aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/query/Sorting.java
blob: b393b3a471ec6e08adc633311312bc7d72b1a008 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query;

import com.ibm.icu.text.Collator;
import com.ibm.icu.util.ULocale;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.processing.IllegalInputException;
import com.yahoo.search.Query;
import com.yahoo.text.Utf8;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Specifies how a query is sorted by a list of fields with a sort order
 *
 * @author Arne Bergene Fossaa
 */
public class Sorting implements Cloneable {

    public static final String STRENGTH_IDENTICAL = "identical";
    public static final String STRENGTH_QUATERNARY = "quaternary";
    public static final String STRENGTH_TERTIARY = "tertiary";
    public static final String STRENGTH_SECONDARY = "secondary";
    public static final String STRENGTH_PRIMARY = "primary";
    public static final String UCA = "uca";
    public static final String RAW = "raw";
    public static final String LOWERCASE = "lowercase";

    private final List<FieldOrder> fieldOrders = new ArrayList<>(2);

    /** Creates an empty sort spec */
    public Sorting() { }

    public Sorting(List<FieldOrder> fieldOrders) {
        this.fieldOrders.addAll(fieldOrders);
    }

    /** Creates a sort spec from a string */
    public Sorting(String sortSpec) {
        setSpec(sortSpec, null);
    }

    /** Creates a sort spec from a string, for a given query. */
    public Sorting(String sortSpec, Query query) {
        IndexFacts.Session session = null;
        if (query != null && query.getModel().getExecution().context().getIndexFacts() != null)
            session = query.getModel().getExecution().context().getIndexFacts().newSession(query);
        setSpec(sortSpec, session);
    }

    /**
     * Creates a new sorting from the given string and returns it, or returns null if the argument does not contain
     * any sorting criteria (e.g it is null or the empty string)
     */
    public static Sorting fromString(String sortSpec) {
        if (sortSpec == null) return null;
        if ("".equals(sortSpec)) return null;
        return new Sorting(sortSpec);
    }

    private void setSpec(String rawSortSpec, IndexFacts.Session indexFacts) {
        for (String sortString : rawSortSpec.split(" ")) {
            // A sortspec element must be at least two characters long,
            // a sorting order and an attribute vector name
            if (sortString.length() < 1) continue;

            char orderMarker = sortString.charAt(0);
            int funcAttrStart = 0;
            if ((orderMarker == '+') || (orderMarker == '-')) {
                funcAttrStart = 1;
            }
            AttributeSorter sorter;
            int startPar = sortString.indexOf('(',funcAttrStart);
            int endPar = sortString.lastIndexOf(')');
            if ((startPar > 0) && (endPar > startPar)) {
                String functionName = sortString.substring(funcAttrStart, startPar);
                if (LOWERCASE.equalsIgnoreCase(functionName)) {
                    sorter = new LowerCaseSorter(canonic(sortString.substring(startPar+1, endPar), indexFacts));
                } else if (RAW.equalsIgnoreCase(functionName)) {
                    sorter = new RawSorter(canonic(sortString.substring(startPar+1, endPar), indexFacts));
                } else if (UCA.equalsIgnoreCase(functionName)) {
                    int commaPos = sortString.indexOf(',', startPar+1);
                    if ((startPar+1 < commaPos) && (commaPos < endPar)) {
                        int commaopt = sortString.indexOf(',', commaPos + 1);
                        UcaSorter.Strength strength = UcaSorter.Strength.UNDEFINED;
                        if (commaopt > 0) {
                            String s = sortString.substring(commaopt+1, endPar);
                            if (STRENGTH_PRIMARY.equalsIgnoreCase(s)) {
                                strength = UcaSorter.Strength.PRIMARY;
                            } else if (STRENGTH_SECONDARY.equalsIgnoreCase(s)) {
                                strength = UcaSorter.Strength.SECONDARY;
                            } else if (STRENGTH_TERTIARY.equalsIgnoreCase(s)) {
                                strength = UcaSorter.Strength.TERTIARY;
                            } else if (STRENGTH_QUATERNARY.equalsIgnoreCase(s)) {
                                strength = UcaSorter.Strength.QUATERNARY;
                            } else if (STRENGTH_IDENTICAL.equalsIgnoreCase(s)) {
                                strength = UcaSorter.Strength.IDENTICAL;
                            } else {
                                throw new IllegalInputException("Unknown collation strength: '" + s + "'");
                            }
                            sorter = new UcaSorter(canonic(sortString.substring(startPar+1, commaPos), indexFacts),
                                                   sortString.substring(commaPos+1, commaopt), strength);
                        } else {
                            sorter = new UcaSorter(canonic(sortString.substring(startPar+1, commaPos), indexFacts),
                                                   sortString.substring(commaPos+1, endPar), strength);
                        }
                    } else {
                        sorter = new UcaSorter(canonic(sortString.substring(startPar+1, endPar), indexFacts));
                    }
                } else {
                    if (functionName.isEmpty()) {
                        throw new IllegalInputException("No sort function specified");
                    } else {
                        throw new IllegalInputException("Unknown sort function '" + functionName + "'");
                    }
                }
            } else {
                sorter = new AttributeSorter(canonic(sortString.substring(funcAttrStart), indexFacts));
            }
            Order order = Order.UNDEFINED;
            if (funcAttrStart != 0) {
                // Override in sortspec
                order = (orderMarker == '+') ? Order.ASCENDING : Order.DESCENDING;
            }
            fieldOrders.add(new FieldOrder(sorter, order));
        }
    }

    private String canonic(String attributeName, IndexFacts.Session indexFacts) {
        if (indexFacts == null) return attributeName;
        return indexFacts.getCanonicName(attributeName);
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();

        String space = "";
        for (FieldOrder spec : fieldOrders)   {
            sb.append(space);
            if (spec.getSortOrder() == Order.DESCENDING) {
                sb.append("-");
            } else {
                sb.append("+");
            }
            sb.append(spec.getFieldName());
            space = " ";
        }
        return sb.toString();
    }

    public enum Order { ASCENDING, DESCENDING, UNDEFINED}

    /**
     * Returns the field orders of this sort specification as list. This is never null but can be empty.
     * This list can be modified to change this sort spec.
     */
    public List<FieldOrder> fieldOrders() { return fieldOrders; }

    @Override
    public Sorting clone() {
        return new Sorting(this.fieldOrders);
    }

    @Override
    public int hashCode() {
        return fieldOrders.hashCode();
    }

    @Override
    public boolean equals(Object o) {
        if (o == this) return true;
        if( ! (o instanceof Sorting ss)) return false;
        return fieldOrders.equals(ss.fieldOrders);
    }

    public int encode(ByteBuffer buffer) {
        int usedBytes = 0;
        byte[] nameBuffer;
        byte space = '.';
        for (FieldOrder fieldOrder : fieldOrders) {
            if (space == ' ')   {
                buffer.put(space);
                usedBytes++;
            }
            if (fieldOrder.getSortOrder() == Order.ASCENDING) {
                buffer.put((byte) '+');
            } else {
                buffer.put((byte) '-');
            }
            usedBytes++;
            nameBuffer = Utf8.toBytes(fieldOrder.getSorter().toSerialForm());
            buffer.put(nameBuffer);
            usedBytes += nameBuffer.length;
            // If this isn't the last element, append a separating space
            //if (i + 1 < sortSpec.size()) {
            space = ' ';
        }
        return usedBytes;
    }

    public static class AttributeSorter implements Cloneable {

        private static final Pattern legalAttributeName = Pattern.compile("[\\[]*[a-zA-Z_][\\.a-zA-Z0-9_-]*[\\]]*");

        private String fieldName;

        public AttributeSorter(String fieldName) {
            if ( ! legalAttributeName.matcher(fieldName).matches())
                throw new IllegalInputException("Illegal attribute name '" + fieldName + "' for sorting. Requires '" + legalAttributeName.pattern() + "'");
            this.fieldName = fieldName;
        }

        public String getName() { return fieldName; }
        public void setName(String fieldName) { this.fieldName = fieldName; }

        /** Returns the serial form of this which contains all information needed to reconstruct this sorter */
        public String toSerialForm() { return fieldName; }

        @Override
        public String toString() { return toSerialForm(); }

        @Override
        public int hashCode() { return fieldName.hashCode(); }

        @Override
        public boolean equals(Object other) {
            if (!(other instanceof AttributeSorter sorter)) {
                return false;
            }
            return sorter.fieldName.equals(fieldName);
        }

        @Override
        public AttributeSorter clone() {
            try {
                return (AttributeSorter)super.clone();
            }
            catch (CloneNotSupportedException e) {
                throw new RuntimeException(e);
            }

        }

        @SuppressWarnings({ "rawtypes", "unchecked" })
        public int compare(Comparable a, Comparable b) {
            return a.compareTo(b);
        }

    }

    public static class RawSorter extends AttributeSorter {

        public RawSorter(String fieldName) { super(fieldName); }

        @Override
        public boolean equals(Object other) {
            if (!(other instanceof RawSorter)) {
                return false;
            }
            return super.equals(other);
        }
    }

    public static class LowerCaseSorter extends AttributeSorter {

        public LowerCaseSorter(String fieldName) { super(fieldName); }

        @Override
        public String toSerialForm() { return "lowercase(" + getName() + ')'; }

        @Override
        public int hashCode() { return 1 + 3*super.hashCode(); }

        @Override
        public boolean equals(Object other) {
            if (!(other instanceof LowerCaseSorter)) {
                return false;
            }
            return super.equals(other);
        }

        @SuppressWarnings({ "rawtypes", "unchecked" })
        public int compare(Comparable a, Comparable b) {
            if ((a instanceof String) && (b instanceof String)) {
                return ((String)a).compareToIgnoreCase((String) b);
            }
            return a.compareTo(b);
        }
    }

    public static class UcaSorter extends AttributeSorter {

        public enum Strength { PRIMARY, SECONDARY, TERTIARY, QUATERNARY, IDENTICAL, UNDEFINED };
        private String locale = null;
        private Strength strength = Strength.UNDEFINED;
        private Collator collator;
        public UcaSorter(String fieldName, String locale, Strength strength) { super(fieldName); setLocale(locale, strength); }
        public UcaSorter(String fieldName) { super(fieldName); }

        static private int strength2Collator(Strength strength) {
            return switch (strength) {
                case PRIMARY -> Collator.PRIMARY;
                case SECONDARY -> Collator.SECONDARY;
                case TERTIARY -> Collator.TERTIARY;
                case QUATERNARY -> Collator.QUATERNARY;
                case IDENTICAL -> Collator.IDENTICAL;
                case UNDEFINED -> Collator.PRIMARY;
            };
        }

        public void setLocale(String locale, Strength strength) {
            this.locale = locale;
            this.strength = strength;
            ULocale uloc;
            try {
                uloc = new ULocale(locale);
            } catch (Throwable e) {
                throw new IllegalArgumentException("ULocale '" + locale + "' failed", e);
            }
            try {
                collator = Collator.getInstance(uloc);
                if (collator == null) {
                    throw new IllegalArgumentException("No collator available for locale '" + locale + "'");
                }
            } catch (Throwable e) {
                throw new RuntimeException("Collator.getInstance(ULocale(" + locale + ")) failed", e);
            }
            collator.setStrength(strength2Collator(strength));
            // collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
        }

        public String getLocale() { return locale; }
        public Strength getStrength() { return strength; }
        Collator getCollator() { return collator; }
        public String getDecomposition() { return (collator.getDecomposition() == Collator.CANONICAL_DECOMPOSITION) ? "CANONICAL_DECOMPOSITION" : "NO_DECOMPOSITION"; }

        @Override
        public String toSerialForm() {
            return "uca(" + getName() + ',' + locale + ',' +
                   ((strength != Strength.UNDEFINED) ? strength.toString() : "PRIMARY") + ')';
        }

        @Override
        public int hashCode() { return 1 + 3*locale.hashCode() + 5*strength.hashCode() + 7*super.hashCode(); }

        @Override
        public boolean equals(Object other) {
            if (this == other) return true;
            if (!(other instanceof UcaSorter)) return false;
            return super.equals(other) && locale.equals(((UcaSorter)other).locale) && (strength == ((UcaSorter)other).strength);
        }

        @Override
        public UcaSorter clone() {
            UcaSorter clone = (UcaSorter)super.clone();
            if (locale != null) {
                clone.setLocale(locale, strength);
            }
            return clone;
        }

        @SuppressWarnings({ "rawtypes", "unchecked" })
        @Override
        public int compare(Comparable a, Comparable b) {
            if ((a instanceof String) && (b instanceof String)) {
                return collator.compare((String)a, (String) b);
            }
            return a.compareTo(b);
        }
    }

    /**
     * An attribute (field) and how it should be sorted
     */
    public static class FieldOrder implements Cloneable {

        private AttributeSorter fieldSorter;
        private Order sortOrder;

        /**
         * Creates an attribute vector
         *
         * @param fieldSorter the sorter of this attribute
         * @param sortOrder    whether to sort this ascending or descending
         */
        public FieldOrder(AttributeSorter fieldSorter, Order sortOrder) {
            this.fieldSorter = fieldSorter;
            this.sortOrder = sortOrder;
        }

        /**
         * Returns the name of this attribute
         */
        public String getFieldName() {
            return fieldSorter.getName();
        }

        /**
         * Returns the sorter of this attribute
         */
        public AttributeSorter getSorter() { return fieldSorter; }
        public void setSorter(AttributeSorter sorter) { fieldSorter = sorter; }

        /**
         * Returns the sorting order of this attribute
         */
        public Order getSortOrder() {
            return sortOrder;
        }

        /**
         * Decide if sortorder is ascending or not.
         */
        public void setAscending(boolean asc) {
            sortOrder = asc ? Order.ASCENDING : Order.DESCENDING;
        }

        @Override
        public String toString() {
            return sortOrder.toString() + ":" + fieldSorter.toString();
        }

        @Override
        public int hashCode() {
            return sortOrder.hashCode() + 17 * fieldSorter.hashCode();
        }

        @Override
        public boolean equals(Object o) {
            if (!(o instanceof FieldOrder other)) return false;
            return other.sortOrder.equals(sortOrder) && other.fieldSorter.equals(fieldSorter);
        }

        @Override
        public FieldOrder clone() {
            return new FieldOrder(fieldSorter.clone(), sortOrder);
        }

    }

}