// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.yql;
import static com.yahoo.search.yql.YqlParser.ACCENT_DROP;
import static com.yahoo.search.yql.YqlParser.ALTERNATIVES;
import static com.yahoo.search.yql.YqlParser.AND_SEGMENTING;
import static com.yahoo.search.yql.YqlParser.BOUNDS;
import static com.yahoo.search.yql.YqlParser.BOUNDS_LEFT_OPEN;
import static com.yahoo.search.yql.YqlParser.BOUNDS_OPEN;
import static com.yahoo.search.yql.YqlParser.BOUNDS_RIGHT_OPEN;
import static com.yahoo.search.yql.YqlParser.CONNECTION_ID;
import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT;
import static com.yahoo.search.yql.YqlParser.CONNECTIVITY;
import static com.yahoo.search.yql.YqlParser.DISTANCE;
import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT;
import static com.yahoo.search.yql.YqlParser.EQUIV;
import static com.yahoo.search.yql.YqlParser.FILTER;
import static com.yahoo.search.yql.YqlParser.HIT_LIMIT;
import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS;
import static com.yahoo.search.yql.YqlParser.LABEL;
import static com.yahoo.search.yql.YqlParser.NEAR;
import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE;
import static com.yahoo.search.yql.YqlParser.ONEAR;
import static com.yahoo.search.yql.YqlParser.ORIGIN;
import static com.yahoo.search.yql.YqlParser.ORIGIN_LENGTH;
import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET;
import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL;
import static com.yahoo.search.yql.YqlParser.PHRASE;
import static com.yahoo.search.yql.YqlParser.PREFIX;
import static com.yahoo.search.yql.YqlParser.RANGE;
import static com.yahoo.search.yql.YqlParser.RANK;
import static com.yahoo.search.yql.YqlParser.RANKED;
import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD;
import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE;
import static com.yahoo.search.yql.YqlParser.STEM;
import static com.yahoo.search.yql.YqlParser.SUBSTRING;
import static com.yahoo.search.yql.YqlParser.SUFFIX;
import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS;
import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR;
import static com.yahoo.search.yql.YqlParser.UNIQUE_ID;
import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA;
import static com.yahoo.search.yql.YqlParser.WAND;
import static com.yahoo.search.yql.YqlParser.WEAK_AND;
import static com.yahoo.search.yql.YqlParser.WEIGHT;
import static com.yahoo.search.yql.YqlParser.WEIGHTED_SET;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import com.google.common.collect.ImmutableMap;
import com.yahoo.prelude.query.AndItem;
import com.yahoo.prelude.query.AndSegmentItem;
import com.yahoo.prelude.query.DotProductItem;
import com.yahoo.prelude.query.EquivItem;
import com.yahoo.prelude.query.ExactStringItem;
import com.yahoo.prelude.query.IndexedItem;
import com.yahoo.prelude.query.IntItem;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.MarkerWordItem;
import com.yahoo.prelude.query.NearItem;
import com.yahoo.prelude.query.NotItem;
import com.yahoo.prelude.query.NullItem;
import com.yahoo.prelude.query.ONearItem;
import com.yahoo.prelude.query.OrItem;
import com.yahoo.prelude.query.PhraseItem;
import com.yahoo.prelude.query.PhraseSegmentItem;
import com.yahoo.prelude.query.PredicateQueryItem;
import com.yahoo.prelude.query.PrefixItem;
import com.yahoo.prelude.query.RangeItem;
import com.yahoo.prelude.query.RankItem;
import com.yahoo.prelude.query.RegExpItem;
import com.yahoo.prelude.query.SegmentingRule;
import com.yahoo.prelude.query.Substring;
import com.yahoo.prelude.query.SubstringItem;
import com.yahoo.prelude.query.SuffixItem;
import com.yahoo.prelude.query.TaggableItem;
import com.yahoo.prelude.query.ToolBox;
import com.yahoo.prelude.query.ToolBox.QueryVisitor;
import com.yahoo.prelude.query.WandItem;
import com.yahoo.prelude.query.WeakAndItem;
import com.yahoo.prelude.query.WeightedSetItem;
import com.yahoo.prelude.query.WordAlternativesItem;
import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.Query;
import com.yahoo.search.grouping.Continuation;
import com.yahoo.search.grouping.GroupingRequest;
import edu.umd.cs.findbugs.annotations.NonNull;
/**
* Serialize Vespa query trees to YQL+ strings.
*
* @author Steinar Knutsen
*/
public class VespaSerializer {
// TODO refactor, too much copy/paste
private static class AndSegmentSerializer extends Serializer {
private static void serializeWords(StringBuilder destination,
AndSegmentItem segment) {
for (int i = 0; i < segment.getItemCount(); ++i) {
if (i > 0) {
destination.append(", ");
}
Item current = segment.getItem(i);
if (current instanceof WordItem) {
destination.append('"');
escape(((WordItem) current).getIndexedString(), destination)
.append('"');
} else {
throw new IllegalArgumentException(
"Serializing of "
+ current.getClass().getSimpleName()
+ " in segment AND expressions not implemented, please report this as a bug.");
}
}
}
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
return serialize(destination, item, true);
}
static boolean serialize(StringBuilder destination, Item item,
boolean includeField) {
AndSegmentItem phrase = (AndSegmentItem) item;
Substring origin = phrase.getOrigin();
String image;
int offset;
int length;
if (origin == null) {
image = phrase.getRawWord();
offset = 0;
length = image.length();
} else {
image = origin.getSuperstring();
offset = origin.start;
length = origin.end - origin.start;
}
if (includeField) {
destination.append(normalizeIndexName(phrase.getIndexName()))
.append(" contains ");
}
destination.append("([{");
serializeOrigin(destination, image, offset, length);
destination.append(", \"").append(AND_SEGMENTING)
.append("\": true");
destination.append("}]");
destination.append(PHRASE).append('(');
serializeWords(destination, phrase);
destination.append("))");
return false;
}
}
private static class AndSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
destination.append(')');
}
@Override
String separator(Deque state) {
return " AND ";
}
@Override
boolean serialize(StringBuilder destination, Item item) {
destination.append("(");
return true;
}
}
private static class DotProductSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
serializeWeightedSetContents(destination, DOT_PRODUCT,
(WeightedSetItem) item);
return false;
}
}
private static class EquivSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
EquivItem e = (EquivItem) item;
String annotations = leafAnnotations(e);
destination.append(getIndexName(e.getItem(0))).append(" contains ");
if (annotations.length() > 0) {
destination.append("([{").append(annotations).append("}]");
}
destination.append(EQUIV).append('(');
int initLen = destination.length();
for (Iterator- i = e.getItemIterator(); i.hasNext();) {
Item x = i.next();
if (destination.length() > initLen) {
destination.append(", ");
}
if (x instanceof PhraseItem) {
PhraseSerializer.serialize(destination, x, false);
} else {
destination.append('"');
escape(((IndexedItem) x).getIndexedString(), destination);
destination.append('"');
}
}
if (annotations.length() > 0) {
destination.append(')');
}
destination.append(')');
return false;
}
}
private static class NearSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
NearItem n = (NearItem) item;
String annotations = nearAnnotations(n);
destination.append(getIndexName(n.getItem(0))).append(" contains ");
if (annotations.length() > 0) {
destination.append('(').append(annotations);
}
destination.append(NEAR).append('(');
int initLen = destination.length();
for (ListIterator
- i = n.getItemIterator(); i.hasNext();) {
WordItem close = (WordItem) i.next();
if (destination.length() > initLen) {
destination.append(", ");
}
destination.append('"');
escape(close.getIndexedString(), destination).append('"');
}
destination.append(')');
if (annotations.length() > 0) {
destination.append(')');
}
return false;
}
static String nearAnnotations(NearItem n) {
if (n.getDistance() != NearItem.defaultDistance) {
return "[{\"" + DISTANCE + "\": " + n.getDistance() + "}]";
} else {
return "";
}
}
}
private static class NotSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
destination.append(')');
}
@Override
String separator(Deque state) {
if (state.peekFirst().subItems == 1) {
return ") AND !(";
} else {
return " OR ";
}
}
@Override
boolean serialize(StringBuilder destination, Item item) {
destination.append("(");
return true;
}
}
private static class NullSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
throw new NullItemException(
"NullItem encountered in query tree."
+ " This is usually a symptom of an invalid query or an error"
+ " in a query transformer.");
}
}
private static class NumberSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
IntItem intItem = (IntItem) item;
if (intItem.getFromLimit().number()
.equals(intItem.getToLimit().number())) {
destination.append(normalizeIndexName(intItem.getIndexName()))
.append(" = ");
annotatedNumberImage(intItem, intItem.getFromLimit().number()
.toString(), destination);
} else if (intItem.getFromLimit().isInfinite()) {
destination.append(normalizeIndexName(intItem.getIndexName()));
destination.append(intItem.getToLimit().isInclusive() ? " <= "
: " < ");
annotatedNumberImage(intItem, intItem.getToLimit().number()
.toString(), destination);
} else if (intItem.getToLimit().isInfinite()) {
destination.append(normalizeIndexName(intItem.getIndexName()));
destination
.append(intItem.getFromLimit().isInclusive() ? " >= "
: " > ");
annotatedNumberImage(intItem, intItem.getFromLimit().number()
.toString(), destination);
} else {
serializeAsRange(destination, intItem);
}
return false;
}
private void serializeAsRange(StringBuilder destination, IntItem intItem) {
String annotations = leafAnnotations(intItem);
boolean leftOpen = !intItem.getFromLimit().isInclusive();
boolean rightOpen = !intItem.getToLimit().isInclusive();
String boundsAnnotation = "";
int initLen;
if (leftOpen && rightOpen) {
boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + BOUNDS_OPEN
+ "\"";
} else if (leftOpen) {
boundsAnnotation = "\"" + BOUNDS + "\": " + "\""
+ BOUNDS_LEFT_OPEN + "\"";
} else if (rightOpen) {
boundsAnnotation = "\"" + BOUNDS + "\": " + "\""
+ BOUNDS_RIGHT_OPEN + "\"";
}
if (annotations.length() > 0 || boundsAnnotation.length() > 0) {
destination.append("[{");
}
initLen = destination.length();
if (annotations.length() > 0) {
destination.append(annotations);
}
comma(destination, initLen);
if (boundsAnnotation.length() > 0) {
destination.append(boundsAnnotation);
}
if (initLen != annotations.length()) {
destination.append("}]");
}
destination.append(RANGE).append('(')
.append(normalizeIndexName(intItem.getIndexName()))
.append(", ").append(intItem.getFromLimit().number())
.append(", ").append(intItem.getToLimit().number())
.append(")");
}
private void annotatedNumberImage(IntItem item, String rawNumber,
StringBuilder image) {
String annotations = leafAnnotations(item);
if (annotations.length() > 0) {
image.append("([{").append(annotations).append("}]");
}
if ('-' == rawNumber.charAt(0)) {
image.append('(');
}
image.append(rawNumber);
appendLongIfNecessary(rawNumber, image);
if ('-' == rawNumber.charAt(0)) {
image.append(')');
}
if (annotations.length() > 0) {
image.append(')');
}
}
private void appendLongIfNecessary(String rawNumber, StringBuilder image) {
// floating point
if (rawNumber.indexOf('.') >= 0) {
return;
}
try {
long l = Long.parseLong(rawNumber);
if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) {
image.append('L');
}
} catch (NumberFormatException e) {
// somebody has managed to init an IntItem containing noise,
// just give up
return;
}
}
}
private static class RegExpSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
RegExpItem regexp = (RegExpItem) item;
String annotations = leafAnnotations(regexp);
destination.append(normalizeIndexName(regexp.getIndexName())).append(
" matches ");
annotatedTerm(destination, regexp, annotations);
return false;
}
}
private static class ONearSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
NearItem n = (NearItem) item;
String annotations = NearSerializer.nearAnnotations(n);
destination.append(getIndexName(n.getItem(0))).append(" contains ");
if (annotations.length() > 0) {
destination.append('(').append(annotations);
}
destination.append(ONEAR).append('(');
int initLen = destination.length();
for (ListIterator
- i = n.getItemIterator(); i.hasNext();) {
WordItem close = (WordItem) i.next();
if (destination.length() > initLen) {
destination.append(", ");
}
destination.append('"');
escape(close.getIndexedString(), destination).append('"');
}
destination.append(')');
if (annotations.length() > 0) {
destination.append(')');
}
return false;
}
}
private static class OrSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
destination.append(')');
}
@Override
String separator(Deque state) {
return " OR ";
}
@Override
boolean serialize(StringBuilder destination, Item item) {
destination.append("(");
return true;
}
}
private static class PhraseSegmentSerializer extends Serializer {
private static void serializeWords(StringBuilder destination,
PhraseSegmentItem segment) {
for (int i = 0; i < segment.getItemCount(); ++i) {
if (i > 0) {
destination.append(", ");
}
Item current = segment.getItem(i);
if (current instanceof WordItem) {
destination.append('"');
escape(((WordItem) current).getIndexedString(), destination)
.append('"');
} else {
throw new IllegalArgumentException(
"Serializing of "
+ current.getClass().getSimpleName()
+ " in phrases not implemented, please report this as a bug.");
}
}
}
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
return serialize(destination, item, true);
}
static boolean serialize(StringBuilder destination, Item item, boolean includeField) {
PhraseSegmentItem phrase = (PhraseSegmentItem) item;
Substring origin = phrase.getOrigin();
String image;
int offset;
int length;
if (includeField) {
destination.append(normalizeIndexName(phrase.getIndexName()))
.append(" contains ");
}
if (origin == null) {
image = phrase.getRawWord();
offset = 0;
length = image.length();
} else {
image = origin.getSuperstring();
offset = origin.start;
length = origin.end - origin.start;
}
destination.append("([{");
serializeOrigin(destination, image, offset, length);
String annotations = leafAnnotations(phrase);
if (annotations.length() > 0) {
destination.append(", ").append(annotations);
}
if (phrase.getSegmentingRule() == SegmentingRule.BOOLEAN_AND) {
destination.append(", ").append('"').append(AND_SEGMENTING)
.append("\": true");
}
destination.append("}]");
destination.append(PHRASE).append('(');
serializeWords(destination, phrase);
destination.append("))");
return false;
}
}
private static class PhraseSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
return serialize(destination, item, true);
}
static boolean serialize(StringBuilder destination, Item item,
boolean includeField) {
PhraseItem phrase = (PhraseItem) item;
String annotations = leafAnnotations(phrase);
if (includeField) {
destination.append(normalizeIndexName(phrase.getIndexName()))
.append(" contains ");
}
if (annotations.length() > 0) {
destination.append("([{").append(annotations).append("}]");
}
destination.append(PHRASE).append('(');
for (int i = 0; i < phrase.getItemCount(); ++i) {
if (i > 0) {
destination.append(", ");
}
Item current = phrase.getItem(i);
if (current instanceof WordItem) {
WordSerializer.serializeWordWithoutIndex(destination,
current);
} else if (current instanceof PhraseSegmentItem) {
PhraseSegmentSerializer.serialize(destination, current,
false);
} else if (current instanceof WordAlternativesItem) {
WordAlternativesSerializer.serialize(destination, (WordAlternativesItem) current, false);
} else {
throw new IllegalArgumentException(
"Serializing of "
+ current.getClass().getSimpleName()
+ " in phrases not implemented, please report this as a bug.");
}
}
destination.append(')');
if (annotations.length() > 0) {
destination.append(')');
}
return false;
}
}
private static class PredicateQuerySerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
PredicateQueryItem pItem = (PredicateQueryItem) item;
destination.append("predicate(").append(pItem.getIndexName())
.append(',');
appendFeatures(destination, pItem.getFeatures());
destination.append(',');
appendFeatures(destination, pItem.getRangeFeatures());
destination.append(')');
return false;
}
private void appendFeatures(StringBuilder destination,
Collection extends PredicateQueryItem.EntryBase> features) {
if (features.isEmpty()) {
destination.append('0'); // Workaround for empty maps.
return;
}
destination.append('{');
boolean first = true;
for (PredicateQueryItem.EntryBase entry : features) {
if (!first) {
destination.append(',');
}
if (entry.getSubQueryBitmap() != PredicateQueryItem.ALL_SUB_QUERIES) {
destination.append("\"0x").append(
Long.toHexString(entry.getSubQueryBitmap()));
destination.append("\":{");
appendKeyValue(destination, entry);
destination.append('}');
} else {
appendKeyValue(destination, entry);
}
first = false;
}
destination.append('}');
}
private void appendKeyValue(StringBuilder destination,
PredicateQueryItem.EntryBase entry) {
destination.append('"');
escape(entry.getKey(), destination);
destination.append("\":");
if (entry instanceof PredicateQueryItem.Entry) {
destination.append('"');
escape(((PredicateQueryItem.Entry) entry).getValue(),
destination);
destination.append('"');
} else {
destination.append(((PredicateQueryItem.RangeEntry) entry)
.getValue());
destination.append('L');
}
}
}
private static class RangeSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
RangeItem range = (RangeItem) item;
String annotations = leafAnnotations(range);
if (annotations.length() > 0) {
destination.append("[{").append(annotations).append("}]");
}
destination.append(RANGE).append('(')
.append(normalizeIndexName(range.getIndexName()))
.append(", ");
appendNumberImage(destination, range.getFrom()); // TODO: Serialize
// inclusive/exclusive
destination.append(", ");
appendNumberImage(destination, range.getTo());
destination.append(')');
return false;
}
private void appendNumberImage(StringBuilder destination, Number number) {
destination.append(number.toString());
if (number instanceof Long) {
destination.append('L');
}
}
}
private static class RankSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
destination.append(')');
}
@Override
String separator(Deque state) {
return ", ";
}
@Override
boolean serialize(StringBuilder destination, Item item) {
destination.append(RANK).append('(');
return true;
}
}
private static class WordAlternativesSerializer extends Serializer {
@Override
void onExit(StringBuilder destination, Item item) {
}
@Override
boolean serialize(StringBuilder destination, Item item) {
return serialize(destination, (WordAlternativesItem) item, true);
}
static boolean serialize(StringBuilder destination, WordAlternativesItem alternatives, boolean includeField) {
String annotations = leafAnnotations(alternatives);
Substring origin = alternatives.getOrigin();
boolean isFromQuery = alternatives.isFromQuery();
boolean needsAnnotations = annotations.length() > 0 || origin != null || !isFromQuery;
if (includeField) {
destination.append(normalizeIndexName(alternatives.getIndexName())).append(" contains ");
}
if (needsAnnotations) {
destination.append("([{");
int initLen = destination.length();
if (origin != null) {
String image = origin.getSuperstring();
int offset = origin.start;
int length = origin.end - origin.start;
serializeOrigin(destination, image, offset, length);
}
if (!isFromQuery) {
comma(destination, initLen);
destination.append('"').append(IMPLICIT_TRANSFORMS).append("\": false");
}
if (annotations.length() > 0) {
comma(destination, initLen);
destination.append(annotations);
}
destination.append("}]");
}
destination.append(ALTERNATIVES).append("({");
int initLen = destination.length();
List sortedAlternatives = new ArrayList<>(alternatives.getAlternatives());
// ensure most precise forms first
Collections.sort(sortedAlternatives, (x, y) -> Double.compare(y.exactness, x.exactness));
for (WordAlternativesItem.Alternative alternative : sortedAlternatives) {
comma(destination, initLen);
destination.append('"');
escape(alternative.word, destination);
destination.append("\": ").append(Double.toString(alternative.exactness));
}
destination.append("})");
if (needsAnnotations) {
destination.append(')');
}
return false;
}
}
private static abstract class Serializer {
abstract void onExit(StringBuilder destination, Item item);
String separator(Deque state) {
throw new UnsupportedOperationException(
"Having several items for this query operator serializer, "
+ this.getClass().getSimpleName()
+ ", not yet implemented.");
}
abstract boolean serialize(StringBuilder destination, Item item);
}
private static final class SerializerWrapper {
int subItems;
final Serializer type;
final Item item;
SerializerWrapper(Serializer type, Item item) {
subItems = 0;
this.type = type;
this.item = item;
}
}
private static final class TokenComparator implements
Comparator> {
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public int compare(Entry