diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-search/src/main/java |
Publish
Diffstat (limited to 'container-search/src/main/java')
771 files changed, 84974 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/.gitignore b/container-search/src/main/java/com/yahoo/.gitignore new file mode 100644 index 00000000000..3fcbf90a554 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/.gitignore @@ -0,0 +1,2 @@ +/ + diff --git a/container-search/src/main/java/com/yahoo/component/chain/dependencies/.gitignore b/container-search/src/main/java/com/yahoo/component/chain/dependencies/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/component/chain/dependencies/.gitignore diff --git a/container-search/src/main/java/com/yahoo/component/provider/.gitignore b/container-search/src/main/java/com/yahoo/component/provider/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/component/provider/.gitignore diff --git a/container-search/src/main/java/com/yahoo/config/.gitignore b/container-search/src/main/java/com/yahoo/config/.gitignore new file mode 100644 index 00000000000..28c283f2219 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/config/.gitignore @@ -0,0 +1,34 @@ +/QueryProfilesConfig.java +/QrBinaryCacheRegionConfig.java +/ClusterConfig.java +/QrSearchersConfig.java +/PhysicalmappingConfig.java +/QrMonitorConfig.java +/BundlesConfig.java +/SummaryConfig.java +/RankProfilesConfig.java +/FieldInfoConfig.java +/QrStartConfig.java +/StringConfig.java +/QrFileserverConfig.java +/QrBinaryCacheConfig.java +/SemanticRulesConfig.java +/FederationConfig.java +/QrConfig.java +/IndexInfoConfig.java +/QueryplannerConfig.java +/AttributesConfig.java +/QrLoggingConfig.java +/ProviderConfig.java +/IntConfig.java +/SearchChainsConfig.java +/PageTemplatesConfig.java +/HandlersConfig.java +/QrTemplatesConfig.java +/TilingSearcherConfig.java +/TimingSearcherConfig.java +/RewritesConfig.java +/UnmarshallingConfig.java +/SpecialtokensConfig.java +/QrQuotetableConfig.java +/ComponentsConfig.java diff --git a/container-search/src/main/java/com/yahoo/container/config/webapp/.gitignore b/container-search/src/main/java/com/yahoo/container/config/webapp/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/config/webapp/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/handler/config/.gitignore b/container-search/src/main/java/com/yahoo/container/handler/config/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/handler/config/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/handler/observability/.gitignore b/container-search/src/main/java/com/yahoo/container/handler/observability/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/handler/observability/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/http/.gitignore b/container-search/src/main/java/com/yahoo/container/http/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/http/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/jrt/.gitignore b/container-search/src/main/java/com/yahoo/container/jrt/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/jrt/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/logging/.gitignore b/container-search/src/main/java/com/yahoo/container/logging/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/logging/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/osgi/.gitignore b/container-search/src/main/java/com/yahoo/container/osgi/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/osgi/.gitignore diff --git a/container-search/src/main/java/com/yahoo/container/protect/.gitignore b/container-search/src/main/java/com/yahoo/container/protect/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/container/protect/.gitignore diff --git a/container-search/src/main/java/com/yahoo/data/JsonProducer.java b/container-search/src/main/java/com/yahoo/data/JsonProducer.java new file mode 100644 index 00000000000..488f2b4c720 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/data/JsonProducer.java @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.data; + +/** + * Generic API for classes that contain data representable as JSON. + **/ +public interface JsonProducer { + + /** + * Append the JSON representation of this object's data to a StringBuilder. + * Note that when passing compact=false the generated string will + * be human-readable and containing embedded newlines; also the + * exact indentation etc may change, so use compact=true for a + * canonical format. + * @param target the StringBuilder to append to. + * @return the target passed in is also returned (to allow chaining). + **/ + public StringBuilder writeJson(StringBuilder target); + + /** + * Convenience method equivalent to: + * makeJson(new StringBuilder()).toString() + * @return String containing JSON representation of this object's data. + **/ + public String toJson(); +} diff --git a/container-search/src/main/java/com/yahoo/data/XmlProducer.java b/container-search/src/main/java/com/yahoo/data/XmlProducer.java new file mode 100644 index 00000000000..d1d65aab095 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/data/XmlProducer.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.data; + +/** + * Generic API for classes that contain data representable as XML. + **/ +public interface XmlProducer { + + /** + * Append the XML representation of this object's data to a StringBuilder. + * @param target the StringBuilder to append to. + * @return the target passed in is also returned (to allow chaining). + **/ + public StringBuilder writeXML(StringBuilder target); + + /** + * Convenience method equivalent to: + * makeXML(new StringBuilder()).toString() + * @return String containing XML representation of this object's data. + **/ + public String toXML(); +} + diff --git a/container-search/src/main/java/com/yahoo/data/package-info.java b/container-search/src/main/java/com/yahoo/data/package-info.java new file mode 100644 index 00000000000..e1ba65bada2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/data/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Generic interfaces for all types of data. + **/ +@ExportPackage +@PublicApi +package com.yahoo.data; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/fs4/BasicPacket.java b/container-search/src/main/java/com/yahoo/fs4/BasicPacket.java new file mode 100644 index 00000000000..a02fc5af2a7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/BasicPacket.java @@ -0,0 +1,334 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.compress.CompressionType; +import com.yahoo.compress.Compressor; +import com.yahoo.log.LogLevel; +import net.jpountz.lz4.LZ4Compressor; +import net.jpountz.lz4.LZ4Factory; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * Superclass of fs4 packets + * + * @author bratseth + */ +public abstract class BasicPacket { + + private final Compressor compressor = new Compressor(); + + private static Logger log = Logger.getLogger(QueryResultPacket.class.getName()); + private static int DEFAULT_WRITE_BUFFER_SIZE = (10 * 1024); + public static final int CODE_MASK = 0x00ff_ffff; // Reserve upper byte for flags. + + protected byte[] encodedBody; + + protected ByteBuffer encodingBuffer; + + /** + * The length of this packet in bytes or -1 if not known + */ + protected int length=-1; + + /** + * A timestamp which can be set or inspected by clients of this class + * but which is never updated by the class itself. This is mostly + * a convenience for when you need to queue packets or retain them + * in some structure where their validity is limited by a timeout + * or similar. + */ + private long timeStamp = -1; + + private int compressionLimit = 0; + + private CompressionType compressionType; + + /** + * Sets the number of bytes the package must be before activating compression. + * A value of 0 means no compression. + * @param limit smallest package size that triggers compression. + */ + public void setCompressionLimit(int limit) { compressionLimit = limit; } + + public void setCompressionType(String type) { + compressionType = CompressionType.valueOf(type); + } + + /** + * Fills this package from a byte buffer positioned at the first byte of the package + * + * @return this for convenience + * @throws UnsupportedOperationException if not implemented in the subclass + */ + public BasicPacket decode(ByteBuffer buffer) { + length=buffer.getInt()+4; // Streamed packet length is the length-4 + int code=buffer.getInt(); + + decodeAndDecompressBody(buffer, code, length - 2*4); + return this; + } + + protected void decodeAndDecompressBody(ByteBuffer buffer, int code, int packetLength) { + byte compressionType = (byte)((code & ~CODE_MASK) >> 24); + boolean isCompressed = compressionType != 0; + codeDecodedHook(code & CODE_MASK); + if (isCompressed) { + int uncompressedSize = buffer.getInt(); + int compressedSize = packetLength - 4; + int offset = 0; + byte[] compressedData; + if (buffer.hasArray()) { + compressedData = buffer.array(); + offset = buffer.arrayOffset() + buffer.position(); + buffer.position(buffer.position() + compressedSize); + } else { + compressedData = new byte[compressedSize]; + buffer.get(compressedData); + } + byte[] body = compressor.decompress(CompressionType.valueOf(compressionType), compressedData, offset, uncompressedSize, Optional.of(compressedSize)); + ByteBuffer bodyBuffer = ByteBuffer.wrap(body); + length += uncompressedSize - (compressedSize + 4); + decodeBody(bodyBuffer); + } else { + decodeBody(buffer); + } + } + + /** + * Decodes the body of this package from a byte buffer + * positioned at the first byte of the package. + * + * @throws UnsupportedOperationException if not implemented in the subclass + */ + public void decodeBody(ByteBuffer buffer) { + throw new UnsupportedOperationException("Decoding of " + this + + " is not implemented"); + } + + /** + * Called when the packet code is decoded. + * This default implementation just throws an exception if the code + * is not the code of this packet. Packets which has several possible codes + * will use this method to store the code. + */ + protected void codeDecodedHook(int code) { + if (code!=getCode()) + throw new RuntimeException("Can not decode " + code + " into " + this); + } + + /** + * <p>Encodes this package onto the given buffer at the current position. + * The position of the buffer after encoding is the byte following + * the last encoded byte.</p> + * + * <p>This method will ensure that everything is written provided + * sufficient capacity regardless of the buffer limit. + * When returning, the limit is at the end of the package (qual to the + * position).</p> + * + * @return this for convenience + * @throws UnsupportedOperationException if not implemented in the subclass + */ + public BasicPacket encode(ByteBuffer buffer) + throws BufferTooSmallException + { + int oldLimit = buffer.limit(); + int startPosition = buffer.position(); + + buffer.limit(buffer.capacity()); + try { + buffer.putInt(4); // Real length written later, when we know it + buffer.putInt(getCode()); + + encodeAndCompressBody(buffer, startPosition); + } + catch (java.nio.BufferOverflowException e) { + // reset buffer to expected state + buffer.position(startPosition); + buffer.limit(oldLimit); + throw new BufferTooSmallException("Destination buffer too small while encoding packet"); + } + + return this; + } + + protected void encodeAndCompressBody(ByteBuffer buffer, int startPosition) { + int startOfBody = buffer.position(); + encodeBody(buffer); + setEncodedBody(buffer, startOfBody, buffer.position() - startOfBody); + length = buffer.position() - startPosition; + + if (compressionLimit != 0 && length-4 > compressionLimit) { + byte[] compressedBody; + compressionType = CompressionType.LZ4; + LZ4Factory factory = LZ4Factory.fastestInstance(); + LZ4Compressor compressor = factory.fastCompressor(); + compressedBody = compressor.compress(encodedBody); + + log.log(LogLevel.DEBUG, "Uncompressed size: " + encodedBody.length + ", Compressed size: " + compressedBody.length); + if (compressedBody.length + 4 < encodedBody.length) { + buffer.position(startPosition); + buffer.putInt(compressedBody.length + startOfBody - startPosition + 4 - 4); // +4 for compressed size + buffer.putInt(getCompressedCode(compressionType)); + buffer.position(startOfBody); + buffer.putInt(encodedBody.length); + buffer.put(compressedBody); + buffer.limit(buffer.position()); + return; + } + } + buffer.putInt(startPosition, length - 4); // Encoded length 4 less than actual length + buffer.limit(buffer.position()); + } + + private int getCompressedCode(CompressionType compression) { + int code = compression.getCode(); + return getCode() | (code << 24); + } + + /** + * Encodes the body of this package onto the given buffer at the current position. + * The position of the buffer after encoding is the byte following + * the last encoded byte. + * + * @throws UnsupportedOperationException if not implemented in the subclass + */ + protected void encodeBody(ByteBuffer buffer) { + throw new UnsupportedOperationException("Encoding of " + this + " is not implemented"); + } + + protected void setEncodedBody(ByteBuffer b, int start, int length) { + encodedBody = new byte[length]; + b.position(start); + b.get(encodedBody); + } + + public boolean isEncoded() { + return encodedBody != null; + } + + /** + * Just a place holder to make the APIs simpler. + */ + public Packet encode(ByteBuffer buffer, int channel) throws BufferTooSmallException { + throw new UnsupportedOperationException( + "This class does not support a channel ID"); + } + + /** + * Allocate the needed buffers and encode the packet using the given + * channel ID (if pertinent). + * + * If this packet does not use a channel ID, the ID will be ignored. + */ + public final void allocateAndEncode(int channelId) { + allocateAndEncode(channelId, DEFAULT_WRITE_BUFFER_SIZE); + } + + private final void allocateAndEncode(int channelId, int initialSize) { + if (encodingBuffer != null) { + patchChannelId(encodingBuffer, channelId); + return; + } + + int size = initialSize; + ByteBuffer buffer = ByteBuffer.allocate(size); + while (true) { + try { + if (hasChannelId()) { + encode(buffer, channelId); + } else { + encode(buffer); + } + buffer.flip(); + encodingBuffer = buffer; + break; + } + catch (BufferTooSmallException e) { + size *= 2; + buffer = ByteBuffer.allocate(size); + } + } + } + + // No channel ID for BasicPacket instances, so it's a NOP + protected void patchChannelId(ByteBuffer buf, int channelId) {} + + /** + * Return buffer containing the encoded form of this package and + * remove internal reference to it. + */ + public final ByteBuffer grantEncodingBuffer(int channelId) { + ByteBuffer b; + if (encodingBuffer == null) { + allocateAndEncode(channelId); + } else { + patchChannelId(encodingBuffer, channelId); + } + b = encodingBuffer; + encodingBuffer = null; + return b; + } + + public final ByteBuffer grantEncodingBuffer(int channelId, int initialSize) { + ByteBuffer b; + if (encodingBuffer == null) { + allocateAndEncode(channelId, initialSize); + } else { + patchChannelId(encodingBuffer, channelId); + } + b = encodingBuffer; + encodingBuffer = null; + return b; + } + + /** Returns the code of this package */ + public abstract int getCode(); + + /** + * Returns the length of this body (including header (8 bytes) and body), + * or -1 if not known. + * Note that the streamed packet format length is 4 bytes less than this length, + * for unknown reasons. + * The length is always known when decodeBody is called. + */ + public int getLength() { + return length; + } + + /** + * Set the timestamp field of the packet. + * + * A timestamp which can be set or inspected by clients of this class + * but which is never updated by the class itself. This is mostly + * a convenience for when you need to queue packets or retain them + * in some structure where their validity is limited by a timeout + * or similar. + */ + public void setTimestamp (long timeStamp) { + this.timeStamp = timeStamp; + } + + /** + * Get the timestamp field of this packet. Note that this is + * <b>not</b> part of the FS4 protocol. @see #setTimestamp for + * more information + * + */ + public long getTimestamp () { + return timeStamp; + } + + public String toString() { + return "packet with code " + getCode(); + } + + /** Whether this is a packets which can encode a channel ID. */ + public boolean hasChannelId() { + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/BufferTooSmallException.java b/container-search/src/main/java/com/yahoo/fs4/BufferTooSmallException.java new file mode 100644 index 00000000000..f169ce7f961 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/BufferTooSmallException.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// -*- mode: java; folded-file: t; c-basic-offset: 4 -*- +// +// + +package com.yahoo.fs4; +/** + * Signal that the buffer used to hold a packet is too small + * + * @author <a href="mailto:borud@yahoo-inc.com">Bjorn Borud</a> + */ +@SuppressWarnings("serial") +public class BufferTooSmallException extends Exception { + public BufferTooSmallException (String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/ChannelTimeoutException.java b/container-search/src/main/java/com/yahoo/fs4/ChannelTimeoutException.java new file mode 100644 index 00000000000..791a9f668de --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/ChannelTimeoutException.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// -*- mode: java; folded-file: t; c-basic-offset: 4 -*- +// + +package com.yahoo.fs4; + + +/** + * Signal that a timeout occurred in the Channel2 communiction + * + * @author <a href="mailto:borud@yahoo-inc.com">Bjorn Borud</a> + * + */ +@SuppressWarnings("serial") +public class ChannelTimeoutException extends Exception +{ + public ChannelTimeoutException (String msg) { + super(msg); + } + + public ChannelTimeoutException () { + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/DocsumPacket.java b/container-search/src/main/java/com/yahoo/fs4/DocsumPacket.java new file mode 100644 index 00000000000..4ebe0819d9e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/DocsumPacket.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.document.GlobalId; + +import java.nio.ByteBuffer; + +/** + * An "extended query result" packet. This is the query result + * packets used today, they allow more flexible sets of parameters + * to be shipped with query results. This packet can be decoded only. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class DocsumPacket extends Packet { + + private GlobalId globalId = new GlobalId(new byte[GlobalId.LENGTH]); + + private byte[] data; + + private DocsumPacket() { + } + + /** + * Constructor used by streaming search + */ + public DocsumPacket(byte[] buffer) { + data = buffer.clone(); + } + + public static DocsumPacket create() { + return new DocsumPacket(); + } + + public int getCode() { return 205; } + + /** + * Fills this packet from a byte buffer positioned at the + * first byte of the packet + */ + public void decodeBody(ByteBuffer buffer) { + byte[] rawGid = new byte[GlobalId.LENGTH]; + buffer.get(rawGid); + globalId = new GlobalId(rawGid); + data=new byte[getLength()-12-GlobalId.LENGTH]; + buffer.get(data); + } + + public GlobalId getGlobalId() { return globalId; } + + public byte[] getData() { return data; } + + public String toString() { + return "docsum packet [globalId: " + globalId.toString() + + ", size: " + (data==null ? "(no data)" : data.length + + " bytes") + " ]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/DocumentInfo.java b/container-search/src/main/java/com/yahoo/fs4/DocumentInfo.java new file mode 100644 index 00000000000..0576f507ffc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/DocumentInfo.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.document.GlobalId; + +import java.nio.ByteBuffer; + +/** + * Meta attributes on documents (not the document summaries themselves). + * Used in query results and get docusum packages + * + * @author bratseth + */ +public class DocumentInfo implements Cloneable { + + private final GlobalId globalId; + private final double metric; + private final int partId; + private final int distributionKey; + + DocumentInfo(ByteBuffer buffer, QueryResultPacket owner) { + byte[] rawGid = new byte[GlobalId.LENGTH]; + buffer.get(rawGid); + globalId = new GlobalId(rawGid); + metric = decodeMetric(buffer); + partId = owner.getMldFeature() ? buffer.getInt() : 0; + distributionKey = owner.getMldFeature() ? buffer.getInt() : 0; + } + + public DocumentInfo(GlobalId globalId, int metric, int partId, int distributionKey) { + this.globalId=globalId; + this.metric=metric; + this.partId=partId; + this.distributionKey = distributionKey; + } + + private double decodeMetric(ByteBuffer buffer) { + return buffer.getDouble(); + } + + public GlobalId getGlobalId() { return globalId; } + + /** Raw rank score */ + public double getMetric() { return metric; } + + /** Partition this document resides on */ + public int getPartId() { return partId; } + + /** Unique key for the node this document resides on */ + public int getDistributionKey() { return distributionKey; } + + public String toString() { + return "document info [globalId=" + globalId + ", metric=" + metric + "]"; + } + + /** + * Implements the Cloneable interface + */ + public Object clone() { + try { + DocumentInfo docInfo=(DocumentInfo) super.clone(); + return docInfo; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone inserted a nonclonable superclass"); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/EolPacket.java b/container-search/src/main/java/com/yahoo/fs4/EolPacket.java new file mode 100644 index 00000000000..14a33d5414b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/EolPacket.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +/** + * A EOL packet signaling end of transmission. + * This package has no body. + * + * @author bratseth + */ +public class EolPacket extends Packet { + + private EolPacket() { + } + + public static EolPacket create() { + return new EolPacket(); + } + + public int getCode() { return 200; } + + public void decodeBody(ByteBuffer buffer) { + // No body + } + + public void encodeBody(ByteBuffer buffer) { + // No body + } + + public String toString() { + return "EOL packet"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/ErrorPacket.java b/container-search/src/main/java/com/yahoo/fs4/ErrorPacket.java new file mode 100644 index 00000000000..f191d192ae2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/ErrorPacket.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +import com.yahoo.text.Utf8; + +/** + * + * An error packet signaling that an error occurred. + * + * @author Bjørn Borud + */ +public class ErrorPacket extends Packet { + private int errorCode; + private int errmsgLen; + private String message; + + private ErrorPacket() { + } + + public static ErrorPacket create() { + return new ErrorPacket(); + } + + public int getCode() { return 203; } + + public void decodeBody(ByteBuffer buffer) { + errorCode = buffer.getInt(); + errmsgLen = buffer.getInt(); + + byte[] tmp = new byte[errmsgLen]; + buffer.get(tmp); + + message = Utf8.toString(tmp); + } + + public int getErrorCode () { return errorCode; } + + public void encodeBody(ByteBuffer buffer) { + // No body + } + + public String toString() { + return (message + " (" + errorCode + ")"); + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/FS4Properties.java b/container-search/src/main/java/com/yahoo/fs4/FS4Properties.java new file mode 100644 index 00000000000..45ded52c7c2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/FS4Properties.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; + +public class FS4Properties { + private String name; + + static public class Entry { + public final String key; + public final String val; + public Entry(byte[] k, byte[] v) { + key = Utf8.toString(k); + val = Utf8.toString(v); + } + }; + + private Entry[] entries; + + void decode(ByteBuffer buffer) { + int nameLen = buffer.getInt(); + byte[] utf8name = new byte[nameLen]; + buffer.get(utf8name); + this.setName(Utf8.toString(utf8name)); + + int n = buffer.getInt(); + setEntries(new Entry[n]); + for (int j = 0; j < n; j++) { + int keyLen = buffer.getInt(); + byte[] key = new byte[keyLen]; + buffer.get(key); + + int valLen = buffer.getInt(); + byte[] value = new byte[valLen]; + buffer.get(value); + + getEntries()[j] = new Entry(key, value); + } + } + + public Entry[] getEntries() { + return entries; + } + + public void setEntries(Entry[] entries) { + this.entries = entries; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/GetDocSumsPacket.java b/container-search/src/main/java/com/yahoo/fs4/GetDocSumsPacket.java new file mode 100644 index 00000000000..fb909a480b1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/GetDocSumsPacket.java @@ -0,0 +1,229 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.document.GlobalId; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.query.Item; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.logging.Logger; + +/** + * <p>A packet for requesting a list of document summaries. + * This packet can be encoded only.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class GetDocSumsPacket extends Packet { + + /** Session id key. Yep, putting this here is ugly as hell */ + public static final String sessionIdKey = "sessionId"; + + private static final Logger log = Logger.getLogger(GetDocSumsPacket.class.getName()); + private final Result result; + private final Query query; + private final String summaryClass; + private QueryPacketData queryPacketData = null; + private int flags = 0; + + /** + * True if we should send the query with this docsum, false otherwise. + * Sending the query is necessary if we need to return summary features or generate a dynamic summary + */ + private final boolean sendQuery; + + private GetDocSumsPacket(Result result, String summaryClass, boolean sendQuery) { + this.result = result; + this.query = result.getQuery(); + this.summaryClass = summaryClass; + this.sendQuery = sendQuery; + } + + /** + * Creates a get docsums packet for a certain result + */ + public static GetDocSumsPacket create(Result result, String summaryClass, boolean sendQuery) { + return new GetDocSumsPacket(result, summaryClass, sendQuery); + } + + /** + * features bits, as given in searchlib/src/searchlib/common/packets.h + * definition of enum getdocsums_features + */ + public static final int GDF_MLD = 0x00000001; + public static final int GDF_QUERYSTACK = 0x00000004; + public static final int GDF_RANKP_QFLAGS = 0x00000010; + public static final int GDF_LOCATION = 0x00000080; + public static final int GDF_RESCLASSNAME = 0x00000800; + public static final int GDF_PROPERTIES = 0x00001000; + public static final int GDF_FLAGS = 0x00002000; + + /** + * flag bits, as given in fastserver4/src/network/transport.h + * definition of enum getdocsums_flags + */ + public static final int GDFLAG_IGNORE_ROW = 0x00000001; + public static final int GDFLAG_ALLOW_SLIME = 0x00000002; + + public void encodeBody(ByteBuffer buffer) { + setFieldsFromHits(); + + boolean useQueryCache = query.getRanking().getQueryCache(); + // If feature cache is used we need to include the sessionId as key. + if (useQueryCache) { // TODO: Move this decision (and the key) to ranking + query.getRanking().getProperties().put(sessionIdKey, query.getSessionId(false).asUtf8String().toString()); + } + + // always allow slime docsums + flags |= GDFLAG_ALLOW_SLIME; + + // set the default features + long features = GDF_MLD; + if (sendQuery) + features |= GDF_QUERYSTACK; + features |= GDF_RANKP_QFLAGS; + + // do we want a specific result class? + if (summaryClass != null) + features |= GDF_RESCLASSNAME; + if (query.getRanking().getLocation() != null) + features |= GDF_LOCATION; + if (query.hasEncodableProperties()) + features |= GDF_PROPERTIES; + if (flags != 0) { + features |= GDF_FLAGS; + } + buffer.putInt((int)features); + buffer.putInt(0); //Unused, was docstamp + long timeLeft = query.getTimeLeft(); + final int minTimeout = 50; + buffer.putInt(Math.max(minTimeout, (int)timeLeft)); + if (log.isLoggable(LogLevel.DEBUG)) { + log.log(LogLevel.DEBUG, "Timeout from query(" + query.getTimeout() + "), sent to backend: " + Math.max(minTimeout, timeLeft)); + } + + if (queryPacketData != null) + encodeQueryFromPacketData(buffer, useQueryCache); + else + encodeQuery(buffer); + + if (flags != 0) + buffer.putInt(flags); + encodeDocIds(buffer); + } + + private void setFieldsFromHits() { + for (Iterator<Hit> i = result.hits().unorderedDeepIterator(); i.hasNext(); ) { + Hit h = i.next(); + if (h instanceof FastHit) { + FastHit hit = (FastHit)h; + if (hit.shouldIgnoreRowBits()) { + flags |= GDFLAG_IGNORE_ROW; + } + QueryPacketData tag = hit.getQueryPacketData(); + if (tag != null) { + this.queryPacketData = tag; + break; + } + } + } + } + + private void encodeQueryFromPacketData(ByteBuffer buffer, boolean reencodePropertyMaps) { + queryPacketData.encodeRankProfile(buffer); + queryPacketData.encodeQueryFlags(buffer); + + encodeSummaryClass(buffer); + + if (reencodePropertyMaps || ! sendQuery) // re-encode when we're not sending query, to avoid resending all encoded properties + query.encodeAsProperties(buffer, sendQuery); + else + queryPacketData.encodePropertyMaps(buffer); + + if (sendQuery) + queryPacketData.encodeQueryStack(buffer); + queryPacketData.encodeLocation(buffer); + } + + private void encodeSummaryClass(ByteBuffer buffer) { + if (summaryClass != null) { + byte[] tmp = Utf8.toBytes(summaryClass); + buffer.putInt(tmp.length); + buffer.put(tmp); + } + } + + private void encodeQuery(ByteBuffer buffer) { + Item.putString(query.getRanking().getProfile(), buffer); + buffer.putInt(QueryPacket.getQueryFlags(query)); + + encodeSummaryClass(buffer); + + query.encodeAsProperties(buffer, sendQuery); + + if (sendQuery) { + // The stack must be resubmitted to generate dynamic docsums + int itemCountPosition = buffer.position(); + buffer.putInt(0); + int dumpLengthPosition = buffer.position(); + buffer.putInt(0); + int count = query.encode(buffer); + buffer.putInt(itemCountPosition, count); + buffer.putInt(dumpLengthPosition, buffer.position() - dumpLengthPosition - 4); + } + + if (query.getRanking().getLocation() != null) { + int locationLengthPosition = buffer.position(); + buffer.putInt(0); + int locationLength = query.getRanking().getLocation().encode(buffer); + buffer.putInt(locationLengthPosition, locationLength); + } + } + + private void encodeDocIds(ByteBuffer buffer) { + byte[] emptyGid = new byte[GlobalId.LENGTH]; + for (Iterator<Hit> i = result.hits().unorderedDeepIterator(); i.hasNext(); ) { + Hit hit = i.next(); + if (hit instanceof FastHit && !hit.isFilled(summaryClass)) { + FastHit fastHit = (FastHit)hit; + buffer.put(fastHit.getGlobalId() != null ? fastHit.getGlobalId().getRawId() : emptyGid); + buffer.putInt(fastHit.getPartId()); + buffer.putInt(0); //Unused, was docstamp + } + } + } + + public int getCode() { + return 219; + } + + public String toString() { + return "Get docsums x packet"; + } + + public int getNumDocsums() { + int num = 0; + for (Iterator<Hit> i = result.hits().unorderedDeepIterator(); i.hasNext(); ) { + Hit hit = i.next(); + if (hit instanceof FastHit && !hit.isFilled(summaryClass)) { + num++; + } + } + return num; + } + + /** + * Return the document summary class we want the fdispatch + * to use when replying to us + */ + @SuppressWarnings("UnusedDeclaration") + public String getSummaryClass() { + return summaryClass; + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/HexByteIterator.java b/container-search/src/main/java/com/yahoo/fs4/HexByteIterator.java new file mode 100644 index 00000000000..b1d5b1073af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/HexByteIterator.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * Provides sequential access to each byte of a buffer + * as a hexadecimal string of length 2. + * + * @author tonytv + */ +public final class HexByteIterator implements Iterator<String> { + private final ByteBuffer buffer; + + private String hexByte(byte b) { + final int unsignedValue = ((int)b) & 0xff; + String s = Integer.toHexString(unsignedValue).toUpperCase(); + + boolean singleChar = unsignedValue < 0x10; + if (singleChar) + return '0' + s; + else + return s; + } + + public boolean hasNext() { + return buffer.hasRemaining(); + } + + public String next() { + return hexByte(buffer.get()); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + public HexByteIterator(ByteBuffer buffer) { + this.buffer = buffer.slice(); + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java b/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java new file mode 100644 index 00000000000..948c537f068 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.TypedBinaryFormat; +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * A static utility for encoding values to the binary map representation used in fs4 packets. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class MapEncoder { + + // TODO: Time to refactor + + private static final String TYPE_SUFFIX = ".type"; + private static final String TENSOR_TYPE = "tensor"; + + /** + * Encodes a single value as a complete binary map. + * Does nothing if the value is null. + * + * Returns the number of maps encoded - 0 or 1 + */ + public static int encodeSingleValue(String mapName, String key, Object value, ByteBuffer buffer) { + if (value == null) return 0; + + byte [] utf8 = Utf8.toBytes(mapName); + buffer.putInt(utf8.length); + buffer.put(utf8); + buffer.putInt(1); + utf8 = Utf8.toBytes(key); + buffer.putInt(utf8.length); + buffer.put(utf8); + utf8 = Utf8.toBytes(value.toString()); + buffer.putInt(utf8.length); + buffer.put(utf8); + + return 1; + } + + /** + * Encodes a map as binary. + * Does nothing if the value is null. + * + * Returns the number of maps encoded - 0 or 1 + */ + public static int encodeMap(String mapName, Map<String,?> map, ByteBuffer buffer) { + if (map.isEmpty()) return 0; + + byte [] utf8 = Utf8.toBytes(mapName); + buffer.putInt(utf8.length); + buffer.put(utf8); + buffer.putInt(map.size()); + for (Map.Entry<String, ?> property : map.entrySet()) { + String key = property.getKey(); + utf8 = Utf8.toBytes(key); + buffer.putInt(utf8.length); + buffer.put(utf8); + utf8 = Utf8.toBytes(property.getValue() != null ? property.getValue().toString() : ""); + buffer.putInt(utf8.length); + buffer.put(utf8); + } + + return 1; + } + + /** + * Encodes a multi-map as binary. + * Does nothing if the value is null. + * + * Returns the number of maps encoded - 0 or 1 + */ + public static int encodeStringMultiMap(String mapName, Map<String,List<String>> map, ByteBuffer buffer) { + if (map.isEmpty()) return 0; + + byte [] utf8 = Utf8.toBytes(mapName); + buffer.putInt(utf8.length); + buffer.put(utf8); + buffer.putInt(countStringEntries(map)); + for (Map.Entry<String, List<String>> property : map.entrySet()) { + String key = property.getKey(); + for (Object value : property.getValue()) { + utf8 = Utf8.toBytes(key); + buffer.putInt(utf8.length); + buffer.put(utf8); + utf8 = Utf8.toBytes(value.toString()); + buffer.putInt(utf8.length); + buffer.put(utf8); + } + } + + return 1; + } + + /** + * Encodes a multi-map as binary. + * Does nothing if the value is null. + * + * Returns the number of maps encoded - 0 or 1 + */ + public static int encodeObjectMultiMap(String mapName, Map<String,List<Object>> map, ByteBuffer buffer) { + if (map.isEmpty()) return 0; + + byte [] utf8 = Utf8.toBytes(mapName); + buffer.putInt(utf8.length); + buffer.put(utf8); + addTensorTypeInfo(map); + buffer.putInt(countObjectEntries(map)); + for (Map.Entry<String, List<Object>> property : map.entrySet()) { + String key = property.getKey(); + for (Object value : property.getValue()) { + utf8 = Utf8.toBytes(key); + buffer.putInt(utf8.length); + buffer.put(utf8); + if (value instanceof Tensor) { + utf8 = TypedBinaryFormat.encode((Tensor)value); + } else { + utf8 = Utf8.toBytes(value.toString()); + } + buffer.putInt(utf8.length); + buffer.put(utf8); + } + } + + return 1; + } + + private static void addTensorTypeInfo(Map<String, List<Object>> map) { + Map<String, Tensor> tensorsToTag = new HashMap<>(); + for (Map.Entry<String, List<Object>> entry : map.entrySet()) { + for (Object value : entry.getValue()) { + if (value instanceof Tensor) { + tensorsToTag.put(entry.getKey(), (Tensor)value); + } + } + } + for (Map.Entry<String, Tensor> entry : tensorsToTag.entrySet()) { + // Ensure that we only have a single tensor associated with each key + map.put(entry.getKey(), Arrays.asList(entry.getValue())); + map.put(entry.getKey() + TYPE_SUFFIX, Arrays.asList(TENSOR_TYPE)); + } + } + + private static int countStringEntries(Map<String, List<String>> value) { + int entries = 0; + for (Map.Entry<String, List<String>> property : value.entrySet()) + entries += property.getValue().size(); + return entries; + } + + private static int countObjectEntries(Map<String, List<Object>> value) { + int entries = 0; + for (Map.Entry<String, List<Object>> property : value.entrySet()) + entries += property.getValue().size(); + return entries; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/Packet.java b/container-search/src/main/java/com/yahoo/fs4/Packet.java new file mode 100644 index 00000000000..fa25fe3df72 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/Packet.java @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; +import java.util.logging.Logger; + +/** + * Superclass of fs4 packets containing channel/query ID + * + * @author bratseth + */ +public abstract class Packet extends BasicPacket { + private static Logger log = Logger.getLogger(Packet.class.getName()); + /** + * The channel at which this packet will be sent or was received, + * or -1 when this is not known + */ + protected int channel=-1; + + protected static final int CHANNEL_ID_OFFSET = 8; + + /** + * Fills this package from a byte buffer positioned at the first + * byte of the package + * + * @return this Packet (as a BasicPacket) for convenience + * @throws UnsupportedOperationException if not implemented in the subclass + */ + public BasicPacket decode(ByteBuffer buffer) { + int originalPos = buffer.position(); + length=buffer.getInt()+4; // Streamed packet length is the length-4 + int packetLength = length; + try { + int code=buffer.getInt(); + channel=buffer.getInt(); + + decodeAndDecompressBody(buffer, code, length - 3*4); + } + finally { + int targetPosition = (originalPos + packetLength); + if (buffer.position() != targetPosition) { + log.warning(" position in buffer, is " + + buffer.position() + + " should be " + + targetPosition); + buffer.position(targetPosition); + } + } + + return this; + } + + /** + * <p>Encodes this package onto the given buffer at the current + * position. The position of the buffer after encoding is the + * byte following the last encoded byte.</p> + * + * <p>This method will ensure that everything is written provided + * sufficient capacity regardless of the buffer limit. + * When returning, the limit is at the end of the package (qual to the + * position).</p> + * + * @return this for convenience + * @throws UnsupportedOperationException if not implemented in the subclass + */ + public final Packet encode(ByteBuffer buffer,int channel) throws BufferTooSmallException { + this.channel=channel; + int oldLimit = buffer.limit(); + int startPosition = buffer.position(); + + buffer.limit(buffer.capacity()); + try { + buffer.putInt(8); // Real length written later, when we know it + buffer.putInt(getCode()); + buffer.putInt(channel); + + encodeAndCompressBody(buffer, startPosition); + } + catch (java.nio.BufferOverflowException e) { + // reset buffer to expected state + buffer.position(startPosition); + buffer.limit(oldLimit); + throw new BufferTooSmallException("Destination buffer too small while encoding packet"); + } + return this; + } + + /** + * Get the channel id of the packet. In the FS4 transport protocol, + * there is the concept of a channel. This must <b>not</b> be confused + * with all the other channels we have floating around this code (aargh!). + * <P> + * The channel can be thought of as a way to pair up requests and + * responses in the FS4 protocol: A response always belongs to + * to a channel and it is the clients responsibility to not re-use + * channel ids within the same connection. + * <p> + * Summary: This "channel" means "session id" + * + * @return FS4 channel id + * + */ + public int getChannel() { return channel; } + + public void setChannel(int channel) { this.channel=channel; } + + + /** Informs that this packets needs a channel ID. */ + public boolean hasChannelId() { + return true; + } + + /** + * Only for use with encodingBuffer magic. + * + * This is only called from allocateAndEncode and grantEncodingBuffer, + * therefore an assumption about the packet starting at the beginning of the + * buffer is made. + */ + protected void patchChannelId(ByteBuffer buf, int channelId) { + buf.putInt(CHANNEL_ID_OFFSET, channelId); + } + + public String toString() { + return "packet with code " + getCode() + ", channelId=" + getChannel(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/PacketDecoder.java b/container-search/src/main/java/com/yahoo/fs4/PacketDecoder.java new file mode 100644 index 00000000000..f63ef59e093 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PacketDecoder.java @@ -0,0 +1,207 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +/** + * Returns the correct package for a package byte stream + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class PacketDecoder { + + /** Represents a packet and the data used to construct it */ + public static class DecodedPacket { + public BasicPacket packet; + public ByteBuffer consumedBytes; + + DecodedPacket(BasicPacket packet, ByteBuffer consumedBytes) { + this.packet = packet; + this.consumedBytes = consumedBytes; + } + } + + private PacketDecoder() {} + + /** + * Returns the package starting at the current position in the buffer + * + * @throws IllegalArgumentException if an unknown package code is + * encountered + * @throws java.nio.BufferUnderflowException if the buffer contains too little + * data to decode the pcode. + */ + public static BasicPacket decode(ByteBuffer buffer) { + int packetCode = buffer.getInt(buffer.position()+4); + packetCode &= BasicPacket.CODE_MASK; + + switch (packetCode) { + case 200: + return EolPacket.create().decode(buffer); + + case 203: + return ErrorPacket.create().decode(buffer); + + case 205: + return DocsumPacket.create().decode(buffer); + + case 202: + case 208: + case 214: + case 217: + return QueryResultPacket.create().decode(buffer); + + case 210: + case 221: + return PongPacket.create().decode(buffer); + + case 207: + return SearchNodePongPacket.create().decode(buffer); + + default: + throw new IllegalArgumentException("No support for packet " + packetCode); + } + } + + /** Gives the packet along with the bytes consumed to construct it. */ + public static DecodedPacket decodePacket(ByteBuffer buffer) { + ByteBuffer dataUsed = buffer.slice(); + int start = buffer.position(); + + BasicPacket packet = decode(buffer); + dataUsed.limit(buffer.position() - start); + return new DecodedPacket(packet, dataUsed); + } + + /** Sniff channel ID for query result packets */ + public static int sniffChannel(ByteBuffer buffer) { + int remaining = buffer.remaining(); + if (remaining < 12) { + return 0; + } + int packetCode = buffer.getInt(buffer.position()+4); + packetCode &= BasicPacket.CODE_MASK; + switch (packetCode) { + case 202: + case 208: + case 214: + case 217: + return buffer.getInt(buffer.position()+8); + default: + return 0; + } + } + + /** + * Test whether the buffer contains (the start of) a pong packet. + * + * Returns false if there is not enough data to determine the + * answer. + */ + public static boolean isPongPacket(ByteBuffer buffer) { + + int remaining = buffer.remaining(); + if (remaining < 8) + return false; + int packetCode = buffer.getInt(buffer.position()+4); + packetCode &= BasicPacket.CODE_MASK; + if (packetCode == 210 || packetCode == 221) + return true; + else + return false; + } + + /** + * Note that it assumes that the position of the ByteBuffer is at the + * start of a packet and that we have enough data to actually read + * an integer out of the buffer. + * + * @return Return the length of the fs4 packet. Returns -1 if length + * could not be determined because we had too little + * data in the buffer. + * + */ + public static int packetLength(ByteBuffer buffer) + { + if (buffer.remaining() < 4) { + return -1; + } + return (buffer.getInt(buffer.position()) + 4); + } + + /** + * Takes a buffer possibly containing a packet. + * + * <P> + * If we return a packet when we return: + * <UL> + * <LI> the buffer is positioned at the beginning of the next + * packet when we return. + * <LI> limit is unchanged + * </UL> + * + * <P> + * If we return <code>null</code> there were no more packets + * there to decode and the following is true of the buffer + * <UL> + * <LI> the buffer is compacted, ie. partial packet is + * moved to the start, or if no more data is available + * the buffer is cleared. + * <LI> the position is set to the next byte after the valid + * data so the buffer is ready for reading. + * </UL> + * + * If there are no packets to be returned the buffer is compacted + * (ie. content is moved to the start and read-pointer is positioned + * + * @return Returns the next available packet from the buffer or + * <code>null</code> if there are no more <b>complete</b> + * packets in the buffer at this time. + */ + public static DecodedPacket extractPacket(ByteBuffer buffer) + throws BufferTooSmallException + { + int remaining = buffer.remaining(); + + // if we are empty we can reset the buffer + if (remaining == 0) { + buffer.clear(); + return null; + } + + // if we can't figure out the size because we have less than + // 4 bytes we prepare the buffer for more data reading. + if (remaining < 4) { + buffer.compact(); + return null; + } + + int plen = packetLength(buffer); + + // -1 means that we do not have enough data to read the packet + // size yet + if (plen == -1) { + buffer.compact(); + return null; + } + + // if we haven't read an entire packet yet, we compact and return + // (same as above but separate code for clarity). note that this + // also occurs when there is no physical room for the packet, so + // clients of this API need to be aware of this and check for it + if (remaining < plen) { + + // if the read buffer is too small we must take drastic action + if (buffer.capacity() < plen) { + throw new BufferTooSmallException("Buffer too small to hold packet"); + } + + buffer.compact(); + return null; + } + + return PacketDecoder.decodePacket(buffer); + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/PacketDumper.java b/container-search/src/main/java/com/yahoo/fs4/PacketDumper.java new file mode 100644 index 00000000000..f8a5e79e290 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PacketDumper.java @@ -0,0 +1,135 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.io.BufferedOutputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.fs4.mplex.FS4Channel; +import com.yahoo.log.LogLevel; +import com.yahoo.search.Query; + +/** + * Responsible for dumping query & query result packets + * + * @author tonytv + */ +public class PacketDumper implements PacketListener { + /** High level representation of packet types (e.g. query, result, ...) */ + public static enum PacketType { + query(QueryPacket.class), + result(QueryResultPacket.class); + + Class<? extends BasicPacket> implementationType; + + PacketType(Class<? extends BasicPacket> implementationType) { + this.implementationType = implementationType; + } + } + + private static Logger log = Logger.getLogger(PacketDumper.class.getSimpleName()); + + private volatile boolean disabled = true; + private final File logDirectory; + private final Map<Class<? extends BasicPacket>, DataOutputStream> dumpFiles = + new HashMap<>(); + private final String fileNamePattern; + + private void handlePacket(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm, String direction) { + //minimize overhead when disabled: + if (disabled) + return; + + try { + DataOutputStream stream = getOutputStream(packet); + if (stream != null) { + synchronized (stream) { + stream.writeChars(packet.getTimestamp() + " " + direction + " packet on channel " + channel.getChannelId()); + String indent = " "; + Query query = channel.getQuery(); + if (query != null) + stream.writeChars('\n' + indent + "Query: '" + query.getModel().getQueryString()); + hexDump(indent, stream, serializedForm); + + stream.writeChar('\n'); + stream.flush(); + } + } + } catch (IOException e) { + log.log(LogLevel.WARNING, "Could not log packet.", e); + } + } + + private void hexDump(String indent, DataOutputStream stream, ByteBuffer serializedForm) throws IOException { + HexByteIterator hexByteIterator = new HexByteIterator(serializedForm); + + long count = 0; + final int maxNumCharacters = 80; + while (hexByteIterator.hasNext()) { + if (count++ % maxNumCharacters == 0) + stream.writeChar('\n'); + stream.writeChars(hexByteIterator.next()); + } + } + + private synchronized DataOutputStream getOutputStream(BasicPacket packet) { + return dumpFiles.get(packet.getClass()); + } + + public void packetSent(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + handlePacket(channel, packet, serializedForm, "Sent"); + } + + public void packetReceived(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + handlePacket(channel, packet, serializedForm, "Received"); + } + + public synchronized void dumpPackets(PacketType packetType, boolean on) throws IOException { + OutputStream stream = dumpFiles.get(packetType.implementationType); + if (!on && stream != null) + closeFile(stream, packetType); + else if (on && stream == null) + openFile(packetType); + } + + private void openFile(PacketType packetType) throws FileNotFoundException { + if (!logDirectory.exists() || + logDirectory.mkdirs()) { + + throw new RuntimeException("PacketDumper: Could not create log directory " + logDirectory); + } + String fileName = fileNamePattern.replace("%s", packetType.toString()); + boolean append = true; + DataOutputStream outputStream = + new DataOutputStream( + new BufferedOutputStream( + new FileOutputStream(new File(logDirectory, fileName), append))); + dumpFiles.put(packetType.implementationType, outputStream); + + disabled = dumpFiles.isEmpty(); + } + + private void closeFile(OutputStream stream, PacketType packetType) throws IOException { + try { + synchronized (stream) { + stream.close(); + } + } finally { + dumpFiles.remove(packetType.implementationType); + disabled = dumpFiles.isEmpty(); + } + } + + public PacketDumper(File logDirectory, String fileNamePattern) { + this.logDirectory = logDirectory; + this.fileNamePattern = fileNamePattern; + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/PacketListener.java b/container-search/src/main/java/com/yahoo/fs4/PacketListener.java new file mode 100644 index 00000000000..b81118e99c9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PacketListener.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +import com.yahoo.fs4.mplex.FS4Channel; + +/** + * Interface for recieving notifications of packets sent or recieved. + * + * @author tonytv + */ +public interface PacketListener { + void packetSent(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm); + void packetReceived(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm); +} diff --git a/container-search/src/main/java/com/yahoo/fs4/PacketNotificationsBroadcaster.java b/container-search/src/main/java/com/yahoo/fs4/PacketNotificationsBroadcaster.java new file mode 100644 index 00000000000..fe868a2de1b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PacketNotificationsBroadcaster.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +import com.yahoo.fs4.mplex.FS4Channel; + +/** + * Broadcasts packet notifications to a list of listeners. + * + * @author tonytv + */ +public class PacketNotificationsBroadcaster implements PacketListener { + + private final PacketListener[] listeners; + + public PacketNotificationsBroadcaster(PacketListener... listeners) { + this.listeners = listeners; + } + + @Override + public void packetSent(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + if (channel == null) return; + for (PacketListener listener : listeners) + listener.packetSent(channel, packet, serializedForm); + } + + @Override + public void packetReceived(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + if (channel == null) return; + for (PacketListener listener : listeners) + listener.packetReceived(channel, packet, serializedForm); + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/PacketQueryTracer.java b/container-search/src/main/java/com/yahoo/fs4/PacketQueryTracer.java new file mode 100644 index 00000000000..3eeebb43a6f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PacketQueryTracer.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +import com.yahoo.fs4.mplex.FS4Channel; +import com.yahoo.search.Query; + +/** + * Adds packets to the query context + * + * @author tonytv + */ +public class PacketQueryTracer implements PacketListener { + + private final static int traceLevel = 10; + + private void addTrace(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + Query query = channel.getQuery(); + if (query != null && query.getTraceLevel() >= traceLevel) { + StringBuilder traceString = new StringBuilder(); + traceString.append(packet.getClass().getSimpleName()).append(": "); + hexDump(serializedForm, traceString); + + final boolean includeQuery = true; + query.trace(traceString.toString(), includeQuery, traceLevel); + } + } + + private void hexDump(ByteBuffer serializedForm, StringBuilder traceString) { + HexByteIterator hexByteIterator = new HexByteIterator(serializedForm); + + long count = 0; + final int maxNumCharacters = 80; + while (hexByteIterator.hasNext()) { + if (++count % maxNumCharacters == 0) + traceString.append('\n'); + traceString.append(hexByteIterator.next()); + } + } + + @Override + public void packetSent(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + addTrace(channel, packet, serializedForm); + } + + @Override + public void packetReceived(FS4Channel channel, BasicPacket packet, ByteBuffer serializedForm) { + addTrace(channel, packet, serializedForm); + } + +} + diff --git a/container-search/src/main/java/com/yahoo/fs4/PingPacket.java b/container-search/src/main/java/com/yahoo/fs4/PingPacket.java new file mode 100644 index 00000000000..cf58d066ff1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PingPacket.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +/** + * A ping packet for FS4. This packet has no data. It maps to + * PCODE_MONITORQUERY the C++ implementation of the protocol. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class PingPacket extends BasicPacket { + private int flags = 0; + + public int getCode() { return (flags == 0) ? 206 : 220; } + + public void encodeBody(ByteBuffer buffer) { + if (flags != 0) { + buffer.putInt(MQF_QFLAGS); + buffer.putInt(flags); + } + } + + /** feature bits, taken from searchlib/common/transport.h */ + static final int MQF_QFLAGS = 0x00000002; + + /** flag bits, taken from searchlib/common/transport.h */ + static final int MQFLAG_REPORT_ACTIVEDOCS = 0x00000020; + + /** ask the backend to report active (searchable) documents */ + public void enableActivedocsReporting() { + flags |= MQFLAG_REPORT_ACTIVEDOCS; + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/PongPacket.java b/container-search/src/main/java/com/yahoo/fs4/PongPacket.java new file mode 100644 index 00000000000..adb7931948c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/PongPacket.java @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; +import java.util.Optional; + +/** + * A pong packet for FS4. It maps to PCODE_MLD_MONITORRESULT + * and PCODE_MONITORRESULTX in the C++ implementation of the protocol. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class PongPacket extends BasicPacket { + @SuppressWarnings("unused") + private int lowPartitionId; // ignored (historical field) + + private int dispatchTimestamp; + + @SuppressWarnings("unused") + private int totalNodes; // configured nodes + private Optional<Integer> activeNodes = Optional.empty(); // number of nodes that are up + @SuppressWarnings("unused") + private int totalPartitions; // configured partitions + private Optional<Integer> activePartitions = Optional.empty(); // number of partitions that are up + + private Optional<Long> activeDocs = Optional.empty(); // how many documents are searchable (sum) + + public PongPacket() { + } + + private int code; + protected void codeDecodedHook(int code) { this.code = code; } + public int getCode() { return code; } + + public void decodeBody(ByteBuffer buffer) { + int features = MRF_MLD; + if (code == PCODE_MONITORRESULTX) { + features = buffer.getInt(); + } + lowPartitionId = buffer.getInt(); + dispatchTimestamp = buffer.getInt(); + if ((features & MRF_MLD) != 0) { + totalNodes = buffer.getInt(); + activeNodes = Optional.of(buffer.getInt()); + totalPartitions = buffer.getInt(); + activePartitions = Optional.of(buffer.getInt()); + } + if ((features & MRF_RFLAGS) != 0) { + buffer.getInt(); // ignore rflags (historical field) + } + if ((features & MRF_ACTIVEDOCS) != 0) { + activeDocs = Optional.of(Long.valueOf(buffer.getLong())); + } + } + + public static PongPacket create() { + return new PongPacket(); + } + + /** + * Return current docstamp for backend to make cache invalidation + * possible. + * */ + public int getDocstamp() { + return dispatchTimestamp; + } + + /** + * retrieve the reported number of active (searchable) documents + * in the monitored backend. + **/ + public Optional<Long> getActiveDocuments() { + return activeDocs; + } + + public Optional<Integer> getActiveNodes() { + return activeNodes; + } + + public Optional<Integer> getActivePartitions() { + return activePartitions; + } + + /** feature bits, taken from searchlib/common/transport.h */ + static final int MRF_MLD = 0x00000001; + static final int MRF_RFLAGS = 0x00000008; + static final int MRF_ACTIVEDOCS = 0x00000010; + + /** packet codes, taken from searchlib/common/transport.h */ + static final int PCODE_MLD_MONITORRESULT = 210; + static final int PCODE_MONITORRESULTX = 221; + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/QueryPacket.java b/container-search/src/main/java/com/yahoo/fs4/QueryPacket.java new file mode 100644 index 00000000000..be0eb0bc2e9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/QueryPacket.java @@ -0,0 +1,268 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.prelude.query.Item; +import com.yahoo.search.Query; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.Ranking; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.text.Utf8; +import com.yahoo.vespa.objects.BufferSerializer; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.List; + + +/** + * An "extended query" packet. This is the query packets used today, + * they allow more flexible sets of parameters to be shipped with queries. + * This packet can be encoded only. + * + * @author bratseth + * @author Bjørn Borud + */ +public class QueryPacket extends Packet { + + private Query query; + private QueryPacketData queryPacketData; + + private QueryPacket(Query query) { + this.query = query; + } + + /** Returns the query from which this packet is populated */ + public Query getQuery() { + return query; + } + + /** + * Creates and returns a query packet + * + * @param query the query to convert to a packet + */ + public static QueryPacket create(Query query) { + return new QueryPacket(query); + } + + + /** Returns the first offset requested */ + public int getOffset() { + return query.getOffset(); + } + + /** + * Returns the <i>last</i> offset requested (inclusively), that is + * getOffset() + getHits() + */ + public int getLastOffset() { + return getOffset() + getHits(); + } + + /** Returns the number of hits requested */ + public int getHits() { + return query.getHits(); + } + + private byte[] getSummaryClassAsUtf8() { + if (query.getPresentation().getSummary() != null) { + return Utf8.toBytes(query.getPresentation().getSummary()); + } + return new byte[0]; + } + + /** + * Returns an opaque cache key for the query represented by this + * (pre-serialized) packet. + */ + public byte[] getOpaqueCacheKey() { + + // the cache key is generated by taking the serialized packet + // body and switching out the offset/hits/timeout fields with + // the requested summary class. We add a single '\0' byte in + // addition to the utf8 encoded summaryclass name to avoid the + // need to fiddle with feature flags to handle a non-existing + // summary class. + + int skipOffset = 4; // offset of offset/hits/timestamp fields + int skipLength = 12; // length of offset/hits/timestamp fields + byte[] utf8Summary = getSummaryClassAsUtf8(); + byte[] stripped = new byte[encodedBody.length - skipLength + utf8Summary.length + 1]; + + System.arraycopy(encodedBody, 0, stripped, 0, skipOffset); + System.arraycopy(utf8Summary, 0, stripped, skipOffset, utf8Summary.length); + stripped[skipOffset + utf8Summary.length] = 0; + System.arraycopy(encodedBody, skipOffset + skipLength, + stripped, skipOffset + utf8Summary.length + 1, + encodedBody.length - (skipOffset + skipLength)); + return stripped; + } + + public void encodeBody(ByteBuffer buffer) { + queryPacketData = new QueryPacketData(); + int startOfFieldToSave; + + boolean sendSessionKey = query.getGroupingSessionCache() || query.getRanking().getQueryCache(); + buffer.putInt(getFeatureInt(sendSessionKey)); + + IntegerCompressor.putCompressedPositiveNumber(getOffset(), buffer); + IntegerCompressor.putCompressedPositiveNumber(getHits(), buffer); + // store the cutoff time in the tag object, and then do a similar Math.max there + buffer.putInt(Math.max(50, (int)query.getTimeLeft())); + buffer.putInt(getFlagInt()); + + startOfFieldToSave = buffer.position(); + Item.putString(query.getRanking().getProfile(), buffer); + queryPacketData.setRankProfile(buffer, startOfFieldToSave); + + if ( query.hasEncodableProperties()) { + startOfFieldToSave = buffer.position(); + query.encodeAsProperties(buffer, true); + queryPacketData.setPropertyMaps(buffer, startOfFieldToSave); + } + + // Language not needed when sending query stacks + + if (query.getRanking().getSorting() != null) { + int sortSpecLengthPosition=buffer.position(); + buffer.putInt(0); + int sortSpecLength = query.getRanking().getSorting().encode(buffer); + buffer.putInt(sortSpecLengthPosition, sortSpecLength); + } + + if (getGroupingList(query).size() > 0) { + BufferSerializer gbuf = new BufferSerializer(new GrowableByteBuffer()); + gbuf.putInt(null, getGroupingList(query).size()); + for (Grouping g: getGroupingList(query)){ + g.serialize(gbuf); + } + gbuf.getBuf().flip(); + byte[] blob = new byte [gbuf.getBuf().limit()]; + gbuf.getBuf().get(blob); + buffer.putInt(blob.length); + buffer.put(blob); + } + + if (sendSessionKey) { + buffer.putInt(query.getSessionId(true).asUtf8String().getByteLength()); + buffer.put(query.getSessionId(true).asUtf8String().getBytes()); + } + + if (query.getRanking().getLocation() != null) { + startOfFieldToSave = buffer.position(); + int locationLengthPosition=buffer.position(); + buffer.putInt(0); + int locationLength= query.getRanking().getLocation().encode(buffer); + buffer.putInt(locationLengthPosition, locationLength); + queryPacketData.setLocation(buffer, startOfFieldToSave); + } + + startOfFieldToSave = buffer.position(); + int stackItemPosition=buffer.position(); + buffer.putInt(0); // Number of stack items written below + int stackLengthPosition = buffer.position(); + buffer.putInt(0); + int stackPosition = buffer.position(); + int stackItemCount=query.encode(buffer); + int stackLength = buffer.position() - stackPosition; + buffer.putInt(stackItemPosition,stackItemCount); + buffer.putInt(stackLengthPosition, stackLength); + queryPacketData.setQueryStack(buffer, startOfFieldToSave); + } + + /** + * feature bits, taken from searchlib/common/transport.h + **/ + static final int QF_PARSEDQUERY = 0x00000002; + static final int QF_RANKP = 0x00000004; + static final int QF_SORTSPEC = 0x00000080; + static final int QF_LOCATION = 0x00000800; + static final int QF_PROPERTIES = 0x00100000; + static final int QF_WARMUP = 0x00200000; + static final int QF_GROUPSPEC = 0x00400000; + static final int QF_SESSIONID = 0x00800000; + + private int getFeatureInt(boolean sendSessionId) { + int features = QF_PARSEDQUERY; // this bitmask means "parsed query" in query packet. + // we always use a parsed query here + + features |= QF_RANKP; // hasRankProfile + + features |= (query.getRanking().getSorting() != null) ? QF_SORTSPEC : 0; + features |= (query.getRanking().getLocation() != null) ? QF_LOCATION : 0; + features |= (query.hasEncodableProperties()) ? QF_PROPERTIES : 0; + features |= (getGroupingList(query).size() > 0) ? QF_GROUPSPEC : 0; + features |= (sendSessionId) ? QF_SESSIONID : 0; + + return features; + } + + /** + * query flag bits, taken from searchlib/common/transport.h + **/ + static final int QFLAG_ALLOW_ERRORPACKET = 0x00000004; + static final int QFLAG_ESTIMATE = 0x00000080; + static final int QFLAG_DROP_SORTDATA = 0x00004000; + static final int QFLAG_REPORT_COVERAGE = 0x00008000; + static final int QFLAG_NO_RESULTCACHE = 0x00010000; + static final int QFLAG_DUMP_FEATURES = 0x00040000; + + private int getFlagInt() { + int flags = getQueryFlags(query); + queryPacketData.setQueryFlags(flags); + + flags |= QFLAG_ALLOW_ERRORPACKET; + + /** + * QFLAG_DROP_SORTDATA + * SORTDATA is a mangling of data from the attribute vectors + * which were used in the search which is byte comparable in + * such a way the comparing SORTDATA for two different hits + * will reproduce the order in which the data were returned when + * using sortspec. For now we simple drop these, but if they + * should be necessary at later date, QueryResultPacket must be + * updated to be able to parse result packets correctly. + */ + flags |= QFLAG_DROP_SORTDATA; + return flags; + } + + + public int getCode() { + return 218; + } + + public String toString() { + return "Query x packet [query: " + query + "]"; + } + + private static List<Grouping> getGroupingList(Query query) { + return Collections.unmodifiableList(GroupingExecutor.getGroupingList(query)); + } + + static int getQueryFlags(Query query) { + int flags = QFLAG_REPORT_COVERAGE; + + flags |= query.properties().getBoolean(com.yahoo.search.query.Model.ESTIMATE) ? QFLAG_ESTIMATE : 0; + flags |= query.getNoCache() ? QFLAG_NO_RESULTCACHE : 0; + flags |= query.properties().getBoolean(Ranking.RANKFEATURES, false) ? QFLAG_DUMP_FEATURES : 0; + return flags; + } + + /** + * Fetch a binary wrapper containing data from encoding process for use in + * creating a summary request. + * + * @return wrapper object suitable for creating a summary fetch packet + * @throws IllegalStateException + * if no wrapper has been generated + */ + public QueryPacketData getQueryPacketData() { + if (queryPacketData == null) { + throw new IllegalStateException("Trying to fetch a hit tag without having encoded the packet first."); + } + return queryPacketData; + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/QueryPacketData.java b/container-search/src/main/java/com/yahoo/fs4/QueryPacketData.java new file mode 100644 index 00000000000..9d6fd2a2795 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/QueryPacketData.java @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +/** + * Class for storing data which has to be constant between query and summary + * fetch for a Vespa hit. Used to avoid to tagging Vespa summary hits with + * the entire query as an immutable. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class QueryPacketData { + + private byte[] rankProfile = null; + private int queryFlags = 0; + private byte[] queryStack = null; + private byte[] location = null; + private byte[] propertyMaps = null; + + /** + * Given src.position() bigger than startOfField, allocate a fresh byte + * array, and copy the data from startOfField to src.position() into it. + * + * @param src + * the ByteBuffer to copy from + * @param startOfField + * the position of the buffer at which the field starts + * @return a copy of the data between startOfField and the buffer position + * before invokation + * @throws IllegalArgumentException + * if startOfField is somewhere after src.position() + */ + private byte[] copyField(final ByteBuffer src, final int startOfField) { + if (startOfField > src.position()) { + throw new IllegalArgumentException("startOfField after src.position()"); + } + final byte[] dst = new byte[src.position() - startOfField]; + + src.position(startOfField); + src.get(dst); + return dst; + } + + ByteBuffer encodeRankProfile(final ByteBuffer buffer) { + return buffer.put(rankProfile); + } + + void setRankProfile(final ByteBuffer src, final int startOfField) { + rankProfile = copyField(src, startOfField); + } + + ByteBuffer encodeQueryFlags(final ByteBuffer buffer) { + return buffer.putInt(queryFlags); + } + + void setQueryFlags(final int queryFlags) { + this.queryFlags = queryFlags; + } + + ByteBuffer encodeQueryStack(final ByteBuffer buffer) { + return buffer.put(queryStack); + } + + void setQueryStack(final ByteBuffer src, final int startOfField) { + queryStack = copyField(src, startOfField); + } + + ByteBuffer encodePropertyMaps(final ByteBuffer buffer) { + if (propertyMaps != null) { + buffer.put(propertyMaps); + } + return buffer; + } + + void setPropertyMaps(final ByteBuffer src, final int startOfField) { + propertyMaps = copyField(src, startOfField); + } + + void setLocation(final ByteBuffer src, final int startOfField) { + this.location = copyField(src, startOfField); + } + + ByteBuffer encodeLocation(final ByteBuffer buffer) { + if (location != null) { + buffer.put(location); + } + return buffer; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/QueryResultPacket.java b/container-search/src/main/java/com/yahoo/fs4/QueryResultPacket.java new file mode 100644 index 00000000000..2e153f36288 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/QueryResultPacket.java @@ -0,0 +1,221 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import com.yahoo.search.Query; + +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.logging.Logger; + + +/** + * An "extended query result" packet. This is the query result packets used today, + * they allow more flexible sets of parameters to be shipped with query results. + * This packet can be decoded only. + * + * @author bratseth + */ +public class QueryResultPacket extends Packet { + + /** This may have code 202, 208, 214 or 217 of historical reasons */ + private int code; + + /** Whether mld stuff, whatever that is, is included in this result */ + private boolean mldFeature=false; + + /** A feature of no apparent utility */ + private boolean datasetFeature=false; + + /** Whether coverage information is included in this result */ + private boolean coverageFeature = false; + private long coverageDocs = 0; + private long activeDocs = 0; + + /** Whether the result contains grouping results **/ + private boolean groupDataFeature = false; + + /** Whether the result contains properties **/ + private boolean propsFeature = false; + + private long totalDocumentCount; + + private Number maxRank; + + private int docstamp; + + private int dataset=-1; + + private byte[] groupData = null; + + private List<DocumentInfo> documents=new ArrayList<>(10); + + public FS4Properties[] propsArray; + + private int offset; + + private QueryResultPacket() { + } + + public static QueryResultPacket create() { + return new QueryResultPacket(); + } + + public void setDocstamp(int docstamp){ this.docstamp=docstamp; } + + public int getDocstamp() { return docstamp; } + + /** Returns whether this has the mysterious mld feature */ + public boolean getMldFeature() { return mldFeature; } + + /** Returns whether this result has the dataset feature */ + public boolean getDatasetFeature() { return datasetFeature; } + + public boolean getCoverageFeature() { + return coverageFeature; + } + + public long getCoverageDocs() { return coverageDocs; } + + public long getActiveDocs() { return activeDocs; } + + public boolean getCoverageFull() { + return coverageDocs == activeDocs; + } + + + /** @return offset returned by backend */ + public int getOffset() { return offset; } + + /** Only for testing. */ + public void setOffset(int offset) { + this.offset = offset; + } + + @Override + public void decodeBody(ByteBuffer buffer) { + IntBuffer ints=buffer.asIntBuffer(); + + decodeFeatures(ints); + offset = ints.get(); + int documentCount=ints.get(); + buffer.position(buffer.position() + ints.position()*4); + totalDocumentCount = buffer.getLong(); + maxRank = decodeMaxRank(buffer); + ints = buffer.asIntBuffer(); + docstamp=ints.get(); + if (datasetFeature) dataset=ints.get(); + buffer.position(buffer.position() + ints.position()*4); + if (groupDataFeature) { + int len = buffer.getInt(); + groupData = new byte[len]; + buffer.get(groupData); + } + if (coverageFeature) { + coverageDocs = buffer.getLong(); + activeDocs = buffer.getLong(); + } + decodeDocuments(buffer,documentCount); + if (propsFeature) { + int numMaps = buffer.getInt(); + propsArray = new FS4Properties[numMaps]; + for (int i = 0; i < numMaps; i++) { + propsArray[i] = new FS4Properties(); + propsArray[i].decode(buffer); + } + } + } + + private Number decodeMaxRank(ByteBuffer buffer) { + return Double.valueOf(buffer.getDouble()); + } + + /** + * feature bits + */ + public static final int QRF_MLD = 0x00000001; + public static final int QRF_SORTDATA = 0x00000010; + public static final int QRF_AGGRDATA = 0x00000020; + public static final int QRF_COVERAGE = 0x00000040; + public static final int QRF_GROUPDATA = 0x00000200; + public static final int QRF_PROPERTIES = 0x00000400; + + /** + * Sets the features of this package. + * Features are either encoded by different package codes + * or by a feature int, for reasons not easily comprehended. + */ + private void decodeFeatures(IntBuffer buffer) { + switch (getCode()) { + case 202: + mldFeature=false; + datasetFeature=false; + break; + case 208: + mldFeature=true; + datasetFeature=false; + break; + case 214: + mldFeature=true; + datasetFeature=true; + break; + case 217: + int features=buffer.get(); + mldFeature = (QRF_MLD & features) != 0; + datasetFeature = (0x002 & features) != 0; + // Data given by sortFeature not currently used by QRS: + // sortFeature = (QRF_SORTDATA & features) != 0; + coverageFeature = (QRF_COVERAGE & features) != 0; + groupDataFeature = (QRF_GROUPDATA & features) != 0; + propsFeature = (QRF_PROPERTIES & features) != 0; + break; + default: + throw new RuntimeException("Programming error"); + } + } + + private void decodeDocuments(ByteBuffer buffer, int documentCount) { + for (int i=0; i<documentCount; i++) { + documents.add(new DocumentInfo(buffer, this)); + } + } + + public int getCode() { return code; } + + protected void codeDecodedHook(int code) { this.code=code; } + + public int getDocumentCount() { return documents.size(); } + + public String toString() { + return "Query result x packet [" + getDocumentCount() + " documents]"; + } + + /** Returns the opaque grouping results **/ + public byte[] getGroupData() { return groupData; } + + + /** Returns the total number of documents avalable for this query */ + public long getTotalDocumentCount() { return totalDocumentCount; } + + /** Only for testing. */ + public void setTotalDocumentCount(long totalDocumentCount) { + this.totalDocumentCount = totalDocumentCount; + } + + /** Returns a read-only list containing the DocumentInfo objects of this result */ + public List<DocumentInfo> getDocuments() { + return Collections.unmodifiableList(documents); + } + + public void addDocument(DocumentInfo document) { + documents.add(document); + } + + // TODO: Handle new maxRank intelligently + public int getMaxRank() { return maxRank.intValue(); } + + public int getDataset() { return dataset; } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/SearchNodePongPacket.java b/container-search/src/main/java/com/yahoo/fs4/SearchNodePongPacket.java new file mode 100644 index 00000000000..dce0e84de95 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/SearchNodePongPacket.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4; + +import java.nio.ByteBuffer; + +/** + * Responsible for decoding pong packets from a search node. + * @author tonytv + */ +public class SearchNodePongPacket extends PongPacket { + private int timeStamp; + + public static SearchNodePongPacket create() { + return new SearchNodePongPacket(); + } + + @Override + public int getCode() { return 207; } + + @Override + public void decodeBody(ByteBuffer buffer) { + @SuppressWarnings("unused") + int partitionId = buffer.getInt(); + + timeStamp = buffer.getInt(); + } + + @Override + public int getDocstamp() { + return timeStamp; + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/mplex/Backend.java b/container-search/src/main/java/com/yahoo/fs4/mplex/Backend.java new file mode 100644 index 00000000000..604c962a2b3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/mplex/Backend.java @@ -0,0 +1,410 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4.mplex; + + +import com.yahoo.fs4.*; +import com.yahoo.io.Connection; +import com.yahoo.io.ConnectionFactory; +import com.yahoo.io.Listener; +import com.yahoo.vespa.defaults.Defaults; +import com.yahoo.yolean.Exceptions; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.nio.channels.SocketChannel; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bjorn Borud</a> + */ +public class Backend implements ConnectionFactory { + + public static final class BackendStatistics { + + public final int activeConnections; + public final int passiveConnections; + + public BackendStatistics(int activeConnections, int passiveConnections) { + this.activeConnections = activeConnections; + this.passiveConnections = passiveConnections; + } + + @Override + public String toString() { + return activeConnections + "/" + totalConnections(); + } + + public int totalConnections() { + return activeConnections + passiveConnections; + } + } + + private static final Logger log = Logger.getLogger(Backend.class.getName()); + + private ListenerPool listeners; + private final InetSocketAddress address; + private final String host; + private final int port; + private final Map<Integer, FS4Channel> activeChannels = new HashMap<>(); + private int channelId = 0; + private boolean shutdownInitiated = false; + + /** Whether we are currently in the state of not being able to connect, to avoid repeated logging */ + private boolean areInSocketNotConnectableState = false; + + private final LinkedList<FS4Channel> pingChannels = new LinkedList<>(); + private final PacketListener packetListener; + private final ConnectionPool connectionPool; + final PacketDumper packetDumper; + private AtomicInteger connectionCount = new AtomicInteger(0); + + + /** + * For unit testing. do not use + */ + protected Backend() { + this.host = null; + this.port = 0; + this.packetListener = null; + this.packetDumper = null; + this.address = null; + this.connectionPool = new ConnectionPool(); + } + + public Backend(String host, int port, String serverDiscriminator, ListenerPool listenerPool, ConnectionPool connectionPool) { + final String fileNamePattern = "qrs." + serverDiscriminator + '.' + host + ":" + port + ".%s" + ".dump"; + packetDumper = new PacketDumper(new File(Defaults.getDefaults().vespaHome() + "logs/vespa/qrs/"), fileNamePattern); + packetListener = new PacketNotificationsBroadcaster(packetDumper, new PacketQueryTracer()); + this.listeners = listenerPool; + this.host = host; + this.port = port; + address = new InetSocketAddress(host, port); + this.connectionPool = connectionPool; + } + + private void logWarning(String attemptDescription, Exception e) { + log.log(Level.WARNING, "Exception on " + attemptDescription + " '" + host + ":" + port + "': " + Exceptions.toMessageString(e)); + } + + private void logInfo(String attemptDescription, Exception e) { + log.log(Level.INFO, "Exception on " + attemptDescription + " '" + host + ":" + port + "': " + Exceptions.toMessageString(e)); + } + + // ============================================================ + // ==== connection pool stuff + // ============================================================ + + + /** + * Fetch a connection from the connection pool. If the pool + * is empty we create a connection. + */ + private FS4Connection getConnection() throws IOException { + FS4Connection connection = connectionPool.getConnection(); + if (connection == null) { + // if pool was empty create one: + connection = createConnection(); + } + return connection; + } + + /** + * Return a connection to the connection pool. If the + * connection is not valid anymore we drop it, ie. do not + * put it into the pool. + */ + public void returnConnection(FS4Connection connection) { + connectionPool.releaseConnection(connection); + } + + /** + * Create a new connection to the target for this backend. + */ + private FS4Connection createConnection() throws IOException { + SocketChannel socket = SocketChannel.open(); + try { + connectSocket(socket); + } catch (Exception e) { + // was warning, see VESPA-1922 + if ( ! areInSocketNotConnectableState) + logInfo("connecting to", e); + areInSocketNotConnectableState = true; + socket.close(); + return null; + } + areInSocketNotConnectableState = false; + int listenerId = connectionCount.getAndIncrement()%listeners.size(); + Listener listener = listeners.get(listenerId); + FS4Connection connection = new FS4Connection(socket, listener, this, packetListener); + listener.registerConnection(connection); + + log.fine("Created new connection to " + host + ":" + port); + connectionPool.createdConnection(); + return connection; + } + + private void connectSocket(SocketChannel socket) throws IOException { + socket.configureBlocking(false); + + boolean connected = socket.connect(address); + + // wait for connection + if (!connected) { + long timeBarrier = System.currentTimeMillis() + 20L; + while (true) { + try { + Thread.sleep(5L); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IllegalStateException("Received InterruptedException while waiting for socket to connect.", e); + } + // don't care whether it's spurious wakeup + connected = socket.finishConnect(); + if (connected || System.currentTimeMillis() > timeBarrier) { + break; + } + } + } + + // did we get a connection? + if(!connected) + + { + throw new IllegalArgumentException("Could not create connection to dispatcher on " + + address.getHostName() + ":" + address.getPort()); + } + + socket.socket(). + + setTcpNoDelay(true); + } + + + //============================================================ + //==== channel management + //============================================================ + + /** + * Open a new channel to fdispatch. Analogous to the "Channel" + * concept as used in FS4. + */ + public FS4Channel openChannel () { + int cachedChannelId; + synchronized (this) { + if (channelId >= ((1 << 31) - 2)) { + channelId = 0; + } + cachedChannelId = channelId; + channelId += 2; + } + Integer id = cachedChannelId; + FS4Channel chan = new FS4Channel(this, id); + synchronized (activeChannels) { + activeChannels.put(id, chan); + } + return chan; + } + + public FS4Channel openPingChannel () { + FS4Channel chan = FS4Channel.createPingChannel(this); + synchronized (pingChannels) { + pingChannels.add(chan); + } + return chan; + } + + /** + * Get the remote address for this Backend. This method + * has package access only, because it is really only of + * importance to FS4Channel for writing slightly more sensible + * log messages. + * @return Returns the address (host, port) for this Backend. + */ + InetSocketAddress getAddress() { + return address; + } + + /** + * Get an active channel by id. + * + * @param id the (fs4) channel id + * @return returns the (fs4) channel associated with this id + * or <code>null</code> if the channel is not in the + * set of active channels. + */ + public FS4Channel getChannel(Integer id) { + synchronized (activeChannels) { + return activeChannels.get(id); + } + } + + /** + * Return the first channel in the queue waiting for pings or + * <code>null</code> if none. + */ + public FS4Channel getPingChannel () { + synchronized (pingChannels) { + if (pingChannels.isEmpty()) + return null; + return pingChannels.getFirst(); + } + } + + /** + * Get an active channel by id. This is a wrapper for the method + * that takes the id as an Integer. + * + * @param id The (fs4) channel id + * @return Returns the (fs4) channel associated with this id + * or <code>null</code> if the channel is not in the + * set of active channels. + */ + public FS4Channel getChannel (int id) { + return getChannel(new Integer(id)); + } + + /** + * Remove a channel. We do not want this method to be called + * directly by the client -- removal of channels should be done + * by calling the close() method of the channel. + * + * @param id The (fs4) channel id + * @return Removes and returns the (fs4) channel associated + * with this id or <code>null</code> if the channel is + * not in the set of active channels. + */ + protected FS4Channel removeChannel (Integer id) { + synchronized (activeChannels) { + return activeChannels.remove(id); + } + } + + /** + * Remove a ping channel. We do not want this method to be called + * directly by the client -- removal of channels should be done + * by calling the close() method of the channel. + * + * @return Removes and returns the (fs4) channel first in + * the queue of ping channels or <code>null</code> + * if there are no active ping channels. + */ + protected FS4Channel removePingChannel () { + synchronized (pingChannels) { + if (pingChannels.isEmpty()) + return null; + return pingChannels.removeFirst(); + } + } + //============================================================ + //==== packet sending and reception + //============================================================ + + protected boolean sendPacket(BasicPacket packet, Integer channelId) throws IOException { + if (shutdownInitiated) { + log.warning("Tried to send packet after shutdown initiated. Ignored."); + return false; + } + + FS4Connection connection = null; + try { + connection = getConnection(); + if (connection == null) { + return false; + } + connection.sendPacket(packet, channelId); + } + finally { + if (connection != null) { + returnConnection(connection); + } + } + + return true; + } + + /** + * When a connection receives a packet, it uses this method to + * dispatch the packet to the right FS4Channel. If the corresponding + * FS4Channel does not exist the packet is dropped and a message is + * logged saying so. + */ + protected void receivePacket(BasicPacket packet) { + FS4Channel fs4; + if (packet.hasChannelId()) + fs4 = getChannel(((Packet)packet).getChannel()); + else + fs4 = getPingChannel(); + + // channel does not exist + if (fs4 == null) { + return; + } + try { + fs4.addPacket(packet); + } + catch (InterruptedException e) { + log.info("Interrupted during packet adding. Packet = " + packet.toString()); + Thread.currentThread().interrupt(); + } + catch (InvalidChannelException e) { + log.log(Level.WARNING, "Channel was invalid. Packet = " + packet.toString() + + " Backend probably sent data pertaining an old request," + + " system may be overloaded."); + } + } + + /** + * This method should be used to ensure graceful shutdown of the backend. + */ + public void shutdown() { + log.info("shutting down"); + if (shutdownInitiated) { + throw new IllegalStateException("Shutdown already in progress"); + } + shutdownInitiated = true; + } + + public void close() { + for (Connection c = connectionPool.getConnection(); c != null; c = connectionPool.getConnection()) { + try { + c.close(); + } catch (IOException e) { + logWarning("closing", e); + } + } + } + + /** + * Connection factory used by the Listener class. + */ + public Connection newConnection(SocketChannel channel, Listener listener) { + return new FS4Connection(channel, listener, this, packetListener); + } + + public String toString () { + return("Backend/" + host + ":" + port); + } + + public BackendStatistics getStatistics() { + synchronized (connectionPool) { //ensure consistent values + return new BackendStatistics(connectionPool.activeConnections(), connectionPool.passiveConnections()); + } + } + + public void dumpPackets(final PacketDumper.PacketType packetType, final boolean on) throws IOException { + packetDumper.dumpPackets(packetType, on); + } + + public String getHost() { + return host; + } + + public int getPort() { + return port; + } + +}
\ No newline at end of file diff --git a/container-search/src/main/java/com/yahoo/fs4/mplex/ConnectionPool.java b/container-search/src/main/java/com/yahoo/fs4/mplex/ConnectionPool.java new file mode 100644 index 00000000000..f39df0d5cec --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/mplex/ConnectionPool.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4.mplex; + +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; +import java.util.Iterator; +import java.util.Timer; +import java.util.TimerTask; + +import com.yahoo.log.LogLevel; +/** + * Pool of FS4 connections. + * + * @author tonytv + */ +public class ConnectionPool { + final static int CLEANINGPERIOD = 1000; // Execute every second + private final Queue<FS4Connection> connections = new ConcurrentLinkedQueue<>(); + private final AtomicInteger activeConnections = new AtomicInteger(0); + private final AtomicInteger passiveConnections = new AtomicInteger(0); + private static final Logger log = Logger.getLogger(ConnectionPool.class.getName()); + + class PoolCleanerTask extends TimerTask { + private final ConnectionPool connectionPool; + public PoolCleanerTask(ConnectionPool connectionPool) { + this.connectionPool = connectionPool; + } + + public void run() { + try { + connectionPool.dropInvalidConnections(); + } catch (Exception e) { + log.log(LogLevel.WARNING, + "Caught exception in connection pool cleaner, ignoring.", + e); + } + } + } + + public ConnectionPool() { + } + + public ConnectionPool(Timer timer) { + timer.schedule(new PoolCleanerTask(this), CLEANINGPERIOD, CLEANINGPERIOD); + } + + private void dropInvalidConnections() { + for (Iterator<FS4Connection> i = connections.iterator(); i.hasNext();) { + FS4Connection connection = i.next(); + if (!connection.isValid()) { + i.remove(); + } + } + } + + private FS4Connection registerAsActiveIfNonZero(FS4Connection connection) { + activeConnections.incrementAndGet(); + passiveConnections.decrementAndGet(); + return connection; + } + + public FS4Connection getConnection() { + return registerAsActiveIfNonZero(connections.poll()); + } + + void releaseConnection(FS4Connection connection) { + assert(connection != null); + activeConnections.decrementAndGet(); + if (connection.isValid()) { + passiveConnections.incrementAndGet(); + connections.add(connection); + } + } + + void createdConnection() { + activeConnections.incrementAndGet(); + } + + int activeConnections() { + return activeConnections.get(); + } + + //unused connections in the pool + int passiveConnections() { + return passiveConnections.get(); + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/mplex/FS4Channel.java b/container-search/src/main/java/com/yahoo/fs4/mplex/FS4Channel.java new file mode 100644 index 00000000000..6077f3c3da2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/mplex/FS4Channel.java @@ -0,0 +1,254 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.fs4.mplex; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +import com.yahoo.concurrent.SystemTimer; +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.ChannelTimeoutException; +import com.yahoo.fs4.Packet; +import com.yahoo.search.Query; + + + +/** + * + * This class is used to represent a "channel" in the FS4 protocol. + * A channel represents a session between a client and the fdispatch. + * Internally this class has a response queue used by the backend + * for queueing up FS4 packets that belong to this channel (or + * <em>session</em>, which might be a more appropriate name for it). + * + * <P> + * Outbound packets are handed off to the FS4Connection. + * + * @author <a href="mailto:borud@yahoo-inc.com">Bjorn Borud</a> + */ +public class FS4Channel +{ + private static Logger log = Logger.getLogger(FS4Channel.class.getName()); + + private Integer channelId; + private Backend backend; + volatile private BlockingQueue<BasicPacket> responseQueue; + private Query query; + private boolean isPingChannel = false; + + /** for unit testing. do not use */ + protected FS4Channel () { + } + + protected FS4Channel (Backend backend, Integer channelId) { + this.channelId = channelId; + this.backend = backend; + this.responseQueue = new LinkedBlockingQueue<>(); + } + + static public FS4Channel createPingChannel(Backend backend) { + FS4Channel pingChannel = new FS4Channel(backend, new Integer(0)); + pingChannel.isPingChannel = true; + return pingChannel; + } + + /** Set the query currently associated with this channel */ + public void setQuery(Query query) { + this.query = query; + } + + /** Get the query currently associated with this channel */ + public Query getQuery() { + return query; + } + + /** + * @return returns an Integer representing the (fs4) channel id + */ + public Integer getChannelId () { + return channelId; + } + + /** + * Closes the channel + */ + public void close () { + BlockingQueue<BasicPacket> q = responseQueue; + responseQueue = null; + query = null; + if (isPingChannel) { + backend.removePingChannel(); + } else { + backend.removeChannel(channelId); + } + if (q != null) { + q.clear(); + } + } + + /** + * Legacy interface. + */ + public boolean sendPacket(BasicPacket packet) throws InvalidChannelException, IOException { + ensureValid(); + return backend.sendPacket(packet, channelId); + } + + /** + * Receives the given number of packets and returns them, OR + * <ul> + * <li>Returns a smaller number of packets if an error or eol packet is received + * <li>Throws a ChannelTimeoutException if timeout occurs before all packets + * are received. Packets received with the wrong channel id are ignored. + * </ul> + * + * @param timeout the number of ms to attempt to get packets before throwing an exception + * @param packetCount the number of packets to receive, or -1 to receive any number up to eol/error + */ + public BasicPacket[] receivePackets(long timeout, int packetCount) + throws InvalidChannelException, ChannelTimeoutException + { + ensureValid(); + + List<BasicPacket> packets = new ArrayList<>(12); + long startTime = SystemTimer.INSTANCE.milliTime(); + long timeLeft = timeout; + + try { + while (timeLeft >= 0) { + BasicPacket p = nextPacket(timeLeft); + if (p == null) throw new ChannelTimeoutException("Timed out"); + + if (!isPingChannel + && ((Packet)p).getChannel() != getChannelId().intValue()) + { + log.warning("Ignoring received " + p + ", when excepting channel " + getChannelId()); + continue; + } + + packets.add(p); + if (isLastPacket(p) || hasEnoughPackets(packetCount, packets)) { + BasicPacket[] packetArray = new BasicPacket[packets.size()]; + packets.toArray(packetArray); + return packetArray; + } + + // doing this last might save us one system call for the last + // packet. + timeLeft = timeout - (SystemTimer.INSTANCE.milliTime() - startTime); + } + } + catch (InvalidChannelException e) { + // nop. if we get this we want to return the default + // zero length packet array indicating that we have no + // valid response + log.info("FS4Channel was invalid. timeLeft=" + + timeLeft + ", timeout=" + timeout); + } + catch (InterruptedException e) { + log.info("FS4Channel was interrupted. timeLeft=" + + timeLeft + ", timeout=" + timeout); + Thread.currentThread().interrupt(); + } + + // default return, we only hit this if we timed out and + // did not get the end of the packet stream + throw new ChannelTimeoutException(); + } + + private static boolean hasEnoughPackets(int packetCount,List<BasicPacket> packets) { + if (packetCount<0) return false; + return packets.size()>=packetCount; + } + + /** + * Returns true if we will definitely receive more packets on this stream + * + * Shouldn't that be "_not_ receive more packets"? + */ + private static boolean isLastPacket (BasicPacket packet) { + if (packet instanceof com.yahoo.fs4.ErrorPacket) return true; + if (packet instanceof com.yahoo.fs4.EolPacket) return true; + if (packet instanceof com.yahoo.fs4.PongPacket) return true; + return false; + } + + /** + * Return the next available packet from the response queue. If there + * are no packets available we wait a maximum of <code>timeout</code> + * milliseconds before returning a <code>null</code> + * + * @param timeout Number of milliseconds to wait for a packet + * to become available. + * + * @return Returns the next available <code>BasicPacket</code> or + * <code>null</code> if we timed out. + */ + public BasicPacket nextPacket(long timeout) + throws InterruptedException, InvalidChannelException + { + return ensureValidQ().poll(timeout, TimeUnit.MILLISECONDS); + } + + /** + * Add incoming packet to the response queue. This is to be used + * by the listener for placing incoming packets in the response + * queue. + * + * @param packet BasicPacket to be placed in the response queue. + * + */ + protected void addPacket (BasicPacket packet) + throws InterruptedException, InvalidChannelException + { + ensureValidQ().put(packet); + } + + /** + * A valid FS4Channel is one that has not yet been closed. + * + * @return Returns <code>true</code> if the FS4Channel is valid. + */ + public boolean isValid () { + return responseQueue != null; + } + + /** + * This method is called whenever we want to perform an operation + * which assumes that the FS4Channel object is valid. An exception + * is thrown if the opposite turns out to be the case. + * + * @throws InvalidChannelException if the channel is no longer valid. + */ + private void ensureValid () throws InvalidChannelException { + if (isValid()) { + return; + } + throw new InvalidChannelException("Channel is no longer valid"); + } + + /** + * This method is called whenever we want to perform an operation + * which assumes that the FS4Channel object is valid. An exception + * is thrown if the opposite turns out to be the case. + * + * @throws InvalidChannelException if the channel is no longer valid. + */ + private BlockingQueue<BasicPacket> ensureValidQ () throws InvalidChannelException { + BlockingQueue<BasicPacket> q = responseQueue; + if (q != null) { + return q; + } + throw new InvalidChannelException("Channel is no longer valid"); + } + + public String toString() { + return "fs4 channel " + channelId + (isValid() ? " [valid]" : " [invalid]"); + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/mplex/FS4Connection.java b/container-search/src/main/java/com/yahoo/fs4/mplex/FS4Connection.java new file mode 100644 index 00000000000..78c534140a2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/mplex/FS4Connection.java @@ -0,0 +1,374 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.fs4.mplex; + + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.SelectionKey; +import java.nio.channels.SocketChannel; +import java.util.LinkedList; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.BufferTooSmallException; +import com.yahoo.fs4.PacketDecoder; +import com.yahoo.fs4.PacketListener; +import com.yahoo.io.Connection; +import com.yahoo.io.Listener; +import com.yahoo.log.LogLevel; +import com.yahoo.search.Query; + +/** + * + * This class is used to represent a connection to an fdispatch + * + * @author <a href="mailto:borud@yahoo-inc.com">Bjorn Borud</a> + */ +public class FS4Connection implements Connection +{ + private static Logger log = Logger.getLogger(FS4Connection.class.getName()); + private Backend backend; + private Listener listener; + private SocketChannel channel; + + private boolean shouldWrite = false; + + private static int idCounter = 1; + private int idNumber; + private int maxInitialSize = 1024; + + // outbound data + private ByteBuffer writeBuffer; + private LinkedList<ByteBuffer> writeBufferList = new LinkedList<>(); + + // inbound data + private ByteBuffer fixedReadBuffer = ByteBuffer.allocateDirect(256 * 1024); + private ByteBuffer readBuffer = fixedReadBuffer; + + private volatile boolean valid = true; + + private final PacketListener packetListener; + + + /** + * Create an FS4 Connection. + */ + public FS4Connection (SocketChannel channel, Listener listener, Backend backend, PacketListener packetListener) { + this.backend = backend; + this.listener = listener; + this.channel = channel; + this.idNumber = idCounter++; + this.packetListener = packetListener; + + log.log(Level.FINER, "new: "+this+", id="+idNumber + ", address=" + backend.getAddress()); + } + + + /** + * Packet sending interface. + */ + public void sendPacket (BasicPacket packet, Integer channelId) throws IOException { + ByteBuffer buffer = packet.grantEncodingBuffer(channelId.intValue(), maxInitialSize); + ByteBuffer viewForPacketListener = buffer.slice(); + synchronized (this) { + if (!(valid && channel.isOpen())) { + throw new IllegalStateException("Connection is not valid. " + + "Address = " + backend.getAddress() + + ", valid = " + valid + + ", isOpen = " + channel.isOpen()); + } + + if (buffer.capacity() > maxInitialSize) { + maxInitialSize = buffer.limit(); + } + if (writeBuffer == null) { + writeBuffer = buffer; + } else { + writeBufferList.addLast(buffer); + enableWrite(); + } + write(); + } + + if (packetListener != null) + packetListener.packetSent(backend.getChannel(channelId), packet, viewForPacketListener); + } + + + /** + * The write event handler. This can be called both from the client + * thread and from the IO thread, so it needs to be synchronized. It + * assumes that IO is nonblocking, and will attempt to keep writing + * data until the system won't accept more data. + * + */ + public synchronized void write () throws IOException { + if (! channel.isOpen()) { + throw new IllegalStateException("Channel not open in write(), address=" + backend.getAddress()); + } + + try { + int bytesWritten = 0; + boolean isFinished = false; + do { + // if writeBuffer is not set we need to fetch the next buffer + if (writeBuffer == null) { + + // if the list is empty, signal the selector we do not need + // to do any writing for a while yet and bail + if (writeBufferList.isEmpty()) { + disableWrite(); + isFinished = true; + break; + } + writeBuffer = writeBufferList.removeFirst(); + } + + // invariants: we have a writeBuffer + bytesWritten = channel.write(writeBuffer); + + // buffer drained so we forget it and see what happens when we + // go around. if indeed we go around + if (!writeBuffer.hasRemaining()) { + writeBuffer = null; + } + } while (bytesWritten > 0); + if (!isFinished) { + enableWrite(); + } + } catch (IOException e) { + log.log(LogLevel.DEBUG, "Failed writing to channel for backend " + backend.getAddress() + + ". Closing channel", e); + try { + close(); + } catch (IOException ignored) {} + + throw e; + } + } + + + private void disableWrite() { + if (shouldWrite) { + listener.modifyInterestOpsBatch(this, SelectionKey.OP_WRITE, false); + shouldWrite = false; + } + } + + + private void enableWrite() { + if (!shouldWrite) { + listener.modifyInterestOps(this, SelectionKey.OP_WRITE, true); + shouldWrite = true; + } + } + + + + public void read () throws IOException { + if (! channel.isOpen()) { + throw new IOException("Channel not open in read(), address=" + backend.getAddress()); + } + + int bytesRead = 0; + + do { + try { + if (readBuffer == fixedReadBuffer) { + bytesRead = channel.read(readBuffer); + } else { + fixedReadBuffer.clear(); + if (readBuffer.remaining() < fixedReadBuffer.capacity()) { + fixedReadBuffer.limit(readBuffer.remaining()); + } + bytesRead = channel.read(fixedReadBuffer); + fixedReadBuffer.flip(); + readBuffer.put(fixedReadBuffer); + fixedReadBuffer.clear(); + } + } + catch (IOException e) { + // this is the "normal" way that connection closes. + log.log(Level.FINER, "Read exception address=" + backend.getAddress() + " id="+idNumber+": "+ + e.getClass().getName()+" / ", e); + bytesRead = -1; + } + + // end of file + if (bytesRead == -1) { + log.log(LogLevel.DEBUG, "Dispatch closed connection" + + " (id="+idNumber+", address=" + backend.getAddress() + ")"); + try { + close(); + } catch (Exception e) { + log.log(Level.WARNING, "Close failed, address=" + backend.getAddress(), e); + } + } + + // no more read + if (bytesRead == 0) { + // buffer too small? + if (! readBuffer.hasRemaining()) { + log.fine("Buffer possibly too small, extending"); + readBuffer.flip(); + extendReadBuffer(readBuffer.capacity() * 2); + } + } + + } while (bytesRead > 0); + + readBuffer.flip(); + + // hand off packet extraction + extractPackets(readBuffer); + } + + private void extractPackets(ByteBuffer readBuffer) { + for (;;) { + PacketDecoder.DecodedPacket packet = null; + try { + FS4Channel receiver = null; + int queryId = PacketDecoder.sniffChannel(readBuffer); + if (queryId == 0) { + if (PacketDecoder.isPongPacket(readBuffer)) + receiver = backend.getPingChannel(); + } + else { + receiver = backend.getChannel(new Integer(queryId)); + } + packet = PacketDecoder.extractPacket(readBuffer); + + if (packet != null) + packetListener.packetReceived(receiver, packet.packet, packet.consumedBytes); + } + catch (BufferTooSmallException e) { + log.fine("Unable to decode, extending readBuffer"); + extendReadBuffer(PacketDecoder.packetLength(readBuffer)); + return; + } + + // break out of loop if we did not get a packet out of the + // buffer so we can select and read some more + if (packet == null) { + + // if the buffer has been cleared, we can do a reset + // of the readBuffer + if ((readBuffer.position() == 0) + && (readBuffer.limit() == readBuffer.capacity())) + { + resetReadBuffer(); + } + break; + } + + backend.receivePacket(packet.packet); + } + } + + /** + * This is called when we close the connection to do any + * pending cleanup work. Closing a connection marks it as + * not valid. + */ + public void close () throws IOException { + valid = false; + channel.close(); + log.log(Level.FINER, "invalidated id="+idNumber + " address=" + backend.getAddress()); + } + + /** + * Upon asynchronous connect completion this method is called by + * the Listener. + */ + public void connect() throws IOException { + throw new RuntimeException("connect() was called, address=" + backend.getAddress() + ". " + + "asynchronous connect in NIO is flawed!"); + } + + /** + * Since we are performing an asynchronous connect we are initially + * only interested in the <code>OP_CONNECT</code> event. + */ + public int selectOps () { + return SelectionKey.OP_READ; + } + + /** + * Return the underlying SocketChannel used by this connection. + */ + public SocketChannel socketChannel() { + return channel; + } + + + public String toString () { + return FS4Connection.class.getName() + "/" + channel; + } + + + //============================================================ + //==== readbuffer management + //============================================================ + + + /** + * Extend the readBuffer. Make a new buffer of the requested size + * copy the contents of the readBuffer into it and assign reference + * to readBuffer instance variable. + * + * <P> + * <b>The readBuffer needs to be in "readable" (flipped) state before + * this is called and it will be in the "writeable" state when it + * returns.</b> + */ + private void extendReadBuffer (int size) { + // we specifically check this because packetLength() can return -1 + // and someone might alter the code so that we do in fact get -1 + // ...which never happens as the code is now + // + if (size == -1) { + throw new RuntimeException("Invalid buffer size requested: -1"); + } + + // if we get a size that is smaller than the current + // readBuffer capacity we just double it. not sure how wise this + // might be. + // + if (size < readBuffer.capacity()) { + size = readBuffer.capacity() * 2; + } + + ByteBuffer tmp = ByteBuffer.allocate(size); + tmp.put(readBuffer); + log.fine("Extended readBuffer to " + size + " bytes" + + "from " + readBuffer.capacity() + " bytes"); + readBuffer = tmp; + } + + /** + * Clear the readBuffer, and if temporarily allocated bigger + * buffer is in use: ditch it and reset the reference to the + * fixed readBuffer. + */ + private void resetReadBuffer () { + fixedReadBuffer.clear(); + if (readBuffer == fixedReadBuffer) { + return; + } + log.fine("Resetting readbuffer"); + readBuffer = fixedReadBuffer; + } + + /** + * This method is used to determine whether the connection is still + * viable or not. All connections are initially valid, but they + * become invalid if we close the connection or something bad happens + * and the connection needs to be ditched. + */ + public boolean isValid() { + return valid; + } + +} diff --git a/container-search/src/main/java/com/yahoo/fs4/mplex/InvalidChannelException.java b/container-search/src/main/java/com/yahoo/fs4/mplex/InvalidChannelException.java new file mode 100644 index 00000000000..a5a5b01a09a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/mplex/InvalidChannelException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// -*- mode: java; folded-file: t; c-basic-offset: 4 -*- + +package com.yahoo.fs4.mplex; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +@SuppressWarnings("serial") +public class InvalidChannelException extends Exception +{ + public InvalidChannelException (String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/fs4/mplex/ListenerPool.java b/container-search/src/main/java/com/yahoo/fs4/mplex/ListenerPool.java new file mode 100644 index 00000000000..5a42d26901f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/fs4/mplex/ListenerPool.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.fs4.mplex; + +import com.yahoo.io.FatalErrorHandler; +import com.yahoo.io.Listener; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Pool of com.yahoo.io.Listener instances for shared use by Vespa backend + * searchers. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + * @since 5.3.0 + */ +public final class ListenerPool { + private final static Logger logger = Logger.getLogger(ListenerPool.class.getName()); + private final List<Listener> listeners; + + public ListenerPool(String name, int numListeners) { + listeners = new ArrayList<>(numListeners); + FatalErrorHandler fatalErrorHandler = new FatalErrorHandler(); + for (int i = 0; i < numListeners; i++) { + Listener listener = new Listener(name + "-" + i); + listener.setFatalErrorHandler(fatalErrorHandler); + listener.start(); + listeners.add(listener); + } + } + + public Listener get(int index) { + return listeners.get(index); + } + + public int size() { + return listeners.size(); + } + + public void close() { + for (Listener listener : listeners) { + listener.interrupt(); + } + try { + for (Listener listener : listeners) { + listener.join(); + } + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Got interrupted", e); + Thread.currentThread().interrupt(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/ConfigurationException.java b/container-search/src/main/java/com/yahoo/prelude/ConfigurationException.java new file mode 100644 index 00000000000..24492831156 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/ConfigurationException.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +/** + * Thrown at events which are likely caused by misconfiguration + * + * @author bratseth + */ +public class ConfigurationException extends RuntimeException { + + public ConfigurationException(String message) { + super(message); + } + + public ConfigurationException(String message, Throwable cause) { + super(message,cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Freshness.java b/container-search/src/main/java/com/yahoo/prelude/Freshness.java new file mode 100644 index 00000000000..9d0b3ec06c8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Freshness.java @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.util.Calendar; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * The parameters for a freshness query (uses the datetime http parameter) + * Parses the string part of the "datetime=<string>", converts it to seconds + * since epoch and send that plus sets the flag in the QueryX packet that + * enables freshnessboost in fsearch. + * <p> + * This is a value object + * + * @author <a href="mailto:pauran@yahoo-inc.com">Per G. Auran</a> + */ +public class Freshness { + + private long refSecondsSinceEpoch = 0; // reference time + + private void parse(String dateTime) { + + /** Convert dateTime string to seconds since epoch */ + if (dateTime.startsWith("now")) { + + /** Case 1: if string starts with now: special case read system time */ + refSecondsSinceEpoch = getSystemTimeInSecondsSinceEpoch(); + + /** Case 2: now can be followed by -seconds for time offset */ + if (dateTime.startsWith("now-")) { + // offset in seconds may be given + String offsetStr = dateTime.substring(4); + long timeOffset; + if ( offsetStr.length() > 0) { + timeOffset = Long.parseLong(offsetStr); + } else { + timeOffset = 1; + } + refSecondsSinceEpoch = refSecondsSinceEpoch - timeOffset; + } + } else { /** Case 3: Reftime explicitly given seconds since epoch */ + refSecondsSinceEpoch = Long.parseLong(dateTime); + } + // Need to activate freshness in the QueryX packet if enabled: See QueryPacket.java + } + + public Freshness(String dateTime) { + parse(toLowerCase(dateTime)); // Set reference time + } + + /** Calculates the current time since epoch in seconds */ + public long getSystemTimeInSecondsSinceEpoch() { + long msSinceEpochNow = Calendar.getInstance().getTimeInMillis(); + return (msSinceEpochNow/1000); + } + + /** Get the reference time as a long value (in seconds since epoch) */ + public long getRefTime() {return refSecondsSinceEpoch;} + + /** Set the reference time as a string value */ + @Override + public String toString() { + StringBuilder ser = new StringBuilder(); + /** convert long value to string */ + String dateTime = Long.toString(refSecondsSinceEpoch); + ser.append(dateTime); + return ser.toString().trim(); + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (! (other instanceof Freshness)) return false; + return ((Freshness)other).refSecondsSinceEpoch == this.refSecondsSinceEpoch; + } + + @Override + public int hashCode() { + return (int)refSecondsSinceEpoch; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Index.java b/container-search/src/main/java/com/yahoo/prelude/Index.java new file mode 100644 index 00000000000..5b1944c352b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Index.java @@ -0,0 +1,325 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + + +import com.yahoo.language.process.StemMode; + +import java.util.Iterator; +import java.util.Set; + + +/** + * Information about configured settings of a field or field collection (an actual index or not) in a search definition. + * There are two types of settings: + * <ul> + * <li><i>Typed commands</i> are checked using a particular is/get method + * <li><i>Untyped commands</i> are checked using hasCommand and commandIterator + * </ul> + * addCommand sets both types. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Index { + + public static class Attribute { + private boolean tokenizedContent = false; + public final String name; + + public Attribute(String name) { + this.name = name; + } + + public boolean isTokenizedContent() { + return tokenizedContent; + } + + public void setTokenizedContent(boolean tokenizedContent) { + this.tokenizedContent = tokenizedContent; + } + } + + /** The null index - don't use this for name lookups */ + public static final Index nullIndex = new Index("(null)"); + + private String name; + private boolean uriIndex = false; + private boolean hostIndex = false; + private StemMode stemMode = StemMode.NONE; + private Attribute[] matchGroup = null; + private boolean isAttribute = false; + private boolean isDefaultPosition = false; + private boolean dynamicSummary=false; + private boolean highlightSummary=false; + private boolean lowercase = false; + private boolean plainTokens = false; + private boolean multivalue = false; + private boolean fastSearch = false; + private boolean normalize = false; + private boolean literalBoost = false; + private boolean numerical = false; + private long predicateUpperBound = Long.MAX_VALUE; + private long predicateLowerBound = Long.MIN_VALUE; + + /** + * True if this is an <i>exact</i> index - which should match + * tokens containing any characters + */ + private boolean exact = false; + + private boolean isNGram = false; + private int gramSize=2; + + /** + * The string terminating an exact token in this index, + * or null to use the default (space) + */ + private String exactTerminator = null; + + private Set<String> commands = new java.util.HashSet<>(); + + public Index(String name) { + this.name = name; + } + + /** + * Returns the canonical name of this index, unless it + * is the null index, which doesn't have a canonical name + */ + public String getName() { + return name; + } + + public boolean isUriIndex() { + return uriIndex; + } + + public boolean isDefaultPosition() { + return isDefaultPosition; + } + + public void setDefaultPosition(boolean v) { + isDefaultPosition = v; + } + + public void setUriIndex(boolean uriIndex) { + this.uriIndex = uriIndex; + } + + public boolean isHostIndex() { + return hostIndex; + } + + public void setHostIndex(boolean hostIndex) { + this.hostIndex = hostIndex; + } + + public StemMode getStemMode() { + return stemMode; + } + + public void setStemMode(StemMode stemMode) { + this.stemMode = stemMode; + } + + public void setStemMode(String name) { + this.stemMode = StemMode.valueOf(name); + } + + /** Adds a type or untyped command string to this */ + public Index addCommand(String commandString) { + if ("fullurl".equals(commandString)) { + setUriIndex(true); + } else if ("urlhost".equals(commandString)) { + setHostIndex(true); + } else if (commandString.startsWith("stem ")) { + setStemMode(commandString.substring(5)); + } else if (commandString.startsWith("stem:")) { + setStemMode(commandString.substring(5)); + } else if ("stem".equals(commandString)) { + setStemMode(StemMode.SHORTEST); + } else if ("word".equals(commandString)) { + setExact(true, null); + } else if ("exact".equals(commandString)) { + setExact(true, " "); + } else if ("dynteaser".equals(commandString)) { + setDynamicSummary(true); + } else if ("highlight".equals(commandString)) { + setHighlightSummary(true); + } else if ("lowercase".equals(commandString)) { + setLowercase(true); + } else if (commandString.startsWith("exact ")) { + setExact(true, commandString.substring(6)); + } else if (commandString.startsWith("ngram ")) { + setNGram(true,Integer.parseInt(commandString.substring(6))); + } else if (commandString.equals("attribute")) { + setAttribute(true); + } else if (commandString.equals("default-position")) { + setDefaultPosition(true); + } else if (commandString.startsWith("match-group ")) { + setMatchGroup(commandString.substring(12).split(" ")); + } else if (commandString.equals("plain-tokens")) { + setPlainTokens(true); + } else if (commandString.equals("multivalue")) { + setMultivalue(true); + } else if (commandString.equals("fast-search")) { + setFastSearch(true); + } else if (commandString.equals("normalize")) { + setNormalize(true); + } else if (commandString.equals("literal-boost")) { + setLiteralBoost(true); + } else if (commandString.equals("numerical")) { + setNumerical(true); + } else if (commandString.startsWith("predicate-bounds ")) { + setPredicateBounds(commandString.substring(17)); + } else { + commands.add(commandString); + } + return this; + } + + private void setPredicateBounds(String bounds) { + if ( ! bounds.startsWith("[..")) { + predicateLowerBound = Long.parseLong(bounds.substring(1, bounds.indexOf(".."))); + } else { + predicateLowerBound = Long.MIN_VALUE; + } + if ( ! bounds.endsWith("..]")) { + predicateUpperBound = Long.parseLong(bounds.substring(bounds.indexOf("..") + 2, bounds.length() - 1)); + } else { + predicateUpperBound = Long.MAX_VALUE; + } + + } + + /** + * Whether terms in this field are lower cased when indexing. + * + * @param lowercase true if terms are lowercased + */ + public void setLowercase(boolean lowercase) { + this.lowercase = lowercase; + } + + /** + * Whether terms in this field are lower cased when indexing. + * + * @return true if terms are lowercased + */ + public boolean isLowercase() { + return lowercase; + } + + /** Returns an iterator of all the untyped commands of this */ + public Iterator<String> commandIterator() { + return commands.iterator(); + } + + /** Checks whether this has the given (exact) <i>untyped</i> command string */ + public boolean hasCommand(String commandString) { + return commands.contains(commandString); + } + + /** + * Set whether this index should match any kind of characters + * + * @param exact true to make this index match any kind of characters, not just word and digit ones + * @param terminator the terminator of an exact sequence (one or more characters), + * or null to use the default (space) + */ + public void setExact(boolean exact, String terminator) { + this.exact = exact; + this.exactTerminator = terminator; + } + + /** Returns whether this is an exact index, which should match tokens containing any characters */ + public boolean isExact() { return exact; } + + /** Returns the string terminating an exact sequence in this index, or null to use the default (space) */ + public String getExactTerminator() { return exactTerminator; } + + /** Returns true if this is an ngram index (default: false) */ + public boolean isNGram() { return isNGram; } + + /** Returns the gram size. Only used if isNGram is true (default: 2)*/ + public int getGramSize() { return gramSize; } + + public void setNGram(boolean nGram,int gramSize) { + this.isNGram=nGram; + this.gramSize=gramSize; + } + + public void setDynamicSummary(boolean dynamicSummary) { this.dynamicSummary=dynamicSummary; } + public boolean getDynamicSummary() { return dynamicSummary; } + + public void setHighlightSummary(boolean highlightSummary) { this.highlightSummary=highlightSummary; } + public boolean getHighlightSummary() { return highlightSummary; } + + /** Returns true if this is the null index */ + // TODO: Replace by == Index.null + public boolean isNull() { + return "(null)".equals(name); + } + + public Attribute[] getMatchGroup() { // TODO: Not in use on Vespa 6 + return matchGroup; + } + + public void setMatchGroup(String[] attributes) { + Attribute[] a = new Attribute[attributes.length]; + + for (int i = 0; i < attributes.length; i++) { + a[i] = new Attribute(attributes[i].trim()); + } + this.matchGroup = a; + } + + public boolean isAttribute() { + return isAttribute; + } + + public void setAttribute(boolean isAttribute) { + this.isAttribute = isAttribute; + } + + public boolean hasPlainTokens() { + return plainTokens; + } + + public void setPlainTokens(boolean plainTokens) { + this.plainTokens = plainTokens; + } + + public void setMultivalue(boolean multivalue) { this.multivalue = multivalue; } + + /** Returns true if this is a multivalue field */ + public boolean isMultivalue() { return multivalue; } + + public void setFastSearch(boolean fastSearch) { this.fastSearch = fastSearch; } + + /** Returns true if this is an attribute with fastsearch turned on */ + public boolean isFastSearch() { return fastSearch; } + + public void setNormalize(boolean normalize) { this.normalize = normalize; } + + /** Returns true if the content of this index is normalized */ + public boolean getNormalize() { return normalize; } + + public boolean getLiteralBoost() { return literalBoost; } + + public void setLiteralBoost(boolean literalBoost) { this.literalBoost = literalBoost; } + + public void setNumerical(boolean numerical) { this.numerical = numerical; } + + public boolean isNumerical() { return numerical; } + + public long getPredicateUpperBound() { return predicateUpperBound; } + + public long getPredicateLowerBound() { return predicateLowerBound; } + + @Override + public String toString() { + return "index '" + getName() + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java new file mode 100644 index 00000000000..9a079c0d23b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java @@ -0,0 +1,440 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + + +import com.google.common.collect.ImmutableList; +import com.yahoo.language.process.StemMode; +import com.yahoo.search.Query; + +import java.util.*; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * A central repository for information about indices. Standard usage is + * + * <pre><code> + * IndexFacts.Session session = indexFacts.newSession(query); // once when starting to process a query + * session.getIndex(indexName).[get index info] + * </code></pre> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +// TODO: We should replace this with a better representation of search definitions +// which is immutable, models clusters and search definitions inside clusters properly, +// and uses better names. +public class IndexFacts { + + private Map<String, List<String>> clusterByDocument; + + private static class DocumentTypeListOffset { + public final int offset; + public final SearchDefinition searchDefinition; + + public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) { + this.offset = offset; + this.searchDefinition = searchDefinition; + } + } + + /** A Map of all known search definitions indexed by name */ + private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>(); + + /** A map of document types contained in each cluster indexed by cluster name */ + private Map<String, List<String>> clusters = new LinkedHashMap<>(); + + /** + * The name of the default search definition, which is the union of all + * known document types. + */ + public static final String unionName = "unionOfAllKnown"; + + /** A search definition which contains the union of all settings. */ + private SearchDefinition unionSearchDefinition=new SearchDefinition(unionName); + + private boolean frozen; + + /** Whether this has (any) NGram indexes. Calculated at freeze time. */ + private boolean hasNGramIndices; + + public IndexFacts() {} + + @SuppressWarnings({"deprecation"}) + public IndexFacts(IndexModel indexModel) { + if (indexModel.getSearchDefinitions() != null && indexModel.getUnionSearchDefinition() != null) { + setSearchDefinitions(indexModel.getSearchDefinitions(), indexModel.getUnionSearchDefinition()); + } + if (indexModel.getMasterClusters() != null) { + setMasterClusters(indexModel.getMasterClusters()); + } + } + + private void setMasterClusters(Map<String, List<String>> clusters) { + // TODO: clusters should probably be a separate class + this.clusters = clusters; + clusterByDocument = invert(clusters); + } + + private static Map<String, List<String>> invert(Map<String, List<String>> clusters) { + Map<String, List<String>> result = new HashMap<>(); + for (Map.Entry<String,List<String>> entry : clusters.entrySet()) { + for (String value : entry.getValue()) { + addEntry(result, value, entry.getKey()); + } + } + return result; + } + + private static void addEntry(Map<String, List<String>> result, String key, String value) { + List<String> values = result.get(key); + if (values == null) { + values = new ArrayList<>(); + result.put(key, values); + } + values.add(value); + } + + // Assumes that document names are equal to the search definition that contain them. + public List<String> clustersHavingSearchDefinition(String searchDefinitionName) { + if (clusterByDocument == null) + return Collections.emptyList(); + + List<String> clusters = clusterByDocument.get(searchDefinitionName); + return clusters != null ? clusters : Collections.<String>emptyList(); + } + + /** + * Public only for testing. + */ + public void setClusters(Map<String, List<String>> clusters) { + ensureNotFrozen(); + this.clusters = clusters; + clusterByDocument = invert(clusters); + } + + public void setSearchDefinitions(Map<String, SearchDefinition> searchDefinitions, + SearchDefinition unionSearchDefinition) { + ensureNotFrozen(); + this.searchDefinitions = searchDefinitions; + this.unionSearchDefinition = unionSearchDefinition; + } + + private boolean isInitialized() { + return searchDefinitions.size() > 0; + } + + private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) { + if (!isInitialized()) return true; + + if (documentTypes.isEmpty()) { + return unionSearchDefinition.getIndex(indexName) != null; + } + + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndex(indexName); + if (index != null) { + return true; + } + sd = chooseSearchDefinition(documentTypes, sd.offset); + } + + return false; + } + + private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) { + if (!isInitialized()) return indexName; + + if (documentTypes.isEmpty()) { + Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName)); + return index == null ? indexName : index.getName(); + } + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName)); + if (index != null) return index.getName(); + sd = chooseSearchDefinition(documentTypes, sd.offset); + } + return indexName; + } + + private Index getIndexFromDocumentTypes(String indexName, List<String> documentTypes) { + if (indexName==null || indexName.isEmpty()) + indexName="default"; + + return getIndexByCanonicNameFromDocumentTypes(indexName, documentTypes); + } + + private Index getIndexByCanonicNameFromDocumentTypes(String canonicName, List<String> documentTypes) { + if ( ! isInitialized()) return Index.nullIndex; + + if (documentTypes.isEmpty()) { + Index index = unionSearchDefinition.getIndex(canonicName); + if (index == null) return Index.nullIndex; + return index; + } + + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndex(canonicName); + + if (index != null) return index; + sd = chooseSearchDefinition(documentTypes, sd.offset); + } + return Index.nullIndex; + } + + /** Calls resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict()) */ + private Set<String> resolveDocumentTypes(Query query) { + // Assumption: Search definition name equals document name. + return resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict(), + searchDefinitions.keySet()); + } + + /** + * Given a search list which is a mixture of document types and cluster + * names, and a restrict list which is a list of document types, return a + * set of all valid document types for this combination. Most use-cases for + * fetching index settings will involve calling this method with the the + * incoming query's {@link com.yahoo.search.query.Model#getSources()} and + * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters + * before calling any other method of this class. + * + * @param sources the search list for a query + * @param restrict the restrict list for a query + * @return a (possibly empty) set of valid document types + */ + private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict, + Set<String> candidateDocumentTypes) { + sources = emptyCollectionIfNull(sources); + restrict = emptyCollectionIfNull(restrict); + + if (sources.isEmpty()) { + if ( ! restrict.isEmpty()) { + return new TreeSet<>(restrict); + } else { + return candidateDocumentTypes; + } + } + + Set<String> toSearch = new TreeSet<>(); + for (String source : sources) { // source: a document type or a cluster containing them + List<String> clusterDocTypes = clusters.get(source); + if (clusterDocTypes == null) { // source was a document type + if (candidateDocumentTypes.contains(source)) { + toSearch.add(source); + } + } else { // source was a cluster, having document types + for (String documentType : clusterDocTypes) { + if (candidateDocumentTypes.contains(documentType)) { + toSearch.add(documentType); + } + } + } + } + + if ( ! restrict.isEmpty()) { + toSearch.retainAll(restrict); + } + + return toSearch; + } + + private Collection<String> emptyCollectionIfNull(Collection<String> collection) { + return collection == null ? Collections.<String>emptyList() : collection; + } + + /** + * Chooses the correct search definition, default if in doubt. + * + * @return the search definition to use + */ + private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) { + while (index < documentTypes.size()) { + String docName = documentTypes.get(index++); + SearchDefinition sd = searchDefinitions.get(docName); + if (sd != null) { + return new DocumentTypeListOffset(index, sd); + } + } + return null; + } + + /** + * Freeze this to prevent further changes. + */ + public void freeze() { + hasNGramIndices = hasNGramIndices(); + // TODO: Freeze content! + frozen = true; + } + + /** Whether this contains any index which has isNGram()==true. This is free to ask on a frozen instance. */ + public boolean hasNGramIndices() { + if (frozen) return hasNGramIndices; + for (Map.Entry<String,SearchDefinition> searchDefinition : searchDefinitions.entrySet()) { + for (Index index : searchDefinition.getValue().indices().values()) + if (index.isNGram()) return true; + } + return false; + } + + /** + * @return whether it is permissible to update this object + */ + public boolean isFrozen() { + return frozen; + } + + private void ensureNotFrozen() { + if (frozen) { + throw new IllegalStateException("Tried to modify frozen IndexFacts instance."); + } + } + + + /** + * Add a string to be accepted as an index name when parsing a + * query. + * + * For testing only. + * + * @param sdName name of search definition containing index, if null, modify default set + * @param indexName name of index, actual or otherwise + */ + public void addIndex(String sdName, String indexName) { + ensureNotFrozen(); + + SearchDefinition sd; + if (sdName == null) { + sd = unionSearchDefinition; + } else if (searchDefinitions.containsKey(sdName)) { + sd = searchDefinitions.get(sdName); + } else { + sd = new SearchDefinition(sdName); + searchDefinitions.put(sdName, sd); + } + sd.getOrCreateIndex(indexName); + unionSearchDefinition.getOrCreateIndex(indexName); + } + + /** + * Adds an index to the specified index, and the default index settings, + * overriding any current settings for this index + */ + public void addIndex(String sdName, Index index) { + ensureNotFrozen(); + + SearchDefinition sd; + if (sdName == null) { + sd = unionSearchDefinition; + } else if (searchDefinitions.containsKey(sdName)) { + sd = searchDefinitions.get(sdName); + } else { + sd = new SearchDefinition(sdName); + searchDefinitions.put(sdName, sd); + } + sd.addIndex(index); + unionSearchDefinition.addIndex(index); + } + + public String getDefaultPosition(String sdName) { + SearchDefinition sd; + if (sdName == null) { + sd = unionSearchDefinition; + } else if (searchDefinitions.containsKey(sdName)) { + sd = searchDefinitions.get(sdName); + } else { + return null; + } + + return sd.getDefaultPosition(); + } + + public Session newSession(Query query) { + return new Session(query); + } + + public Session newSession(Collection<String> sources, Collection<String> restrict) { + return new Session(sources, restrict); + } + + public Session newSession(Collection<String> sources, Collection<String> restrict, + Set<String> candidateDocumentTypes) { + return new Session(sources, restrict, candidateDocumentTypes); + } + + /** + * Create an instance of this to look up index facts with a given query. + * Note that if the model.source or model.restrict parameters of the query + * is changed another session should be created. This is immutable. + */ + public class Session { + + private final List<String> documentTypes; + + private Session(Query query) { + documentTypes = ImmutableList.copyOf(resolveDocumentTypes(query)); + } + + private Session(Collection<String> sources, Collection<String> restrict) { + // Assumption: Search definition name equals document name. + documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet())); + } + + private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) { + documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes)); + } + + /** + * Returns the index for this name. + * + * @param indexName the name of the index. If this is null or empty the index + * named "default" is returned + * @return the index best matching the input parameters or the nullIndex + * (never null) if none is found + */ + public Index getIndex(String indexName) { + return IndexFacts.this.getIndexFromDocumentTypes(indexName, documentTypes); + } + + /** Returns an index given from a given search definition */ + // Note: This does not take the context into account currently. + // Ideally, we should be able to resolve the right search definition name + // in the context of the searched clusters, but this cannot be modelled + // currently by the flat structure in IndexFacts. + // That can be fixed without changing this API. + public Index getIndex(String indexName, String documentType) { + return IndexFacts.this.getIndexFromDocumentTypes(indexName, Collections.singletonList(documentType)); + } + + /** + * Returns the canonical form of the index name (Which may be the same as + * the input). + * + * @param indexName index name or alias + */ + public String getCanonicName(String indexName) { + return IndexFacts.this.getCanonicNameFromDocumentTypes(indexName, documentTypes); + } + + /** + * Returns whether the given name is an index. + * + * @param indexName index name candidate + */ + public boolean isIndex(String indexName) { + return IndexFacts.this.isIndexFromDocumentTypes(indexName, documentTypes); + } + + /** Returns an immutable list of the document types this has resolved to */ + public List<String> documentTypes() { return documentTypes; } + + @Override + public String toString() { + return "index facts for search definitions " + documentTypes; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexModel.java b/container-search/src/main/java/com/yahoo/prelude/IndexModel.java new file mode 100644 index 00000000000..a4e08accd48 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/IndexModel.java @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.log.LogLevel; +import com.yahoo.search.config.IndexInfoConfig; +import com.yahoo.container.QrSearchersConfig; + +/** + * Parameter class used for construction IndexFacts. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class IndexModel { + private static final Logger log = Logger.getLogger(IndexModel.class.getName()); + + // Copied from MasterClustersInfoUpdater. It's a temporary workaround for IndexFacts. + private Map<String, List<String>> masterClusters; + private Map<String, SearchDefinition> searchDefinitions; + private SearchDefinition unionSearchDefinition; + + /** + * Use IndexModel as a pure wrapper for the parameters given. + */ + public IndexModel(Map<String, List<String>> masterClusters, + Map<String, SearchDefinition> searchDefinitions, + SearchDefinition unionSearchDefinition) { + this.masterClusters = masterClusters; + this.searchDefinitions = searchDefinitions; + this.unionSearchDefinition = unionSearchDefinition; + } + + public IndexModel(IndexInfoConfig indexInfo, Map<String, List<String>> clusters) { + if (indexInfo != null) { + setDefinitions(indexInfo); + } else { + searchDefinitions = null; + unionSearchDefinition = null; + } + this.masterClusters = clusters; + } + + public IndexModel(IndexInfoConfig indexInfo, QrSearchersConfig clusters) { + if (indexInfo != null) { + setDefinitions(indexInfo); + } else { + searchDefinitions = null; + unionSearchDefinition = null; + } + if (clusters != null) { + setMasterClusters(clusters); + } else { + masterClusters = null; + } + } + + private void setMasterClusters(QrSearchersConfig config) { + masterClusters = new HashMap<>(); + for (int i = 0; i < config.searchcluster().size(); ++i) { + List<String> docTypes = new ArrayList<>(); + String clusterName = config.searchcluster(i).name(); + for (int j = 0; j < config.searchcluster(i).searchdef().size(); ++j) { + docTypes.add(config.searchcluster(i).searchdef(j)); + } + masterClusters.put(clusterName, docTypes); + } + } + + private void setDefinitions(IndexInfoConfig c) { + searchDefinitions = new HashMap<>(); + unionSearchDefinition = new SearchDefinition(IndexFacts.unionName); + + for (Iterator<IndexInfoConfig.Indexinfo> i = c.indexinfo().iterator(); i.hasNext();) { + IndexInfoConfig.Indexinfo info = i.next(); + + SearchDefinition sd = new SearchDefinition(info.name()); + + for (Iterator<IndexInfoConfig.Indexinfo.Command> j = info.command().iterator(); j.hasNext();) { + IndexInfoConfig.Indexinfo.Command command = j.next(); + sd.addCommand(command.indexname(),command.command()); + unionSearchDefinition.addCommand(command.indexname(),command.command()); + } + + sd.fillMatchGroups(); + searchDefinitions.put(info.name(), sd); + } + unionSearchDefinition.fillMatchGroups(); + + for (IndexInfoConfig.Indexinfo info : c.indexinfo()) { + + SearchDefinition sd = searchDefinitions.get(info.name()); + + for (IndexInfoConfig.Indexinfo.Alias alias : info.alias()) { + String aliasString = alias.alias(); + String indexString = alias.indexname(); + + sd.addAlias(aliasString, indexString); + try { + unionSearchDefinition.addAlias(aliasString, indexString); + } catch (RuntimeException e) { + log.log(LogLevel.WARNING, + "Ignored the alias \"" + + aliasString + + "\" for \"" + + indexString + + "\" in the union of all search definitions," + + " source has to be explicitly set to \"" + + sd.getName() + + "\" for that alias to work.", e); + } + } + } + } + + public Map<String, List<String>> getMasterClusters() { + return masterClusters; + } + + public Map<String, SearchDefinition> getSearchDefinitions() { + return searchDefinitions; + } + + public SearchDefinition getUnionSearchDefinition() { + return unionSearchDefinition; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Location.java b/container-search/src/main/java/com/yahoo/prelude/Location.java new file mode 100644 index 00000000000..10d63051cbe --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Location.java @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; +import java.util.StringTokenizer; + +/** + * Location data for a geographical query. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author arnej27959 + */ +public class Location { + + // 1 or 2 + private int dimensions = 0; + + // line elements and rectangles + private int x1 = 0; + private int y1 = 0; + private int x2 = 1; + private int y2 = 1; + + // center(x,y), radius + private int x = 1; + private int y = 1; + private int r = 1; + + // next three are now UNUSED + // ranking table, rank multiplier (scale) + // {0, 1} an int to make parsing and rendering the hit even simpler + private int tableId = 0; + private int s = 1; + private int replace = 0; + + private boolean renderCircle = false; + private boolean renderRectangle = false; + private long aspect = 0; + + private String attribute; + + public boolean equals(Object other) { + if (! (other instanceof Location)) return false; + Location l = (Location)other; + return dimensions == l.dimensions + && renderCircle == l.renderCircle + && renderRectangle == l.renderRectangle + && aspect == l.aspect + && x1 == l.x1 + && x2 == l.x2 + && y1 == l.y1 + && y2 == l.y2 + && x == l.x + && y == l.y + && r == l.r; + } + + public boolean hasDimensions() { + return dimensions != 0; + } + public void setDimensions(int d) { + if (hasDimensions() && dimensions != d) { + throw new IllegalArgumentException("already has dimensions="+dimensions+", cannot change it to "+d); + } + if (d == 1 || d == 2) { + dimensions = d; + } else { + throw new IllegalArgumentException("Illegal location, dimensions must be 1 or 2, but was: "+d); + } + } + public int getDimensions() { + return dimensions; + } + + // input data are degrees n/e (if positive) or s/w (if negative) + public void setBoundingBox(double n, double s, + double e, double w) + { + setDimensions(2); + if (hasBoundingBox()) { + throw new IllegalArgumentException("can only set bounding box once"); + } + int px1 = (int) (Math.round(w * 1000000)); + int px2 = (int) (Math.round(e * 1000000)); + int py1 = (int) (Math.round(s * 1000000)); + int py2 = (int) (Math.round(n * 1000000)); + if (px1 > px2) { + throw new IllegalArgumentException("cannot have w > e"); + } + x1 = px1; + x2 = px2; + if (py1 > py2) { + throw new IllegalArgumentException("cannot have s > n"); + } + y1 = py1; + y2 = py2; + renderRectangle = true; + } + + private void adjustAspect() { + //calculate aspect based on latitude (elevation angle) + //no need to "optimize" for special cases, exactly 0, 30, 45, 60, or 90 degrees won't be input anyway + double degrees = (double) y / 1000000d; + if (degrees <= -90.0 || degrees >= +90.0) { + aspect = 0; + return; + } + double radians = degrees * Math.PI / 180d; + double cosLatRadians = Math.cos(radians); + aspect = (long) (cosLatRadians * 4294967295L); + } + + public void setGeoCircle(double ns, double ew, double radius_in_degrees) { + setDimensions(2); + if (isGeoCircle()) { + throw new IllegalArgumentException("can only set geo circle once"); + } + int px = (int) (ew * 1000000); + int py = (int) (ns * 1000000); + int pr = (int) (radius_in_degrees * 1000000); + if (ew < -180.1 || ew > +180.1) { + throw new IllegalArgumentException("e/w location must be in range [-180,+180]"); + } + if (ns < -90.1 || ns > +90.1) { + throw new IllegalArgumentException("n/s location must be in range [-90,+90]"); + } + if (radius_in_degrees < 0 || radius_in_degrees > 180.0) { + throw new IllegalArgumentException("radius must be in range [0,180] degrees, approximately upto 20000km"); + } + x = px; + y = py; + r = pr; + renderCircle = true; + adjustAspect(); + } + + public void setXyCircle(int px, int py, int radius_in_units) { + setDimensions(2); + if (isGeoCircle()) { + throw new IllegalArgumentException("can only set geo circle once"); + } + if (radius_in_units < 0) { + throw new IllegalArgumentException("radius must be positive"); + } + x = px; + y = py; + r = radius_in_units; + renderCircle = true; + } + + private void parseRectangle(String rectangle) { + int endof = rectangle.indexOf(']'); + if (endof == -1) { + throw new IllegalArgumentException("Illegal location syntax: "+rectangle); + } + String rectPart = rectangle.substring(1,endof); + StringTokenizer tokens = new StringTokenizer(rectPart, ","); + setDimensions(Integer.parseInt(tokens.nextToken())); + if (dimensions == 1) { + x1 = Integer.parseInt(tokens.nextToken()); + x2 = Integer.parseInt(tokens.nextToken()); + if (tokens.hasMoreTokens()) { + throw new IllegalArgumentException("Illegal location syntax: "+rectangle); + } + } else if (dimensions == 2) { + x1 = Integer.parseInt(tokens.nextToken()); + y1 = Integer.parseInt(tokens.nextToken()); + x2 = Integer.parseInt(tokens.nextToken()); + y2 = Integer.parseInt(tokens.nextToken()); + } + renderRectangle = true; + String theRest = rectangle.substring(endof+1).trim(); + if (theRest.length() >= 15 && theRest.charAt(0) == '(') { + parseCircle(theRest); + } + } + + private void parseCircle(String circle) { + int endof = circle.indexOf(')'); + if (endof == -1) { + throw new IllegalArgumentException("Illegal location syntax: "+circle); + } + String circlePart = circle.substring(1,endof); + StringTokenizer tokens = new StringTokenizer(circlePart, ","); + setDimensions(Integer.parseInt(tokens.nextToken())); + x = Integer.parseInt(tokens.nextToken()); + if (dimensions == 2) { + y = Integer.parseInt(tokens.nextToken()); + } + r = Integer.parseInt(tokens.nextToken()); + Integer.parseInt(tokens.nextToken()); // was "tableId" + Integer.parseInt(tokens.nextToken()); // was "scale" (multiplier) + Integer.parseInt(tokens.nextToken()); // was "replace" + + if (dimensions == 1) { + if (tokens.hasMoreTokens()) { + throw new IllegalArgumentException("Illegal location syntax: "+circle); + } + } + else { + if (tokens.hasMoreTokens()) { + String aspectToken = tokens.nextToken(); + if (aspectToken.equalsIgnoreCase("CalcLatLon")) { + adjustAspect(); + } else { + try { + aspect = Long.parseLong(aspectToken); + } catch (NumberFormatException nfe) { + throw new IllegalArgumentException("Aspect "+aspectToken+" for location must be an integer or 'CalcLatLon' for automatic aspect calculation.", nfe); + } + if (aspect > 4294967295L || aspect < 0) { + throw new IllegalArgumentException("Aspect "+aspect+" for location parameter must be less than 4294967296 (2^32)"); + } + } + } + } + renderCircle = true; + String theRest = circle.substring(endof+1).trim(); + if (theRest.length() > 5 && theRest.charAt(0) == '[') { + parseRectangle(theRest); + } + } + + public Location() {} + + public Location(String rawLocation) { + int attributeSepPos = rawLocation.indexOf(':'); + String locationSpec = rawLocation; + if (attributeSepPos != -1) { + String tempAttribute = rawLocation.substring(0, attributeSepPos); + if (tempAttribute != null && !tempAttribute.isEmpty()) { + attribute = tempAttribute; + } + locationSpec = rawLocation.substring(attributeSepPos+1); + } + + if (locationSpec.charAt(0) == '[') { + parseRectangle(locationSpec); + } + else if (locationSpec.charAt(0) == '(') { + parseCircle(locationSpec); + } + else { + throw new IllegalArgumentException("Illegal location syntax"); + } + } + + public String toString() { + StringBuilder ser = new StringBuilder(); + if (attribute != null) { + ser.append(attribute).append(':'); + } + if (renderRectangle) { + ser.append("[").append(dimensions).append(","); + if (dimensions == 1) { + ser.append(x1).append(","). + append(x2); + } + else { + ser.append(x1).append(","). + append(y1).append(","). + append(x2).append(","). + append(y2); + } + ser.append("]"); + } + if (renderCircle) { + ser.append("(").append(dimensions).append(",").append(x); + if (dimensions == 2) { + ser.append(",").append(y); + } + ser.append(",").append(r). + append(",").append(tableId). + append(",").append(s). + append(",").append(replace); + if (dimensions == 2 && aspect != 0) { + ser.append(",").append(aspect); + } + ser.append(")"); + } + return ser.toString(); + } + + /** + * Returns width of bounding box (actual width if rectangle, bounding square if circle) + * @return width of bounding box + */ + public int getBoundingWidth() { + if (renderCircle) { + return r * 2; + } else { + return x2 - x1; + } + } + + /** + * Returns height of bounding box (actual height if rectangle, bounding square if circle) + * @return height of bounding box + */ + public int getBoundingHeight() { + if (renderCircle) { + return r * 2; + } else { + return y2 - y1; + } + } + + public int hashCode() { + return toString().hashCode(); + } + + public boolean hasAttribute() { + return attribute != null; + } + public String getAttribute() { + return attribute; + } + public void setAttribute(String attributeName) { + attribute = attributeName; + } + + /** check whether this Location contains a 2D circle */ + public boolean isGeoCircle() { + return (renderCircle && dimensions==2); + } + + public boolean hasBoundingBox() { + return renderRectangle; + } + + private void checkGeoCircle() { + if (!isGeoCircle()) { + throw new IllegalArgumentException("only geo circles support this api"); + } + } + + /** + * Obtain degrees latitude (North-South direction); negative numbers are degrees South. + * Expected range is [-90.0,+90.0] only. + * May only be called when isGeoCircle() returns true. + **/ + public double degNS() { + checkGeoCircle(); + return 0.000001 * y; + } + + /** + * Obtain degrees longitude (East-West direction); negative numbers are degrees West. + * Expected range is [-180.0,+180.0] only. + * May only be called when isGeoCircle() returns true. + **/ + public double degEW() { + checkGeoCircle(); + return 0.000001 * x; + } + + /** + * Obtain circle radius (in degrees). + * May only be called when isGeoCircle() returns true. + **/ + public double degRadius() { + checkGeoCircle(); + return 0.000001 * r; + } + + /** + * Encodes the location to the given buffer and returns the length. + * For internal use. + */ + public int encode(ByteBuffer buffer) { + byte[] loc = Utf8.toBytes(toString()); + buffer.put(loc); + return loc.length; + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Ping.java b/container-search/src/main/java/com/yahoo/prelude/Ping.java new file mode 100644 index 00000000000..ce8f1cba399 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Ping.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +/** + * A ping, typically to ask whether backend is alive. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class Ping { + /** How long to wait for a pong */ + private long timeout; + + public Ping() { + this(500); + } + + public Ping(long timeout) { + this.timeout = timeout; + } + + public long getTimeout() { + return timeout; + } + + @Override + public String toString() { + return "Ping(timeout = " + timeout + ")"; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Pong.java b/container-search/src/main/java/com/yahoo/prelude/Pong.java new file mode 100644 index 00000000000..d2ead51690a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Pong.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import com.yahoo.fs4.PongPacket; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.statistics.ElapsedTime; + +/** + * An answer from Ping. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class Pong { + + private String pingInfo=""; + private List<ErrorMessage> errors = new ArrayList<>(1); + private List<PongPacket> pongPackets = new ArrayList<>(1); + private ElapsedTime elapsed = new ElapsedTime(); + + public Pong() { + } + public void addError(ErrorMessage error) { + errors.add(error); + } + public ErrorMessage getError(int i) { + return errors.get(i); + } + public int getErrorSize() { + return errors.size(); + } + public void addPongPacket(PongPacket pongPacket) { + pongPackets.add(pongPacket); + } + public PongPacket getPongPacket(int i) { + return pongPackets.get(i); + } + public int getPongPacketsSize() { + return pongPackets.size(); + } + /** Merge all information from another pong into this */ + public void merge(Pong pong) { + if (pong.badResponse()) { + errors.addAll(pong.getErrors()); + } + pongPackets.addAll(pong.getPongPackets()); + } + public List<ErrorMessage> getErrors() { + return Collections.unmodifiableList(errors); + } + public List<PongPacket> getPongPackets() { + return Collections.unmodifiableList(pongPackets); + } + /** @return whether there is an error or not */ + public boolean badResponse() { + return !errors.isEmpty(); + } + + /** Sets information about the ping used to produce this. This is included when returning the tostring of this. */ + public void setPingInfo(String pingInfo) { + if (pingInfo==null) + pingInfo=""; + this.pingInfo=pingInfo; + } + + /** Returns information about the ping use, or "" (never null) if none */ + public String getPingInfo() { return pingInfo; } + + public ElapsedTime getElapsedTime() { + return elapsed; + } + + /** Returns a string which included the ping info (if any) and any errors added to this */ + public @Override String toString() { + StringBuffer m=new StringBuffer("Result of pinging"); + if (pingInfo.length() > 0) { + m.append(" using "); + m.append(pingInfo); + } + m.append(" "); + for (int i=0; i<errors.size(); i++) { + m.append(errors.get(i).toString()); + if (i<errors.size()-1) + m.append(", "); + } + return m.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/SearchDefinition.java b/container-search/src/main/java/com/yahoo/prelude/SearchDefinition.java new file mode 100644 index 00000000000..0cec7cfc19d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/SearchDefinition.java @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import com.yahoo.prelude.Index.Attribute; + +import java.util.HashMap; +import java.util.Map; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * An object for storing information about search definitions in the centralized + * directory services. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +// TODO: Make freezable! +public class SearchDefinition { + + private String name; + + /** A map of all indices in this search definition, indexed by name */ + private Map<String, Index> indices = new HashMap<>(); + + /* + * A map of all indices in this search definition, indexed by lower cased + * name. + */ + private Map<String, Index> lowerCase = new HashMap<>(); + + private String defaultPosition; + + public SearchDefinition(String name) { + this.name = name; + } + + public String getName() { return name; } + + public String getDefaultPosition() { + return defaultPosition; + } + + public void addIndex(Index index) { + indices.put(index.getName(), index); + lowerCase.put(toLowerCase(index.getName()), index); + if (index.isDefaultPosition()) { + defaultPosition = index.getName(); + } + } + + public void addAlias(String alias, String indexName) { + Index old; + + if ((old = indices.get(alias)) != null) { + if (old.getName().equals(indexName)) { + return; + } else { + throw new IllegalArgumentException("Tried adding the alias \"" + + alias + "\" for the index name \"" + indexName + + "\" when the name \"" + alias + + "\" already maps to \"" + old.getName() + "\"."); + } + } + Index index = indices.get(indexName); + if (index == null) { + throw new IllegalArgumentException("Failed adding alias \"" + alias + + "\" for the index name \"" + indexName + + "\" as there is no index with that name available."); + } + indices.put(alias, index); + String lca = toLowerCase(alias); + if (lowerCase.get(lca) == null) { + lowerCase.put(lca, index); + } + } + + public Index getIndex(String name) { + return indices.get(name); + } + + public Index getIndexByLowerCase(String name) { + return lowerCase.get(name); + } + + /** Returns the indices of this as a map */ + public Map<String,Index> indices() { + return indices; + } + + public Index getOrCreateIndex(String name) { + Index idx = getIndex(name); + if (idx != null) { + return idx; + } + idx = new Index(name); + addIndex(idx); + return idx; + } + + public void addCommand(String indexName, String commandString) { + Index index = getOrCreateIndex(indexName); + index.addCommand(commandString); + if (index.isDefaultPosition()) { + defaultPosition = index.getName(); + } + } + + public void fillMatchGroups() { + for (Index i : indices.values()) { + Attribute[] matchGroup = i.getMatchGroup(); + if (matchGroup == null) { + continue; + } + for (Attribute a : matchGroup) { + Index m = getIndex(a.name); + if (m != null) { + a.setTokenizedContent(!m.isAttribute()); + } + } + } + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/VespaSVersionRetriever.java b/container-search/src/main/java/com/yahoo/prelude/VespaSVersionRetriever.java new file mode 100644 index 00000000000..9d6d4f55fb3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/VespaSVersionRetriever.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.io.IOException; +import java.util.jar.Manifest; + +/** + * Retrieves Vespa-Version from the manifest file. + * + * @author tonytv + */ +public class VespaSVersionRetriever { + + public static String getVersion() { + return version; + } + + private static String version = retrieveVersion(); + + private static String retrieveVersion() { + try { + Manifest manifest = new Manifest(VespaSVersionRetriever.class.getResourceAsStream("/META-INF/MANIFEST.MF")); + manifest.getMainAttributes().entrySet(); + return manifest.getMainAttributes().getValue("Vespa-Version"); + } catch (IOException e) { + return "not available."; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cache/Cache.java b/container-search/src/main/java/com/yahoo/prelude/cache/Cache.java new file mode 100644 index 00000000000..0bdd6e0fdfa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cache/Cache.java @@ -0,0 +1,274 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cache; + +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; + +import com.yahoo.cache.SizeCalculator; +import com.yahoo.search.Result; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +/** + * <p>A generic cache which keeps the total memory consumed by its content + * below a configured maximum.</p> + * + * <p>Thread safe.</p> + * + * @author vegardh + */ +public class Cache<K, V> { + private Value elems = null; + private Value entrySizes = null; + + private Map<CacheKey<K>,CacheValue<K, V>> content=new LinkedHashMap<>(12500, 1.0f, true); + private SizeCalculator calc = new SizeCalculator(); + private long maxSizeBytes; + + private long currentSizeBytes=0; + + /** The time an element is allowed to live, negative for indefinite lifespan */ + private long timeToLiveMillis=-1; + + /** The max allowed size of an entry */ + private long maxEntrySizeBytes=10000; + + /** + * Creates a new cache + * + * @param maxSizeBytes the max size in bytes this cache is permitted to consume, + * including Result objects and Query keys + * @param timeToLiveMillis a negative value means unlimited time + * @param manager the current Statistics manager acquired by injection + */ + public Cache(long maxSizeBytes,long timeToLiveMillis, long maxEntrySizeBytes, Statistics manager) { + this.maxSizeBytes=maxSizeBytes; + this.timeToLiveMillis=timeToLiveMillis; + this.maxEntrySizeBytes=maxEntrySizeBytes; + initStats(manager); + } + + private void initStats(Statistics manager) { + elems = new Value("querycache_elems", manager, new Value.Parameters() + .setLogRaw(true).setNameExtension(true).setLogMax(true)); + entrySizes = new Value("querycache_entry_sizes", manager, + new Value.Parameters().setLogRaw(false).setLogMean(true) + .setNameExtension(true).setLogMax(true)); + } + + private synchronized CacheValue<K, V> synchGet(CacheKey<K> k) { + return content.get(k); + } + + private synchronized boolean synchPut(K key,V value, long keySizeBytes, long valueSizeBytes) { + // log.info("Put "+key.toString()+ " key size:"+keySizeBytes+" val size:"+valueSizeBytes); + makeRoomForBytes(valueSizeBytes+keySizeBytes); + CacheKey<K> cacheKey = new CacheKey<>(keySizeBytes, key); + CacheValue<K, V> cacheValue; + if (timeToLiveMillis<0) { + cacheValue=new CacheValue<>(valueSizeBytes,value, cacheKey); + } else { + cacheValue=new AgingCacheValue<>(valueSizeBytes,value, cacheKey); + } + currentSizeBytes+=(valueSizeBytes+keySizeBytes); + elems.put(content.size()); + content.put(cacheKey, cacheValue); + return true; + } + + /** + * Attempts to add a value to the cache + * + * @param key the key of the value + * @param value the value to add + * @return true if the value was added, false if it could not be added + */ + public boolean put(K key,V value) { + if (value instanceof Result) { // Optimized for CachingSearcher. Assuming the key is the Query. + long totalSizeBytes = calc.sizeOf(value); // Result has a Query field + if (tooBigToCache(totalSizeBytes)) { + return false; + } + entrySizes.put(totalSizeBytes); + return synchPut(key, value, 0, totalSizeBytes); + } + long keySizeBytes=calc.sizeOf(key); + long valueSizeBytes=calc.sizeOf(value); + if (tooBigToCache(keySizeBytes+valueSizeBytes)) { + return false; + } + entrySizes.put(keySizeBytes+valueSizeBytes); + return synchPut(key, value, keySizeBytes, valueSizeBytes); + } + + /** + * Don't cache elems that are too big, even if there's space + */ + private boolean tooBigToCache(long totalSize) { + if (totalSize > maxEntrySizeBytes) { + return true; + } + if (totalSize > maxSizeBytes) { + return true; + } + return false; + } + + private void makeRoomForBytes(long bytes) { + if ((maxSizeBytes-currentSizeBytes) > bytes) { + return; + } + if (content.isEmpty()) { + return; + } + for (Iterator<Map.Entry<CacheKey<K>, CacheValue<K, V>>> i = content.entrySet().iterator() ; i.hasNext() ; ) { + Map.Entry<CacheKey<K>, CacheValue<K, V>> entry = i.next(); + CacheKey<K> key = entry.getKey(); + CacheValue<K, V> value = entry.getValue(); + // Can't call this.removeField(), breaks iterator. + i.remove(); // Access order: first ones are LRU. + currentSizeBytes-=key.sizeBytes(); + currentSizeBytes-=value.sizeBytes(); + if ((maxSizeBytes-currentSizeBytes) > bytes) { + break; + } + } + } + + public boolean containsKey(K k) { + return content.containsKey(new CacheKey<>(-1, k)); + } + + /** Returns a value, if it is present in the cache */ + public V get(K key) { + // Currently it works to make a new CacheKey object without size + // because we have changed hashCode() there. + CacheKey<K> cacheKey = new CacheKey<>(-1, key); + CacheValue<K, V> value=synchGet(cacheKey); + if (value==null) { + return null; + } + if (timeToLiveMillis<0) { + return value.value(); + } + + if (value.expired(timeToLiveMillis)) { + // There was a value, which has now expired + remove(key); + return null; + } else { + return value.value(); + } + } + + /** + * Removes a cache value if present + * + * @return true if the value was removed, false if it was not present + */ + public synchronized boolean remove(K key) { + CacheValue<K, V> value=content.remove(key); + if (value==null) { + return false; + } + currentSizeBytes-=value.sizeBytes(); + currentSizeBytes-=value.getKey().sizeBytes(); + elems.put(content.size()); + return true; + } + + public int size() { + return content.size(); + } + + private static class CacheKey<K> { + private long sizeBytes; + private K key; + public CacheKey(long sizeBytes,K key) { + this.sizeBytes=sizeBytes; + this.key=key; + } + + public long sizeBytes() { + return sizeBytes; + } + + public K getKey() { + return key; + } + + public int hashCode() { + return key.hashCode(); + } + + @SuppressWarnings("rawtypes") + public boolean equals(Object k) { + if (key==null) { + return false; + } + if (k==null) { + return false; + } + if (k instanceof CacheKey) { + return key.equals(((CacheKey)k).getKey()); + } + return false; + } + + public String toString() { + return key.toString(); + } + + } + + private static class CacheValue<K, V> { + private long sizeBytes; + private V value; + private CacheKey<K> key; + public CacheValue(long sizeBytes, V value, CacheKey<K> key) { + this.sizeBytes=sizeBytes; + this.value=value; + this.key = key; + } + + public boolean expired(long ttl) { + return false; + } + + public V value() { + return value; + } + + public long sizeBytes() { + return sizeBytes; + } + + public CacheKey<K> getKey() { + return key; + } + + public String toString() { + return value.toString(); + } + + } + + private static class AgingCacheValue<K, V> extends CacheValue<K, V> { + private long birthTimeMillis; + + public AgingCacheValue(long sizeBytes,V value, CacheKey<K> key) { + super(sizeBytes,value, key); + this.birthTimeMillis=System.currentTimeMillis(); + } + + public long ageMillis() { + return System.currentTimeMillis()-birthTimeMillis; + } + + public boolean expired(long ttl) { + return (ageMillis() >= ttl); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cache/QueryCacheKey.java b/container-search/src/main/java/com/yahoo/prelude/cache/QueryCacheKey.java new file mode 100644 index 00000000000..d885422ce57 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cache/QueryCacheKey.java @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cache; + +import com.yahoo.search.Query; + +public class QueryCacheKey { + private Query query; + private int offset; + private int hits; + + public QueryCacheKey(Query query) { + this.query = query; + this.offset = query.getOffset(); + this.hits = query.getHits(); + } + + public boolean equals(Object key) { + if (key==null) { + return false; + } + if (query==null) { + return false; + } + if (key instanceof QueryCacheKey) { + QueryCacheKey ckey = (QueryCacheKey)key; + boolean res = equalQueryWith(ckey) && equalPathWith(ckey); + return res; + } + return false; + } + + private boolean equalQueryWith(QueryCacheKey other) { + return query.equals(other.getQuery()); + } + + private boolean equalPathWith(QueryCacheKey other) { + if (other == null) return false; + if (other.getQuery() == null) return false; + + return query.getHttpRequest().getUri().getPath().equals(other.getQuery().getHttpRequest().getUri().getPath()); + } + + public int getHits() { + return hits; + } + + public int getOffset() { + return offset; + } + + public Query getQuery() { + return query; + } + + public void setQuery(Query newQuery) { + query = newQuery; + } + + public String toString() { + if (query==null) { + return super.toString(); + } + return query.toString(); + } + + public int hashCode() { + if (query==null) { + return super.hashCode(); + } + int ret = query.hashCode(); + return ret; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java new file mode 100644 index 00000000000..77ee36785f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java @@ -0,0 +1,146 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import java.util.Map; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.yahoo.component.provider.Freezable; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.result.ErrorMessage; + +/** + * Monitors of a cluster of remote nodes. The monitor uses an internal thread + * for node monitoring. + * + * @author <a href="mailto:jon.bratseth@yahoo-inc.com">Jon S Bratseth</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class ClusterMonitor implements Runnable, Freezable { + + private final MonitorConfiguration configuration; + + private final static Logger log = Logger.getLogger(ClusterMonitor.class.getName()); + + private final ClusterSearcher nodeManager; + + private final VipStatus vipStatus; + + /** A map from Node to corresponding MonitoredNode */ + private final Map<VespaBackEndSearcher, NodeMonitor> nodeMonitors = new java.util.IdentityHashMap<>(); + ScheduledFuture<?> future; + + private boolean isFrozen = false; + + ClusterMonitor(final ClusterSearcher manager, final QrMonitorConfig monitorConfig, VipStatus vipStatus) { + configuration = new MonitorConfiguration(monitorConfig); + nodeManager = manager; + this.vipStatus = vipStatus; + log.fine("checkInterval is " + configuration.getCheckInterval() + " ms"); + } + + /** Returns the configuration of this cluster monitor */ + MonitorConfiguration getConfiguration() { + return configuration; + } + + void startPingThread() { + if (!isFrozen()) { + throw new IllegalStateException( + "Do not start the monitoring thread before the set of" + +" nodes to monitor is complete/the ClusterMonitor is frozen."); + } + future = nodeManager.getScheduledExecutor().scheduleAtFixedRate(this, 30 * 1000, configuration.getCheckInterval(), TimeUnit.MILLISECONDS); + } + + /** + * Adds a new node for monitoring. + */ + void add(final VespaBackEndSearcher node) { + if (isFrozen()) { + throw new IllegalStateException( + "Can not add new nodes after ClusterMonitor has been frozen."); + } + final NodeMonitor monitor = new NodeMonitor(node); + nodeMonitors.put(node, monitor); + } + + /** Called from ClusterSearcher/NodeManager when a node failed */ + void failed(final VespaBackEndSearcher node, final ErrorMessage error) { + final NodeMonitor monitor = nodeMonitors.get(node); + final boolean wasWorking = monitor.isWorking(); + monitor.failed(error); + if (wasWorking && !monitor.isWorking()) { + // was warning, see VESPA-1922 + log.info("Failed monitoring node '" + node + "' due to '" + error); + nodeManager.failed(node); + } + updateVipStatus(); + } + + /** Called when a node responded */ + void responded(final VespaBackEndSearcher node, boolean hasDocumentsOnline) { + final NodeMonitor monitor = nodeMonitors.get(node); + final boolean wasFailing = !monitor.isWorking(); + monitor.responded(hasDocumentsOnline); + if (wasFailing && monitor.isWorking()) { + log.info("Failed node '" + node + "' started working again."); + nodeManager.working(monitor.getNode()); + } + updateVipStatus(); + } + + private void updateVipStatus() { + boolean hasWorkingNodesWithDocumentsOnline = false; + for (NodeMonitor node : nodeMonitors.values()) { + if (node.isWorking() && node.searchNodesOnline()) { + hasWorkingNodesWithDocumentsOnline = true; + break; + } + } + if (hasWorkingNodesWithDocumentsOnline) { + vipStatus.addToRotation(this); + } else { + vipStatus.removeFromRotation(this); + } + } + + /** + * Ping all nodes which needs pinging to discover state changes + */ + private void ping() throws InterruptedException { + for (final NodeMonitor monitor : nodeMonitors.values()) { + nodeManager.ping(monitor.getNode()); + } + } + + @Override + public void run() { + log.finest("Activating ping"); + try { + ping(); + } catch (final Exception e) { + log.log(Level.WARNING, "Error in monitor thread", e); + } + } + + public void shutdown() throws InterruptedException { + if (future != null) { + future.cancel(true); + } + } + + @Override + public void freeze() { + isFrozen = true; + + } + + @Override + public boolean isFrozen() { + return isFrozen; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java new file mode 100644 index 00000000000..88982fa1b69 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java @@ -0,0 +1,657 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import com.yahoo.collections.Tuple2; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.concurrent.Receiver; +import com.yahoo.concurrent.Receiver.MessageState; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.container.protect.Error; +import com.yahoo.fs4.PacketDumper; +import com.yahoo.fs4.PongPacket; +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.log.LogLevel; +import com.yahoo.search.dispatch.Dispatcher; +import com.yahoo.prelude.fastsearch.FS4ResourcePool; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.prelude.fastsearch.CacheControl; +import com.yahoo.prelude.fastsearch.CacheParams; +import com.yahoo.prelude.fastsearch.ClusterParams; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; +import com.yahoo.prelude.fastsearch.FastSearcher; +import com.yahoo.prelude.fastsearch.SummaryParameters; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.search.query.ParameterParser; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; +import com.yahoo.vespa.config.search.DispatchConfig; +import com.yahoo.vespa.streamingvisitors.VdsStreamingSearcher; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledExecutorService; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.lang.StringUtils; + +import static com.yahoo.container.QrSearchersConfig.Searchcluster.Indexingmode.STREAMING; + +/** + * A searcher which forwards to a cluster of monitored native Vespa backends. + * + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +@After("*") +public class ClusterSearcher extends Searcher { + + private final static Logger log = Logger.getLogger(ClusterSearcher.class.getName()); + + private final ClusterMonitor monitor; + + private final Hasher hasher; + + private final Value cacheHitRatio; + + private final String clusterModelName; + + private final List<Backend> backends = new ArrayList<>(); + + // The set of document types contained in this search cluster + private final Set<String> documentTypes; + + // Mapping from rank profile names to document types containing them + private final Map<String, Set<String>> rankProfiles = new HashMap<>(); + + private final boolean failoverToRemote; + + private final FS4ResourcePool fs4ResourcePool; + + private final long maxQueryTimeout; // in milliseconds + private final static long DEFAULT_MAX_QUERY_TIMEOUT = 600000L; + + private final long maxQueryCacheTimeout; // in milliseconds + private final static long DEFAULT_MAX_QUERY_CACHE_TIMEOUT = 10000L; + + /** + * Creates a new ClusterSearcher. + */ + public ClusterSearcher(ComponentId id, + QrSearchersConfig qrsConfig, + ClusterConfig clusterConfig, + DocumentdbInfoConfig documentDbConfig, + LegacyEmulationConfig emulationConfig, + QrMonitorConfig monitorConfig, + DispatchConfig dispatchConfig, + Statistics manager, + FS4ResourcePool listeners, + VipStatus vipStatus) { + super(id); + this.hasher = new Hasher(); + this.fs4ResourcePool = listeners; + monitor = new ClusterMonitor(this, monitorConfig, vipStatus); + final int searchClusterIndex = clusterConfig.clusterId(); + clusterModelName = clusterConfig.clusterName(); + final QrSearchersConfig.Searchcluster searchClusterConfig = getSearchClusterConfigFromClusterName(qrsConfig, clusterModelName); + documentTypes = new LinkedHashSet<>(); + failoverToRemote = clusterConfig.failoverToRemote(); + Dispatcher dispatcher = new Dispatcher(dispatchConfig); + + final String eventName = clusterModelName + ".cache_hit_ratio"; + cacheHitRatio = new Value(eventName, manager, new Value.Parameters() + .setNameExtension(false).setLogRaw(false).setLogMean(true)); + + maxQueryTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryTimeout(), DEFAULT_MAX_QUERY_TIMEOUT); + maxQueryCacheTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryCacheTimeout(), + DEFAULT_MAX_QUERY_CACHE_TIMEOUT); + + final CacheParams cacheParams = new CacheParams(createCache(clusterConfig, clusterModelName)); + final SummaryParameters docSumParams = new SummaryParameters(qrsConfig + .com().yahoo().prelude().fastsearch().FastSearcher().docsum() + .defaultclass()); + + for (final DocumentdbInfoConfig.Documentdb docDb : documentDbConfig.documentdb()) { + String docTypeName = docDb.name(); + documentTypes.add(docTypeName); + + for (final DocumentdbInfoConfig.Documentdb.Rankprofile profile : docDb.rankprofile()) { + addValidRankProfile(profile.name(), docTypeName); + } + } + + boolean gotExpectedBackend = false; + if (searchClusterConfig.indexingmode() == STREAMING) { + final VdsStreamingSearcher searcher = vdsCluster(searchClusterIndex, + searchClusterConfig, cacheParams, emulationConfig, docSumParams, + documentDbConfig); + addBackendSearcher(searcher); + gotExpectedBackend = true; + } else { + for (int i = 0; i < searchClusterConfig.dispatcher().size(); i++) { + final Backend b = createBackend( + searchClusterConfig.dispatcher(i)); + final FastSearcher searcher = searchDispatch(searchClusterIndex, + searchClusterConfig, cacheParams, emulationConfig, docSumParams, + documentDbConfig, b, dispatcher, i); + try { + searcher.setLocalDispatching(!isRemote(searchClusterConfig.dispatcher(i).host())); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + backends.add(b); + addBackendSearcher(searcher); + gotExpectedBackend |= searcher.isLocalDispatching(); + } + } + if (!gotExpectedBackend) { + log.log(Level.SEVERE, "ClusterSearcher should have a local top level dispatch." + + " The possibility to configure dispatchers explicitly will be removed" + + " in a future release."); + } + hasher.running = true; + monitor.freeze(); + monitor.startPingThread(); + } + + private static QrSearchersConfig.Searchcluster getSearchClusterConfigFromClusterName(QrSearchersConfig config, String name) { + for (QrSearchersConfig.Searchcluster searchCluster : config.searchcluster()) { + if (searchCluster.name().equals(name)) { + return searchCluster; + } + } + return null; + } + + /** + * Returns false if this host is local. + */ + boolean isRemote(String host) throws UnknownHostException { + InetAddress dispatchHost = InetAddress.getByName(host); + if (dispatchHost.isLoopbackAddress()) { + return false; + } else { + String localName; + try { + localName = InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + // Macs unfortunately can tell their own name but does not know + // it, so if we run this model on a mac we'll end up here. + return false; + } + return !localName.equals(dispatchHost.getCanonicalHostName()); + } + } + + private static ClusterParams makeClusterParams(int searchclusterIndex, + QrSearchersConfig.Searchcluster searchClusterConfig, + LegacyEmulationConfig emulConfig, + int dispatchIndex) { + return new ClusterParams(searchclusterIndex, + "sc" + searchclusterIndex + ".num" + dispatchIndex, + searchClusterConfig.rowbits(), + emulConfig); + } + + private static FastSearcher searchDispatch(int searchclusterIndex, + QrSearchersConfig.Searchcluster searchClusterConfig, + CacheParams cacheParams, + LegacyEmulationConfig emulConfig, + SummaryParameters docSumParams, + DocumentdbInfoConfig documentdbInfoConfig, + Backend backend, + Dispatcher dispatcher, + int i) { + ClusterParams clusterParams = makeClusterParams(searchclusterIndex, + searchClusterConfig, + emulConfig, i); + return new FastSearcher(backend, dispatcher, docSumParams, clusterParams, cacheParams, documentdbInfoConfig); + } + + private static VdsStreamingSearcher vdsCluster(int searchclusterIndex, + QrSearchersConfig.Searchcluster searchClusterConfig, + CacheParams cacheParams, + LegacyEmulationConfig emulConfig, + SummaryParameters docSumParams, + DocumentdbInfoConfig documentdbInfoConfig) { + ClusterParams clusterParams = makeClusterParams(searchclusterIndex, + searchClusterConfig, + emulConfig, 0); + VdsStreamingSearcher searcher = (VdsStreamingSearcher) VespaBackEndSearcher + .getSearcher("com.yahoo.vespa.streamingvisitors.VdsStreamingSearcher"); + searcher.setSearchClusterConfigId(searchClusterConfig + .rankprofiles().configid()); + searcher.setStorageClusterRouteSpec(searchClusterConfig + .storagecluster().routespec()); + searcher.init(docSumParams, clusterParams, cacheParams, documentdbInfoConfig); + return searcher; + } + + /** Do not use, for internal testing purposes only. **/ + ClusterSearcher(Set<String> documentTypes) { + this.hasher = new Hasher(); + this.failoverToRemote = false; + this.documentTypes = documentTypes; + monitor = new ClusterMonitor(this, new QrMonitorConfig(new QrMonitorConfig.Builder()), new VipStatus()); + cacheHitRatio = new Value( + "com.yahoo.prelude.cluster.ClusterSearcher.ClusterSearcher().dummy", + Statistics.nullImplementation, new Value.Parameters()); + clusterModelName = "testScenario"; + fs4ResourcePool = null; + maxQueryTimeout = DEFAULT_MAX_QUERY_TIMEOUT; + maxQueryCacheTimeout = DEFAULT_MAX_QUERY_CACHE_TIMEOUT; + } + + public Map<String, Backend.BackendStatistics> getBackendStatistics() { + Map<String, Backend.BackendStatistics> backendStatistics = new TreeMap<>(); + for (final Backend backend : backends) { + backendStatistics.put(backend.toString(), backend.getStatistics()); + } + return backendStatistics; + } + + private Backend createBackend(final QrSearchersConfig.Searchcluster.Dispatcher disp) { + return fs4ResourcePool.getBackend(disp.host(), disp.port()); + } + + private static CacheControl createCache(ClusterConfig config, String clusterModelName) { + log.log(Level.INFO, "Enabling cache for search cluster " + + clusterModelName + " (size=" + config.cacheSize() + + ", timeout=" + config.cacheTimeout() + ")"); + + return new CacheControl(config.cacheSize(), config.cacheTimeout()); + } + + public String getClusterModelName() { + return clusterModelName; + } + + ClusterMonitor getMonitor() { + return monitor; + } + + void addBackendSearcher(VespaBackEndSearcher searcher) { + monitor.add(searcher); + hasher.add(searcher); + } + + void addValidRankProfile(String profileName, String docTypeName) { + if (!rankProfiles.containsKey(profileName)) { + rankProfiles.put(profileName, new HashSet<>()); + } + rankProfiles.get(profileName).add(docTypeName); + } + + void setValidRankProfile(String profileName, Set<String> documentTypes) { + rankProfiles.put(profileName, documentTypes); + } + + /** + * Returns an error if the document types do not have the requested rank + * profile. For the case of multiple document types, only returns an + * error if we have restricted the set of documents somehow. This is + * because when searching over all doc types, common ancestors might + * not have the requested rank profile and failing on that basis is + * probably not reasonable. + * + * @param query query + * @param docTypes set of requested doc types for this query + * @return null if request rank profile is ok for the requested + * doc types, a result with error message if not. + */ + private Result checkValidRankProfiles(Query query, Set<String> docTypes) { + String rankProfile = query.getRanking().getProfile(); + Set<String> invalidInDocTypes = null; + Set<String> rankDocTypes = rankProfiles.get(rankProfile); + + if (rankDocTypes == null) { + // ranking profile does not exist in any document type + invalidInDocTypes = docTypes; + } + else if (docTypes.size() == 1) { + // one document type, fails if invalid rank profile + if (!rankDocTypes.contains(docTypes.iterator().next())) { + invalidInDocTypes = docTypes; + } + } + else { + // multiple document types, only fail when restricting doc types + Set<String> restrict = query.getModel().getRestrict(); + Set<String> sources = query.getModel().getSources(); + boolean validate = restrict != null && !restrict.isEmpty(); + validate = validate || sources != null && !sources.isEmpty(); + if (validate && !rankDocTypes.containsAll(docTypes)) { + invalidInDocTypes = new HashSet<>(docTypes); + invalidInDocTypes.removeAll(rankDocTypes); + } + } + + if (invalidInDocTypes != null && !invalidInDocTypes.isEmpty()) { + String plural = invalidInDocTypes.size() > 1 ? "s" : ""; + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Requested rank profile '" + rankProfile + + "' is undefined for document type" + plural + " '" + + StringUtils.join(invalidInDocTypes.iterator(), ", ") + "'")); + } + + return null; + } + + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + Query query = result.getQuery(); + int tries = 0; + + do { + // The loop is in case there are other searchers available + // able to produce results + VespaBackEndSearcher searcher = hasher.select(tries++); + if (searcher != null) { + if (query.getTimeLeft() > 0) { + doFill(searcher, result, summaryClass, execution); + } else { + if (result.hits().getErrorHit() == null) { + result.hits().setError(ErrorMessage.createTimeout("No time left to get summaries")); + } + } + } else { + if (result.hits().getErrorHit() == null) { + result.hits().setError(ErrorMessage.createNoBackendsInService("Could not fill result")); + } + } + // no error: good result, let's return + if (result.hits().getError() == null) { + return; + } + + } while (tries < hasher.getNodeCount() && failoverToRemote); + } + + public void doFill(Searcher searcher, Result result, String summaryClass, Execution execution) { + searcher.fill(result, summaryClass, execution); + updateCacheHitRatio(result, result.getQuery()); + } + + private void updateCacheHitRatio(Result result, Query query) { + // result.isCached() looks at the contained hits, so if there are no + // hits, the result will be treated as cached, even though the backend + // was queried. + if (result.hits().getError() == null + && result.hits().getConcreteSize() > 0) { + + if (result.isCached()) { + cacheHit(); + } else if (!query.getNoCache()) { + cacheMiss(); + } + } + } + + @Override + public Result search(com.yahoo.search.Query query, Execution execution) { + Result result; + int tries = 0; + + do { + // The loop is in case there are other searchers available + // able to produce results + validateQueryTimeout(query); + validateQueryCache(query); + VespaBackEndSearcher searcher = hasher.select(tries++); + if (searcher == null) { + return new Result(query, ErrorMessage.createNoBackendsInService("Could not search")); + } + if (query.getTimeLeft() <= 0) { + return new Result(query, ErrorMessage.createTimeout("No time left for searching")); + } + + result = doSearch(searcher, query, execution); + + // no error: good result, let's return + if (result.hits().getError() == null) { + return result; + } + if (result.hits().getError().getCode() == Error.TIMEOUT.code) { + return result; // Retry is unlikely to help + } + if (result.hits().getError().getCode() == Error.INVALID_QUERY_PARAMETER.code) { + return result; // Retry is unlikely to help here as well + } + } while (tries < hasher.getNodeCount()); + + // only error-result gets returned here. + return result; + } + + private void validateQueryTimeout(Query query) { + if (query.getTimeout() > maxQueryTimeout) { + log.warning("Query timeout (" + query.getTimeout() + " ms) > max query timeout (" + maxQueryTimeout + " ms) for '" + + query.toString() + "'. Setting timeout to " + maxQueryTimeout + " ms."); + query.setTimeout(maxQueryTimeout); + } + } + + private void validateQueryCache(Query query) { + if (query.getRanking().getQueryCache() && query.getTimeout() > maxQueryCacheTimeout) { + log.warning("Query timeout (" + query.getTimeout() + " ms) > max query cache timeout (" + maxQueryCacheTimeout + " ms) for '" + + query.toString() + "'. Disabling query cache."); + query.getRanking().setQueryCache(false); + } + } + + private Result doSearch(Searcher searcher, Query query, Execution execution) { + Result result; + if (documentTypes.size() > 1) { + result = searchMultipleDocumentTypes(searcher, query, execution); + } else { + String docType = documentTypes.iterator().next(); + + Result invalidRankProfile = checkValidRankProfiles(query, documentTypes); + if (invalidRankProfile != null) { + return invalidRankProfile; + } + + query.getModel().setRestrict(docType); + result = searcher.search(query, execution); + } + updateCacheHitRatio(result, query); + return result; + } + + + private Result searchMultipleDocumentTypes(Searcher searcher, Query query, Execution execution) { + Set<String> docTypes = resolveDocumentTypes(query, execution.context().getIndexFacts()); + + Result invalidRankProfile = checkValidRankProfiles(query, docTypes); + if (invalidRankProfile != null) { + return invalidRankProfile; + } + + List<Query> queries = createQueries(query, docTypes); + if (queries.size() == 1) { + return searcher.search(queries.get(0), execution); + } else { + Result mergedResult = new Result(query.clone()); + for (Query q : queries) { + Result result = searcher.search(q, execution); + mergedResult.mergeWith(result); + mergedResult.hits().addAll(result.hits().asUnorderedHits()); + } + // Should we trim the merged result? + if (query.getOffset() > 0 || query.getHits() < mergedResult.hits().size()) { + if (mergedResult.getHitOrderer() != null) { + // Make sure we have the necessary data for sorting + searcher.fill(mergedResult, Execution.ATTRIBUTEPREFETCH, execution); + } + mergedResult.hits().trim(query.getOffset(), query.getHits()); + } + return mergedResult; + } + } + + Set<String> resolveDocumentTypes(Query query, IndexFacts indexFacts) { + Set<String> restrict = query.getModel().getRestrict(); + if (restrict == null || restrict.isEmpty()) { + Set<String> sources = query.getModel().getSources(); + if (sources == null || sources.isEmpty()) { + return documentTypes; + } else { + return new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), documentTypes).documentTypes()); + } + } else { + return filterValidDocumentTypes(restrict); + } + } + + private Set<String> filterValidDocumentTypes(Collection<String> restrict) { + Set<String> retval = new LinkedHashSet<>(); + for (String docType : restrict) { + if (docType != null && documentTypes.contains(docType)) { + retval.add(docType); + } + } + return retval; + } + + private List<Query> createQueries(Query query, Set<String> docTypes) { + List<Query> retval = new ArrayList<>(docTypes.size()); + if (docTypes.size() == 1) { + query.getModel().setRestrict(docTypes.iterator().next()); + retval.add(query); + } else if ( ! docTypes.isEmpty() ) { + for (String docType : docTypes) { + Query q = query.clone(); + q.setOffset(0); + q.setHits(query.getOffset() + query.getHits()); + q.getModel().setRestrict(docType); + retval.add(q); + } + } + return retval; + } + + private void cacheHit() { + cacheHitRatio.put(1.0); + } + + private void cacheMiss() { + cacheHitRatio.put(0.0); + } + + /** NodeManager method, called from ClusterMonitor. */ + void working(VespaBackEndSearcher node) { + hasher.add(node); + } + + /** Called from ClusterMonitor. */ + void failed(VespaBackEndSearcher node) { + hasher.remove(node); + } + + /** + * Pinging a node, called from ClusterMonitor. + */ + void ping(VespaBackEndSearcher node) throws InterruptedException { + log.fine("Sending ping to: " + node); + Pinger pinger = new Pinger(node); + + getExecutor().execute(pinger); + Pong pong = pinger.getPong(); // handles timeout + if (pong == null) { + monitor.failed(node, ErrorMessage.createNoAnswerWhenPingingNode("Ping thread timed out.")); + } else if (pong.badResponse()) { + monitor.failed(node, pong.getError(0)); + } else { + monitor.responded(node, backendCanServeDocuments(pong)); + } + } + + private boolean backendCanServeDocuments(Pong pong) { + List<PongPacket> wireReply = pong.getPongPackets(); + if (wireReply.size() == 0) { + return true; // streaming search does not add PongPacket instances + } + if (wireReply.size() > 1) { + log.log(LogLevel.ERROR, "ClusterSearcher ping got more than one pong packet (" + wireReply.size() + + "), this means basic implementation assumptions now are out of sync."); + } + + PongPacket pongPacket = wireReply.get(0); + if (pongPacket.getActiveNodes().isPresent() && pongPacket.getActiveNodes().get() == 0) { + return false; + } else { + return true; + } + } + + public void dumpPackets(PacketDumper.PacketType packetType, boolean on) throws IOException { + for (Backend b : backends) { + b.dumpPackets(packetType, on); + } + } + + @Override + public void deconstruct() { + try { + monitor.shutdown(); + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + ExecutorService getExecutor() { + return fs4ResourcePool.getExecutor(); + } + + ScheduledExecutorService getScheduledExecutor() { + return fs4ResourcePool.getScheduledExecutor(); + } + + private class Pinger implements Runnable { + + private final Searcher searcher; + private final Ping pingChallenge = new Ping(monitor.getConfiguration().getRequestTimeout()); + private final Receiver<Pong> pong = new Receiver<>(); + + public Pinger(final Searcher searcher) { + this.searcher = searcher; + } + + @Override + public void run() { + pong.put(createExecution().ping(pingChallenge)); + } + + private Execution createExecution() { + return new Execution(new Chain<>(searcher), + new Execution.Context(null, null, null, null, null)); + } + + public Pong getPong() throws InterruptedException { + Tuple2<MessageState, Pong> reply = pong.get(pingChallenge.getTimeout() + 150); + if (reply.first != MessageState.VALID) { + return null; + } else { + return reply.second; + } + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/Hasher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/Hasher.java new file mode 100644 index 00000000000..a78b5d6e1b5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/Hasher.java @@ -0,0 +1,135 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; + +import com.yahoo.container.handler.VipStatus; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; + +/** + * Failover between multiple Vespa backends. + * + * @author bratseth + * @author Prashanth B. Bhat + * @author Steinar Knutsen + */ +public class Hasher { + + boolean running = false; + + private static final Logger log = Logger.getLogger(Hasher.class.getName()); + private static final Random tldSeeder = new Random(); + + private volatile VespaBackEndSearcher[] allNodes = new VespaBackEndSearcher[0]; + private volatile VespaBackEndSearcher[] localNodes = new VespaBackEndSearcher[0]; + + private AtomicInteger avoidAllQrsHitSameTld = new AtomicInteger(tldSeed()); + + /** + * Creates a hasher independent of the {@linkplain VipStatus programmatic VIP API}. + */ + public Hasher() { + } + + private static synchronized int tldSeed() { + return tldSeeder.nextInt(); + } + + static private VespaBackEndSearcher[] addNode(VespaBackEndSearcher node, VespaBackEndSearcher[] oldNodes) { + VespaBackEndSearcher[] newNodes = new VespaBackEndSearcher[oldNodes.length + 1]; + System.arraycopy(oldNodes, 0, newNodes, 0, oldNodes.length); + newNodes[oldNodes.length] = node; + return newNodes; + } + /** + * Make a node available for search. + */ + public void add(VespaBackEndSearcher node) { + allNodes = addNode(node, allNodes); + + if (node.isLocalDispatching()) { + localNodes = addNode(node, localNodes); + } + } + + private VespaBackEndSearcher[] removeNode(VespaBackEndSearcher node, VespaBackEndSearcher[] nodes) { + VespaBackEndSearcher[] newNodes = null; + for (VespaBackEndSearcher n : nodes) { + if (n == node) { + newNodes = new VespaBackEndSearcher[nodes.length - 1]; + break; + } + } + if (newNodes != null) { + int numToKeep = 0; + + for (VespaBackEndSearcher n : nodes) { + if (n != node) { + newNodes[numToKeep++] = n; + } + } + return newNodes; + } + return nodes; + } + + /** Removes a node */ + public void remove(VespaBackEndSearcher node) { + if (allNodes.length == 0) { + return; + } + + VespaBackEndSearcher[] newNodes = removeNode(node, allNodes); + if (newNodes != allNodes) { + if (running && newNodes.length == 0) { + log.log(LogLevel.WARNING, "No longer any nodes for this cluster when" + + " removing malfunctioning " + node.toString() + "."); + } + allNodes = newNodes; + } + + newNodes = removeNode(node, localNodes); + if (newNodes != localNodes) { + if (running && localNodes.length == 0) { + log.log(LogLevel.WARNING, "Removing malfunctioning " + node.toString() + + " from traffic leaves no local dispatchers, performance" + + " degradation is to expected."); + } + localNodes = newNodes; + } + } + + public int getNodeCount() { + return allNodes.length; + } + + /** + * Return a node, prefer local nodes, try to skip already hit nodes. + * + * @param trynum + * hint to skip already used nodes + * @return the selected node, or null if this hasher has no nodes + */ + public VespaBackEndSearcher select(int trynum) { + VespaBackEndSearcher[] nodes = allNodes; + + if (nodes.length == 0) { + return null; + } else { + if (localNodes.length > 0) { + nodes = localNodes; + if (localNodes.length == 1) { + return nodes[0]; + } else { + return nodes[Math.abs(avoidAllQrsHitSameTld.incrementAndGet() % nodes.length)]; + } + } else { + return nodes[Math.abs(avoidAllQrsHitSameTld.incrementAndGet() % nodes.length)]; + } + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/MonitorConfiguration.java b/container-search/src/main/java/com/yahoo/prelude/cluster/MonitorConfiguration.java new file mode 100644 index 00000000000..9d2074cb2c0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/MonitorConfiguration.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +/** + * The configuration of a cluster monitor instance + * + * @author bratseth + * @author Steinar Knutsen + */ +public class MonitorConfiguration { + + /** + * The interval in ms between consecutive checks of the monitored nodes + */ + private final long checkInterval = 1000; + + /** + * The number of milliseconds to attempt to complete a request before giving + * up + */ + private long requestTimeout = 2700; + + public MonitorConfiguration(final QrMonitorConfig config) { + requestTimeout = config.requesttimeout(); + } + + /** + * Returns the interval between each ping of idle or failing nodes Default + * is 1000ms + */ + public long getCheckInterval() { + return checkInterval; + } + + /** + * Sets the number of milliseconds to attempt to service a request (at + * different nodes) before giving up. + */ + public void setRequestTimeout(final long timeout) { + requestTimeout = timeout; + } + + /** + * Returns the number of milliseconds to attempt to service a request (at + * different nodes) before giving up. Default is 2700 ms. + */ + public long getRequestTimeout() { + return requestTimeout; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java new file mode 100644 index 00000000000..b6fe4b69052 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import static com.yahoo.container.protect.Error.BACKEND_COMMUNICATION_ERROR; +import static com.yahoo.container.protect.Error.NO_ANSWER_WHEN_PINGING_NODE; + +import java.util.logging.Logger; + +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.result.ErrorMessage; + +/** + * A node monitor is responsible for maintaining the state of a monitored node. + * It has the following properties: + * <ul> + * <li>A node is taken out of operation if it gives no response in 10 s</li> + * <li>A node is put back in operation when it responds correctly again</li> + * </ul> + * + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class NodeMonitor { + + protected static Logger log = Logger.getLogger(NodeMonitor.class.getName()); + + /** The object representing the monitored node */ + private final VespaBackEndSearcher node; + + private boolean isWorking = true; + + /** The last time this node responded successfully */ + private long succeededAt = 0; + + /** Whether it is assumed the node has documents available to serve */ + private boolean searchNodesOnline = true; + + /** + * Creates a new node monitor for a node + */ + public NodeMonitor(final VespaBackEndSearcher node) { + this.node = node; + } + + /** + * Returns whether this node is currently in a state suitable for receiving + * traffic. As far as we know, that is + */ + public boolean isWorking() { + return isWorking; + } + + // Whether or not dispatch has ever responded successfully + private boolean atStartUp = true; + + public VespaBackEndSearcher getNode() { + return node; + } + + /** + * Called when this node fails. + * + * @param error + * A container which should contain a short description + */ + public void failed(final ErrorMessage error) { + long respondedAt = System.currentTimeMillis(); + + if (error.getCode() == BACKEND_COMMUNICATION_ERROR.code + || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) { + // Only count not being able to talk to backend at all + // as errors we care about + if ((respondedAt - succeededAt) > 10000) { + setWorking(false, "Not working for 10 s: " + error.toString()); + } + } else { + succeededAt = respondedAt; + } + } + + /** + * Called when a response is received from this node. + */ + public void responded(boolean searchNodesOnline) { + succeededAt = System.currentTimeMillis(); + this.searchNodesOnline = searchNodesOnline; + atStartUp = false; + + if (!isWorking) { + setWorking(true, "Responds correctly"); + } + } + + /** Changes the state of this node if required */ + private void setWorking(final boolean working, String explanation) { + String explanationToLog; + if (isWorking == working) { + return; // Old news + } + + if (explanation == null) { + explanationToLog = ""; + } else { + explanationToLog = ": " + explanation; + } + + if (working) { + log.info("Putting " + node + " in service" + explanationToLog); + } else { + if (!atStartUp) { + // was warning, see VESPA-1922 + log.info("Taking " + node + " out of service" + explanationToLog); + } + } + + isWorking = working; + } + + boolean searchNodesOnline() { + return searchNodesOnline; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/dispatchprototype/DispatchClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/dispatchprototype/DispatchClusterSearcher.java new file mode 100644 index 00000000000..d47a5a82023 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/dispatchprototype/DispatchClusterSearcher.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster.dispatchprototype; + +import com.google.common.annotations.Beta; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.prelude.cluster.ClusterSearcher; +import com.yahoo.prelude.cluster.QrMonitorConfig; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; +import com.yahoo.prelude.fastsearch.FS4ResourcePool; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.search.config.dispatchprototype.SearchNodesConfig; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.vespa.config.search.DispatchConfig; + +import static com.yahoo.container.QrSearchersConfig.Searchcluster; + +/** + * This class modifies ClusterSearcher behavior to talk directly to search nodes instead of dispatchers. + * + * This means that queries are sent to a single search node only. Obviously, this will not give correct + * results - it is just a single step towards eliminating top-level dispatch as a separate process. + * + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +@Beta +@After("*") +public class DispatchClusterSearcher extends Searcher { + private final ClusterSearcher clusterSearcher; + + public DispatchClusterSearcher( + final ComponentId id, + final SearchNodesConfig searchNodesConfig, + final QrSearchersConfig qrsConfig, + final ClusterConfig clusterConfig, + final DocumentdbInfoConfig documentDbConfig, + final LegacyEmulationConfig emulationConfig, + final QrMonitorConfig monitorConfig, + final DispatchConfig dispatchConfig, + final Statistics manager, + final FS4ResourcePool listeners, + final ComponentRegistry<ClusterSearcher> otherClusterSearchers, + final VipStatus vipStatus) { + + clusterSearcher = new ClusterSearcher( + id, + makeQrSearchersConfigWithSearchNodesInsteadOfDispatcherNodes( + qrsConfig, + searchNodesConfig, + clusterConfig.clusterName()), + clusterConfig, + documentDbConfig, + emulationConfig, + monitorConfig, + dispatchConfig, + manager, + listeners, + vipStatus); + + //Prevent the ClusterSearcher(s) implicitly set up by the model from warning that it can't contact + //the c++ TLD when we disable it in the system test. + otherClusterSearchers.allComponents().stream() + .forEach(ClusterSearcher::deconstruct); + } + + + @Override + public Result search(Query query, Execution execution) { + return clusterSearcher.search(query, execution); + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + clusterSearcher.fill(result, summaryClass, execution); + } + + private static QrSearchersConfig makeQrSearchersConfigWithSearchNodesInsteadOfDispatcherNodes( + final QrSearchersConfig qrsConfig, + final SearchNodesConfig searchNodesConfig, + final String clusterName) { + final QrSearchersConfig.Builder qrSearchersConfigBuilder = new QrSearchersConfig.Builder(); + copyEverythingExceptSearchclusters(qrsConfig, qrSearchersConfigBuilder); + + // We only "copy" (with modifications) a single Searchcluster. + final Searchcluster originalSearchcluster = getSearchclusterByName(qrsConfig, clusterName); + final Searchcluster.Builder searchclusterBuilder = new Searchcluster.Builder(); + copyEverythingExceptDispatchers(originalSearchcluster, searchclusterBuilder); + // Here comes the trick: Substitute search nodes for dispatchers. + for (final SearchNodesConfig.Search_node searchNodeConfig : searchNodesConfig.search_node()) { + searchclusterBuilder.dispatcher( + new Searchcluster.Dispatcher.Builder() + .host(searchNodeConfig.host()) + .port(searchNodeConfig.port())); + } + qrSearchersConfigBuilder.searchcluster(searchclusterBuilder); + + return new QrSearchersConfig(qrSearchersConfigBuilder); + } + + private static void copyEverythingExceptSearchclusters( + final QrSearchersConfig source, + final QrSearchersConfig.Builder destination) { + destination.tag(new QrSearchersConfig.Tag.Builder(source.tag())); + destination.com(new QrSearchersConfig.Com.Builder(source.com())); + destination.customizedsearchers(new QrSearchersConfig.Customizedsearchers.Builder(source.customizedsearchers())); + for (final QrSearchersConfig.External external : source.external()) { + destination.external(new QrSearchersConfig.External.Builder(external)); + } + } + + private static Searchcluster getSearchclusterByName(final QrSearchersConfig qrsConfig, final String clusterName) { + return qrsConfig.searchcluster().stream() + .filter(cluster -> clusterName.equals(cluster.name())) + .findAny() + .orElseThrow(() -> new IllegalStateException("No cluster found with name " + clusterName)); + } + + private static void copyEverythingExceptDispatchers( + final Searchcluster source, + final Searchcluster.Builder destination) { + destination + .name(source.name()) + .searchdef(source.searchdef()) + .rankprofiles(new Searchcluster.Rankprofiles.Builder(source.rankprofiles())) + .indexingmode(source.indexingmode()) + // Deliberately excluding storagecluster here because it's not relevant. + .rowbits(source.rowbits()); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/package-info.java b/container-search/src/main/java/com/yahoo/prelude/cluster/package-info.java new file mode 100644 index 00000000000..e4dbfbb3a1b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.cluster; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ByteField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ByteField.java new file mode 100644 index 00000000000..44107499b40 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ByteField.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a byte field in the result set + * + */ + +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class ByteField extends DocsumField { + static final byte EMPTY_VALUE = Byte.MIN_VALUE; + + public ByteField(String name) { + super(name); + } + + private Object convert(byte value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Byte.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.get()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Byte.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((byte)value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheControl.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheControl.java new file mode 100644 index 00000000000..fdc76835e1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheControl.java @@ -0,0 +1,117 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.search.Query; +import com.yahoo.processing.request.CompoundName; + + +/** + * The cache control logic for FastSearcher + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class CacheControl { + + private static final CompoundName nocachewrite=new CompoundName("nocachewrite"); + + /** Whether this CacheControl actually should cache hits at all. */ + private final boolean activeCache; + + /** Direct unsychronized cache access */ + private final PacketCache packetCache; + + public CacheControl(int sizeMegaBytes, double cacheTimeOutSeconds) { + activeCache = sizeMegaBytes > 0 && cacheTimeOutSeconds > 0.0d; + if (activeCache) { + packetCache = new PacketCache(sizeMegaBytes, 0, cacheTimeOutSeconds); + } else { + packetCache = null; + } + } + + /** Returns the capacity of the packet cache in megabytes */ + public final int capacity() { + return packetCache.getCapacity(); + } + + public final boolean useCache(Query query) { + return (activeCache && !query.getNoCache()); + } + + public final PacketWrapper lookup(CacheKey key, Query query) { + if ((key != null) && useCache(query)) { + long now = System.currentTimeMillis(); + synchronized (packetCache) { + return packetCache.get(key, now); + } + } + return null; + } + + // updates first phase in multi phase search + void updateCacheEntry(CacheKey key, Query query, QueryResultPacket resultPacket) { + long oldTimestamp; + if (!activeCache) return; + + PacketWrapper wrapper = lookup(key, query); + if (wrapper == null) return; + + // The timestamp is owned by the QueryResultPacket, this is why this + // update method puts entries into the cache differently from elsewhere + oldTimestamp = wrapper.getTimestamp(); + wrapper = (PacketWrapper) wrapper.clone(); + wrapper.addResultPacket(resultPacket); + synchronized (packetCache) { + packetCache.put(key, wrapper, oldTimestamp); + } + } + + // updates phases after first phase phase in multi phase search + void updateCacheEntry(CacheKey key, Query query, DocsumPacketKey[] packetKeys, Packet[] packets) { + if (!activeCache) return; + + PacketWrapper wrapper = lookup(key, query); + if (wrapper== null) return; + + wrapper = (PacketWrapper) wrapper.clone(); + wrapper.addDocsums(packetKeys, packets); + synchronized (packetCache) { + packetCache.put(key, wrapper, wrapper.getTimestamp()); + } + } + + void cache(CacheKey key, Query query, DocsumPacketKey[] packetKeys, Packet[] packets) { + if ( ! activeCache) return; + + if (query.getNoCache()) return; + if (query.properties().getBoolean(nocachewrite)) return; + + PacketWrapper wrapper = lookup(key, query); + if (wrapper == null) { + wrapper = new PacketWrapper(key, packetKeys,packets); + long now = System.currentTimeMillis(); + synchronized (packetCache) { + packetCache.put(key, wrapper, now); + } + } else { + wrapper = (PacketWrapper) wrapper.clone(); + wrapper.addResultPacket((QueryResultPacket) packets[0]); + wrapper.addDocsums(packetKeys, packets, 1); + synchronized (packetCache) { + packetCache.put(key, wrapper, wrapper.getTimestamp()); + } + } + } + + /** Test method. */ + public void clear() { + if (packetCache != null) { + packetCache.clear(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheKey.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheKey.java new file mode 100644 index 00000000000..cd330603b3d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheKey.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.util.Arrays; + +import com.yahoo.collections.BobHash; +import com.yahoo.fs4.QueryPacket; + + +/** + * The key used in the packet cache. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class CacheKey { + private int hashCode; + private byte[] serialized = null; + + /** + * Create a cache key from the query packet. + */ + public CacheKey(QueryPacket queryPacket) { + if (!queryPacket.isEncoded()) { + queryPacket.allocateAndEncode(0); + } + this.serialized = queryPacket.getOpaqueCacheKey(); + hashCode = calculateHashCode(); + } + + private int calculateHashCode() { + return BobHash.hash(serialized, 0); + } + + public boolean equals(Object o) { + if (o == null) { + return false; + } + if (!(o instanceof CacheKey)) { + return false; + } + + CacheKey k = (CacheKey) o; + return Arrays.equals(serialized, k.serialized); + // // The following is used for detailed debugging + // boolean state = true; + // if (serialized.length != k.serialized.length) { + // System.out.println("this " + serialized.length + " other " + k.serialized.length); + // return false; + // } + // System.out.println("start of arrays"); + // for (int i = 0; i < serialized.length; ++i) { + // System.out.print("serialized " + serialized[i] + " " + k.serialized[i]); + // if (serialized[i] != k.serialized[i]) { + // System.out.println(" diff at index " + i); + // state = false; // want to see all the data + // } else { + // System.out.println(""); + // } + // } + // return state; + } + + public int hashCode() { + return hashCode; + } + + public byte[] getCopyOfFullKey() { + return Arrays.copyOf(serialized, serialized.length); + } + + /** + * Return an estimate of the memory used by this object. Ie the sum of + * the internal data fields. + */ + public int byteSize() { + // 4 = sizeOf(hashCode) + return serialized.length + 4; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheParams.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheParams.java new file mode 100644 index 00000000000..f7714ce1457 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheParams.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +/** + * Helper class for carrying around cache-related + * config parameters to the FastSearcher class. + * + * @author arnej27959 + */ +public class CacheParams { + public int cacheMegaBytes = 0; + public double cacheTimeOutSeconds = 0; + public CacheControl cacheControl = null; + + public CacheParams(int megabytes, double timeoutseconds) { + this.cacheMegaBytes = megabytes; + this.cacheTimeOutSeconds = timeoutseconds; + } + + public CacheParams(CacheControl cacheControl) { + this.cacheControl = cacheControl; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ClusterParams.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ClusterParams.java new file mode 100644 index 00000000000..d5a17060dd6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ClusterParams.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.container.search.LegacyEmulationConfig; + +/** + * Helper class for carrying around cluster-related + * config parameters to the FastSearcher class. + * + * @author arnej27959 + */ +public class ClusterParams { + public final int clusterNumber; + public final String searcherName; + public final int rowBits; + public final LegacyEmulationConfig emulation; + + /** + * for compatibility + **/ + public ClusterParams(int number, String name, int rowbits) { + this(number, name, rowbits, new LegacyEmulationConfig()); + } + + /** + * for testcases only + **/ + public ClusterParams(String name) { + this(0, name, 0); + } + + /** + * make up full ClusterParams + **/ + public ClusterParams(int number, String name, int rowbits, LegacyEmulationConfig cfg) { + this.clusterNumber = number; + this.searcherName = name; + this.rowBits = rowbits; + this.emulation = cfg; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java new file mode 100644 index 00000000000..0e54adae932 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a data field in the result set. a data field + * is basically the same thing as a string field, only that we + * treat it like a raw buffer. Well we SHOULD. we don't actually + * do so. yet. we should probably do some defensive copying and + * return a ByteBuffer...hmm... + * + */ + +package com.yahoo.prelude.fastsearch; + +import java.nio.ByteBuffer; + +import com.yahoo.prelude.hitfield.RawData; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class DataField extends DocsumField implements VariableLengthField { + public DataField(String name) { + super(name); + } + + private Object convert(byte[] value) { + return new RawData(value); + } + + @Override + public Object decode(ByteBuffer b) { + int len = ((int) b.getShort()) & 0xffff; + + byte[] tmp = new byte[len]; + b.get(tmp); + return convert(tmp); + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type data"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + int len = ((int) b.getShort()) & 0xffff; + b.position(offset + len + (Short.SIZE >> 3)); + return len + (Short.SIZE >> 3); + } + + @Override + public int sizeOfLength() { + return Short.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return convert(value.asData(Value.empty().asData())); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/Docsum.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Docsum.java new file mode 100644 index 00000000000..2941baf40f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Docsum.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + + +/** + * An instance of a document summary, backed by binary data, which decodes and returns fields on request, + * using the (shared) definition of this docsum. + * + * @author <a href="mailt:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class Docsum { + + private final DocsumDefinition definition; + private final byte[] packet; + /** The offsets into the packet data of each field, given the fields index, computed lazily */ + private final int[] fieldOffsets; + /** The largest stored offset */ + private int largestStoredOffset=-1; + + public Docsum(DocsumDefinition definition, byte[] packet) { + this.definition = definition; + this.packet = packet; + fieldOffsets=new int[definition.getFieldCount()]; + } + + public DocsumDefinition getDefinition() { return definition; } + + public Integer getFieldIndex(String fieldName) { + return definition.getFieldIndex(fieldName); + } + + public Object decode(int fieldIndex) { + ByteBuffer b=packetAsBuffer(); + setAndReturnOffsetToField(b, fieldIndex); + return definition.getField(fieldIndex).decode(b); + } + + /** Fetches the field as raw utf-8 if it is a text field. Returns null otherwise */ + public FastHit.RawField fetchFieldAsUtf8(int fieldIndex) { + DocsumField dataType = definition.getField(fieldIndex); + if ( ! (dataType instanceof LongstringField || dataType instanceof XMLField || dataType instanceof StringField)) + return null; + + ByteBuffer b=packetAsBuffer(); + DocsumField field = definition.getField(fieldIndex); + int fieldStart = setAndReturnOffsetToField(b, fieldIndex); // set buffer.pos = start of field + if (field.isCompressed(b)) return null; + int length = field.getLength(b); // scan to end of field + if (field instanceof VariableLengthField) { + int fieldLength = ((VariableLengthField) field).sizeOfLength(); + b.position(fieldStart + fieldLength); // reset to start of field + length -= fieldLength; + } else { + b.position(fieldStart); // reset to start of field + } + byte[] bufferView = new byte[length]; + b.get(bufferView); + return new FastHit.RawField(dataType, bufferView); + } + + public ByteBuffer packetAsBuffer() { + ByteBuffer buffer = ByteBuffer.wrap(packet); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.getInt(); // Skip class id + return buffer; + } + + /** Returns the offset of a given field in the buffer, and sets the position of the buffer to that field start */ + private int setAndReturnOffsetToField(ByteBuffer b, int fieldIndex) { + // find and store missing offsets up to fieldIndex + if (largestStoredOffset<0) { // initial case + fieldOffsets[0]=b.position(); + largestStoredOffset++; + } + while (largestStoredOffset < fieldIndex) { // induction + int offsetOfLargest=fieldOffsets[largestStoredOffset]; + b.position(offsetOfLargest); + fieldOffsets[largestStoredOffset+1]=offsetOfLargest+definition.getField(largestStoredOffset).getLength(b); + largestStoredOffset++; + } + + // return the stored offset + int offset=fieldOffsets[fieldIndex]; + b.position(offset); + return offset; + } + + public String toString() { + return "docsum [definition: " + definition + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinition.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinition.java new file mode 100644 index 00000000000..bef0069d525 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinition.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.yahoo.vespa.config.search.SummaryConfig; +import com.yahoo.container.search.LegacyEmulationConfig; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A docsum definition which knows how to decode a certain class of document + * summaries. The docsum definition has a name and a list of field definitions + * + * @author bratseth + * @author Bjørn Borud + */ +public class DocsumDefinition { + + private String name; + private final List<DocsumField> fields; + + /** True if this contains dynamic fields */ + private boolean dynamic = false; + + // Mapping between field names and their index in this.fields + private final Map<String,Integer> fieldNameToIndex; + + DocsumDefinition(DocumentdbInfoConfig.Documentdb.Summaryclass config, LegacyEmulationConfig emulConfig) { + this.name = config.name(); + List<DocsumField> fieldsBuilder = new ArrayList<>(); + Map<String,Integer> fieldNameToIndexBuilder = new HashMap<>(); + + for (DocumentdbInfoConfig.Documentdb.Summaryclass.Fields field : config.fields()) { + // no, don't switch the order of the two next lines :) + fieldNameToIndexBuilder.put(field.name(), fieldsBuilder.size()); + fieldsBuilder.add(DocsumField.create(field.name(), field.type(), emulConfig)); + if (field.dynamic()) + dynamic = true; + } + fields = ImmutableList.copyOf(fieldsBuilder); + fieldNameToIndex = ImmutableMap.copyOf(fieldNameToIndexBuilder); + } + + /** Returns the field at this index, or null if none */ + public DocsumField getField(int fieldIndex) { + if (fieldIndex >= fields.size()) return null; + return fields.get(fieldIndex); + } + + /** Returns the index of a field name */ + public Integer getFieldIndex(String fieldName) { + return fieldNameToIndex.get(fieldName); + } + + @Override + public String toString() { + return "docsum definition '" + getName() + "'"; + } + + public String getName() { + return name; + } + + public int getFieldCount() { + return fields.size(); + } + + public List<DocsumField> getFields() { + return fields; + } + + /** Returns whether this summary contains one or more dynamic fields */ + public boolean isDynamic() { + return dynamic; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java new file mode 100644 index 00000000000..2f0768d4e8b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.slime.BinaryFormat; +import com.yahoo.slime.Slime; +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.vespa.config.search.SummaryConfig; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.container.search.LegacyEmulationConfig; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +/** + * A set of docsum definitions + * + * @author bratseth + * @author Bjørn Borud + */ +public final class DocsumDefinitionSet { + public static final int SLIME_MAGIC_ID = 0x55555555; + private final static Logger log = Logger.getLogger(DocsumDefinitionSet.class.getName()); + + private final HashMap<Long, DocsumDefinition> definitions = new HashMap<>(); + private final HashMap<String, DocsumDefinition> definitionsByName = new HashMap<>(); + private final LegacyEmulationConfig emulationConfig; + + public DocsumDefinitionSet(DocumentdbInfoConfig.Documentdb config) { + this.emulationConfig = new LegacyEmulationConfig(); + configure(config); + } + + public DocsumDefinitionSet(DocumentdbInfoConfig.Documentdb config, LegacyEmulationConfig emulConfig) { + this.emulationConfig = emulConfig; + configure(config); + } + + /** Returns a docsum definition by id + * @param id document summary class id + * @return a DocsumDefinition for the id, if found. + */ + public final DocsumDefinition getDocsumDefinition(long id) { + return definitions.get(new Long(id)); + } + + /** + * Returns a docsum definition by name, or null if not found + * + * @param name the name of the summary class to use, or null to use the name "default" + * @return the summary class found, or null if none + */ + public final DocsumDefinition getDocsumDefinition(String name) { + if (name == null) + name="default"; + return definitionsByName.get(name); + } + + /** + * Makes data available for decoding for the given hit. + * + * @param summaryClass the requested summary class + * @param data docsum data from backend + * @param hit the Hit corresponding to this document summary + * @throws ConfigurationException if the summary class of this hit is missing + */ + public final void lazyDecode(String summaryClass, byte[] data, FastHit hit) { + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.order(ByteOrder.LITTLE_ENDIAN); + long docsumClassId = buffer.getInt(); + if (docsumClassId != SLIME_MAGIC_ID) { + DocsumDefinition docsumDefinition = lookupDocsum(docsumClassId); + Docsum docsum = new Docsum(docsumDefinition, data); + hit.addSummary(docsum); + } else { + DocsumDefinition docsumDefinition = lookupDocsum(summaryClass); + Slime value = BinaryFormat.decode(buffer.array(), buffer.arrayOffset()+buffer.position(), buffer.remaining()); + hit.addSummary(docsumDefinition, new SlimeAdapter(value.get())); + } + } + + private DocsumDefinition lookupDocsum(long docsumClassId) { + DocsumDefinition docsumDefinition = getDocsumDefinition(docsumClassId); + if (docsumDefinition == null) { + throw new ConfigurationException("Received hit with summary id " + docsumClassId + + ", but this summary class is not in current summary config (" + toString() + ")" + + " (that is, the system is in an inconsistent state)"); + } + return docsumDefinition; + } + + private DocsumDefinition lookupDocsum(String summaryClass) { + DocsumDefinition ds = definitionsByName.get(summaryClass); + if (ds == null) { + ds = definitionsByName.get("default"); + } + if (ds == null) { + throw new ConfigurationException("Fetched hit with summary class " + summaryClass + + ", but this summary class is not in current summary config (" + toString() + ")" + + " (that is, you asked for something unknown, and no default was found)"); + } + return ds; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + Set<Map.Entry<Long, DocsumDefinition>> entrySet = definitions.entrySet(); + boolean first = true; + for (Iterator<Map.Entry<Long, DocsumDefinition>> itr = entrySet.iterator(); itr.hasNext(); ) { + if (!first) { + sb.append(","); + } else { + first = false; + } + Map.Entry<Long, DocsumDefinition> entry = itr.next(); + sb.append("[").append(entry.getKey()).append(",").append(entry.getValue().getName()).append("]"); + } + return sb.toString(); + } + + public int size() { + return definitions.size(); + } + + private void configure(DocumentdbInfoConfig.Documentdb config) { + for (int i = 0; i < config.summaryclass().size(); ++i) { + DocumentdbInfoConfig.Documentdb.Summaryclass sc = config.summaryclass(i); + DocsumDefinition docSumDef = new DocsumDefinition(sc, emulationConfig); + definitions.put((long) sc.id(), docSumDef); + definitionsByName.put(sc.name(), docSumDef); + } + if (definitions.size() == 0) { + log.warning("No summary classes found in DocumentdbInfoConfig.Documentdb"); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java new file mode 100644 index 00000000000..3aa02f57a1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Logger; +import com.yahoo.data.access.Inspector; +import com.yahoo.container.search.LegacyEmulationConfig; + +import com.yahoo.log.LogLevel; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public abstract class DocsumField { + + private static final Logger log = Logger.getLogger(DocsumField.class.getName()); + private static FieldFactory fieldFactory; + + private static class FieldFactory { + Map<String, Constructor<? extends DocsumField>> constructors = new HashMap<>(); + + void put(final String typename, + final Class<? extends DocsumField> fieldClass) + throws NoSuchMethodException, SecurityException + { + final Constructor<? extends DocsumField> constructor = fieldClass.getConstructor(String.class); + constructors.put(typename, constructor); + } + + DocsumField create(final String typename, final String name, final LegacyEmulationConfig emulConfig) + throws InstantiationException, IllegalAccessException, + IllegalArgumentException, InvocationTargetException + { + DocsumField f = constructors.get(typename).newInstance(name); + f.emulConfig = emulConfig; + return f; + } + } + private LegacyEmulationConfig emulConfig; + final LegacyEmulationConfig getEmulConfig() { return emulConfig; } + + static { + fieldFactory = new FieldFactory(); + + try { + fieldFactory.put("byte", ByteField.class); + fieldFactory.put("short", ShortField.class); + fieldFactory.put("integer", IntegerField.class); + fieldFactory.put("int64", Int64Field.class); + fieldFactory.put("float", FloatField.class); + fieldFactory.put("double", DoubleField.class); + fieldFactory.put("string", StringField.class); + fieldFactory.put("data", DataField.class); + fieldFactory.put("longstring", LongstringField.class); + fieldFactory.put("longdata", LongdataField.class); + fieldFactory.put("jsonstring", StructDataField.class); + fieldFactory.put("featuredata", FeatureDataField.class); + fieldFactory.put("xmlstring", XMLField.class); + } catch (final Exception e) { + log.log(LogLevel.ERROR, + "Could not initialize docsum decoding properly.", e); + } + } + + protected String name; + + protected DocsumField(final String name) { + this.name = name; + } + + /* for unit test only */ + static DocsumField create(final String name, final String typename) { + return create(name, typename, new LegacyEmulationConfig()); + } + + public static DocsumField create(final String name, final String typename, LegacyEmulationConfig emulConfig) { + try { + return fieldFactory.create(typename, name, emulConfig); + } catch (final Exception e) { + throw new RuntimeException("Unknown field type '" + typename + "'", e); + } + } + + public String getName() { + return name; + } + + public boolean isCompressed(final ByteBuffer b) { + return false; + } + + /** + * Decode the field at the current buffer position into the fast hit. + */ + public abstract Object decode(ByteBuffer b, FastHit hit); + + /** + * Decode the field at the current buffer position and simply return the + * value. + */ + public abstract Object decode(ByteBuffer b); + + /** + * Get the number of bytes this field occupies in the given buffer and set + * the position of the first byte after this field. + */ + public abstract int getLength(ByteBuffer b); + + /** + * Convert a generic value into an object of the appropriate type + * for this field. + **/ + public abstract Object convert(Inspector value); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumPacketKey.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumPacketKey.java new file mode 100644 index 00000000000..1e76207e370 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumPacketKey.java @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.document.GlobalId; + + +/** + * Key for each entry in the packetcache. + * + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M\u00f8lster Lidal</a> + */ +public class DocsumPacketKey { + private GlobalId globalId; + private int partid; + private int docstamp; + private String summaryClass; + + private static boolean strEquals(String a, String b) { + if (a == null || b == null) { + return (a == null && b == null); + } + return a.equals(b); + } + + private static int strHashCode(String s) { + if (s == null) { + return 0; + } + return s.hashCode(); + } + + public DocsumPacketKey(GlobalId globalId, int partid, String summaryClass) { + this.globalId = globalId; + this.partid = partid; + this.summaryClass = summaryClass; + } + + public GlobalId getGlobalId() { + return globalId; + } + + public int getPartid() { + return partid; + } + + public boolean equals(Object o) { + if (o instanceof DocsumPacketKey) { + DocsumPacketKey other = (DocsumPacketKey) o; + + if (globalId.equals(other.getGlobalId()) + && partid == other.getPartid() + && strEquals(summaryClass, other.summaryClass)) + { + return true; + } + } + return false; + } + + public int hashCode() { + return globalId.hashCode() + 10 * partid + strHashCode(summaryClass); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java new file mode 100644 index 00000000000..c48a8804f9f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.container.search.LegacyEmulationConfig; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Representation of a back-end document database. + * + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +public class DocumentDatabase { + + // TODO: What about name conflicts when different search defs have the same rank profile/docsum? + + public static final String MATCH_PROPERTY = "match"; + public static final String SEARCH_DOC_TYPE_KEY = "documentdb.searchdoctype"; + + private final String name; + private final DocsumDefinitionSet docsumDefSet; + + private final Map<String, RankProfile> rankProfiles; + + public DocumentDatabase(DocumentdbInfoConfig.Documentdb documentDb, LegacyEmulationConfig emulConfig) { + this.name = documentDb.name(); + this.docsumDefSet = new DocsumDefinitionSet(documentDb, emulConfig); + this.rankProfiles = ImmutableMap.copyOf(toRankProfiles(documentDb.rankprofile())); + } + + public String getName() { + return name; + } + + public DocsumDefinitionSet getDocsumDefinitionSet() { + return docsumDefSet; + } + + /** Returns an unmodifiable map of all the rank profiles in this indexed by rank profile name */ + public Map<String, RankProfile> rankProfiles() { return rankProfiles; } + + private Map<String, RankProfile> toRankProfiles(List<DocumentdbInfoConfig.Documentdb.Rankprofile> rankProfileConfigList) { + Map<String, RankProfile> rankProfiles = new HashMap<>(); + for (DocumentdbInfoConfig.Documentdb.Rankprofile c : rankProfileConfigList) { + rankProfiles.put(c.name(), new RankProfile(c.name(), c.hasSummaryFeatures(), c.hasRankFeatures())); + } + return rankProfiles; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DoubleField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DoubleField.java new file mode 100644 index 00000000000..d42d5567718 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DoubleField.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M\u00f8lster Lidal</a> + */ +public class DoubleField extends DocsumField { + static final double EMPTY_VALUE = Double.NaN; + + public DoubleField(String name) { + super(name); + } + + private Object convert(double value) { + if (Double.isNaN(value)) { + return NanNumber.NaN; + } else { + return Double.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getDouble()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int byteLength = Double.SIZE >> 3; + b.position(offset + byteLength); + return byteLength; + } + + public Object convert(Inspector value) { + return convert(value.asDouble(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4ResourcePool.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4ResourcePool.java new file mode 100644 index 00000000000..1aa226dbeb8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4ResourcePool.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.ThreadFactoryFactory; +import com.yahoo.container.Server; +import com.yahoo.container.search.Fs4Config; +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.fs4.mplex.ConnectionPool; +import com.yahoo.fs4.mplex.ListenerPool; +import com.yahoo.io.Connection; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Timer; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Provider for {@link com.yahoo.fs4.mplex.ListenerPool}. All users will get the same pool instance. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + * @since 5.4.0 + */ +public class FS4ResourcePool extends AbstractComponent { + private static final Logger logger = Logger.getLogger(FS4ResourcePool.class.getName()); + private static final AtomicInteger instanceCounter = new AtomicInteger(0); + private final int instanceId; + private final ListenerPool listeners; + private final Timer timer = new Timer(); // This is a timer for cleaning the closed connections + private Map<String, Backend> connectionPoolMap = new HashMap<>(); + private final ExecutorService executor; + private final ScheduledExecutorService scheduledExecutor; + + public FS4ResourcePool(Fs4Config fs4Config) { + instanceId = instanceCounter.getAndIncrement(); + logger.log(Level.INFO, "Constructing an FS4ResourcePool with id '" + instanceId + "' with config '" + fs4Config.toString() + "'"); + String name = "FS4-" + instanceId; + listeners = new ListenerPool(name, fs4Config.numlistenerthreads()); + executor = Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory(name)); + scheduledExecutor = Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory(name + ".scheduled")); + } + + public ExecutorService getExecutor() { + return executor; + } + public ScheduledExecutorService getScheduledExecutor() { + return scheduledExecutor; + } + public Backend getBackend(String host, int port) { + + String key = host + ":" + port; + synchronized (connectionPoolMap) { + Backend pool = connectionPoolMap.get(key); + if (pool == null) { + pool = new Backend(host, port, Server.get().getServerDiscriminator(), listeners, new ConnectionPool(timer)); + connectionPoolMap.put(key, pool); + } + return pool; + } + } + + @Override + public void deconstruct() { + logger.log(Level.INFO, "Deconstructing FS4ResourcePool with id '" + instanceId + "'."); + super.deconstruct(); + listeners.close(); + timer.cancel(); + for (Backend backend : connectionPoolMap.values()) { + backend.shutdown(); + backend.close(); + } + executor.shutdown(); + scheduledExecutor.shutdown(); + try { + executor.awaitTermination(10, TimeUnit.SECONDS); + scheduledExecutor.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + logger.warning("Executors failed terminating within timeout of 10 seconds : " + e); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java new file mode 100644 index 00000000000..ee3f9ac0583 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java @@ -0,0 +1,442 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.google.common.annotations.Beta; +import com.yahoo.document.GlobalId; +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.net.URI; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Type; +import com.yahoo.data.access.simple.Value.StringValue; + +/** + * A regular hit from a Vespa backend + * + * @author bratseth + * @author steinar + */ +public class FastHit extends Hit { + + public static final String SUMMARY = "summary"; + + private static final long serialVersionUID = 298098891191029589L; + + /** The global id of this document in the backend node which produced it */ + private GlobalId globalId = new GlobalId(new byte[GlobalId.LENGTH]); + + /** Part ID */ + private int partId; + + /** DistributionKey (needed to generate getDocsumPacket, for two-phase search) */ + private int distributionKey = 0; + + /** The index uri of this. Lazily set */ + private URI indexUri = null; + + /** + * The number of least significant bits in the part id which specifies the + * row in the search cluster which produced this hit. The other bits + * specifies the column. 0 if not known. + */ + private int rowBits = 0; + + /** + * Whether or not to ignore the row bits. If this is set, FastSearcher is + * allowed to choose an appropriate row. + */ + private boolean ignoreRowBits = false; + + /** + * Whether to use the row number in the index uri, see FastSearcher for + * details + */ + private boolean useRowInIndexUri = true; + + private transient QueryPacketData queryPacketData = null; + private transient CacheKey cacheKey = null; + + /** + * Creates an empty and temporarily invalid summary hit + */ + public FastHit() { } + + public FastHit(String uri, double relevancy) { + this(uri, relevancy, null); + } + + public FastHit(String uri, double relevance, String source) { + setId(uri); + super.setField("uri", uri); + setRelevance(new Relevance(relevance)); + setSource(source); + types().add(SUMMARY); + setPartId(0, 0); + } + + public String toString() { + return super.toString() + " [fasthit, globalid: " + globalId + ", partId: " + + partId + ", distributionkey: " + distributionKey + "]"; + } + + public static String asHexString(GlobalId gid) { + StringBuilder sb = new StringBuilder(); + byte[] rawGid = gid.getRawId(); + for (byte b : rawGid) { + String hex = Integer.toHexString(0xFF & b); + if (hex.length() == 1) { + sb.append('0'); + } + sb.append(hex); + } + return sb.toString(); + } + + @Override + public int hashCode() { + if (getId() == null) { + throw new IllegalStateException("This hit must have a 'uri' field, and this fild must be filled through " + + "Execution.fill(Result)) before hashCode() is accessed."); + } else { + return super.hashCode(); + } + } + + @Override + public URI getId() { + return getUri(); // Make sure we decode it if the id is encoded + } + + /** + * Returns the explicitly set uri if available, returns + * "index:[source]/[partid]/[id]" otherwise + * @return uri of hit + */ + public URI getUri() { + URI uri = super.getId(); + if (uri != null) return uri; + + // TODO: Remove, this should be one of the last vestiges of URL field magic + if (fields().containsKey("uri")) { + // trigger decoding + Object o = getField("uri"); + setId(o.toString()); + return super.getId(); + } + + return getIndexUri(); + } + + /** + * The uri of the index location of this hit + * ("index:[source]/[partid]/[id]"). This is the uri if no other uri is + * assigned + * @return uri to the index. + */ + public URI getIndexUri() { + if (indexUri != null) return indexUri; + + String rowString = "-"; + if (useRowInIndexUri) + rowString = String.valueOf(getRow()); + + return new URI("index:" + getSourceNumber() + "/" + getColumn() + "/" + rowString + "/" + asHexString(getGlobalId())); + } + + /** Returns the global id of this document in the backend node which produced it */ + public GlobalId getGlobalId() { + return globalId; + } + + public void setGlobalId(GlobalId globalId) { + this.globalId = globalId; + } + + public int getPartId() { + return partId; + } + + /** + * Sets the part id number, which specifies the node where this hit is + * found. The row count is used to decode the part id into a column and a + * row number: the number of n least significant bits required to hold the + * highest row number are the row bits, the rest are column bits. + * + * @param partId partition id + * @param rowBits number of bits to encode row number + */ + public void setPartId(int partId, int rowBits) { + this.partId = partId; + this.rowBits = rowBits; + } + + /** + * + * @param useRowInIndexUri Sets whether to use the row in the index uri. See FastSearcher for details. + */ + public void setUseRowInIndexUri(boolean useRowInIndexUri) { + this.useRowInIndexUri = useRowInIndexUri; + } + + /** + * @return Returns the column number where this hit originated, or partId if not known + */ + public int getColumn() { + return partId >>> rowBits; + } + + /** + * @return the row number where this hit originated, or 0 if not known + * */ + public int getRow() { + if (rowBits == 0) { + return 0; + } + + return partId & ((1 << rowBits) - 1); + } + + /** + * <p>Returns a field value from this Hit. The value is either a stored value from the Document represented by + * this Hit, or a generated value added during later processing.</p> + * + * <p>The values available from the matching Document are a <i>subset</i> of the values set in the document, + * determined by the {@link #getFilled() filled} status of this Hit. More fields may be requested by requesting + * further filling.</p> + * + * <p>Lookups on names which does not exists in the document and is not added by later processing + * return null.</p> + * + * <p>Lookups on fields which exist in the document, in a summary class which is already requested + * filled returns the following types, even when the field has no actual value:</p> + * + * <ul> + * <li><b>Dynamic summary string fields</b>: A Java String before JuniperSearcher and a HitField after.</li> + * <li><b>string/uri/content</b>: A Java String.<br> + * The empty string ("") if no value is assigned in the document. + * + * <li><b>Numerics</b>: The corresponding numeric Java type.<br> + * If the field has <i>no value</i> assigned in the document, + * the special numeric {@link com.yahoo.search.result.NanNumber#NaN} is returned. + * + * <li><b>raw</b>: A {@link com.yahoo.prelude.hitfield.RawData} instance + * + * <li><b>multivalue fields</b>: A {@link com.yahoo.prelude.hitfield.JSONString} instance + * </ul> + */ + @Override + public Object getField(String key) { + Object value = super.getField(key); + + if (value instanceof LazyValue) { + return getAndCacheLazyValue(key, (LazyValue) value); + } else { + return value; + } + } + + private Object getAndCacheLazyValue(String key, LazyValue value) { + Object forcedValue = value.getValue(key); + setField(key, forcedValue); + return forcedValue; + } + + /** Returns false - this is a concrete hit containing requested content */ + public boolean isMeta() { + return false; + } + + /** + * Only needed when fetching summaries in 2 phase. + * + * @return distribution key of node where the hit originated from + */ + public int getDistributionKey() { + return distributionKey; + } + + /** + * Only needed when fetching summaries in 2 phase. + * @param distributionKey Of node where you find this hit. + */ + public void setDistributionKey(int distributionKey) { + this.distributionKey = distributionKey; + } + + public void addSummary(Docsum docsum) { + LazyDocsumValue lazyDocsumValue = new LazyDocsumValue(docsum); + for (DocsumField field : docsum.getDefinition().getFields()) { + setDocsumFieldIfNotPresent(field.getName(), lazyDocsumValue); + } + } + + void addSummary(DocsumDefinition docsumDef, Inspector value) { + for (DocsumField field : docsumDef.getFields()) { + String fieldName = field.getName(); + if (value.type() == Type.STRING && + (field instanceof LongstringField || + field instanceof StringField || + field instanceof XMLField)) + { + setDocsumFieldIfNotPresent(fieldName, new LazyString(field, value)); + } else { + Inspector f = value.field(fieldName); + if (field.getEmulConfig().forceFillEmptyFields() || f.valid()) { + setDocsumFieldIfNotPresent(fieldName, field.convert(f)); + } + } + } + } + + private void setDocsumFieldIfNotPresent(String fieldName, Object value) { + if (super.getField(fieldName) == null) { + setField(fieldName, value); + } + } + + /** + * Set a field to behave like a string type summary field, not decoding raw + * data till actually used. Added to make testing lazy docsum functionality + * easier. This is not a method to be used for efficiency, as it causes + * object allocations. + * + * @param fieldName + * the name of the field to insert undecoded UTF-8 into + * @param value + * an array of valid UTF-8 data + */ + @Beta + public void setLazyStringField(String fieldName, byte[] value) { + setField(fieldName, new LazyString(new StringField(fieldName), new StringValue(value))); + } + + public static final class RawField { + private final boolean needXmlEscape; + + private final byte[] contents; + + public RawField(DocsumField fieldType, byte[] contents) { + needXmlEscape = ! (fieldType instanceof XMLField); + this.contents = contents; + } + public RawField(byte [] contents) { + needXmlEscape = true; + this.contents = contents; + } + + public byte [] getUtf8() { return contents; } + public boolean needXmlEscape() { return needXmlEscape; } + } + + /** + * Add the binary data common for the query packet to a Vespa backend and a + * summary fetch packet to a Vespa backend. This method can only be called + * once for a single hit. + * + * @param queryPacketData binary data from a query packet resulting in this hit + * @throws IllegalStateException if the method is called more than once + * @throws NullPointerException if trying to set query packet data to null + */ + public void setQueryPacketData(QueryPacketData queryPacketData) { + if (this.queryPacketData != null) + throw new IllegalStateException("Query packet data already set to " + + this.queryPacketData + ", tried to set it to " + queryPacketData); + if (queryPacketData == null) + throw new NullPointerException("Query packet data reference can not be set to null."); + this.queryPacketData = queryPacketData; + } + + /** + * Fetch binary data from the query packet which produced this hit. These + * data may not be available, this method will then return null. + * + * @return wrapped binary data from a query packet, or null + */ + public QueryPacketData getQueryPacketData() { + return queryPacketData; + } + + public void clearQueryPacketData() { + queryPacketData = null; + } + + CacheKey getCacheKey() { + return cacheKey; + } + + void setCacheKey(CacheKey cacheKey) { + this.cacheKey = cacheKey; + } + + public void setIgnoreRowBits(boolean ignoreRowBits) { + this.ignoreRowBits = ignoreRowBits; + } + + public boolean shouldIgnoreRowBits() { + return ignoreRowBits; + } + + public boolean fieldIsNotDecoded(String name) { + return super.getField(name) instanceof LazyValue; + } + + public RawField fetchFieldAsUtf8(String fieldName) { + Object value = super.getField(fieldName); + if (value instanceof LazyValue) { + return ((LazyValue) value).getFieldAsUtf8(fieldName); + } else { + throw new IllegalStateException("Field " + fieldName + " has already been decoded:" + value); + } + } + + private static abstract class LazyValue { + abstract Object getValue(String fieldName); + abstract RawField getFieldAsUtf8(String fieldName); + } + + /** + * Represents a value that resides in the docsum. + */ + private static class LazyDocsumValue extends LazyValue { + private final Docsum docsum; + + LazyDocsumValue(Docsum docsum) { + this.docsum = docsum; + } + + Object getValue(String fieldName) { + return docsum.decode(getFieldIndex(fieldName)); + } + + private int getFieldIndex(String fieldName) { + Integer index = docsum.getFieldIndex(fieldName); + if (index == null) throw new AssertionError("Invalid fieldName " + fieldName); + return index; + } + + RawField getFieldAsUtf8(String fieldName) { + return docsum.fetchFieldAsUtf8(getFieldIndex(fieldName)); + } + } + + private static class LazyString extends LazyValue { + private final Inspector value; + private final DocsumField fieldType; + + LazyString(DocsumField fieldType, Inspector value) { + assert(value.type() == Type.STRING); + this.value = value; + this.fieldType = fieldType; + } + + Object getValue(String fieldName) { + return value.asString(); + } + + RawField getFieldAsUtf8(String fieldName) { + return new RawField(fieldType, value.asUtf8()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java new file mode 100644 index 00000000000..dfca9c49cba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java @@ -0,0 +1,566 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.util.Optional; + +import com.yahoo.compress.CompressionType; +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.ChannelTimeoutException; +import com.yahoo.fs4.GetDocSumsPacket; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.PingPacket; +import com.yahoo.fs4.PongPacket; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.fs4.mplex.FS4Channel; +import com.yahoo.fs4.mplex.InvalidChannelException; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.prelude.querytransform.QueryRewrite; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.dispatch.Dispatcher; +import com.yahoo.search.query.Ranking; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Iterator; +import java.util.TimeZone; +import java.util.logging.Level; + +import static com.yahoo.container.util.Util.quote; + +/** + * The searcher which forwards queries to fdispatch nodes, using the fnet/fs4 + * network layer. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +// TODO: Clean up all the duplication in the various search methods by +// switching to doing all the error handling using exceptions below doSearch2. +// Right now half is done by exceptions handled in doSearch2 and half by setting +// errors on results and returning them. It could be handy to create a QueryHandlingErrorException +// or similar which could wrap an error message, and then just always throw that and +// catch and unwrap into a results with an error in high level methods. -Jon +public class FastSearcher extends VespaBackEndSearcher { + + /** If this is turned on this will fill summaries by dispatching directly to search nodes over RPC */ + private final static CompoundName dispatchSummaries = new CompoundName("dispatch.summaries"); + + /** The compression method which will be used with rpc dispatch. "lz4" (default) and "none" is supported. */ + private final static CompoundName dispatchCompression = new CompoundName("dispatch.compression"); + + /** Used to dispatch directly to search nodes over RPC, replacing the old fnet communication path */ + private final Dispatcher dispatcher; + + /** Time (in ms) at which the index of this searcher was last modified */ + private volatile long editionTimeStamp = 0; + + /** Edition of the index */ + private int docstamp; + + private Backend backend; + + /** + * Creates a Fastsearcher. + * + * @param backend The backend object for this FastSearcher + * @param docSumParams Document summary parameters + * @param clusterParams The cluster number, and other cluster backend parameters + * @param cacheParams The size, lifetime, and controller of our cache + * @param documentdbInfoConfig Document database parameters + */ + public FastSearcher(Backend backend, Dispatcher dispatcher, SummaryParameters docSumParams, ClusterParams clusterParams, + CacheParams cacheParams, DocumentdbInfoConfig documentdbInfoConfig) { + init(docSumParams, clusterParams, cacheParams, documentdbInfoConfig); + this.backend = backend; + this.dispatcher = dispatcher; + } + + /** Clears the packet cache if the received timestamp is older than our timestamp */ + private void checkTimestamp(QueryResultPacket resultPacket) { + checkTimestamp(resultPacket.getDocstamp()); + } + + /** Clears the packet cache if the received timestamp is older than our timestamp */ + private void checkTimestamp(int newDocstamp) { + if (docstamp < newDocstamp) { + long currentTimeMillis = System.currentTimeMillis(); + + docstamp = newDocstamp; + setEditionTimeStamp(currentTimeMillis); + } + } + + private static SimpleDateFormat isoDateFormat; + + static { + isoDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z"); + isoDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + } + + private int countNumberOfFastHits(Result result) { + int numFastHits = 0; + + for (Iterator<com.yahoo.search.result.Hit> i = hitIterator(result); i.hasNext();) { + com.yahoo.search.result.Hit hit = i.next(); + + if (hit instanceof FastHit) { + numFastHits++; + } + } + return numFastHits; + } + + /** + * Pings the backend. Does not propagate to other searchers. + */ + @Override + public Pong ping(Ping ping, Execution execution) { + // If you want to change this code, you need to understand + // com.yahoo.prelude.cluster.ClusterSearcher.ping(Searcher) and + // com.yahoo.prelude.cluster.TrafficNodeMonitor.failed(ErrorMessage) + FS4Channel channel = backend.openPingChannel(); + + try { + PingPacket pingPacket = new PingPacket(); + pingPacket.enableActivedocsReporting(); + Pong pong = new Pong(); + + try { + boolean couldSend = channel.sendPacket(pingPacket); + if (!couldSend) { + pong.addError(ErrorMessage.createBackendCommunicationError("Could not ping in " + getName())); + return pong; + } + } catch (InvalidChannelException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("Invalid channel " + getName())); + return pong; + } catch (IllegalStateException e) { + pong.addError( + ErrorMessage.createBackendCommunicationError("Illegal state in FS4: " + e.getMessage())); + return pong; + } catch (IOException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("IO error while sending ping: " + e.getMessage())); + return pong; + } + // We should only get a single packet + BasicPacket[] packets; + + try { + packets = channel.receivePackets(ping.getTimeout(), 1); + } catch (ChannelTimeoutException e) { + pong.addError(ErrorMessage.createNoAnswerWhenPingingNode("timeout while waiting for fdispatch for " + getName())); + return pong; + } catch (InvalidChannelException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("Invalid channel for " + getName())); + return pong; + + } + + if (packets.length == 0) { + pong.addError(ErrorMessage.createBackendCommunicationError(getName() + " got no packets back")); + return pong; + } + + if (isLoggingFine()) { + getLogger().finest("got packets " + packets.length + " packets"); + } + + try { + ensureInstanceOf(PongPacket.class, packets[0]); + } catch (TimeoutException e) { + pong.addError(ErrorMessage.createTimeout(e.getMessage())); + return pong; + } catch (IOException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("Unexpected packet class returned after ping: " + e.getMessage())); + return pong; + } + pong.addPongPacket((PongPacket) packets[0]); + checkTimestamp(((PongPacket) packets[0]).getDocstamp()); + return pong; + } finally { + if (channel != null) { + channel.close(); + } + } + } + + protected void transformQuery(Query query) { + QueryRewrite.rewriteSddocname(query); + } + + @Override + public Result doSearch2(Query query, QueryPacket queryPacket, CacheKey cacheKey, Execution execution) { + FS4Channel channel = null; + try { + channel = backend.openChannel(); + channel.setQuery(query); + + // If not found, then fetch from the source. The call to + // insert into cache will be made from within searchTwoPhase + Result result = searchTwoPhase(channel, query, queryPacket, cacheKey); + + if (query.properties().getBoolean(Ranking.RANKFEATURES, false)) { + // There is currently no correct choice for which + // summary class we want to fetch at this point. If we + // fetch the one selected by the user it may not + // contain the data we need. If we fetch the default + // one we end up fetching docsums twice unless the + // user also requested the default one. + fill(result, query.getPresentation().getSummary(), execution); // ARGH + } + return result; + } catch (TimeoutException e) { + return new Result(query,ErrorMessage.createTimeout(e.getMessage())); + } catch (IOException e) { + Result result = new Result(query); + if (query.getTraceLevel() >= 1) + query.trace(getName() + " error response: " + result, false, 1); + result.hits().addError(ErrorMessage.createBackendCommunicationError(getName() + " failed: "+ e.getMessage())); + return result; + } finally { + if (channel != null) { + channel.close(); + } + } + } + + /** + * Only used to fill the sddocname field when using direct dispatching as that is normally done in VespaBackEndSearcher.decodeSummary + * @param result The result + */ + private void fillSDDocName(Result result) { + DocumentDatabase db = getDocumentDatabase(result.getQuery()); + for (Iterator<Hit> i = hitIterator(result); i.hasNext();) { + Hit hit = i.next(); + if (hit instanceof FastHit) { + hit.setField(Hit.SDDOCNAME_FIELD, db.getName()); + } + } + } + /** + * Perform a partial docsum fill for a temporary result + * representing a partition of the complete fill request. + * + * @param result result containing a partition of the unfilled hits + * @param summaryClass the summary class we want to fill with + **/ + protected void doPartialFill(Result result, String summaryClass) { + if (result.isFilled(summaryClass)) return; + + Query query = result.getQuery(); + traceQuery(getName(), "fill", query, query.getOffset(), query.getHits(), 2, quotedSummaryClass(summaryClass)); + + if (query.properties().getBoolean(dispatchSummaries)) { + CompressionType compression = + CompressionType.valueOf(query.properties().getString(dispatchCompression, "LZ4").toUpperCase()); + fillSDDocName(result); + dispatcher.fill(result, summaryClass, compression); + return; + } + + CacheKey cacheKey = null; + PacketWrapper packetWrapper = null; + if (getCacheControl().useCache(query)) { + cacheKey = fetchCacheKeyFromHits(result.hits(), summaryClass); + if (cacheKey == null) { + QueryPacket queryPacket = QueryPacket.create(query); + cacheKey = new CacheKey(queryPacket); + } + packetWrapper = cacheLookupTwoPhase(cacheKey, result,summaryClass); + } + + FS4Channel channel = backend.openChannel(); + channel.setQuery(query); + Packet[] receivedPackets; + try { + DocsumPacketKey[] packetKeys; + + if (countNumberOfFastHits(result) > 0) { + packetKeys = getPacketKeys(result, summaryClass, false); + if (packetKeys.length == 0) { + receivedPackets = new Packet[0]; + } else { + try { + receivedPackets = fetchSummaries(channel, result, summaryClass); + } catch (InvalidChannelException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError("Invalid channel " + getName() + " (summary fetch)")); + return; + } catch (ChannelTimeoutException e) { + result.hits().addError(ErrorMessage.createTimeout("timeout waiting for summaries from " + getName())); + return; + } catch (IOException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError( + "IO error while talking on channel " + getName() + " (summary fetch): " + e.getMessage())); + return; + } + if (receivedPackets.length == 0) { + result.hits().addError(ErrorMessage.createBackendCommunicationError(getName() + " got no packets back (summary fetch)")); + return; + } + } + } else { + packetKeys = new DocsumPacketKey[0]; + receivedPackets = new Packet[0]; + } + + int skippedHits; + try { + skippedHits = fillHits(result, 0, receivedPackets, summaryClass); + } catch (TimeoutException e) { + result.hits().addError(ErrorMessage.createTimeout(e.getMessage())); + return; + } catch (IOException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError("Error filling hits with summary fields, source: " + getName())); + return; + } + if (skippedHits==0 && packetWrapper != null) { + cacheControl.updateCacheEntry(cacheKey, query, packetKeys, receivedPackets); + } + + if ( skippedHits>0 ) { + getLogger().info("Could not fill summary '" + summaryClass + "' for " + skippedHits + " hits for query: " + result.getQuery()); + result.hits().addError(com.yahoo.search.result.ErrorMessage.createEmptyDocsums("Missing hit data for summary '" + summaryClass + "' for " + skippedHits + " hits")); + } + result.analyzeHits(); + + if (query.getTraceLevel() >= 3) { + int hitNumber = 0; + for (Iterator<com.yahoo.search.result.Hit> i = hitIterator(result); i.hasNext();) { + com.yahoo.search.result.Hit hit = i.next(); + if ( ! (hit instanceof FastHit)) continue; + FastHit fastHit = (FastHit) hit; + + String traceMsg = "Hit: " + (hitNumber++) + " from " + (fastHit.isCached() ? "cache" : "backend" ); + if ( ! fastHit.isFilled(summaryClass)) + traceMsg += ". Error, hit, not filled"; + query.trace(traceMsg, false, 3); + } + } + } finally { + channel.close(); + } + } + + private static @NonNull Optional<String> quotedSummaryClass(String summaryClass) { + return Optional.of(summaryClass == null ? "[null]" : quote(summaryClass)); + } + + private CacheKey fetchCacheKeyFromHits(HitGroup hits, String summaryClass) { + for (Iterator<Hit> i = hits.unorderedDeepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h instanceof FastHit) { + FastHit hit = (FastHit) h; + if (hit.isFilled(summaryClass)) { + continue; + } + if (hit.getCacheKey() != null) { + return hit.getCacheKey(); + } + } + } + return null; + } + + private Result searchTwoPhase(FS4Channel channel, Query query, QueryPacket queryPacket, CacheKey cacheKey) throws IOException { + + if (isLoggingFine()) + getLogger().finest("sending query packet"); + + try { + boolean couldSend = channel.sendPacket(queryPacket); + if ( ! couldSend) + return new Result(query,ErrorMessage.createBackendCommunicationError("Could not reach '" + getName() + "'")); + } catch (InvalidChannelException e) { + return new Result(query,ErrorMessage.createBackendCommunicationError("Invalid channel " + getName())); + } catch (IllegalStateException e) { + return new Result(query, ErrorMessage.createBackendCommunicationError("Illegal state in FS4: " + e.getMessage())); + } + + BasicPacket[] basicPackets; + + try { + basicPackets = channel.receivePackets(Math.max(50, query.getTimeLeft()), 1); + } catch (ChannelTimeoutException e) { + return new Result(query,ErrorMessage.createTimeout("Timeout while waiting for " + getName())); + } catch (InvalidChannelException e) { + return new Result(query,ErrorMessage.createBackendCommunicationError("Invalid channel for " + getName())); + } + + if (basicPackets.length == 0) { + return new Result(query,ErrorMessage.createBackendCommunicationError(getName() + " got no packets back")); + } + + if (isLoggingFine()) + getLogger().finest("got packets " + basicPackets.length + " packets"); + + ensureInstanceOf(QueryResultPacket.class, basicPackets[0]); + QueryResultPacket resultPacket = (QueryResultPacket) basicPackets[0]; + + checkTimestamp(resultPacket); + + if (isLoggingFine()) + getLogger().finest("got query packet. " + "docsumClass=" + query.getPresentation().getSummary()); + + if (query.getPresentation().getSummary() == null) + query.getPresentation().setSummary(getDefaultDocsumClass()); + + Result result = new Result(query); + + addMetaInfo(query, queryPacket.getQueryPacketData(), resultPacket, result, false); + + addUnfilledHits(result, resultPacket.getDocuments(), false, queryPacket.getQueryPacketData(), cacheKey); + Packet[] packets; + PacketWrapper packetWrapper = cacheControl.lookup(cacheKey, query); + + if (packetWrapper != null) { + cacheControl.updateCacheEntry(cacheKey, query, resultPacket); + } + else { + if (resultPacket.getCoverageFeature() && ! resultPacket.getCoverageFull()) { + // Don't add error here, it was done in first phase + // No check if packetWrapper already exists, since incomplete + // first phase data won't be cached anyway. + } else { + packets = new Packet[1]; + packets[0] = resultPacket; + cacheControl.cache(cacheKey, query, new DocsumPacketKey[0], packets); + } + } + return result; + } + + private Packet[] convertBasicPackets(BasicPacket[] basicPackets) throws ClassCastException { + // trying to cast a BasicPacket[] to Packet[] will compile, + // but lead to a runtime error. At least that's what I got + // from testing and reading the specification. I'm just happy + // if someone tells me what's the proper Java way of doing + // this. -SK + Packet[] packets = new Packet[basicPackets.length]; + + for (int i = 0; i < basicPackets.length; i++) { + packets[i] = (Packet) basicPackets[i]; + } + return packets; + } + + private Packet[] fetchSummaries(FS4Channel channel, Result result, String summaryClass) + throws InvalidChannelException, ChannelTimeoutException, ClassCastException, IOException { + + BasicPacket[] receivedPackets; + boolean summaryNeedsQuery = summaryNeedsQuery(result.getQuery()); + if (result.getQuery().getTraceLevel() >=3) + result.getQuery().trace((summaryNeedsQuery ? "Resending " : "Not resending ") + "query during document summary fetching", 3); + + GetDocSumsPacket docsumsPacket = GetDocSumsPacket.create(result, summaryClass, summaryNeedsQuery); + int compressionLimit = result.getQuery().properties().getInteger(PACKET_COMPRESSION_LIMIT, 0); + docsumsPacket.setCompressionLimit(compressionLimit); + if (compressionLimit != 0) { + docsumsPacket.setCompressionType(result.getQuery().properties().getString(PACKET_COMPRESSION_TYPE, "lz4")); + } + + if (isLoggingFine()) + getLogger().finest("Sending " + docsumsPacket + " on " + channel); + + boolean couldSend = channel.sendPacket(docsumsPacket); + if ( ! couldSend) throw new IOException("Could not successfully send GetDocSumsPacket."); + receivedPackets = channel.receivePackets(Math.max(50, result.getQuery().getTimeLeft()), docsumsPacket.getNumDocsums() + 1); + + if (isLoggingFine()) + getLogger().finest("got " + receivedPackets.length + "docsumPackets"); + + return convertBasicPackets(receivedPackets); + } + + /** + * Returns whether we need to send the query when fetching summaries. + * This is necessary if the query requests summary features or dynamic snippeting + */ + private boolean summaryNeedsQuery(Query query) { + if (query.getRanking().getQueryCache()) return false; // Query is cached in backend + + DocumentDatabase documentDb = getDocumentDatabase(query); + + // Needed to generate a dynamic summary? + DocsumDefinition docsumDefinition = documentDb.getDocsumDefinitionSet().getDocsumDefinition(query.getPresentation().getSummary()); + if (docsumDefinition == null) return true; // stay safe + if (docsumDefinition.isDynamic()) return true; + + // Needed to generate ranking features? + RankProfile rankProfile = documentDb.rankProfiles().get(query.getRanking().getProfile()); + if (rankProfile == null) return true; // stay safe + if (rankProfile.hasSummaryFeatures()) return true; + if (query.getRanking().getListFeatures()) return true; + + // (Don't just add other checks here as there is a return false above) + + return false; + } + + /** + * Whether to mask out the row id from the index uri. + * Masking out the row number is useful when it is necessary to deduplicate + * across rows. That is necessary with searchers which issues several queries + * to produce one result in the first phase, as the grouping searcher - when + * some of those searchers go to different rows, a mechanism is needed to detect + * duplicates returned from different rows before the summary is requested. + * Producing an index id which is the same across rows and using that as the + * hit uri provides this. Note that this only works if the document ids are the + * same for all the nodes (rows) in a column. This is usually the case for + * batch and incremental indexing, but not for realtime. + */ + + public long getEditionTimeStamp() { + return editionTimeStamp; + } + + public void setEditionTimeStamp(long editionTime) { + this.editionTimeStamp = editionTime; + } + + public String toString() { + return "fast searcher (" + getName() + ") " + backend; + } + + /** + * Returns an array of the hits contained in this result + * + * @param filled true to return all hits, false to return only unfilled hits + * @return array of docids, empty array if no hits + */ + private DocsumPacketKey[] getPacketKeys(Result result, String summaryClass, boolean filled) { + DocsumPacketKey[] packetKeys = new DocsumPacketKey[result.getHitCount()]; + int x = 0; + + for (Iterator<com.yahoo.search.result.Hit> i = hitIterator(result); i.hasNext();) { + com.yahoo.search.result.Hit hit = i.next(); + if (hit instanceof FastHit) { + FastHit fastHit = (FastHit) hit; + if(filled || !fastHit.isFilled(summaryClass)) { + packetKeys[x] = new DocsumPacketKey(fastHit.getGlobalId(), fastHit.getPartId(), summaryClass); + x++; + } + } + } + if (x < packetKeys.length) { + DocsumPacketKey[] tmp = new DocsumPacketKey[x]; + + System.arraycopy(packetKeys, 0, tmp, 0, x); + return tmp; + } else { + return packetKeys; + } + } + + protected boolean isLoggingFine() { + return getLogger().isLoggable(Level.FINE); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java new file mode 100644 index 00000000000..b622f5c62c5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Type; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.search.result.FeatureData; + +/** + * Class representing a "feature data" field. This was historically + * just a string containing JSON; now it's a structure of + * data (that will be rendered as JSON by default). + */ +public class FeatureDataField extends LongstringField { + + public FeatureDataField (String name) { + super(name); + } + + @Override + public String toString() { + return "field " + getName() + " type FeatureDataField"; + } + + public Object convert(Inspector value) { + if (! value.valid()) { + if (getEmulConfig().stringBackedFeatureData()) { + return ""; + } else if (getEmulConfig().forceFillEmptyFields()) { + return new FeatureData(com.yahoo.data.access.simple.Value.empty()); + } else { + return null; + } + } + if (value.type() == Type.STRING) { + return value.asString(); + } + FeatureData obj = new FeatureData(value); + if (getEmulConfig().stringBackedFeatureData()) { + return obj.toJson(); + } else { + return obj; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FloatField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FloatField.java new file mode 100644 index 00000000000..ed5c7edd4da --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FloatField.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M\u00f8lster Lidal</a> + */ +public class FloatField extends DocsumField { + static final double EMPTY_VALUE = Float.NaN; + + public FloatField(String name) { + super(name); + } + + private Object convert(float value) { + if (Float.isNaN(value)) { + return NanNumber.NaN; + } else { + return Float.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getFloat()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Float.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((float)value.asDouble(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java new file mode 100644 index 00000000000..f8425ba8cfd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.util.List; + +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.search.result.Hit; +import com.yahoo.searchlib.aggregation.Grouping; + +// TODO: Author! +public class GroupingListHit extends Hit { + private static final long serialVersionUID = -6645125887873082234L; + + /** for unit tests only, may give problems if grouping contains docsums */ + public GroupingListHit(List<Grouping> groupingList) { + this(groupingList, null); + } + + public GroupingListHit(List<Grouping> groupingList, + DocsumDefinitionSet defs) + { + super("meta:grouping", 0); + this.groupingList = groupingList; + this.defs = defs; + } + public boolean isMeta() { return true; } + + public List<Grouping> getGroupingList() { return groupingList; } + public DocsumDefinitionSet getDocsumDefinitionSet() { return defs; } + + private final List<Grouping> groupingList; + private final DocsumDefinitionSet defs; + private QueryPacketData queryPacketData; + + public void setQueryPacketData(QueryPacketData queryPacketData) { + this.queryPacketData = queryPacketData; + } + + /** Returns encoded query data from the query used to create this, or null if none present */ + public QueryPacketData getQueryPacketData() { + return queryPacketData; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/Int64Field.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Int64Field.java new file mode 100644 index 00000000000..2759f313d52 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Int64Field.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a integer field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class Int64Field extends DocsumField { + static final long EMPTY_VALUE = Long.MIN_VALUE; + + public Int64Field(String name) { + super(name); + } + + private Object convert(long value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Long.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getLong()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public String toString() { + return "field " + getName() + " type int64"; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Long.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert(value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/IntegerField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/IntegerField.java new file mode 100644 index 00000000000..b134ea49bac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/IntegerField.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a integer field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class IntegerField extends DocsumField { + static final int EMPTY_VALUE = Integer.MIN_VALUE; + + public IntegerField(String name) { + super(name); + } + + private Object convert(int value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Integer.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getInt()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public String toString() { + return "field " + getName() + " type int"; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Integer.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((int)value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java new file mode 100644 index 00000000000..d61a15723ac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java @@ -0,0 +1,180 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.text.Utf8; +import com.yahoo.data.access.*; +import com.yahoo.data.access.simple.Value; + + +/** + * Class representing a JSON string field in the result set + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class JSONField extends DocsumField implements VariableLengthField { + public JSONField(String name) { + super(name); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + JSONString field; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + + field = new JSONString(Utf8.toString(tmp)); + return field; + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type JSONString"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + private static class CompatibilityConverter { + Value.ArrayValue target = new Value.ArrayValue(); + + Inspector stringify(Inspector value) { + if (value.type() == Type.STRING) return value; + if (value.type() == Type.LONG) { + String str = String.valueOf(value.asLong()); + return new Value.StringValue(str); + } + if (value.type() == Type.DOUBLE) { + String str = String.valueOf(value.asDouble()); + return new Value.StringValue(str); + } + String str = value.toString(); + return new Value.StringValue(str); + } + } + + private static class ArrConv extends CompatibilityConverter + implements ArrayTraverser + { + @Override + public void entry(int idx, Inspector value) { + target.add(stringify(value)); + } + } + + private static class WsConv1 extends CompatibilityConverter + implements ArrayTraverser + { + @Override + public void entry(int idx, Inspector value) { + Value.ArrayValue obj = new Value.ArrayValue(); + obj.add(stringify(value.entry(0))); + obj.add(value.entry(1)); + target.add(obj); + } + } + + private static class WsConv2 extends CompatibilityConverter + implements ArrayTraverser + { + @Override + public void entry(int idx, Inspector value) { + Value.ArrayValue obj = new Value.ArrayValue(); + obj.add(stringify(value.field("item"))); + obj.add(value.field("weight")); + target.add(obj); + } + } + + static Inspector convertTop(Inspector value) { + if (value.type() == Type.ARRAY && value.entryCount() > 0) { + Inspector first = value.entry(0); + if (first.type() == Type.ARRAY && first.entryCount() == 2) { + // old style weighted set + WsConv1 conv = new WsConv1(); + value.traverse(conv); + return conv.target; + } + if (first.type() == Type.OBJECT && + first.fieldCount() == 2 && + first.field("item").valid() && + first.field("weight").valid()) + { + // new style weighted set + WsConv2 conv = new WsConv2(); + value.traverse(conv); + return conv.target; + } + if (first.type() == Type.LONG) { + ArrConv conv = new ArrConv(); + value.traverse(conv); + return conv.target; + } + if (first.type() == Type.DOUBLE) { + ArrConv conv = new ArrConv(); + value.traverse(conv); + return conv.target; + } + } + return value; + } + + public Object convert(Inspector value) { + if (value.valid()) { + return new JSONString(convertTop(value)); + } else { + return new JSONString(""); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java new file mode 100644 index 00000000000..617f382f462 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a long data field in the result set. + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.prelude.hitfield.RawData; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class LongdataField extends DocsumField implements VariableLengthField { + public LongdataField(String name) { + super(name); + } + + private Object convert(byte[] value) { + return new RawData(value); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + return convert(tmp); + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return convert(value.asData(Value.empty().asData())); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java new file mode 100644 index 00000000000..744476beaa5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a long string field in the result set. + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.text.Utf8; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class LongstringField extends DocsumField implements VariableLengthField { + public LongstringField(String name) { + super(name); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + String field; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + field = Utf8.toString(tmp); + return field; + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return value.asString(""); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketCache.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketCache.java new file mode 100644 index 00000000000..e5a7d433324 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketCache.java @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.log.LogLevel; + + +/** + * An LRU cache using number of hits cached inside the results as + * size limiting factor. Directly modelled after com.yahoo.collections.Cache. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class PacketCache extends LinkedHashMap<CacheKey, PacketWrapper> { + + /** + * + */ + private static final long serialVersionUID = -7403077211906108356L; + + /** The <i>current</i> number of bytes of packets in this cache */ + private int totalSize; + + /** The maximum number of bytes of packets in this cache */ + private final int capacity; + + /** The max size of a cached item compared to the total size */ + private int maxCacheItemPercentage = 1; + + /** The max age for a valid cache entry, 0 mean infinite */ + private final long maxAge; + + private static final Logger log = Logger.getLogger(PacketCache.class.getName()); + + public void clear() { + super.clear(); + totalSize = 0; + } + + /** + * Sets the max size of a cached item compared to the total size + * Cache requests for larger objects will be ignored + */ + public void setMaxCacheItemPercentage(int maxCapacityPercentage) { + maxCacheItemPercentage = maxCapacityPercentage; + } + + /** + * Creates a cache with a size given by + * cachesizemegabytes*2^20+cachesizebytes + * + * @param capacityMegaBytes the cache size, measured in megabytes + * @param capacityBytes additional number of bytes to add to the cache size + * @param maxAge seconds a cache entry is valid, 0 or less are illegal arguments + */ + public PacketCache(int capacityMegaBytes,int capacityBytes,double maxAge) { + // hardcoded inital entry capacity, won't matter much anyway + // after a while + super(12500, 1.0f, true); + if (maxAge <= 0.0d) { + throw new IllegalArgumentException("maxAge <= 0 not legal on 5.1, use some very large number for no timeout."); + } + if (capacityMegaBytes > (Integer.MAX_VALUE >> 20)) { + log.log(LogLevel.INFO, "Packet cache of more than 2 GB requested. Reverting to 2 GB packet cache."); + this.capacity = Integer.MAX_VALUE; + } else { + this.capacity = (capacityMegaBytes << 20) + capacityBytes; + } + if (this.capacity <= 0) { + throw new IllegalArgumentException("Total cache size set to 0 or less bytes. If no caching is desired, avoid creating this object instead."); + } + this.maxAge = (long) (maxAge * 1000.0d); + } + + /** + * Overrides LinkedHashMap.removeEldestEntry as suggested to implement LRU cache. + */ + protected boolean removeEldestEntry(Map.Entry<CacheKey, PacketWrapper> eldest) + { + if (totalSize > capacity) { + totalSize -= eldest.getValue().getPacketsSize(); + return true; + } + return false; + } + + private void removeOverflow() { + if (totalSize < capacity) return; + + for (Iterator<PacketWrapper> i = values().iterator(); i.hasNext();) { + PacketWrapper eldestEntry = i.next(); + totalSize -= eldestEntry.getPacketsSize(); + + i.remove(); + if (totalSize < capacity) { + break; + } + } + } + + public int getCapacity() { + return capacity >> 20; + } + + public int getByteCapacity() { + return capacity; + } + + /** + * Adds a PacketWrapper object to this cache, + * unless the size is more than maxCacheItemPercentage of the total size + */ + public PacketWrapper put(CacheKey key, PacketWrapper value) { + return put(key, value, System.currentTimeMillis()); + } + + /** + * Adds a BasicPacket array to this cache, + * unless the size is more than maxCacheItemPercentage of the total size + * + * @param timestamp the timestamp for the first packet in the array, + * unit milliseconds + */ + public PacketWrapper put(CacheKey key, PacketWrapper result, long timestamp) { + int size = result.getPacketsSize(); + + if (size > 0) { + result.setTimestamp(timestamp); + } + + // don't insert if it is too big + if (size * 100 > capacity * maxCacheItemPercentage) { + // removeField the old one since that is now stale. + return remove(key); + } + + totalSize += size; + PacketWrapper previous = super.put(key, result); + if (previous != null) { + totalSize -= previous.getPacketsSize(); + } + if (totalSize > (capacity * 1.1)) { + removeOverflow(); + } + + return previous; + } + + public PacketWrapper get(CacheKey key) { + return get(key, System.currentTimeMillis()); + } + + public PacketWrapper get(CacheKey key, long now) { + PacketWrapper result = super.get(key); + + if (result == null) { + return result; + } + + long timestamp = result.getTimestamp(); + + if ((now - timestamp) > maxAge) { + remove(key); + return null; + } else { + return result; + } + } + + public PacketWrapper remove(CacheKey key) { + PacketWrapper removed = super.remove(key); + + if (removed != null) { + totalSize -= removed.getPacketsSize(); + } + return removed; + } + + public int totalPacketSize() { + return totalSize; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketWrapper.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketWrapper.java new file mode 100644 index 00000000000..1cc9678843c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketWrapper.java @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.util.*; +import java.util.logging.Logger; + +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.DocsumPacket; +import com.yahoo.fs4.DocumentInfo; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.document.GlobalId; +import com.yahoo.document.DocumentId; + + +/** + * A wrapper for cache entries to make it possible to check whether the + * hits are truly correct. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Lidal</a> + */ +public class PacketWrapper implements Cloneable { + private static Logger log = Logger.getLogger(PacketWrapper.class.getName()); + + final int keySize; + // associated result packets, sorted in regard to offset + private ArrayList<BasicPacket> resultPackets = new ArrayList<>(3); // length = "some small number" + LinkedHashMap<DocsumPacketKey, BasicPacket> packets; + + private static class ResultPacketComparator<T extends BasicPacket> implements Comparator<T> { + @Override + public int compare(T o1, T o2) { + QueryResultPacket r1 = (QueryResultPacket) o1; + QueryResultPacket r2 = (QueryResultPacket) o2; + return r1.getOffset() - r2.getOffset(); + } + } + + private static ResultPacketComparator<BasicPacket> resultPacketComparator = new ResultPacketComparator<>(); + + public PacketWrapper(CacheKey key, DocsumPacketKey[] packetKeys, BasicPacket[] bpackets) { + // Should not support key == null + this.keySize = key.byteSize(); + resultPackets.add(bpackets[0]); + this.packets = new LinkedHashMap<>(); + Packet[] ppackets = new Packet[packetKeys.length]; + + for (int i = 0; i < packetKeys.length; i++) { + ppackets[i] = (Packet) bpackets[i + 1]; + } + addDocsums(packetKeys, ppackets); + } + + /** + * Only used by PacketCacheTestCase, should not be used otherwise + */ + public PacketWrapper(CacheKey key, BasicPacket[] packets) { + // Should support key == null as this is for testing + if (key == null) { + keySize = 0; + } else { + this.keySize = key.byteSize(); + } + resultPackets.add(packets[0]); + this.packets = new LinkedHashMap<>(); + for (int i = 0; i < packets.length - 1; i++) { + this.packets.put(new DocsumPacketKey(new GlobalId(new DocumentId("doc:test:" + i).getGlobalId()), i, null), packets[i + 1]); + } + + } + + public QueryResultPacket getFirstResultPacket() { + if (resultPackets.size() > 0) { + return (QueryResultPacket) resultPackets.get(0); + } else { + return null; + } + } + + /** + * @return list of documents, null if not all are available + */ + public List<DocumentInfo> getDocuments(int offset, int hits) { + // speculatively allocate list for the hits + List<DocumentInfo> docs = new ArrayList<>(hits); + int currentOffset = 0; + QueryResultPacket r = getFirstResultPacket(); + if (offset >= r.getTotalDocumentCount()) { + // shortcut especially for results with 0 hits + // >= both necessary for end of result sets and + // offset == 0 && totalDocumentCount == 0 + return docs; + } + for (Iterator<BasicPacket> i = resultPackets.iterator(); i.hasNext();) { + QueryResultPacket result = (QueryResultPacket) i.next(); + if (result.getOffset() > offset + currentOffset) { + // we haven't got all the requested document info objects + return null; + } + if (result.getOffset() + result.getDocumentCount() + <= currentOffset + offset) { + // no new hits available + continue; + } + List<DocumentInfo> documents = result.getDocuments(); + int packetOffset = (offset + currentOffset) - result.getOffset(); + int afterLastDoc = Math.min(documents.size(), packetOffset + hits); + for (Iterator<DocumentInfo> j = documents.subList(packetOffset, afterLastDoc).iterator(); + docs.size() < hits && j.hasNext(); + ++currentOffset) { + docs.add(j.next()); + } + if (hits == docs.size() + || offset + docs.size() >= result.getTotalDocumentCount()) { + // We have the hits we need, or there are no more hits available + return docs; + } + } + return null; + } + + public void addResultPacket(QueryResultPacket resultPacket) { + // This function only keeps the internal list sorted according + // to offset + int insertionPoint; + QueryResultPacket r; + + if (resultPacket.getDocumentCount() == 0) { + return; // do not add a packet which does not contain new info + } + + insertionPoint = Collections.binarySearch(resultPackets, + resultPacket, + resultPacketComparator); + if (insertionPoint < 0) { + // new offset + insertionPoint = ~insertionPoint; // (insertionPoint + 1) * -1; + resultPackets.add(insertionPoint, resultPacket); + cleanResultPackets(); + } else { + // there exists a packet with same offset + r = (QueryResultPacket) resultPackets.get(insertionPoint); + if (resultPacket.getDocumentCount() > r.getDocumentCount()) { + resultPackets.set(insertionPoint, resultPacket); + cleanResultPackets(); + } + } + } + + private void cleanResultPackets() { + int marker; + QueryResultPacket previous; + if (resultPackets.size() == 1) { + return; + } + + // we know the list is sorted with regard to offset + // First ensure the list grows in regards to lastOffset as well. + // Could have done this addResultPacket, but this makes the code + // simpler. + previous = (QueryResultPacket) resultPackets.get(0); + for (int i = 1; i < resultPackets.size(); ++i) { + QueryResultPacket r = (QueryResultPacket) resultPackets.get(i); + if (r.getOffset() + r.getDocumentCount() + <= previous.getOffset() + previous.getDocumentCount()) { + resultPackets.remove(i--); + } else { + previous = r; + } + } + + marker = 0; + while (marker < (resultPackets.size() - 2)) { + QueryResultPacket r0 = (QueryResultPacket) resultPackets.get(marker); + QueryResultPacket r1 = (QueryResultPacket) resultPackets.get(marker + 1); + QueryResultPacket r2 = (QueryResultPacket) resultPackets.get(marker + 2); + int nextOffset = r0.getOffset() + r0.getDocumentCount(); + + if (r1.getOffset() < nextOffset + && r2.getOffset() <= nextOffset) { + resultPackets.remove(marker + 1); + } + ++marker; + } + } + + /** Only for testing. */ + public List<BasicPacket> getResultPackets() { + return resultPackets; + } + + public void addDocsums(DocsumPacketKey[] packetKeys, BasicPacket[] bpackets, + int offset) { + Packet[] ppackets = new Packet[packetKeys.length]; + + for (int i = 0; i < packetKeys.length; i++) { + ppackets[i] = (Packet) bpackets[i + offset]; + } + addDocsums(packetKeys, ppackets); + } + + public void addDocsums(DocsumPacketKey[] packetKeys, Packet[] packets) { + if (packetKeys == null || packets == null) { + log.warning( + "addDocsums called with " + + (packetKeys == null ? "packetKeys == null " : "") + + (packets == null ? "packets == null" : "")); + return; + } + for (int i = 0; i < packetKeys.length && i < packets.length; i++) { + if (packetKeys[i] == null) { + log.warning( + "addDocsums called, but packetsKeys[" + i + "] is null"); + } else if (packets[i] instanceof DocsumPacket) { + DocsumPacket dp = (DocsumPacket) packets[i]; + + if (packetKeys[i].getGlobalId().equals(dp.getGlobalId()) + && dp.getData().length > 0) + { + this.packets.put(packetKeys[i], packets[i]); + log.fine("addDocsums " + i + " globalId: " + dp.getGlobalId()); + } else { + log.warning("not caching bad Docsum for globalId " + packetKeys[i].getGlobalId() + ": " + dp); + } + } else { + log.warning( + "addDocsums called, but packets[" + i + + "] is not a DocsumPacket instance"); + } + } + } + + public int getNumPackets() { + return packets.size(); + } + + BasicPacket getPacket(GlobalId globalId, int partid, String summaryClass) { + return getPacket( + new DocsumPacketKey(globalId, partid, summaryClass)); + } + + BasicPacket getPacket(DocsumPacketKey packetKey) { + return packets.get(packetKey); + } + + long getTimestamp() { + return getFirstResultPacket().getTimestamp(); + } + + public void setTimestamp(long timestamp) { + getFirstResultPacket().setTimestamp(timestamp); + } + + public int getPacketsSize() { + int size = 0; + + for (Iterator<BasicPacket> i = resultPackets.iterator(); i.hasNext();) { + QueryResultPacket r = (QueryResultPacket) i.next(); + int l = r.getLength(); + + if (l < 0) { + log.warning("resultpacket length " + l); + l = 10240; + } + size += l; + } + for (Iterator<BasicPacket> i = packets.values().iterator(); i.hasNext();) { + BasicPacket packet = i.next(); + int l = packet.getLength(); + + if (l < 0) { + log.warning("BasicPacket length " + l); + l = 10240; + } + size += l; + } + size += keySize; + return size; + } + + /** + * Straightforward shallow copy. + */ + @SuppressWarnings("unchecked") + public Object clone() { + try { + PacketWrapper other = (PacketWrapper) super.clone(); + other.resultPackets = (ArrayList<BasicPacket>) resultPackets.clone(); + if (packets != null) { + other.packets = (LinkedHashMap<DocsumPacketKey, BasicPacket>) packets.clone(); + } + return other; + } catch (CloneNotSupportedException e) { + throw new RuntimeException("A non-cloneable superclass has been inserted.", + e); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/RankProfile.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/RankProfile.java new file mode 100644 index 00000000000..66931f37369 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/RankProfile.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +/** + * Information about a rank profile + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class RankProfile { + + private final String name; + + private final boolean hasSummaryFeatures; + + private final boolean hasRankFeatures; + + public RankProfile(String name, boolean hasSummaryFeatures, boolean hasRankFeatures) { + this.name = name; + this.hasSummaryFeatures = hasSummaryFeatures; + this.hasRankFeatures = hasRankFeatures; + } + + public String getName() { return name; } + + /** Returns true if this rank profile has summary features */ + public boolean hasSummaryFeatures() { return hasSummaryFeatures; } + + /** Returns true if this rank profile has rank features */ + public boolean hasRankFeatures() { return hasRankFeatures; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java new file mode 100644 index 00000000000..e9c19590102 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a short field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ + +public class ShortField extends DocsumField { + static final short EMPTY_VALUE = Short.MIN_VALUE; + + public ShortField(String name) { + super(name); + } + + private Object convert(short value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Short.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getShort()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Short.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((short)value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java new file mode 100644 index 00000000000..671188e4cae --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a string field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.text.Utf8; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class StringField extends DocsumField implements VariableLengthField { + public StringField(String name) { + super(name); + } + + @Override + public Object decode(ByteBuffer b) { + int length = ((int) b.getShort()) & 0xffff; + Object field; + + field = Utf8.toString(b.array(), b.arrayOffset() + b.position(), length); + b.position(b.position() + length); + return field; + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type string"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + int len = ((int) b.getShort()) & 0xffff; + b.position(offset + len + (Short.SIZE >> 3)); + return len + (Short.SIZE >> 3); + } + + @Override + public int sizeOfLength() { + return Short.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return value.asString(""); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java new file mode 100644 index 00000000000..f0f4b82c22a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.search.result.StructuredData; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Type; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.prelude.hitfield.JSONString; + +/** + * Class representing a XML rendered structured data field in the result set + */ +public class StructDataField extends JSONField { + + public StructDataField(String name) { + super(name); + } + + @Override + public String toString() { + return "field " + getName() + " type StructDataField"; + } + + public Object convert(Inspector value) { + if (getEmulConfig().stringBackedStructuredData() || + value.type() == Type.STRING) + { + return super.convert(value); + } + return new StructuredData(value); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/SummaryParameters.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/SummaryParameters.java new file mode 100644 index 00000000000..97a711d8590 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/SummaryParameters.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +/** + * Wrapper for document summary parameters and configuration. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SummaryParameters { + + public final String defaultClass; + + public SummaryParameters(String defaultClass) { + if (defaultClass != null && defaultClass.isEmpty()) + this.defaultClass = null; + else + this.defaultClass = defaultClass; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/TimeoutException.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/TimeoutException.java new file mode 100644 index 00000000000..8c3d587a059 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/TimeoutException.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.io.IOException; + +/** + * Thrown on communication timeouts + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +@SuppressWarnings("serial") +public class TimeoutException extends IOException { + + public TimeoutException(String message) { + super(message); + } +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java new file mode 100644 index 00000000000..f169533f8db --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +/** + * Interface to easier find the start of the actual data for variable length + * fields. + * + * @author <a href="mailt:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface VariableLengthField { + public int sizeOfLength(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java new file mode 100644 index 00000000000..820c764de06 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java @@ -0,0 +1,653 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.util.Optional; +import com.yahoo.collections.TinyIdentitySet; +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.DocsumPacket; +import com.yahoo.fs4.DocumentInfo; +import com.yahoo.fs4.ErrorPacket; +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.io.HexDump; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.textualrepresentation.TextualQueryRepresentation; +import com.yahoo.prelude.querytransform.QueryRewrite; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.protect.Validator; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.cluster.PingableSearcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.result.Coverage; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.vespa.objects.BufferSerializer; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; + + +/** + * Superclass for backend searchers. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +@SuppressWarnings("deprecation") +public abstract class VespaBackEndSearcher extends PingableSearcher { + + private static final CompoundName grouping=new CompoundName("grouping"); + private static final CompoundName combinerows=new CompoundName("combinerows"); + protected static final CompoundName PACKET_COMPRESSION_LIMIT = new CompoundName("packetcompressionlimit"); + protected static final CompoundName PACKET_COMPRESSION_TYPE = new CompoundName("packetcompressiontype"); + protected static final CompoundName TRACE_DISABLE = new CompoundName("trace.disable"); + + /** The set of all document databases available in the backend handled by this searcher */ + private Map<String, DocumentDatabase> documentDbs = new LinkedHashMap<>(); + private DocumentDatabase defaultDocumentDb = null; + + /** Default docsum class. null means "unset" and is the default value */ + private String defaultDocsumClass = null; + + /** Returns an iterator which returns all hits below this result **/ + protected Iterator<Hit> hitIterator(Result result) { + return result.hits().unorderedDeepIterator(); + } + + private boolean localDispatching = true; + + /** The name of this source */ + private String name; + + /** Cache wrapper */ + protected CacheControl cacheControl = null; + /** + * The number of last significant bits in the partId which specifies the + * row number in this backend, + * the rest specifies the column. 0 if not known. + */ + private int rowBits = 0; + /** Searchcluster number */ + private int sourceNumber; + + protected final String getName() { return name; } + protected final String getDefaultDocsumClass() { return defaultDocsumClass; } + + /** Sets default document summary class. Default is null */ + private void setDefaultDocsumClass(String docsumClass) { defaultDocsumClass = docsumClass; } + + /** Returns the packet cache controller of this */ + public final CacheControl getCacheControl() { return cacheControl; } + + /** + * Searches a search cluster + * This is an endpoint - searchers will never propagate the search to any nested searcher. + * + * @param query the query to search + * @param queryPacket the serialized query representation to pass to the search cluster + * @param cacheKey the cache key created from the query packet, or null if caching is not used + * @param execution the query execution context + */ + protected abstract Result doSearch2(Query query, QueryPacket queryPacket, CacheKey cacheKey, Execution execution); + + protected abstract void doPartialFill(Result result, String summaryClass); + + private Result cacheLookupFirstPhase(CacheKey key, QueryPacketData queryPacketData, Query query, int offset, int hits, String summaryClass) throws IOException { + PacketWrapper packetWrapper = cacheControl.lookup(key, query); + + if (packetWrapper == null) return null; + + // Check if the cache entry contains the requested hits + List<DocumentInfo> documents = packetWrapper.getDocuments(offset, hits); + if (documents == null) return null; + + if (query.getPresentation().getSummary() == null) + query.getPresentation().setSummary(getDefaultDocsumClass()); + Result result = new Result(query); + QueryResultPacket resultPacket = packetWrapper.getFirstResultPacket(); + + addMetaInfo(query, queryPacketData, resultPacket, result, true); + if (packetWrapper.getNumPackets() == 0) + addUnfilledHits(result, documents, true, queryPacketData, key); + else + addCachedHits(result, packetWrapper, summaryClass, documents); + return result; + } + + + protected DocumentDatabase getDocumentDatabase(Query query) { + if (query.getModel().getRestrict().size() == 1) { + String docTypeName = (String)query.getModel().getRestrict().toArray()[0]; + DocumentDatabase db = documentDbs.get(docTypeName); + if (db != null) { + return db; + } + } + return defaultDocumentDb; + } + + private void resolveDocumentDatabase(Query query) { + DocumentDatabase docDb = getDocumentDatabase(query); + if (docDb != null) { + query.getModel().setDocumentDb(docDb.getName()); + } + } + + public final void init(SummaryParameters docSumParams, ClusterParams clusterParams, CacheParams cacheParams, + DocumentdbInfoConfig documentdbInfoConfig) { + this.name = clusterParams.searcherName; + this.sourceNumber = clusterParams.clusterNumber; + this.rowBits = clusterParams.rowBits; + + Validator.ensureNotNull("Name of Vespa backend integration", getName()); + + setDefaultDocsumClass(docSumParams.defaultClass); + + if (documentdbInfoConfig != null) { + for (DocumentdbInfoConfig.Documentdb docDb : documentdbInfoConfig.documentdb()) { + DocumentDatabase db = new DocumentDatabase(docDb, clusterParams.emulation); + if (documentDbs.isEmpty()) { + defaultDocumentDb = db; + } + documentDbs.put(docDb.name(), db); + } + } + + if (cacheParams.cacheControl == null) { + this.cacheControl = new CacheControl(cacheParams.cacheMegaBytes, cacheParams.cacheTimeOutSeconds); + } else { + this.cacheControl = cacheParams.cacheControl; + } + } + + protected void transformQuery(Query query) { } + + public Result search(Query query, Execution execution) { + // query root should not be null here + Item root = query.getModel().getQueryTree().getRoot(); + if (root == null || root instanceof NullItem) { + return new Result(query, ErrorMessage.createNullQuery(query.getHttpRequest().getUri().toString())); + } + + QueryRewrite.optimizeByRestrict(query); + QueryRewrite.optimizeAndNot(query); + QueryRewrite.collapseSingleComposites(query); + + root = query.getModel().getQueryTree().getRoot(); + if (root == null || root instanceof NullItem) // root can become null after optimization + return new Result(query); + + resolveDocumentDatabase(query); + transformQuery(query); + traceQuery(name, "search", query, query.getOffset(), query.getHits(), 1, Optional.<String>empty()); + + root = query.getModel().getQueryTree().getRoot(); + if (root == null || root instanceof NullItem) // root can become null after resolving and transformation? + return new Result(query); + + QueryPacket queryPacket = QueryPacket.create(query); + int compressionLimit = query.properties().getInteger(PACKET_COMPRESSION_LIMIT, 0); + queryPacket.setCompressionLimit(compressionLimit); + if (compressionLimit != 0) { + queryPacket.setCompressionType(query.properties().getString(PACKET_COMPRESSION_TYPE, "lz4")); + } + + if (isLoggingFine()) + getLogger().fine("made QueryPacket: " + queryPacket); + + Result result = null; + CacheKey cacheKey = null; + if (cacheControl.useCache(query)) { + cacheKey = new CacheKey(queryPacket); + result = getCached(cacheKey, queryPacket.getQueryPacketData(), query); + } + + if (result == null) { + String next = null; + result = doSearch2(query, queryPacket, cacheKey, execution); + if (isLoggingFine()) { + getLogger().fine("Result NOT retrieved from cache"); + } + + if (query.getTraceLevel() >= 1) { + query.trace(getName() + " dispatch response: " + result, false, 1); + } + result.trace(getName()); + } + return result; + } + + /** + * Returns a cached result, or null if no result was cached for this key + * + * @param cacheKey the cache key created from the query packet + * @param queryPacketData a serialization of the query, to avoid having to recompute this, or null if not available + * @param query the query, used for tracing, lookup of result window and result creation + */ + private Result getCached(CacheKey cacheKey, QueryPacketData queryPacketData, Query query) { + if (query.getTraceLevel() >= 6) { + query.trace("Cache key hash: " + cacheKey.hashCode(), 6); + if (query.getTraceLevel() >= 8) { + query.trace("Cache key: " + HexDump.toHexString(cacheKey.getCopyOfFullKey()), 8); + } + } + + try { + Result result = cacheLookupFirstPhase(cacheKey, queryPacketData, query, query.getOffset(), query.getHits(), query.getPresentation().getSummary()); + if (result == null) return null; + + if (isLoggingFine()) { + getLogger().fine("Result retrieved from cache: " + result); + } + if (query.getTraceLevel() >= 1) { + query.trace(getName() + " cached response: " + result, false, 1); + } + result.trace(getName()); + return result; + } + catch (IOException e) { + Result result = new Result(query); + + if (result.hits().getErrorHit() == null) { + result.hits().setError(ErrorMessage.createBackendCommunicationError( + "Fast Search (" + getName() + ") failed: " + e.getMessage())); + } + if (query.getTraceLevel() >= 1) { + query.trace(getName() + " error response: " + result, false, 1); + } + return result; + } + } + + private List<Result> partitionHits(Result result, String summaryClass) { + List<Result> parts = new ArrayList<>(); + TinyIdentitySet<Query> queryMap = new TinyIdentitySet<>(4); + + for (Iterator<Hit> itr = hitIterator(result); itr.hasNext(); ) { + Hit hit = itr.next(); + if (hit instanceof FastHit) { + FastHit fastHit = (FastHit) hit; + if (!fastHit.isFilled(summaryClass)) { + Query q = fastHit.getQuery(); + if (q == null) { + q = result.hits().getQuery(); // fallback for untagged hits + } + int idx = queryMap.indexOf(q); + if (idx < 0) { + idx = queryMap.size(); + Result r = new Result(q); + parts.add(r); + queryMap.add(q); + } + parts.get(idx).hits().add(fastHit); + } + } + } + return parts; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + if (result.isFilled(summaryClass)) return; // TODO: Checked in the superclass - remove + + List<Result> parts= partitionHits(result, summaryClass); + if (parts.size() > 0) { // anything to fill at all? + for (Result r : parts) { + doPartialFill(r, summaryClass); + mergeErrorsInto(result, r); + } + result.hits().setSorted(false); + result.analyzeHits(); + } + } + + private void mergeErrorsInto(Result destination, Result source) { + ErrorHit eh = source.hits().getErrorHit(); + if (eh != null) { + for (ErrorMessage error : eh.errors()) + destination.hits().addError(error); + } + } + + static void traceQuery(String sourceName, String type, Query query, int offset, int hits, int level, Optional<String> quotedSummaryClass) { + if ((query.getTraceLevel()<level) || query.properties().getBoolean(TRACE_DISABLE)) return; + + StringBuilder s = new StringBuilder(); + s.append(sourceName).append(" " + type + " to dispatch: ") + .append("query=[") + .append(query.getModel().getQueryTree().getRoot().toString()) + .append("]"); + + s.append(" timeout=").append(query.getTimeout()).append("ms"); + + s.append(" offset=") + .append(offset) + .append(" hits=") + .append(hits); + + if (query.getRanking().hasRankProfile()) { + s.append(" rankprofile[") + .append(query.getRanking().getProfile()) + .append("]"); + } + + if (query.getRanking().getFreshness() != null) { + s.append(" freshness=") + .append(query.getRanking().getFreshness().getRefTime()); + } + + if (query.getRanking().getSorting() != null) { + s.append(" sortspec=") + .append(query.getRanking().getSorting().fieldOrders().toString()); + } + + if (query.getRanking().getLocation() != null) { + s.append(" location=") + .append(query.getRanking().getLocation().toString()); + } + + List<Grouping> grouping = GroupingExecutor.getGroupingList(query); + s.append(" grouping=").append(grouping.size()).append(" : "); + for(Grouping g : grouping) { + s.append(g.toString()); + } + + if ( ! query.getRanking().getProperties().isEmpty()) { + s.append(" rankproperties=") + .append(query.getRanking().getProperties().toString()); + } + + if ( ! query.getRanking().getFeatures().isEmpty()) { + s.append(" rankfeatures=") + .append(query.getRanking().getFeatures().toString()); + } + + if (query.getModel().getRestrict() != null) { + s.append(" restrict=").append(query.getModel().getRestrict().toString()); + } + + if (quotedSummaryClass.isPresent()) { + s.append(" summary=").append(quotedSummaryClass.get()); + } + + query.trace(s.toString(), false, level); + if (query.isTraceable(level + 1)) { + query.trace("Current state of query tree: " + + new TextualQueryRepresentation(query.getModel().getQueryTree().getRoot()), + false, level+1); + } + if (query.isTraceable(level + 2)) { + query.trace("YQL+ representation: " + query.yqlRepresentation(), level+2); + } + } + + protected void addMetaInfo(Query query, QueryPacketData queryPacketData, QueryResultPacket resultPacket, Result result, boolean fromCache) { + result.setTotalHitCount(resultPacket.getTotalDocumentCount()); + + // Grouping + if (resultPacket.getGroupData() != null) { + byte[] data = resultPacket.getGroupData(); + ArrayList<Grouping> list = new ArrayList<>(); + BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer(ByteBuffer.wrap(data))); + int cnt = buf.getInt(null); + for (int i = 0; i < cnt; i++) { + Grouping g = new Grouping(); + g.deserialize(buf); + list.add(g); + } + GroupingListHit hit = new GroupingListHit(list, getDocsumDefinitionSet(query)); + hit.setQuery(result.getQuery()); + hit.setSource(getName()); + hit.setSourceNumber(sourceNumber); + hit.setQueryPacketData(queryPacketData); + result.hits().add(hit); + } + + if (resultPacket.getCoverageFeature()) { + result.setCoverage(new Coverage(resultPacket.getCoverageDocs(), resultPacket.getActiveDocs())); + } + } + + private boolean fillHit(FastHit hit, DocsumPacket packet, String summaryClass) { + if (packet != null) { + byte[] docsumdata = packet.getData(); + if (docsumdata.length > 0) { + decodeSummary(summaryClass, hit, docsumdata); + return true; + } + } + return false; + } + + /** + * Fills the hits. + * + * @return the number of hits that we did not return data for, i.e + * when things are working normally we return 0. + */ + protected int fillHits(Result result, int packetIndex, Packet[] packets, String summaryClass) throws IOException { + int skippedHits=0; + for (Iterator<Hit> i = hitIterator(result); i.hasNext();) { + Hit hit = i.next(); + + if (hit instanceof FastHit && !hit.isFilled(summaryClass)) { + FastHit fastHit = (FastHit) hit; + + ensureInstanceOf(DocsumPacket.class, packets[packetIndex]); + DocsumPacket docsum = (DocsumPacket) packets[packetIndex]; + + packetIndex++; + if ( ! fillHit(fastHit, docsum, summaryClass)) + skippedHits++; + } + } + result.hits().setSorted(false); + return skippedHits; + } + + /** + * Throws an IOException if the packet is not of the expected type + */ + protected final void ensureInstanceOf(Class<? extends BasicPacket> type, BasicPacket packet) throws IOException { + if ((type.isAssignableFrom(packet.getClass()))) return; + + if (packet instanceof ErrorPacket) { + ErrorPacket errorPacket=(ErrorPacket)packet; + if (errorPacket.getErrorCode() == 8) + throw new TimeoutException("Query timed out in " + getName()); + else + throw new IOException("Received error from backend in " + getName() + ": " + packet); + } else { + throw new IOException("Received " + packet + " when expecting " + type); + } + } + + private boolean addCachedHits(Result result, + PacketWrapper packetWrapper, + String summaryClass, + List<DocumentInfo> documents) { + boolean filledAllOfEm = true; + Query myQuery = result.getQuery(); + + for (DocumentInfo document : documents) { + FastHit hit = new FastHit(); + hit.setQuery(myQuery); + + hit.setUseRowInIndexUri(useRowInIndexUri(result)); + hit.setFillable(); + hit.setCached(true); + + extractDocumentInfo(hit, document); + + DocsumPacket docsum = (DocsumPacket) packetWrapper.getPacket(document.getGlobalId(), document.getPartId(), summaryClass); + + if (docsum != null) { + byte[] docsumdata = docsum.getData(); + + if (docsumdata.length > 0) { + decodeSummary(summaryClass, hit, docsumdata); + } else { + filledAllOfEm = false; + } + } else { + filledAllOfEm = false; + } + + result.hits().add(hit); + + } + + return filledAllOfEm; + } + + private boolean useRowInIndexUri(Result result) { + return ! ((result.getQuery().properties().getString(grouping) != null) || result.getQuery().properties().getBoolean(combinerows)); + } + + private void extractDocumentInfo(FastHit hit, DocumentInfo document) { + hit.setSourceNumber(sourceNumber); + hit.setSource(getName()); + + Number rank = document.getMetric(); + + hit.setRelevance(new Relevance(rank.doubleValue())); + + hit.setDistributionKey(document.getDistributionKey()); + hit.setGlobalId(document.getGlobalId()); + hit.setPartId(document.getPartId(), rowBits); + } + + protected PacketWrapper cacheLookupTwoPhase(CacheKey cacheKey, Result result, String summaryClass) { + Query query = result.getQuery(); + PacketWrapper packetWrapper = cacheControl.lookup(cacheKey, query); + + if (packetWrapper == null) { + return null; + } + if (packetWrapper.getNumPackets() != 0) { + for (Iterator<Hit> i = hitIterator(result); i.hasNext();) { + Hit hit = i.next(); + + if (hit instanceof FastHit) { + FastHit fastHit = (FastHit) hit; + DocsumPacketKey key = new DocsumPacketKey(fastHit.getGlobalId(), fastHit.getPartId(), summaryClass); + + if (fillHit(fastHit, + (DocsumPacket) packetWrapper.getPacket(key), + summaryClass)) { + fastHit.setCached(true); + } + + } + } + result.hits().setSorted(false); + result.analyzeHits(); + } + + return packetWrapper; + } + + protected DocsumDefinitionSet getDocsumDefinitionSet(Query query) { + DocumentDatabase db = getDocumentDatabase(query); + return db.getDocsumDefinitionSet(); + } + + private void decodeSummary(String summaryClass, FastHit hit, byte[] docsumdata) { + DocumentDatabase db = getDocumentDatabase(hit.getQuery()); + hit.setField(Hit.SDDOCNAME_FIELD, db.getName()); + decodeSummary(summaryClass, hit, docsumdata, db.getDocsumDefinitionSet()); + } + + private void decodeSummary(String summaryClass, FastHit hit, byte[] docsumdata, DocsumDefinitionSet docsumSet) { + docsumSet.lazyDecode(summaryClass, docsumdata, hit); + hit.setFilled(summaryClass); + } + + /** + * Creates unfilled hits from a List of DocumentInfo instances. Do note + * cacheKey should be available if a cache is active, even if the hit is not + * created from a cache in the current call path. + * + * @param queryPacketData binary data from first phase of search, or null + * @param cacheKey the key this hit should match in the packet cache, or null + */ + protected boolean addUnfilledHits(Result result, List<DocumentInfo> documents, boolean fromCache, QueryPacketData queryPacketData, CacheKey cacheKey) { + boolean allHitsOK = true; + Query myQuery = result.getQuery(); + + for (DocumentInfo document : documents) { + + try { + FastHit hit = new FastHit(); + hit.setQuery(myQuery); + if (queryPacketData != null) + hit.setQueryPacketData(queryPacketData); + hit.setCacheKey(cacheKey); + + hit.setUseRowInIndexUri(useRowInIndexUri(result)); + hit.setFillable(); + hit.setCached(fromCache); + + extractDocumentInfo(hit, document); + + result.hits().add(hit); + } catch (ConfigurationException e) { + allHitsOK = false; + getLogger().log(LogLevel.WARNING, "Skipping hit", e); + } catch (Exception e) { + allHitsOK = false; + getLogger().log(LogLevel.ERROR, "Skipping malformed hit", e); + } + } + return allHitsOK; + } + + @SuppressWarnings("rawtypes") + public static VespaBackEndSearcher getSearcher(String s) { + try { + Class c = Class.forName(s); + if (VespaBackEndSearcher.class.isAssignableFrom(c)) { + Constructor[] constructors = c.getConstructors(); + for (Constructor constructor : constructors) { + Class[] parameters = constructor.getParameterTypes(); + if (parameters.length == 0) { + return (VespaBackEndSearcher) constructor.newInstance(); + } + } + throw new RuntimeException("Failed initializing " + s); + + } else { + throw new RuntimeException(s + " is not com.yahoo.prelude.fastsearch.VespaBackEndSearcher"); + } + } catch (Exception e) { + throw new RuntimeException("Failure loading class " + s + ", exception :" + e); + } + } + + protected boolean isLoggingFine() { + return getLogger().isLoggable(Level.FINE); + } + public boolean isLocalDispatching() { + return localDispatching; + } + public void setLocalDispatching(boolean localDispatching) { + this.localDispatching = localDispatching; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/XMLField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/XMLField.java new file mode 100644 index 00000000000..0ccc8b03e3b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/XMLField.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a string field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.text.Utf8; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class XMLField extends DocsumField implements VariableLengthField { + public XMLField(String name) { + super(name); + } + + private Object convert(String value) { + return new XMLString(value); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + return convert(Utf8.toString(tmp)); + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type XMLString"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + public Object convert(Inspector value) { + return convert(value.asString("")); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/package-info.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/package-info.java new file mode 100644 index 00000000000..b34b74ccae3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.fastsearch; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/grouping/legacy/.gitignore b/container-search/src/main/java/com/yahoo/prelude/grouping/legacy/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/grouping/legacy/.gitignore diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/AnnotateStringFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/AnnotateStringFieldPart.java new file mode 100644 index 00000000000..8361cb722e9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/AnnotateStringFieldPart.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** TODO: Class header! */ +public class AnnotateStringFieldPart implements FieldPart { + + public static final char RAW_ANNOTATE_BEGIN_CHAR = '\uFFF9'; + public static final char RAW_ANNOTATE_SEPARATOR_CHAR = '\uFFFA'; + public static final char RAW_ANNOTATE_END_CHAR = '\uFFFB'; + + private String content; + private String rawContent; + + public AnnotateStringFieldPart(String source, int index) { + content = ""; + rawContent = ""; + if (source.charAt(index) == RAW_ANNOTATE_BEGIN_CHAR) { + int sep = source.indexOf(RAW_ANNOTATE_SEPARATOR_CHAR, index); + int end = source.indexOf(RAW_ANNOTATE_END_CHAR, index); + + if (sep != -1) { + rawContent = source.substring(index + 1, sep); + if (end != -1 && end > sep) { + content = source.substring(sep + 1, end); + } + else { + content = rawContent; + } + } + } + } + + public boolean isFinal() { return false; } + + public boolean isToken() { return true; } + + public String getContent() { return rawContent; } + + public void setContent(String content) { + this.content = content; + } + + public String toString() { return content; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldCloseFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldCloseFieldPart.java new file mode 100644 index 00000000000..1b306c26f3e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldCloseFieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup, representing + * end of a bolded area. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class BoldCloseFieldPart extends MarkupFieldPart { + public BoldCloseFieldPart(String content) { + super(content); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldOpenFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldOpenFieldPart.java new file mode 100644 index 00000000000..b4e8d1cfbf3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldOpenFieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup representing + * the start of a bolded area. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class BoldOpenFieldPart extends MarkupFieldPart { + public BoldOpenFieldPart(String content) { + super(content); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldIterator.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldIterator.java new file mode 100644 index 00000000000..b1d3abb73a7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldIterator.java @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import java.util.List; +import java.util.ListIterator; + +/** + * A specialized list iterator to manipulate FieldParts in HitField objects. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class FieldIterator implements ListIterator<FieldPart> { + + private final ListIterator<FieldPart> realIterator; + private final HitField hitField; + + public FieldIterator(List<FieldPart> fieldList, HitField hitField) { + this.hitField = hitField; + realIterator = fieldList.listIterator(); + } + + public void add(FieldPart o) { + realIterator.add(o); + hitField.markDirty(); + } + + public boolean hasNext() { + return realIterator.hasNext(); + } + + public boolean hasPrevious() { + return realIterator.hasPrevious(); + } + + public FieldPart next() { + return realIterator.next(); + } + + public int nextIndex() { + return realIterator.nextIndex(); + } + + public FieldPart previous() { + return realIterator.previous(); + } + + public int previousIndex() { + return realIterator.previousIndex(); + } + + public void remove() { + realIterator.remove(); + hitField.markDirty(); + } + + public void set(FieldPart o) { + realIterator.set(o); + hitField.markDirty(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldPart.java new file mode 100644 index 00000000000..17c39de01f3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface FieldPart { + public abstract boolean isFinal(); + public abstract boolean isToken(); + public abstract String getContent(); + public abstract String toString(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/HitField.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/HitField.java new file mode 100644 index 00000000000..638376c791d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/HitField.java @@ -0,0 +1,417 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + +import com.yahoo.prelude.searcher.JuniperSearcher; +import com.yahoo.text.XML; + +/** + * Represents a Field in a Hit. The original raw content and the field + * name cannot be modified. But the tokenized version can be retrieved + * and set. + * + * @author <a href="mailto:larschr@yahoo-inc.com">Lars Christian Jensen</a> + */ +public class HitField { + + private final String name; + private final String rawContent; + private final boolean isCJK; + + private boolean xmlProperty; + + private List<FieldPart> tokenizedContent = null; + private String content = null; + + + private Object original; + + /** + * @param f The field name + * @param c The field content + */ + public HitField(String f, String c) { + this(f, c, c.indexOf(JuniperSearcher.RAW_HIGHLIGHT_CHAR) > -1); + } + + /** + * @param f The field name + * @param c The field content + */ + public HitField(String f, XMLString c) { + this(f, c, c.toString().indexOf(JuniperSearcher.RAW_HIGHLIGHT_CHAR) > -1); + } + + /** + * @param f The field name + * @param c The field content + * @param cjk true if this is a cjk-document + */ + public HitField(String f, String c, boolean cjk) { + this(f, c, cjk, false); + } + + /** + * @param f The field name + * @param c The field content + * @param cjk true if this is a cjk-document + */ + public HitField(String f, XMLString c, boolean cjk) { + this(f, c.toString(), cjk, true); + } + + /** + * @param f The field name + * @param c The field content + * @param cjk true if this is a cjk-document + * @param xmlProperty true if this should not quote XML syntax + */ + public HitField(String f, String c, boolean cjk, boolean xmlProperty) { + name = f; + rawContent = c; + content = null; + isCJK = cjk; + this.xmlProperty = xmlProperty; + } + + + /** + * @return the name of this field + */ + public String getName() { + return name; + } + + /** + * @return the raw/original content of this field + */ + public String getRawContent() { + return rawContent; + } + + private List<FieldPart> tokenizeUnknown() { + List<FieldPart> pre = new ArrayList<>(); + if (rawContent.length() == 0) + return pre; + int i = 0; + int j = 0; + i = rawContent.indexOf('\u001E'); + if (i == 0) { + pre.add(new SeparatorFieldPart(rawContent.substring(0,1))); + j = 1; + i = rawContent.indexOf('\u001E', j); + } + while(i != -1) { + tokenizeSnippet(pre, rawContent.substring(j, i)); + pre.add(new SeparatorFieldPart(rawContent.substring(i,i+1))); + i++; + j = i; + i = rawContent.indexOf('\u001E', j); + } + if (j < rawContent.length()) { + tokenizeSnippet(pre, rawContent.substring(j)); + } + return pre; + } + + private boolean isAnnotationChar(char c) { + return c == AnnotateStringFieldPart.RAW_ANNOTATE_BEGIN_CHAR || + c == AnnotateStringFieldPart.RAW_ANNOTATE_SEPARATOR_CHAR || + c == AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR; + } + + private void tokenizeSnippet(List<FieldPart> resultParts, String content) { + int head = 0; + int tail = 0; + boolean justFinishedIncompleteAnnotation = false; + int numRawHighLightChars = 0; + List<FieldPart> localParts = new ArrayList<>(); + if (content.length() == 0) { + return; + } + + boolean prevHeadLetterOrDigital = Character.isLetterOrDigit(content.charAt(0)); + + for ( ;head < content.length(); head++) { + char headChar = content.charAt(head); + if (isAnnotationChar(headChar)) { + if (headChar == AnnotateStringFieldPart.RAW_ANNOTATE_BEGIN_CHAR) { + int nextHead = content.indexOf(AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR, head); + boolean incompleteAnnotation = (nextHead == -1); + boolean skippedInvalidHighlightChar = false; + if (head > tail) { + int currHead = head; + if (incompleteAnnotation && + content.charAt(head-1) == JuniperSearcher.RAW_HIGHLIGHT_CHAR && + numRawHighLightChars % 2 == 1) + { + currHead--; // skip invalid highlight char + skippedInvalidHighlightChar = true; + } + localParts.add(createToken(content.substring(tail, currHead), prevHeadLetterOrDigital)); + } + if (!skippedInvalidHighlightChar) { + localParts.add(new AnnotateStringFieldPart(content, head)); + } + head = nextHead; + } else if (headChar == AnnotateStringFieldPart.RAW_ANNOTATE_SEPARATOR_CHAR) { + localParts.clear(); + head = content.indexOf(AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR, head); + justFinishedIncompleteAnnotation = true; + } else if (headChar == AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR) { + localParts.clear(); + justFinishedIncompleteAnnotation = true; + } + if (head == -1) { + head = content.length(); + } else { + if (head + 1 < content.length()) { + prevHeadLetterOrDigital = Character.isLetterOrDigit(content.charAt(head + 1)); + } + } + tail = head + 1; + } else { + if (headChar == JuniperSearcher.RAW_HIGHLIGHT_CHAR) { + if (justFinishedIncompleteAnnotation) { + tail = head + 1; // skip invalid highlight char + } else { + ++numRawHighLightChars; + } + } + boolean currHeadLetterOrDigital = Character.isLetterOrDigit(headChar); + if (currHeadLetterOrDigital != prevHeadLetterOrDigital & head > tail) { + localParts.add(createToken(content.substring(tail, head), prevHeadLetterOrDigital)); + tail = head; + prevHeadLetterOrDigital = currHeadLetterOrDigital; + } + justFinishedIncompleteAnnotation = false; + } + } + if (head > tail) { + localParts.add(createToken(content.substring(tail), prevHeadLetterOrDigital)); + } + resultParts.addAll(localParts); + } + + private FieldPart createToken(String substring, boolean isToken) { + if (xmlProperty) { + // TODO: Model this with something better than ImmutableFieldPart + return new ImmutableFieldPart(substring, isToken); + } else { + return new StringFieldPart(substring, isToken); + } + } + + private List<FieldPart> tokenizePretokenized() { + String[] pre = rawContent.split("\u001F+"); + List<FieldPart> tokenized = new ArrayList<>(pre.length); + for (int i = 0; i < pre.length; i++) { + tokenized.add(createToken(pre[i], true)); + } + return tokenized; + } + + private void tokenizeContent() { + List<FieldPart> pre; + if (isCJK) { + pre = tokenizePretokenized(); + } else { + pre = tokenizeUnknown(); + } + setTokenizedContentUnchecked(pre); + } + /** + * Get a list representation of the tokens in the content. This is + * only a copy, changes here will not affect the HitField. + * + * @return a list containing the content in tokenized form. + */ + public List<FieldPart> getTokenizedContent() { + List<FieldPart> l = new ArrayList<>(); + for (ListIterator<FieldPart> i = tokenIterator(); i.hasNext(); ) { + l.add(i.next()); + } + return l; + } + + private List<FieldPart> ensureTokenized() { + if (tokenizedContent == null) { + tokenizeContent(); + } + return tokenizedContent; + } + /** + * Return an iterator for the tokens, delimiters and markup elements + * of the field. + */ + public ListIterator<FieldPart> listIterator() { + return new FieldIterator(ensureTokenized(), + this); + } + + /** + * Return an iterator for the tokens in the field + */ + public ListIterator<FieldPart> tokenIterator() { + return new TokenFieldIterator(ensureTokenized(), + this); + } + + /** + * Only FieldPart objects must be present in the list. + * + * @param list contains the new content of this HitField in tokenized form. + */ + public void setTokenizedContent(List<FieldPart> list) { + tokenizedContent = new ArrayList<>(list.size()); + for (Iterator<FieldPart> i = list.iterator(); i.hasNext(); ) { + tokenizedContent.add(i.next()); + } + // Must null content reference _before_ calling getContent() + content = null; + } + + private void setTokenizedContentUnchecked(List<FieldPart> list) { + tokenizedContent = list; + // Must null content reference _before_ calling getContent() + content = null; + } + /** + * @return the content of this field + */ + public String getContent() { + if (content == null) { + StringBuilder buf = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + buf.append(iter.next().getContent()); + } + content = buf.toString(); + } + return content; + } + + /** + * @return the content of this field, using the arguments as bolding + * tags + */ + public String getContent(String boldOpenTag, + String boldCloseTag, + String separatorTag) { + StringBuilder buf = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f instanceof BoldOpenFieldPart + && boldOpenTag != null + && boldOpenTag.length() > 0) + buf.append(boldOpenTag); + else if (f instanceof BoldCloseFieldPart + && boldCloseTag != null + && boldCloseTag.length() > 0) + buf.append(boldCloseTag); + else if (f instanceof SeparatorFieldPart + && separatorTag != null + && separatorTag.length() > 0) + buf.append(separatorTag); + else + buf.append(f.getContent()); + } + return buf.toString(); + } + + public void markDirty() { + content = null; + } + + /** + * @param inAttribute whether to quote quotation marks + * @return the content of this field as an XML string + */ + public String quotedContent(boolean inAttribute) { + StringBuilder xml = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f.isFinal()) + xml.append(f.getContent()); + else + xml.append(XML.xmlEscape(f.getContent(), inAttribute)); + } + return xml.toString(); + } + + /** + * @return the content of this field, using the arguments as bolding + * tags, as an XML string + */ + public String quotedContent(String boldOpenTag, + String boldCloseTag, + String separatorTag, + boolean inAttribute) { + StringBuilder xml = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f instanceof BoldOpenFieldPart + && boldOpenTag != null + && boldOpenTag.length() > 0) + xml.append(boldOpenTag); + else if (f instanceof BoldCloseFieldPart + && boldCloseTag != null + && boldCloseTag.length() > 0) + xml.append(boldCloseTag); + else if (f instanceof SeparatorFieldPart + && separatorTag != null + && separatorTag.length() > 0) + xml.append(separatorTag); + else if (f.isFinal()) + xml.append(f.getContent()); + else + xml.append(XML.xmlEscape(f.getContent(), inAttribute)); + } + return xml.toString(); + } + /** + * @return the content of the field, stripped of markup + */ + public String bareContent(boolean XMLQuote, boolean inAttribute) { + StringBuilder bareContent = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f instanceof MarkupFieldPart) + continue; + + if (XMLQuote) + bareContent.append(XML.xmlEscape(f.getContent(), inAttribute)); + else + bareContent.append(f.getContent()); + } + return bareContent.toString(); + } + + public String toString() { + return getContent(); + } + + /** + * Fetch the object which (the String representation of) this HitField was + * built from. This may be null as setting the original is optional. + */ + public Object getOriginal() { + return original; + } + + /** + * Optionally set the object which this HitField should represent. + */ + public void setOriginal(Object original) { + this.original = original; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/ImmutableFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/ImmutableFieldPart.java new file mode 100644 index 00000000000..d7bfe0e287d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/ImmutableFieldPart.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is a possibly + * mutable string element + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class ImmutableFieldPart implements FieldPart { + private final String content; + private final String initContent; + // Whether this element represents a (part of) a token or a + // delimiter string. When splitting existing parts, the new + // parts should inherit this state from the object they were + // split from. + private boolean tokenOrDelimiter; + public ImmutableFieldPart(String initContent, + boolean tokenOrDelimiter) { + this(initContent, initContent, tokenOrDelimiter); + } + public ImmutableFieldPart(String initContent, + String content, + boolean tokenOrDelimiter) { + + this.initContent = initContent; + this.content = content; + this.tokenOrDelimiter = tokenOrDelimiter; + } + public boolean isFinal() { return true; } + public boolean isToken() { return tokenOrDelimiter; } + public String getContent() { return content; } + public String getInitContent() { return initContent; } + public String toString() { return content; } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/JSONString.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/JSONString.java new file mode 100644 index 00000000000..f8992c7004c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/JSONString.java @@ -0,0 +1,449 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.text.Utf8; +import com.yahoo.text.XML; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.Type; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.slime.Slime; +import com.yahoo.slime.JsonDecoder; +import java.util.Iterator; + +/** + * A JSON wrapper. Contains XML-style rendering of a JSON structure. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class JSONString implements Inspectable { + + private static final long serialVersionUID = -3929383619752472712L; + private Inspector value; + private String content; + private boolean didInitContent = false; + private Object parsedJSON; + private boolean didInitJSON = false; + + public JSONString(Inspector value) { + if (value == null) { + throw new IllegalArgumentException("JSONString does not accept null value."); + } + this.value = value; + } + + public Inspector inspect() { + if (value == null) { + JsonDecoder decoder = new JsonDecoder(); + Slime slime = decoder.decode(new Slime(), Utf8.toBytes(content)); + if (slime.get().field("error_message").valid() && + slime.get().field("partial_result").valid() && + slime.get().field("offending_input").valid()) + { + // probably a json parse error... + value = new Value.StringValue(content); + } else if (slime.get().type() == com.yahoo.slime.Type.OBJECT || + slime.get().type() == com.yahoo.slime.Type.ARRAY) + { + // valid json object or array + value = new SlimeAdapter(slime.get()); + } else { + // 'valid' json, but leaf value + value = new Value.StringValue(content); + } + } + return value; + } + + private void initContent() { + if (didInitContent) { + return; + } + didInitContent = true; + if (value.type() == Type.EMPTY) { + content = ""; + } else if (value.type() == Type.STRING) { + content = value.asString(); + } else { + // This will be json, because we know there is Slime below + content = value.toString(); + } + } + + /** + * @throws IllegalArgumentException Does not accept null content + */ + public JSONString(String content) { + if (content == null) { + throw new IllegalArgumentException("JSONString does not accept null content."); + } + this.content = content; + didInitContent = true; + } + + public String toString() { + if (value != null) { + return renderFromInspector(); + } + initContent(); + if (content.length() == 0) { + return content; + } + initJSON(); + if (parsedJSON == null) { + return content; + } else if (parsedJSON.getClass() == JSONArray.class) { + return render((JSONArray) parsedJSON); + } else if (parsedJSON.getClass() == JSONObject.class) { + return render((JSONObject) parsedJSON); + } else { + return content; + } + } + + public boolean fillWeightedSetItem(WeightedSetItem item) { + initContent(); + initJSON(); + try { + if (parsedJSON instanceof JSONArray) { + JSONArray seq = (JSONArray)parsedJSON; + for (int i = 0; i < seq.length(); i++) { + JSONArray wsi = seq.getJSONArray(i); + String name = (String)wsi.get(0); + Number weight = (Number) wsi.get(1); + item.addToken(name, weight.intValue()); + } + return true; + } + } catch (JSONException | ClassCastException e) { + } + return false; + } + + private void initJSON() { + initContent(); + if (didInitJSON) { + return; + } + didInitJSON = true; + if (content.charAt(0) == '[') { + try { + parsedJSON = new JSONArray(content); + } catch (JSONException e) { + // System.err.println("bad json: "+e); + return; + } + } else { + try { + parsedJSON = new JSONObject(content); + } catch (JSONException e) { + // System.err.println("bad json: "+e); + return; + } + } + } + + private static String render(JSONArray sequence) { + return FieldRenderer.renderMapOrArray(new StringBuilder(), sequence, 2).toString(); + } + + private static String render(JSONObject structure) { + return FieldRenderer.renderStruct(new StringBuilder(), structure, 2).toString(); + } + + private static abstract class FieldRenderer { + + protected static void indent(StringBuilder renderTarget, int nestingLevel) { + for (int i = 0; i < nestingLevel; ++i) { + renderTarget.append(" "); + } + } + + public static StringBuilder renderMapOrArray(StringBuilder renderTarget, + JSONArray sequence, + int nestingLevel) + { + if (sequence.length() == 0) return renderTarget; + + if (MapFieldRenderer.isMap(sequence)) { + MapFieldRenderer.renderMap(renderTarget, sequence, nestingLevel + 1); + } else { + ArrayFieldRenderer.renderArray(renderTarget, sequence, nestingLevel + 1); + } + indent(renderTarget, nestingLevel); + return renderTarget; + } + + public static StringBuilder renderStruct(StringBuilder renderTarget, JSONObject object, int nestingLevel) { + StructureFieldRenderer.renderStructure(renderTarget, object, nestingLevel + 1); + indent(renderTarget, nestingLevel); + return renderTarget; + } + + public abstract void render(StringBuilder renderTarget, Object value, int nestingLevel); + + public abstract void closeTag(StringBuilder renderTarget, int nestingLevel, String closing); + + /** Returns a value from an object, or null if not found */ + protected static Object get(String field,JSONObject source) { + try { + return source.get(field); + } + catch (JSONException e) { // not found + return null; + } + } + + protected static void renderValue(Object value,StringBuilder renderTarget,int nestingLevel) { + if (value.getClass() == JSONArray.class) { + renderMapOrArray(renderTarget, (JSONArray) value, nestingLevel); + } else if (value instanceof Number) { + NumberFieldRenderer.renderNumber(renderTarget, (Number) value); + } else if (value.getClass() == String.class) { + StringFieldRenderer.renderString(renderTarget, (String) value); + } else if (value.getClass() == JSONObject.class) { + renderStruct(renderTarget, (JSONObject) value, nestingLevel); + } else { + renderTarget.append(value.toString()); + } + } + + } + + private static class MapFieldRenderer extends FieldRenderer { + + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderMap(renderTarget, (JSONArray) value, nestingLevel); + } + + /** Returns true if the given JSON object contains a map - a list of pairs called "key" and "value" */ + private static boolean isMap(JSONArray array) { + Object firstObject=get(0,array); + if ( ! (firstObject instanceof JSONObject)) return false; + JSONObject first=(JSONObject)firstObject; + if (first.length()!=2) return false; + if ( ! first.has("key")) return false; + if ( ! first.has("value")) return false; + return true; + } + + public static void renderMap(StringBuilder renderTarget, JSONArray sequence, int nestingLevel) { + int limit = sequence.length(); + if (limit == 0) return; + for (int i = 0; i < limit; ++i) + renderMapItem(renderTarget, (JSONObject)get(i,sequence), nestingLevel); + renderTarget.append("\n"); + } + + public static void renderMapItem(StringBuilder renderTarget, JSONObject object, int nestingLevel) { + renderTarget.append('\n'); + indent(renderTarget, nestingLevel); + renderTarget.append("<item><key>"); + renderValue(get("key",object), renderTarget, nestingLevel); + renderTarget.append("</key><value>"); + renderValue(get("value",object), renderTarget, nestingLevel); + renderTarget.append("</value></item>"); + } + + /** Returns a value from an array, or null if it does not exist */ + private static Object get(int index,JSONArray source) { + try { + return source.get(index); + } + catch (JSONException e) { // not found + return null; + } + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + indent(renderTarget, nestingLevel); + renderTarget.append(closing); + } + } + + private static class StructureFieldRenderer extends FieldRenderer { + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderStructure(renderTarget, (JSONObject) value, nestingLevel); + } + + public static void renderStructure(StringBuilder renderTarget, JSONObject structure, int nestingLevel) { + for (Iterator<?> i = structure.keys(); i.hasNext();) { + String key = (String) i.next(); + Object value=get(key,structure); + if (value==null) continue; + renderTarget.append('\n'); + indent(renderTarget, nestingLevel); + renderTarget.append("<struct-field name=\"").append(key).append("\">"); + renderValue(value, renderTarget, nestingLevel); + renderTarget.append("</struct-field>"); + } + renderTarget.append('\n'); + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + indent(renderTarget, nestingLevel); + renderTarget.append(closing); + } + } + + private static class NumberFieldRenderer extends FieldRenderer { + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderNumber(renderTarget, (Number) value); + } + + public static void renderNumber(StringBuilder renderTarget, Number number) { + renderTarget.append(number.toString()); + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + renderTarget.append(closing); + } + } + + private static class StringFieldRenderer extends FieldRenderer { + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderString(renderTarget, (String) value); + } + + public static void renderString(StringBuilder renderTarget, String value) { + renderTarget.append(XML.xmlEscape(value, false)); + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + renderTarget.append(closing); + } + } + + private static class ArrayFieldRenderer extends FieldRenderer { + protected static FieldRenderer structureFieldRenderer = new StructureFieldRenderer(); + protected static FieldRenderer stringFieldRenderer = new StringFieldRenderer(); + protected static FieldRenderer numberFieldRenderer = new NumberFieldRenderer(); + + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + // Only for completeness + renderArray(renderTarget, (JSONArray) value, nestingLevel); + } + + public static void renderArray(StringBuilder renderTarget, JSONArray seq, int nestingLevel) { + FieldRenderer renderer; + int limit = seq.length(); + if (limit == 0) return; + Object sniffer; + try { + sniffer = seq.get(0); + } catch (JSONException e) { + return; + } + if (sniffer.getClass() == JSONArray.class) { + renderWeightedSet(renderTarget, seq, nestingLevel); + return; + } else if (sniffer.getClass() == JSONObject.class) { + renderer = structureFieldRenderer; + } else if (sniffer instanceof Number) { + renderer = numberFieldRenderer; + } else if (sniffer.getClass() == String.class) { + renderer = stringFieldRenderer; + } else { + return; + } + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + Object value; + try { + value = seq.get(i); + } catch (JSONException e) { + continue; + } + indent(renderTarget, nestingLevel); + renderTarget.append("<item>"); + renderer.render(renderTarget, value, nestingLevel + 1); + renderer.closeTag(renderTarget, nestingLevel, "</item>\n"); + } + } + + protected static void renderWeightedSet(StringBuilder renderTarget, + JSONArray seq, int nestingLevel) { + int limit = seq.length(); + Object sniffer; + FieldRenderer renderer; + + try { + JSONArray first = seq.getJSONArray(0); + sniffer = first.get(0); + } catch (JSONException e) { + return; + } + + if (sniffer.getClass() == JSONObject.class) { + renderer = structureFieldRenderer; + } else if (sniffer instanceof Number) { + renderer = numberFieldRenderer; + } else if (sniffer.getClass() == String.class) { + renderer = stringFieldRenderer; + } else { + return; + } + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + JSONArray value; + Object name; + Number weight; + + try { + value = seq.getJSONArray(i); + name = value.get(0); + weight = (Number) value.get(1); + + } catch (JSONException e) { + continue; + } + indent(renderTarget, nestingLevel); + renderTarget.append("<item weight=\"").append(weight).append("\">"); + renderer.render(renderTarget, name, nestingLevel + 1); + renderer.closeTag(renderTarget, nestingLevel, "</item>\n"); + } + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + indent(renderTarget, nestingLevel); + renderTarget.append(closing); + } + } + + public String getContent() { + initContent(); + return content; + } + + public Object getParsedJSON() { + initContent(); + if (parsedJSON == null) { + initJSON(); + } + return parsedJSON; + } + + public void setParsedJSON(Object parsedJSON) { + this.parsedJSON = parsedJSON; + } + + public String renderFromInspector() { + return XmlRenderer.render(new StringBuilder(), value).toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/MarkupFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/MarkupFieldPart.java new file mode 100644 index 00000000000..6fdf7662b9b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/MarkupFieldPart.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup, not content. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class MarkupFieldPart implements FieldPart { + private String content; + public MarkupFieldPart(String content) { + this.content = content; + } + public boolean isFinal() { return true; } + // Markup is never part of tokens as such + public boolean isToken() { return false; } + public void setContent(String content) { + this.content = content; + } + public String getContent() { return content; } + public String toString() { return content; } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java new file mode 100644 index 00000000000..26787e442fc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * A representation of some random data with unknown semantics + * + * @author arnej27959 + */ +public final class RawData +{ + private byte[] content; + + /** + * Constructor, takes ownership + * @param content some bytes, handover + */ + public RawData(byte[] content) { + this.content = content; + } + + /** + * @return internal byte array containing the actual data received + **/ + public byte[] getInternalData() { + return content; + } + + /** + * an ascii string; non-ascii data is escaped with hex notation + * NB: not always uniquely reversible + **/ + public String toString() { + StringBuilder buf = new StringBuilder(); + for (byte b : content) { + int i = b; + i &= 0xFF; + char cv = (char)i; + if ((i > 31 && i < 127) || cv == '\n' || cv == '\t') { + buf.append(cv); + } else if (i < 16) { + buf.append("\\x0"); + buf.append(Integer.toHexString(i)); + } else if (i < 256) { + buf.append("\\x"); + buf.append(Integer.toHexString(i)); + } else { + // XXX maybe we should only do this? creates possibly-invalid XML though. + buf.append("&"); + buf.append(Integer.toString(i)); + buf.append(";"); + } + } + return buf.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/SeparatorFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/SeparatorFieldPart.java new file mode 100644 index 00000000000..30a82bdf323 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/SeparatorFieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup for + * separating dynamic snippets. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SeparatorFieldPart extends MarkupFieldPart { + public SeparatorFieldPart(String content) { + super(content); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/StringFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/StringFieldPart.java new file mode 100644 index 00000000000..2d04fa3d08d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/StringFieldPart.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is a possibly + * mutable string element + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class StringFieldPart implements FieldPart { + private String content; + private final String initContent; + // Whether this element represents a (part of) a token or a + // delimiter string. When splitting existing parts, the new + // parts should inherit this state from the object they were + // split from. + private boolean tokenOrDelimiter; + public StringFieldPart(String content, boolean tokenOrDelimiter) { + this.content = content; + initContent = content; + this.tokenOrDelimiter = tokenOrDelimiter; + } + public boolean isFinal() { return false; } + public boolean isToken() { return tokenOrDelimiter; } + public String getContent() { return content; } + public void setContent(String content) { + this.content = content; + } + public String getInitContent() { return initContent; } + public String toString() { return content; } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/TokenFieldIterator.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/TokenFieldIterator.java new file mode 100644 index 00000000000..3c055472337 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/TokenFieldIterator.java @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; + +/** + * A specialized list iterator to manipulate tokens in HitField objects. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TokenFieldIterator implements ListIterator<FieldPart> { + + private int index = 0; + private int prevReturned = 0; + private final List<FieldPart> fieldList; + private final HitField hitField; + + public TokenFieldIterator(List<FieldPart> fieldList, HitField hitField) { + this.fieldList = fieldList; + this.hitField = hitField; + } + + public void add(FieldPart o) { + fieldList.add(index, o); + index++; + hitField.markDirty(); + } + + public boolean hasNext() { + int i = index; + while (i < fieldList.size()) { + if (fieldList.get(i).isToken()) + return true; + i++; + } + return false; + } + + public boolean hasPrevious() { + int i = index; + while (i > 0) { + i--; + if (fieldList.get(i).isToken()) + return true; + } + return false; + } + + public FieldPart next() { + int i = index; + while (i < fieldList.size()) { + if (fieldList.get(i).isToken()) { + index = i + 1; + prevReturned = i; + return fieldList.get(i); + } + i++; + } + throw new NoSuchElementException("No more tokens available."); + } + + public int nextIndex() { + int i = index; + while (i < fieldList.size()) { + if (fieldList.get(i).isToken()) + return i; + i++; + } + return fieldList.size(); + } + + public FieldPart previous() { + int i = index; + while (i > 0) { + i--; + if (fieldList.get(i).isToken()) { + index = i; + prevReturned = i; + return fieldList.get(i); + } + } + throw new NoSuchElementException("Trying to go before first token available."); + } + + public int previousIndex() { + int i = index; + while (i > 0) { + i--; + if (fieldList.get(i).isToken()) + return i; + } + return -1; + } + + public void remove() { + fieldList.remove(prevReturned); + if (prevReturned < index) + index--; + hitField.markDirty(); + } + + public void set(FieldPart o) { + fieldList.set(prevReturned, o); + hitField.markDirty(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/XMLString.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/XMLString.java new file mode 100644 index 00000000000..9338c8ca53a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/XMLString.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * A representation of an XML chunk. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class XMLString { + + private final String content; + + public XMLString(String content) { + this.content = content; + } + + public String toString() { + return content; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/XmlRenderer.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/XmlRenderer.java new file mode 100644 index 00000000000..13f94769d1f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/XmlRenderer.java @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import com.yahoo.text.Utf8; +import com.yahoo.text.XML; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.Type; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.slime.SlimeAdapter; +import java.nio.charset.StandardCharsets; + +import java.util.Iterator; +import java.util.Map; + +/** + * Utility class for converting accessible data into the historical "prelude" xml format. + **/ +public class XmlRenderer { + + public static StringBuilder render(StringBuilder target, Inspector value) { + new InspectorRenderer(target).renderInspector(value, 2); + return target; + } + + private static class InspectorRenderer { + + private final StringBuilder renderTarget; + + InspectorRenderer(StringBuilder target) { + this.renderTarget = target; + } + + void renderInspector(Inspector value, int nestingLevel) { + if (value.type() == Type.ARRAY) { + renderMapOrArray(value, nestingLevel); + } else if (value.type() == Type.OBJECT) { + renderStruct(value, nestingLevel); + } else if (value.type() == Type.STRING) { + renderTarget.append(XML.xmlEscape(value.asString(), false)); + } else if (value.type() == Type.LONG) { + long l = value.asLong(); + renderTarget.append(String.valueOf(l)); + } else if (value.type() == Type.DOUBLE) { + double d = value.asDouble(); + renderTarget.append(String.valueOf(d)); + } else if (value.type() == Type.BOOL) { + boolean b = value.asBool(); + renderTarget.append(b ? "true" : "false"); + } else if (value.type() == Type.DATA) { + byte[] data = value.asData(); + renderTarget.append("<data length=\"").append(data.length); + renderTarget.append("\" encoding=\"hex\">"); + for (int i = 0; i < data.length; i++) { + for (int sh = 4; sh >= 0; sh -= 4) { + int val = (data[i] >> sh) & 0xF; + char hexdigit = (val < 10) ? ((char)('0' + val)) : ((char)('A' + val - 10)); + renderTarget.append(hexdigit); + } + } + renderTarget.append("</data>"); + } + } + + private void renderMapItem(Inspector object, int nestingLevel) { + renderTarget.append('\n'); + indent(nestingLevel); + renderTarget.append("<item><key>"); + renderInspector(object.field("key"), nestingLevel); + renderTarget.append("</key><value>"); + renderInspector(object.field("value"), nestingLevel); + renderTarget.append("</value></item>"); + } + + private void renderStructure(Inspector structure, int nestingLevel) { + for (Map.Entry<String,Inspector> entry : structure.fields()) { + String key = entry.getKey(); + Inspector value = entry.getValue(); + renderTarget.append('\n'); + indent(nestingLevel); + renderTarget.append("<struct-field name=\"").append(key).append("\">"); + renderInspector(value, nestingLevel); + renderTarget.append("</struct-field>"); + } + renderTarget.append('\n'); + } + + private void renderStruct(Inspector object, int nestingLevel) { + renderStructure(object, nestingLevel + 1); + indent(nestingLevel); + } + + private void indent(int nestingLevel) { + for (int i = 0; i < nestingLevel; ++i) { + renderTarget.append(" "); + } + } + + private void renderMap(Inspector sequence, int nestingLevel) { + int limit = sequence.entryCount(); + if (limit == 0) return; + for (int i = 0; i < limit; ++i) + renderMapItem(sequence.entry(i), nestingLevel); + renderTarget.append("\n"); + } + + /** Returns true if the given array represents a map - a list of pairs called "key" and "value" */ + private boolean isMap(Inspector array) { + Inspector firstObject = array.entry(0); + if (firstObject.type() != Type.OBJECT) return false; + if (firstObject.fieldCount() != 2) return false; + if (! firstObject.field("key").valid()) return false; + if (! firstObject.field("value").valid()) return false; + return true; + } + + /** + * Returns true if the given array represents a weighted set, + * as a list of pairs called "item" and "weight" + **/ + private boolean isWeightedSetObjects(Inspector array) { + Inspector firstObject = array.entry(0); + if (firstObject.type() != Type.OBJECT) return false; + if (firstObject.fieldCount() != 2) return false; + if (! firstObject.field("item").valid()) return false; + if (! firstObject.field("weight").valid()) return false; + return true; + } + + /** + * Returns true if the given array represents a weighted set, + * as a list of tuples + **/ + private boolean isWeightedSetArrays(Inspector array) { + Inspector firstObject = array.entry(0); + if (firstObject.type() != Type.ARRAY) return false; + if (firstObject.entryCount() != 2) return false; + return true; + } + + private void renderMapOrArray(Inspector sequence, int nestingLevel) + { + if (sequence.entryCount() == 0) return; + if (isMap(sequence)) { + renderMap(sequence, nestingLevel + 1); + } else if (isWeightedSetArrays(sequence)) { + renderWeightedSet(sequence, nestingLevel + 1, true); + } else if (isWeightedSetObjects(sequence)) { + renderWeightedSet(sequence, nestingLevel + 1, false); + } else { + renderArray(sequence, nestingLevel + 1); + } + indent(nestingLevel); + } + + private void renderWeightedSet(Inspector seq, int nestingLevel, boolean nestedarray) + { + int limit = seq.entryCount(); + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + Inspector value = nestedarray ? seq.entry(i).entry(0) : seq.entry(i).field("item"); + Inspector weight = nestedarray ? seq.entry(i).entry(1) : seq.entry(i).field("weight"); + long lw = 0; + double dw = 0; + if (weight.type() == Type.LONG) { + lw = weight.asLong(); + dw = (double)lw; + } + if (weight.type() == Type.DOUBLE) { + dw = weight.asDouble(); + lw = (long)dw; + } + indent(nestingLevel); + renderTarget.append("<item weight=\""); + if (dw == (double)lw || weight.type() == Type.LONG) { + renderTarget.append(lw); + } else { + renderTarget.append(dw); + } + renderTarget.append("\">"); + renderInspector(value, nestingLevel); + renderTarget.append("</item>\n"); + } + } + + private void renderArray(Inspector seq, int nestingLevel) { + int limit = seq.entryCount(); + if (limit == 0) return; + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + Inspector value = seq.entry(i); + indent(nestingLevel); + renderTarget.append("<item>"); + renderInspector(value, nestingLevel); + renderTarget.append("</item>\n"); + } + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/package-info.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/package-info.java new file mode 100644 index 00000000000..9e5a5b08c6b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.prelude.hitfield; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/logging/AccessLogEntry.java b/container-search/src/main/java/com/yahoo/prelude/logging/AccessLogEntry.java new file mode 100644 index 00000000000..b327fed1c4b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/logging/AccessLogEntry.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.logging; + +/** + * Hollow compatibility class for com.yahoo.container.logging.AccessLogEntry. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AccessLogEntry extends com.yahoo.container.logging.AccessLogEntry { + + public AccessLogEntry() { + super(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/logging/package-info.java b/container-search/src/main/java/com/yahoo/prelude/logging/package-info.java new file mode 100644 index 00000000000..6ba2f1ce648 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/logging/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.logging; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/package-info.java b/container-search/src/main/java/com/yahoo/prelude/package-info.java new file mode 100644 index 00000000000..fdb1a2068a4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/package-info.java @@ -0,0 +1,8 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The main classes of the Vespa Query Result Server. As an external API, this is <b>deprecated</b> starting with Vespa 4.2 + */ +@ExportPackage +package com.yahoo.prelude; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/AndItem.java b/container-search/src/main/java/com/yahoo/prelude/query/AndItem.java new file mode 100644 index 00000000000..ad891f821f6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/AndItem.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An and'ing of a collection of sub-expressions + * + * @author bratseth + */ +public class AndItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.AND; + } + + public String getName() { + return "AND"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/AndSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/AndSegmentItem.java new file mode 100644 index 00000000000..189c8260785 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/AndSegmentItem.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.Iterator; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * An immutable and'ing of a collection of sub-expressions. It does not extend + * AndItem to avoid code using instanceof handling it as an AndItem. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AndSegmentItem extends SegmentItem implements BlockItem { + + public AndSegmentItem(String rawWord, boolean isFromQuery, boolean stemmed) { + super(rawWord, rawWord, isFromQuery, stemmed, null); + } + + public AndSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed) { + super(rawWord, current, isFromQuery, stemmed, null); + } + + public AndSegmentItem(PhraseSegmentItem item) { + super(item.getRawWord(), item.stringValue(), item.isFromQuery(), item.isStemmed(), null); + int weight = item.getWeight(); + if (item.getItemCount() > 0) { + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + WordItem word = (WordItem) i.next(); + word.setWeight(weight); + addItem(word); + } + } + } + + public ItemType getItemType() { + return ItemType.AND; + } + + public String getName() { + return "SAND"; + } + + @NonNull + public String getIndexName() { + if (getItemCount() == 0) { + return ""; + } else { + return ((IndexedItem) getItem(0)).getIndexName(); + } + } + + // TODO: Is it necessary to override equals? + + public void setWeight(int w) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + i.next().setWeight(w); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java new file mode 100644 index 00000000000..6b6fabafac5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An interface used for anything which represents a single block + * of query input. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface BlockItem extends HasIndexItem { + + /** + * The untransformed raw text from the user serving as base for + * this item. + */ + String getRawWord(); + + /** Returns the substring which is the origin of this item, or null if none */ + public Substring getOrigin(); + + /** Returns the value of this term as a string */ + public abstract String stringValue(); + + /** + * Is this block of text conceptually from the user query? + */ + boolean isFromQuery(); + + boolean isStemmed(); + + /** + * Does this item represent "usual words"? + */ + boolean isWords(); + + /** + * If the block has to be resegmented, what operator should be chosen if it + * is necessary to change operator? + */ + SegmentingRule getSegmentingRule(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeIndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeIndexedItem.java new file mode 100644 index 00000000000..6dbaa129f66 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeIndexedItem.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Common implementation for Item classes implementing the IndexedItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeIndexedItem.java + * SimpleIndexedItem.java + * IndexedSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class CompositeIndexedItem extends CompositeTaggableItem implements IndexedItem { + + @NonNull + private String index = ""; + + /** + * The name of the index this belongs to, or "" (never null) if not specified + **/ + @NonNull + public String getIndexName() { + return index; + } + + // encode index bytes + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(index, buffer); + } + + /** Sets the name of the index to search */ + public void setIndexName(String index) { + if (index == null) { + index = ""; + } + this.index = index; + } + + /** Appends the index prefix if necessary */ + protected void appendIndexString(StringBuilder buffer) { + if (!getIndexName().equals("")) { + buffer.append(getIndexName()); + buffer.append(":"); + } + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + IndexedItem other = (IndexedItem) object; // Ensured by superclass + if (!this.index.equals(other.getIndexName())) { + return false; + } + return true; + } + + public int hashCode() { + return super.hashCode() + 31 * index.hashCode(); + } + + public abstract String getIndexedString(); + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", index); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java new file mode 100644 index 00000000000..99e388d8db3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; +import com.yahoo.protect.Validator; +import com.yahoo.search.query.QueryTree; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + + +/** + * Superclass of expressions which contains a collection of sub-expressions + * + * @author bratseth + */ +public abstract class CompositeItem extends Item { + + private List<Item> subitems = new java.util.ArrayList<>(4); + + /** Sets the index name of all subitems of this */ + public void setIndexName(String index) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + item.setIndexName(index); + } + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + for (Item item : subitems) + discloser.addChild(item); + } + + public void ensureNotInSubtree(CompositeItem item) { + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + Item possibleCycle = i.next(); + + if (this == possibleCycle) { + throw new QueryException("Tried to create a cycle in a tree."); + } else if (possibleCycle instanceof CompositeItem) { + ensureNotInSubtree((CompositeItem) possibleCycle); + } + } + } + + public void addItem(Item item) { + adding(item); + subitems.add(item); + } + + protected void adding(Item item) { + Validator.ensureNotNull("Composite item", item); + Validator.ensure("Attempted to add a composite to itself", item != this); + if (item instanceof CompositeItem) { + ensureNotInSubtree((CompositeItem) item); + } + item.setParent(this); + } + + /** + * Inserts the item at a position and increases the index of existing items + * starting on this position by one + * + * @throws IndexOutOfBoundsException if the index is out of range + */ + public void addItem(int index, Item item) { + if (index > subitems.size() || index < 0) { + throw new IndexOutOfBoundsException( + "Could not add a subitem at position " + index + " to " + this); + } + adding(item); + subitems.add(index, item); + } + + /** For NOT items, which may wish to insert nulls */ + void insertNullFirstItem() { + subitems.add(0, null); + } + + /** + * Returns a subitem + * + * @param index the (0-base) index of the item to return + * @throws IndexOutOfBoundsException if there is no subitem at index + */ + public Item getItem(int index) { + return subitems.get(index); + } + + /** + * Replaces the item at the given index + * + * @param index the (0-base) index of the item to replace + * @param item the new item + * @return the old item at this position. The parent of the old item is <i>not</i> cleared + * @throws IndexOutOfBoundsException if there is no item at this index + */ + public Item setItem(int index, Item item) { + if (index >= subitems.size() || index < 0) + throw new IndexOutOfBoundsException("Could not add a subitem at position " + index + " to " + this); + + adding(item); + Item old = subitems.set(index, item); + if (old!=item) + removing(old); + return old; + } + + /** + * Returns the index of a subitem + * + * @param item The child item to find the index of + * @return the 0-base index of the child or -1 if there is no such child + */ + public int getItemIndex(Item item) { + return subitems.indexOf(item); + } + + /** + * Removes the item at the given index + * + * @param index the index of the item to remove + * @return the removed item + * @throws IndexOutOfBoundsException if there is no item at the given index + */ + public Item removeItem(int index) { + Item item = subitems.remove(index); + + removing(item); + return item; + } + + /** Always call on every remove */ + private void removing(Item item) { + if (item == null) { + return; + } + if (item.getParent() == this) { // Otherwise, this belongs to somebody else now (somebody are doing addField, removeField) + item.setParent(null); + } + } + + /** + * Removes the given item. Does nothing if the item is not present. + * + * @param item the item to remove + * @return whether the item was removed + */ + public boolean removeItem(Item item) { + boolean removed = subitems.remove(item); + + if (removed) { + removing(item); + } + return removed; + } + + /** Returns the number of direct ancestors of this item */ + public int getItemCount() { + return subitems.size(); + } + + /** Returns a modifiable list iterator */ + public ListIterator<Item> getItemIterator() { + return new ListIteratorWrapper(this); + } + + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item subitem = i.next(); + + itemCount += subitem.encode(buffer); + } + return itemCount; + } + + /** + * Encodes just this item, not it's usual subitems, to the given buffer. + */ + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(encodingArity(), buffer); + } + + protected int encodingArity() { + return subitems.size(); + } + + protected void appendBodyString(StringBuilder buffer) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + buffer.append(item.toString()); + if (i.hasNext()) { + buffer.append(" "); + } + } + } + + /** Composite items should be parenthized when not on the top level */ + protected boolean shouldParenthize() { + return getParent()!= null && ! (getParent() instanceof QueryTree); + } + + /** Returns a deep copy of this item */ + public CompositeItem clone() { + CompositeItem copy = (CompositeItem) super.clone(); + + copy.subitems = new java.util.ArrayList<>(); + for (Item subItem : subitems) { + Item subItemCopy = subItem.clone(); + copy.adding(subItemCopy); + copy.subitems.add(subItemCopy); + } + fixConnexity(copy); + return copy; + } + + private void fixConnexity(CompositeItem copy) { + List<Item> flatland = new ArrayList<>(); + List<Item> flatCopy = new ArrayList<>(); + taggingFlatten(this, flatland); + taggingFlatten(copy, flatCopy); + int barrier = flatland.size(); + for (int i = 0; i < barrier; ++i) { + Item orig = flatland.get(i); + int connectedTo = find(orig.connectedItem, flatland); + if (connectedTo >= 0) { + TaggableItem tagged = (TaggableItem) flatCopy.get(i); + tagged.setConnectivity(flatCopy.get(connectedTo), orig.connectivity); + } + } + } + + private void taggingFlatten(Item tree, List<Item> container) { + if (tree.hasUniqueID()) { + container.add(tree); + } else if (tree instanceof CompositeItem) { + CompositeItem asComposite = (CompositeItem) tree; + for (Iterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { + taggingFlatten(i.next(), container); + } + } + } + + private int find(Item needle, List<Item> haystack) { + if (needle == null) { + return -1; + } + int barrier = haystack.size(); + for (int i = 0; i < barrier; ++i) { + if (haystack.get(i) == needle) { + return i; + } + } + return -1; + } + + public int hashCode() { + int code = getName().hashCode() + subitems.size() * 17; + + for (int i = 0; i < subitems.size() && i <= 5; i++) { + code += subitems.get(i).hashCode(); + } + return code; + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + + CompositeItem other = (CompositeItem) object; // Ensured by superclass + + if (!this.subitems.equals(other.subitems)) { + return false; + } + + return true; + } + + /** Make composite immutable if this is supported. */ + public void lock() {} + + /** Whether this composite is in a mutable state. */ + public boolean isLocked() { + return false; + } + + /** Handles mutator calls correctly */ + private static class ListIteratorWrapper implements ListIterator<Item> { + + private CompositeItem owner; + + private ListIterator<Item> wrapped; + + private Item current = null; + + public ListIteratorWrapper(CompositeItem owner) { + this.owner = owner; + wrapped = owner.subitems.listIterator(); + } + + public boolean hasNext() { + return wrapped.hasNext(); + } + + public Item next() { + current = wrapped.next(); + return current; + } + + public boolean hasPrevious() { + return wrapped.hasPrevious(); + } + + public Item previous() { + Item current = wrapped.previous(); + + return current; + } + + public int nextIndex() { + return wrapped.nextIndex(); + } + + public int previousIndex() { + return wrapped.previousIndex(); + } + + public void remove() { + owner.removing(current); + wrapped.remove(); + } + + public void set(Item o) { + Item newItem = o; + + owner.removing(current); + owner.adding(newItem); + current = newItem; + wrapped.set(newItem); + } + + public void add(Item o) { + Item newItem = o; + + owner.adding(newItem); + // TODO: Change current here? Check javadoc + wrapped.add(o); + } + + } + + @Override + public int getTermCount() { + int terms = 0; + for (Item item : subitems) { + terms += item.getTermCount(); + } + return terms; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java new file mode 100644 index 00000000000..186f9686150 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Common implementation for Item classes implementing the TaggableItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeTaggableItem.java + * SimpleTaggableItem.java + * TaggableSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class CompositeTaggableItem extends CompositeItem implements TaggableItem { + + public int getUniqueID() { + return uniqueID; + } + + public void setUniqueID(int id) { + setHasUniqueID(true); + uniqueID = id; + } + + /** See {@link TaggableItem#setConnectivity} */ + public void setConnectivity(Item item, double connectivity) { + setHasUniqueID(true); + item.setHasUniqueID(true); + if (connectedItem != null) { + // untangle old connectivity + connectedItem.connectedBacklink = null; + } + this.connectivity = connectivity; + connectedItem = item; + connectedItem.connectedBacklink = this; + } + + public Item getConnectedItem() { + return connectedItem; + } + + public double getConnectivity() { + return connectivity; + } + + public void setSignificance(double significance) { + setHasUniqueID(true); + setExplicitSignificance(true); + this.significance = significance; + } + + public void setExplicitSignificance(boolean explicitSignificance) { + this.explicitSignificance = explicitSignificance; + } + + public boolean hasExplicitSignificance() { + return explicitSignificance; + } + + public double getSignificance() { + return significance; + } + + //Change access privilege from protected to public. + public boolean hasUniqueID() { + return super.hasUniqueID(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/DotProductItem.java b/container-search/src/main/java/com/yahoo/prelude/query/DotProductItem.java new file mode 100644 index 00000000000..fd494dba491 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/DotProductItem.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * A weighted set query item to be evaluated as a sparse dot product. + * + * The resulting dot product will be available as a raw score in the rank framework. + * + * @author havardpe + */ +public class DotProductItem extends WeightedSetItem { + + public DotProductItem(String indexName) { super(indexName); } + + @Override + public ItemType getItemType() { return ItemType.DOTPRODUCT; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/EquivItem.java b/container-search/src/main/java/com/yahoo/prelude/query/EquivItem.java new file mode 100644 index 00000000000..a847ba1b288 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/EquivItem.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.protect.Validator; + +import java.util.Collection; + +/** + * An Item where each child is an <i>alternative</i> which can be matched. + * Produces the same recall as Or, but differs in that the relevance of a match + * does not increase if more than one children is matched: With Equiv, matching one child perfectly is a perfect match. + * <p> + * This can only have Word, Int or Phrase children. + * + * @author <a href="mailto:havardpe@yahoo-inc.com">havardpe</a> + */ +public class EquivItem extends CompositeTaggableItem { + + public ItemType getItemType() { + return ItemType.EQUIV; + } + + public String getName() { + return "EQUIV"; + } + + @Override + protected void adding(Item item) { + super.adding(item); + Validator.ensure("Equiv can only have word/int/phrase as children", + item.getItemType() == ItemType.WORD || + item.getItemType() == ItemType.INT || + item.getItemType() == ItemType.PHRASE); + } + + /** make an EQUIV item with no children */ + public EquivItem() {} + + /** + * create an EQUIV with the given item as child. + * The new EQUIV will take connectivity, + * significance and weight from the given item. + * + * @param item Will be modified and added as a child. + **/ + public EquivItem(Item item) { + addItem(item); + + // steal other item's connectivity: + if (item.connectedItem != null) { + setConnectivity(item.connectedItem, item.connectivity); + item.connectedItem = null; + item.connectivity = 0.0; + } + TaggableItem back = (TaggableItem)item.connectedBacklink; + if (back != null) { + back.setConnectivity(this, back.getConnectivity()); + item.connectedBacklink = null; + } + + // steal other item's significance: + if (item.explicitSignificance) { + setSignificance(item.significance); + } + + // steal other item's weight: + setWeight(item.getWeight()); + + // we have now stolen all of the other item's unique id needs: + item.setHasUniqueID(false); + } + + /** + * create an EQUIV with the given item and a set + * of alternate words as children. + * The new EQUIV will take connectivity, + * significance and weight from the given item. + * + * @param item Will be modified and added as a child. + * @param words Set of words to create WordItems from. + **/ + public EquivItem(Item item, Collection<String> words) { + this(item); + String idx = ((IndexedItem)item).getIndexName(); + for (String word : words) { + WordItem witem = new WordItem(word, idx); + addItem(witem); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ExactstringItem.java b/container-search/src/main/java/com/yahoo/prelude/query/ExactstringItem.java new file mode 100644 index 00000000000..3972d2b808e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ExactstringItem.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * @author balder + */ +// TODO: balder to fix javadoc +public class ExactstringItem extends WordItem { + + public ExactstringItem(String substring) { + this(substring, false); + } + + public ExactstringItem(String substring, boolean isFromQuery) { + super(substring, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.EXACT; + } + + public String getName() { + return "EXACTSTRING"; + } + + public String stringValue() { + return getWord(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FalseItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FalseItem.java new file mode 100644 index 00000000000..993c395b191 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/FalseItem.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * A query item which never matches. This is sometimes an useful output of query rewriting. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class FalseItem extends Item { + + @Override + public void setIndexName(String index) { } + + @Override + public ItemType getItemType() { + return ItemType.WORD; // Implemented as a non-matching word as the backend does not support FalseItem + } + + @Override + public String getName() { return "FALSE"; } + + /** Override to only return "FALSE" rather than "FALSE " */ + @Override + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + } + + @Override + public int encode(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(" ", buffer); // searching for space will not match + return 1; + } + + @Override + public int getTermCount() { return 1; } + + @Override + protected void appendBodyString(StringBuilder buffer) { } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/HasIndexItem.java b/container-search/src/main/java/com/yahoo/prelude/query/HasIndexItem.java new file mode 100644 index 00000000000..2608e6ec58e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/HasIndexItem.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import edu.umd.cs.findbugs.annotations.NonNull; + + +/** + * An interface for items where it is useful to access an associated + * index name. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface HasIndexItem { + + @NonNull + public String getIndexName(); + + public int getNumWords(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Highlight.java b/container-search/src/main/java/com/yahoo/prelude/query/Highlight.java new file mode 100644 index 00000000000..509b6f04a66 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Highlight.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.*; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Class encapsulating information on extra highlight-terms for a query + * + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Lidal</a> + */ +public class Highlight implements Cloneable { + + /** + * The name of the property map which contains extra highlight terms + */ + public static final String HIGHLIGHTTERMS = "highlightterms"; + + private Map<String, AndItem> highlightItems = new LinkedHashMap<>(); + + private Map<String, List<String>> highlightTerms = new LinkedHashMap<>(); + + public Highlight() {} + + private void addHighlightItem(String key, Item value) { + /*List<IndexedItem> l = highlightItems.get(key); + if (l == null) { + l = new ArrayList<IndexedItem>(); + highlightItems.put(key, l); + } + l.addField(value);*/ + AndItem item = highlightItems.get(key); + if (item == null) { + item = new AndItem(); + highlightItems.put(key, item); + } + item.addItem(value); + } + + /** + * Add custom highlight term + * + * @param field Field name + * @param item Term to be highlighted + */ + public void addHighlightTerm(String field, String item) { + addHighlightItem(field, new WordItem(toLowerCase(item), field, true)); + } + + /** + * Add custom highlight phrase + * @param field Field name + * @param phrase List of terms to be highlighted as a phrase + */ + public void addHighlightPhrase(String field, List<String> phrase) { + PhraseItem pi = new PhraseItem(); + pi.setIndexName(field); + for (String s : phrase) { + pi.addItem(new WordItem(toLowerCase(s), field, true)); + } + addHighlightItem(field, pi); + } + + /** + * Returns the modifiable map of highlight items (never null) + * + * @return Map of highlight items + */ + public Map<String, AndItem> getHighlightItems() { + return highlightItems; + } + + @Override + public Highlight clone() { + try { + Highlight clone = (Highlight) super.clone(); + + clone.highlightItems = new LinkedHashMap<>(); + for (Map.Entry<String,AndItem> entry: highlightItems.entrySet()) { + clone.highlightItems.put(entry.getKey(),(AndItem)entry.getValue().clone()); + } + + clone.highlightTerms = new LinkedHashMap<>(); + for (Map.Entry<String, List<String>> entry : highlightTerms.entrySet()) + clone.highlightTerms.put(entry.getKey(), new ArrayList<>(entry.getValue())); + + return clone; + + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + public Map<String, List<String>> getHighlightTerms() { return highlightTerms; } + + /** Prepares this for binary serialization. For internal use - see {@link com.yahoo.search.Query#prepare} */ + public void prepare() { + this.highlightTerms.clear(); + + for (String index : getHighlightItems().keySet()) { + AndItem root = getHighlightItems().get(index); + List<WordItem> words = new ArrayList<>(); + List<CompositeItem> phrases = new ArrayList<>(); + for (Iterator<Item> i = root.getItemIterator(); i.hasNext(); ) { + Item item = i.next(); + if (item instanceof WordItem) { + words.add((WordItem)item); + } else if (item instanceof CompositeItem) { + phrases.add((CompositeItem)item); + } + } + + List<String> terms = new ArrayList<>(); + terms.add(String.valueOf(words.size() + phrases.size())); + for (WordItem item : words) { + terms.add(item.getWord()); + } + + for (CompositeItem item : phrases) { + terms.add("\""); + terms.add(String.valueOf(item.getItemCount())); + for (Iterator<Item> i = item.getItemIterator(); i.hasNext(); ) { + terms.add(((IndexedItem)i.next()).getIndexedString()); + } + terms.add("\""); + } + + if (terms.size() > 1) + this.highlightTerms.put(index, terms); + } + } + + + +} + + + + diff --git a/container-search/src/main/java/com/yahoo/prelude/query/IndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/IndexedItem.java new file mode 100644 index 00000000000..6f873e28f8a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/IndexedItem.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * Interface for Items that is indexed + * + * @author Lars Christian Jensen + */ +public interface IndexedItem extends HasIndexItem { + + /** Sets the name of the index to search */ + public void setIndexName(String index); + + /** + * Return the searchable term contents of this item. + * + * @return a string representation of what is presumably stored in an index + * which will match this item + */ + public String getIndexedString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/IndexedSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/IndexedSegmentItem.java new file mode 100644 index 00000000000..af6108b819d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/IndexedSegmentItem.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Common implementation for Item classes implementing the IndexedItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeIndexedItem.java + * SimpleIndexedItem.java + * IndexedSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class IndexedSegmentItem extends TaggableSegmentItem implements IndexedItem { + + @NonNull + private String index = ""; + + protected IndexedSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed, Substring origin) { + super(rawWord, current, isFromQuery, stemmed, origin); + } + + /** + * The name of the index this belongs to, or "" (never null) if not specified + **/ + @NonNull + public String getIndexName() { + return index; + } + + // encode index bytes + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(index, buffer); + } + + /** Sets the name of the index to search */ + public void setIndexName(String index) { + if (index == null) { + index = ""; + } + this.index = index; + } + + /** Appends the index prefix if necessary */ + protected void appendIndexString(StringBuilder buffer) { + if (!getIndexName().equals("")) { + buffer.append(getIndexName()); + buffer.append(":"); + } + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + IndexedItem other = (IndexedItem) object; // Ensured by superclass + if (!this.index.equals(other.getIndexName())) { + return false; + } + return true; + } + + public int hashCode() { + return super.hashCode() + 31 * index.hashCode(); + } + + public abstract String getIndexedString(); + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", index); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/IntItem.java b/container-search/src/main/java/com/yahoo/prelude/query/IntItem.java new file mode 100644 index 00000000000..90a9ce5a07f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/IntItem.java @@ -0,0 +1,292 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import java.math.BigInteger; +import java.nio.ByteBuffer; + + +/** + * This represents either + * <ul> + * <li>a number (integer or floating point) + * <li>a partial range, given by "<number" or ">number", where the numbers are exclusive, or + * <li>a full or open range "[number;number], "[number;]" or "[;number]" where the numbers are inclusive, + * or exclusive if a square bracket is replaced with a pointy one + * </ul> + * + * If a range is specified in brackets, it is also permissible to add a third number specifying the number of hits this + * will match on each node - [from;to;hitLimit] + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class IntItem extends TermItem { + + /** The inclusive lower end of this range */ + private Limit from; + + /** The inclusive upper end of this range */ + private Limit to; + + private int hitLimit = 0; + + /** The number expression of this */ + private String expression; + + /** + * Creates an int item which must be equal to the given int number - + * that is both the lower and upper limit is this number + */ + public IntItem(int number, String indexName) { + this(new Limit(number, true), new Limit(number, true), indexName); + } + + /** + * Creates an int item which must be equal to the given long number - + * that is both the lower and upper limit is this number + */ + public IntItem(long number, String indexName) { + this(new Limit(number, true), new Limit(number, true), indexName); + } + + public IntItem(Limit from, Limit to, String indexName) { + super(indexName, false); + this.from = from; + this.to = to; + expression = toExpression(from, to, 0); + } + + /** Returns the simplest expression matching this */ + private String toExpression(Limit from, Limit to, int hitLimit) { + if (from.equals(to) && hitLimit == 0) return from.number().toString(); + + String expression = from.toRangeStart() + ";" + to.toRangeEnd(); + if (hitLimit == 0) return expression; + + // Insert ;hitLimit at the end inside the brackets + return expression.substring(0, expression.length()-1) + ";" + hitLimit + expression.substring(expression.length()-1); + } + + public IntItem(String expression) { + this(expression, ""); + } + + public IntItem(String expression, boolean isFromQuery) { + this(expression, "", isFromQuery); + } + + public IntItem(String expression, String indexName) { + this(expression, indexName, false); + } + + public IntItem(String expression, String indexName, boolean isFromQuery) { + super(indexName, isFromQuery); + setNumber(expression); + } + + public IntItem(Limit from, Limit to, int hitLimit, String indexName, boolean isFromQuery) { + super(indexName, isFromQuery); + setLimits(from, to); + this.hitLimit = hitLimit; + this.expression = toExpression(from, to, hitLimit); + } + + /** Sets limit and flip them if "from" is greater than "to" */ + private final void setLimits(Limit from, Limit to) { + if (from.number().doubleValue() > to.number().doubleValue()) { + this.from = to; + this.to = from; + } + else { + this.from = from; + this.to = to; + } + } + + /** Sets the number expression of this - a number or range following the syntax specified in the class javadoc */ + public void setNumber(String expression) { + try { + this.expression = expression; + parseAndAssignLimits(expression.trim()); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("'" + expression + "' is not an int item expression: " + + "Expected NUMBER, '<'NUMBER, '>'NUMBER or ('['|'<')NUMBER;NUMBER(;NUMBER)?(']'|'>')", e); + + } + } + + private void parseAndAssignLimits(String e) { + if (e.startsWith("<") && ! e.contains(";")) { + from = Limit.NEGATIVE_INFINITY; + to = new Limit(asNumber(e.substring(1)), false); + } + else if (e.startsWith(">")) { + from = new Limit(asNumber(e.substring(1)), false); + to = Limit.POSITIVE_INFINITY; + } + else if (e.startsWith("[") || e.startsWith("<")) { + if ( ! (e.endsWith("]") || e.endsWith(">"))) throw new IllegalArgumentException("No closing bracket"); + + boolean inclusiveStart = e.startsWith("["); + boolean inclusiveEnd = e.startsWith("["); + + String[] limits = e.substring(1, e.length()-1).split(";"); + if (limits.length < 1 || limits.length > 3) throw new IllegalArgumentException("Unexpected bracket content"); + Limit from = new Limit(getOr(Double.NEGATIVE_INFINITY, 0, limits), inclusiveStart); + Limit to = new Limit(getOr(Double.POSITIVE_INFINITY, 1, limits), inclusiveEnd); + setLimits(from, to); + hitLimit = getOr(0, 2, limits).intValue(); + } + else { + to = from = new Limit(asNumber(e), true); + } + } + + private Number getOr(Number defaultValue, int valueIndex, String[] values) { + if (valueIndex >= values.length) return defaultValue; + if (values[valueIndex] == null) return defaultValue; + if (values[valueIndex].isEmpty()) return defaultValue; + return asNumber(values[valueIndex]); + } + + private Number asNumber(String numberString) { + try { + if (!numberString.contains(".")) return new Long(numberString); + } + catch (NumberFormatException e) { + return new BigInteger(numberString); + } + return new Double(numberString); + } + + /** Sets the number expression of this - a number or range */ + public String getNumber() { return expression; } + + /** Returns the lower limit of this range, which may be negative infinity */ + public final Limit getFromLimit() { + return from; + } + + /** Returns the upper limit of this range, which may be positive infinity */ + public final Limit getToLimit() { + return to; + } + + /** + * Returns the number of hits this will match, or 0 if all should be matched. + * If this number is positive, the hits closest to <code>from</code> are returned, and if + * this number is negative the hits closest to <code>to</code> are returned. + */ + public final int getHitLimit() { + return hitLimit; + } + + /** + * Sets the number of hits this will match, or 0 if all should be + * matched. If this number is positive, the hits closest to + * <code>from</code> are returned, and if this number is negative the hits + * closest to <code>to</code> are returned. + * + * @param hitLimit + * number of hits to match for this operator + */ + public final void setHitLimit(int hitLimit) { + this.hitLimit = hitLimit; + this.expression = toExpression(from, to, hitLimit); + } + + @Override + public String getRawWord() { + return getNumber(); + } + + @Override + public ItemType getItemType() { + return ItemType.INT; + } + + @Override + public String getName() { + return "INT"; + } + + @Override + public String stringValue() { + return expression; + } + + /** Same as {@link #setNumber} */ + @Override + public void setValue(String value) { setNumber(value); } + + /** Int items uses a empty heading instead of "INT " */ + protected void appendHeadingString(StringBuilder buffer) {} + + @Override + public int hashCode() { + return super.hashCode() + 199 * expression.hashCode(); + } + + @Override + public boolean equals(Object object) { + if ( ! super.equals(object)) return false; + + IntItem other = (IntItem) object; // Ensured by superclass + if ( ! getFromLimit().equals(other.getFromLimit())) return false; + if ( ! getToLimit().equals(other.getToLimit())) return false; + if ( getHitLimit() != other.getHitLimit()) return false; + return true; + } + + /** Returns the number for encoding; the number expression as-is. */ + protected String getEncodedInt() { + return getIndexedString(); + } + + @Override + public String getIndexedString() { + return expression; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + putString(getEncodedInt(), buffer); + } + + @Override + public int getNumWords() { + return 1; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public boolean isWords() { + return false; + } + + /** + * Creates an int item from arguments. + * This will return an instance of the RankItem subclass if either <code>hitLimit</code> or both <code>from</code> + * and <code>to</code> is set to a value other than defaults (respectively 0, double negative and positive infinity). + * And different from each other. + * + * @param indexName the index this searches + * @param from the lower limit (inclusive) on hits + * @param to the higher limit (inclusive) on hits + * @param hitLimit the number of hits to match, or 0 to return all + */ + public static IntItem from(String indexName, Limit from, Limit to, int hitLimit) { + if (hitLimit == 0 && (from.equals(Limit.NEGATIVE_INFINITY) || to.equals(Limit.POSITIVE_INFINITY) || from.equals(to))) + return new IntItem(from, to, indexName); + else { + return new RangeItem(from, to, hitLimit, indexName, false); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java new file mode 100644 index 00000000000..d9f0dcb7b1c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -0,0 +1,507 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.collections.CopyOnWriteHashMap; +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; +import com.yahoo.search.query.QueryTree; +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; + + +/** + * <p>A term of the query language. As "term" is also the common term (sorry) + * for a literal to be found (or not) in a search index, the term <i>item</i> + * is used for <i>query language</i> terms.</p> + * + * <p>The query is represented as a composite tree of + * Item subclasses. This allow arbitrary complex combinations of ands, + * nots, phrases and so on.</p> + * + * <p>Items are in general mutable and not thread safe.</p> + * + * @author bratseth + * @author havardpe + */ +public abstract class Item implements Cloneable { + + /** + * The definitions in Item.ItemType must match the ones in + * searchlib/src/searchlib/parsequery/parse.h + */ + public static enum ItemType { + OR(0), + AND(1), + NOT(2), + RANK(3), + WORD(4), + INT(5), + PHRASE(6), + PAREN(7), + PREFIX(8), + SUBSTRING(9), + NEAR(11), + ONEAR(12), + SUFFIX(13), + EQUIV(14), + WEIGHTEDSET(15), + WEAK_AND(16), + EXACT(17), + LEGACY_RISE_QUERY_NOT_USED_ANYMORE_BUT_DO_NOT_REUSE_FOR_A_WHILE(18), + PURE_WEIGHTED_STRING(19), + PURE_WEIGHTED_INTEGER(20), + DOTPRODUCT(21), + WAND(22), + PREDICATE_QUERY(23), + REGEXP(24), + WORD_ALTERNATIVES(25); + + public final int code; + + private ItemType(int code) { + this.code = code; + } + + } + + public static final int DEFAULT_WEIGHT = 100; + + /** The relative importancy of this term in the query. Default is 100 */ + private int weight = DEFAULT_WEIGHT; + + /** + * The definitions in Item.ItemCreator must match the ones in + * searchlib/src/searchlib/parsequery/parse.h + */ + public static enum ItemCreator { + ORIG(0), + FILTER(1); + + public final int code; + + private ItemCreator(int code) { + this.code = code; + } + } + + private boolean fromSpecialToken = false; + + private ItemCreator creator = ItemCreator.ORIG; + + /** The parent in the query tree, or null if this is a root */ + private CompositeItem parent = null; + + /** The annotations made on this item */ + private CopyOnWriteHashMap<String, Object> annotations; + + /** Whether or not this item should affect ranking. */ + private boolean isRanked = true; + + /** Whether or not position data should be used when ranking this item */ + private boolean usePositionData = true; + + /** Whether the item should encode a unique ID */ + private boolean hasUniqueID = false; + + /** Optional symbolic name for this item, requires unique id */ + private String label = null; + + /** Unique identifier to address the item for external annotation */ + protected int uniqueID = 0; + + /** Items for explicit connectivity */ + // TODO: Don't use protected members, especially not for something like this + // Move this to an object which can take care of being a weighted bidirectional reference more elegantly and safely. + protected Item connectedItem; + protected Item connectedBacklink; + protected double connectivity; + + /** Explicit term significance */ + protected double significance; + protected boolean explicitSignificance = false; + + /** Whether this item is eligible for change by query rewriters (false) or should be kept as-is (true) */ + private boolean isProtected; + + /** Sets the index name of this item */ + public abstract void setIndexName(String index); + + /** Returns the int code of this item */ + public final int getCode() { + return getItemType().code; + } + + /** Return the enumerated type of this item. */ + public abstract ItemType getItemType(); + + /** Returns the name of this item */ + public abstract String getName(); + + /** + * Sets whether this is a filter term. + * This indicates that the term origins from the filter parameter in the search API. + * The search backend does to handle filter terms any different than non-filter terms. + */ + public void setFilter(boolean filter) { + if (filter) { + creator = ItemCreator.FILTER; + } else { + creator = ItemCreator.ORIG; + } + } + + /** Returns whether this is a filter term */ + public boolean isFilter() { + return creator == ItemCreator.FILTER; + } + + /** Returns the item creator value. */ + public ItemCreator getCreator() { + return creator; + } + + /** Sets the item creator value. */ + public void setCreator(ItemCreator creator) { + this.creator = creator; + } + + /** Sets the relative importance of this term */ + public void setWeight(int w) { + weight = w; + } + + /** Returns the relative importance of this term. Default is 100. */ + public int getWeight() { + return weight; + } + + /** + * Annotate this item + * + * @param key the annotation key + * @param value the value, or null to set a valueless annotation + */ + public void addAnnotation(String key, Object value) { + if (annotations == null) + annotations = new CopyOnWriteHashMap<>(); + annotations.put(key, value); + } + + /** + * Returns an annotation on this item, or null if the annotation is not set + */ + public Object getAnnotation(String annotation) { + if (annotations == null) { + return null; + } + return annotations.get(annotation); + } + + /** + * Returns whether this has an annotation + */ + public boolean hasAnnotation(String annotation) { + if (annotations == null) return false; + return annotations.containsKey(annotation); + } + + /** Set whether this should be protected from change/remove by query rewriters */ + public void setProtected(boolean isProtected) { this.isProtected=isProtected; } + + /** Returns whether this is to be protected from change/remove by query rewriters. default is false */ + public boolean isProtected() { return isProtected; } + + + /** Sets the parent in the tree. Do not use: Only to be called from CompositeItem/QueryTree */ + public void setParent(CompositeItem parent) { + this.parent = parent; + } + + /** Returns the parent in the query tree, or null if this node has no parent */ + public CompositeItem getParent() { + return parent; + } + + public abstract int encode(ByteBuffer buffer); + + protected void encodeThis(ByteBuffer buffer) { + int FEAT_SHIFT = 5; + int CODE_MASK = 0x1f; + int FEAT_MASK = 0xe0; + int FEAT_WEIGHT = 0x01; + int FEAT_UNIQUEID = 0x02; + int FEAT_FLAGS = 0x04; + + int features = 0; + + if (weight != DEFAULT_WEIGHT) { + features |= FEAT_WEIGHT; + } + if (hasUniqueID()) { + features |= FEAT_UNIQUEID; + } + byte flags = getFlagsFeature(); + if (flags != 0) { + features |= FEAT_FLAGS; + } + byte type = (byte)(((getCode() & CODE_MASK) + | ((features << FEAT_SHIFT) & FEAT_MASK)) & 0xff); + + buffer.put(type); + if ((features & FEAT_WEIGHT) != 0) { + IntegerCompressor.putCompressedNumber(weight, buffer); + } + if ((features & FEAT_UNIQUEID) != 0) { + IntegerCompressor.putCompressedPositiveNumber(uniqueID, buffer); + } + if (flags != 0) { + buffer.put(flags); + } + } + + /** + * Returns an integer that contains all feature flags for this item. This must be kept in sync with the flags + * defined in searchlib/parsequery/parse.h. + * + * @return The feature flags. + */ + private byte getFlagsFeature() { + byte FLAGS_NORANK = 0x01; + byte FLAGS_SPECIALTOKEN = 0x02; + byte FLAGS_NOPOSITIONDATA = 0x04; + byte FLAGS_ISFILTER = 0x08; + + byte ret = 0; + if (!isRanked()) { + ret |= FLAGS_NORANK; + } + if (isFromSpecialToken()) { + ret |= FLAGS_SPECIALTOKEN; + } + if (!usePositionData()) { + ret |= FLAGS_NOPOSITIONDATA; + } + if (isFilter()) { + ret |= FLAGS_ISFILTER; + } + return ret; + } + + + /** Utility method for turning a string into utf-8 bytes */ + protected static final byte[] getBytes(String string) { + return Utf8.toBytes(string); + } + public static void putString(String s, ByteBuffer buffer) { + putBytes(Utf8.toBytes(s), buffer); + } + public static void putBytes(byte [] bytes, ByteBuffer buffer) { + IntegerCompressor.putCompressedPositiveNumber(bytes.length, buffer); + buffer.put(bytes); + } + + public abstract int getTermCount(); + + /** + * <p>Returns the canonical query language string of this item.</p> + * + * <p>The canonical language represent an item by the string + * <pre> + * ([itemName] [body]) + * </pre> + * where the body may recursively be other items. + * + * <p> + * TODO: Change the output query language into a canonical form of the input + * query language + */ + public String toString() { + StringBuilder buffer = new StringBuilder(); + + if (shouldParenthize()) { + buffer.append("("); + } + if (isFilter()) { + buffer.append("|"); + } + appendHeadingString(buffer); + appendBodyString(buffer); + if (shouldParenthize()) { + buffer.append(")"); + } + + if (weight != DEFAULT_WEIGHT) { + buffer.append("!"); + buffer.append(weight); + } + + return buffer.toString(); + } + + /** + * Returns whether or not this item should be parethized when printed. + * Default is false - no parentheses + */ + protected boolean shouldParenthize() { + return false; + } + + /** Appends the heading of this string. As default getName() followed by a space. */ + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append(" "); + } + + /** + * Override to append the item body in the canonical query language of this item. + * An item is usually represented by the string + * <pre> + * ([itemName] [body]) + * </pre> + * The body must be appended appended by this method. + */ + protected abstract void appendBodyString(StringBuilder buffer); + + /** Returns a deep copy of this item */ + public Item clone() { + try { + Item clone = (Item)super.clone(); + if (this.annotations != null) + clone.annotations = this.annotations.clone(); + // note: connectedItem and connectedBacklink references are corrected in CompositeItem.clone() + return clone; + } catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone made Item unclonable"); + } + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (object == null) { + return false; + } + if (object.getClass() != this.getClass()) { + return false; + } // Fails on different c.l.'s + + Item other = (Item) object; + + if (this.creator != other.creator) { + return false; + } + if (this.weight != other.weight) { + return false; + } + // if (this.termIndex!=other.termIndex) return false; + + return true; + } + + public int hashCode() { + return weight * 29 + creator.code; + } + + protected boolean hasUniqueID() { + return hasUniqueID; + } + + protected void setHasUniqueID(boolean hasUniqueID) { + this.hasUniqueID = hasUniqueID; + } + + /** + * Label this item with a symbolic name which can later be used by + * the back-end to identify specific items for ranking purposes. + * + * @param label label for this item + **/ + public void setLabel(String label) { + setHasUniqueID(true); + this.label = label; + } + + /** + * Obtain the label for this item. This method will return null if + * no label has been set. + * + * @return label for this item + **/ + public String getLabel() { + return label; + } + + /** + * Sets whether or not this term item should affect ranking. + * If set to false this term is not exposed to the ranking framework in the search backend. + */ + public void setRanked(boolean isRanked) { + this.isRanked = isRanked; + } + + /** Returns whether or not this item should affect ranking. */ + public boolean isRanked() { + return isRanked; + } + + /** + * Sets whether or not position data should be used when ranking this term item. + * If set to false the search backend uses fast bit vector data structures when matching on this term + * and only a few simple ranking features will be available when ranking this term. + * Note that setting this to false also saves a lot of CPU during matching as bit vector data structures are used. + */ + public void setPositionData(boolean usePositionData) { + this.usePositionData = usePositionData; + } + + /** Returns whether or not position data should be used when ranking this item */ + public boolean usePositionData() { + return usePositionData; + } + + public void disclose(Discloser discloser) { + discloser.addProperty("connectivity", connectivity); + discloser.addProperty("connectedItem", connectedItem); //reference + + discloser.addProperty("creator", creator); + discloser.addProperty("explicitSignificance", explicitSignificance); + discloser.addProperty("isRanked", isRanked); + discloser.addProperty("usePositionData", usePositionData); + discloser.addProperty("significance", significance); + discloser.addProperty("weight", weight); + + if (label != null) { + discloser.addProperty("label", label); + } + if (hasUniqueID) { + discloser.addProperty("uniqueID", uniqueID); + } + } + + public boolean isFromSpecialToken() { + return fromSpecialToken; + } + + public void setFromSpecialToken(boolean fromSpecialToken) { + this.fromSpecialToken = fromSpecialToken; + } + + /** + * DO NOT USE + */ + public boolean hasConnectivityBackLink() { + return connectedBacklink != null; + } + + /** Returns true if this is the root item - that is if the parent is the QueryTree (or null for legacy reasons)*/ + public boolean isRoot() { + if (getParent()==null) return true; + if (getParent() instanceof QueryTree) return true; + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ItemHelper.java b/container-search/src/main/java/com/yahoo/prelude/query/ItemHelper.java new file mode 100644 index 00000000000..a2a140abb29 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ItemHelper.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.Iterator; +import java.util.List; + +/** + * Helper function for Item + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class ItemHelper { + + /* + We could have exchanged the following 3 functions with this + But this introspection is a bit too much of a hack, so we'll leave it with this. + + + public static <T extends CompositeItem> T ensureIsItem(Item unknown,Class<T> tClass) { + + if(unknown != null && tClass.isInstance(unknown)) { + return (T) unknown; + } + T item; + + try { + Constructor<T> n = tClass.getConstructor(); + item = n.newInstance(); + } catch (NoSuchMethodException e) { + return null; + } catch (InvocationTargetException e) { + return null; + } catch (IllegalAccessException e) { + return null; + } catch (InstantiationException e) { + return null; + } + if(item != null) { + item.addItem(unknown); + } + return item; + + } + */ + + /** Traverse the query tree and return total number of terms */ + int getNumTerms(Item rootNode) { + int numTerms = 0; + + if (rootNode == null) { + return 0; + } else if (rootNode instanceof CompositeItem) { + CompositeItem composite = (CompositeItem) rootNode; + + for (Iterator<Item> i = composite.getItemIterator(); i.hasNext();) { + numTerms += getNumTerms(i.next()); + } + } else if (rootNode instanceof TermItem) { + return 1; + } else { + return 0; + } + return numTerms; + } + + public void getPositiveTerms(Item item, List<IndexedItem> terms) { + if (item instanceof NotItem) { + getPositiveTerms(((NotItem) item).getPositiveItem(), terms); + } else if (item instanceof PhraseItem) { + PhraseItem pItem = (PhraseItem)item; + terms.add(pItem); + } else if (item instanceof CompositeItem) { + for (Iterator<Item> i = ((CompositeItem) item).getItemIterator(); i.hasNext();) { + getPositiveTerms(i.next(), terms); + } + } else if (item instanceof TermItem) { + terms.add((TermItem)item); + } + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Limit.java b/container-search/src/main/java/com/yahoo/prelude/query/Limit.java new file mode 100644 index 00000000000..b830ba13ed5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Limit.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * An immutable numeric range limit which can be inclusive or exclusive + * + * @author bratseth + */ +public class Limit { + + public static final Limit NEGATIVE_INFINITY = new Limit(Double.NEGATIVE_INFINITY, false); + public static final Limit POSITIVE_INFINITY = new Limit(Double.POSITIVE_INFINITY, false); + + private final Number number; + private final boolean inclusive; + private final boolean infinite; + + public Limit(Number number, boolean inclusive) { + this.number = number; + this.infinite = Double.isInfinite(number.doubleValue()); + this.inclusive = inclusive || infinite; + } + + public Number number() { return number; } + + /** Returns true if this limit includes its number, false if it excludes it */ + public boolean isInclusive() { return inclusive; } + + String toRangeStart() { + return (inclusive ? "[" : "<" ) + (isInfinite() ? "" : number.toString()); + } + + String toRangeEnd() { + return (isInfinite() ? "" : number.toString()) + (inclusive ? "]" : ">" ); + } + + /** Returns the smaller of this and the given limit */ + public Limit min(Limit other) { + return this.isSmallerOrEqualTo(other) ? this : other; + } + + /** Returns the larger of this and the given limit */ + public Limit max(Limit other) { + return this.isLargerOrEqualTo(other) ? this : other; + } + + public boolean isSmallerOrEqualTo(Limit other) { + double thisNumber = this.number().doubleValue(); + double otherNumber = other.number().doubleValue(); + if (thisNumber == otherNumber) { + if ( ! other.isInclusive()) return false; + return true; + } + return thisNumber < otherNumber; + } + + public boolean isLargerOrEqualTo(Limit other) { + double thisNumber = this.number().doubleValue(); + double otherNumber = other.number().doubleValue(); + if (thisNumber == otherNumber) { + if ( ! other.isInclusive()) return false; + return true; + } + return thisNumber > otherNumber; + } + + public boolean isInfinite() { return infinite; } + + @Override + public String toString() { + return number + " (" + (inclusive ? "inclusive" : "exclusive") + ")"; + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Limit)) return false; + Limit other = (Limit)o; + if (Boolean.compare(other.inclusive, this.inclusive) != 0) return false; + return this.number.equals(other.number); + } + + @Override + public int hashCode() { + return number.hashCode() + (inclusive ? 1 : 0); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java new file mode 100644 index 00000000000..8fb16e8a3ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.net.UrlTokenizer; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + + +/** + * Special words known by the index used for marking things. + * The reserved word itself is not public, while a symbol representation is. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class MarkerWordItem extends WordItem { + + /** Creates a special word item which marks the start of a host name */ + public static WordItem createStartOfHost() { + return new MarkerWordItem("^", UrlTokenizer.TERM_STARTHOST); + } + + /** Creates a special word item which marks the end of a host name */ + public static WordItem createEndOfHost() { + return new MarkerWordItem("$", UrlTokenizer.TERM_ENDHOST); + } + + private String markerWord; + + private MarkerWordItem(String publicSymbol, String markerWord) { + super(publicSymbol); + this.markerWord = markerWord; + } + + /** Returns the marker word for encoding */ + protected String getEncodedWord() { + return markerWord; + } + + public boolean equals(Object o) { + if (!super.equals(o)) { + return false; + } + if (!(o instanceof MarkerWordItem)) { + return false; + } + + MarkerWordItem other = (MarkerWordItem) o; + + return markerWord.equals(other.markerWord); + } + + public int hashCode() { + return super.hashCode() + 499 * markerWord.hashCode(); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("markerWord", markerWord); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NearItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NearItem.java new file mode 100644 index 00000000000..9fa42f90a29 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NearItem.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; + + +/** + * <p>A set of terms which must be near each other to match.</p> + * + * @author bratseth + * @author havardpe + */ +public class NearItem extends CompositeItem { + + protected int distance; + + /** The default distance used if none is specified: 2 */ + public static final int defaultDistance=2; + + /** Creates a NEAR item with distance 2 */ + public NearItem() { + setDistance(defaultDistance); + } + + /** + * Creates a <i>near</i> item with a limit to the distance + * between the words. + * + * @param distance the number of word position which may separate + * the words for this near item to match + */ + public NearItem(int distance) { + setDistance(distance); + } + + public void setDistance(int distance) { + if (distance < 0) { + throw new IllegalArgumentException("Can not use negative distance '" + distance + "'."); + } + this.distance = distance; + } + + public int getDistance() { + return distance; + } + + public ItemType getItemType() { + return ItemType.NEAR; + } + + public String getName() { + return "NEAR"; + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(distance, buffer); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("limit", distance); + } + + /** Appends the heading of this string - <code>[getName()]([limit]) </code> */ + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(distance); + buffer.append(")"); + buffer.append(" "); + } + + public int hashCode() { + return super.hashCode() + 23* distance; + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (!super.equals(object)) return false; + NearItem other = (NearItem) object; // Ensured by superclass + if (this.distance !=other.distance) return false; + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NonReducibleCompositeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NonReducibleCompositeItem.java new file mode 100644 index 00000000000..abac8200f49 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NonReducibleCompositeItem.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * A composite item which specifies semantics which are not maintained + * if an instance with a single child is replaced by the single child. + * <p> + * Most composites, like AND and OR, are reducible as e.g (AND a) is semantically equal to (a). + * <p> + * This type functions as a marked interfaces for query rewriters. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @since 5.1.22 + */ +public abstract class NonReducibleCompositeItem extends CompositeItem { +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NotItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NotItem.java new file mode 100644 index 00000000000..0432795b716 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NotItem.java @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.protect.Validator; + +import java.util.Iterator; + + +/** + * <p>A composite item where the first item is positive and the following + * items are negative items which should be excluded from the result. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +// TODO: Handle nulls by creating nullItem or checking in encode/toString +public class NotItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.NOT; + } + + public String getName() { + return "NOT"; + } + + /** + * Adds an item. The first item is the positive + * the rest is negative + */ + public void addItem(Item item) { + super.addItem(item); + } + + /** + * Adds a negative item. Like addItem but skips the first position + * (position 0) if it is not already set. + */ + public void addNegativeItem(Item negative) { + if (getItemCount() == 0) { + insertNullFirstItem(); + } + addItem(negative); + } + + /** + * Returns the positive item (the first subitem), + * or null if no positive items has been added + */ + public Item getPositiveItem() { + if (getItemCount() == 0) { + return null; + } + return getItem(0); + } + + /** + * Sets the positive item (the first item) + * + * @return the old positive item, or null if there was no items + */ + public Item setPositiveItem(Item item) { + Validator.ensureNotNull("Positive item of " + this, item); + if (getItemCount() == 0) { + addItem(item); + return null; + } else { + return setItem(0, item); + } + } + + /** + * Convenience method for adding a positive item. + * If a positive item is already present + * the positive item becomes an AndItem with the items added + */ + public void addPositiveItem(Item item) { + if (getPositiveItem() == null) { + setPositiveItem(item); + } else if (getPositiveItem() instanceof AndItem) { + ((AndItem) getPositiveItem()).addItem(item); + } else { + AndItem positives = new AndItem(); + + positives.addItem(getPositiveItem()); + positives.addItem(item); + setPositiveItem(positives); + } + } + + public boolean removeItem(Item item) { + int removedIndex = getItemIndex(item); + boolean removed = super.removeItem(item); + + if (removed && removedIndex == 0) { + insertNullFirstItem(); + } + return removed; + } + + public Item removeItem(int index) { + Item removed = super.removeItem(index); + + if (index == 0) { // Don't make the first negative the positive + insertNullFirstItem(); + } + return removed; + } + + /** Not items uses a empty heading instead of "NOT " */ + protected void appendHeadingString(StringBuilder buffer) {} + + /** + * Overridden to tolerate nulls and to append "+" + * to the first item and "-" to the rest + */ + protected void appendBodyString(StringBuilder buffer) { + boolean isFirstItem = true; + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + if (isFirstItem) { + buffer.append("+"); + } else { + buffer.append(" -"); + } + if (item == null) { + buffer.append("(null)"); + } else { + buffer.append(item.toString()); + } + isFirstItem = false; + } + } + + /** Returns the number of actual *positive* terms in this */ + @Override + public int getTermCount() { + Item positive = getPositiveItem(); + return positive == null ? 0 : positive.getTermCount(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NullItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NullItem.java new file mode 100644 index 00000000000..aa3a04d670f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NullItem.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import java.nio.ByteBuffer; + + +/** + * A place holder for null queries to make searchers easier to write. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class NullItem extends Item { + + public NullItem() {} + + /** Does nothing */ + public void setIndexName(String index) {} + + public int encode(ByteBuffer buffer) { + throw new RuntimeException( + "A NullItem was attempted encoded. " + + "This is probably a misbehaving " + "searcher."); + } + + public ItemType getItemType() { + throw new RuntimeException( + "Packet code access attempted. " + + "A NullItem has no packet code. " + + "This is probably a misbehaving " + "searcher."); + } + + public void appendBodyString(StringBuilder buffer) { + // No body for this Item + return; + } + + public void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + } + + public String getName() { + return "NULL"; + } + + @Override + public int getTermCount() { return 0; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ONearItem.java b/container-search/src/main/java/com/yahoo/prelude/query/ONearItem.java new file mode 100644 index 00000000000..c7caa9acc8f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ONearItem.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * Ordered NearItem. + * <p> + * Matches as a near operator, but also demands that the operands have the + * same order in the document as in the query. + * + * @author bratseth + */ +public class ONearItem extends NearItem { + + /** Creates a ordered NEAR item with limit 2 */ + public ONearItem() { + setDistance(2); + } + + /** + * Creates a ordered near item which matches if there are at most <code>distance</code> + * separation between the words, in the right direction. + */ + public ONearItem(int distance) { + super(distance); + } + + public ItemType getItemType() { + return ItemType.ONEAR; + } + + public String getName() { + return "ONEAR"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/OrItem.java b/container-search/src/main/java/com/yahoo/prelude/query/OrItem.java new file mode 100644 index 00000000000..20d29cd9c0e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/OrItem.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An or'ing of a collection of sub-expressions + * + * @author bratseth + */ +public class OrItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.OR; + } + + public String getName() { + return "OR"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java new file mode 100644 index 00000000000..130eafe49ef --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java @@ -0,0 +1,266 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * A term which contains a phrase - a collection of word terms + * + * @author bratseth + * @author havardpe + */ +public class PhraseItem extends CompositeIndexedItem { + + /** Whether this was explicitly written as a phrase using quotes by the user */ + private boolean explicit = false; + + /** Creates an empty phrase */ + public PhraseItem() {} + + /** Creates an empty phrase which will search the given index */ + public PhraseItem(String indexName) { + setIndexName(indexName); + } + + /** Creates a phrase containing the given words */ + public PhraseItem(String[] words) { + for (int i = 0; i < words.length; i++) { + addIndexedItem(new WordItem(words[i])); + } + } + + public ItemType getItemType() { + return ItemType.PHRASE; + } + + public String getName() { + return "PHRASE"; + } + + public void setIndexName(String index) { + super.setIndexName(index); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + IndexedItem word = (IndexedItem) i.next(); + word.setIndexName(index); + } + } + + /** + * Sets whether this was explicitly written as a phrase using quotes by the + * user + */ + public void setExplicit(boolean explicit) { + this.explicit = explicit; + } + + /** + * Returns whether this was explicitly written as a phrase using quotes by + * the user Default is false + */ + public boolean isExplicit() { + return explicit; + } + + private IndexedItem convertIntToWord(Item orig) { + IntItem o = (IntItem) orig; + return new WordItem(o.stringValue(), o.getIndexName(), o.isFromQuery()); + } + + /** + * Adds subitem. The word will have its index name set to the index name of + * this phrase. If the item is a word, it will simply be added, if the item + * is a phrase, each of the words of the phrase will be added. + * + * @throws IllegalArgumentException + * if the given item is not a WordItem or PhraseItem + */ + public void addItem(Item item) { + if (item instanceof WordItem || item instanceof PhraseSegmentItem || item instanceof WordAlternativesItem) { + addIndexedItem((IndexedItem) item); + } else if (item instanceof IntItem) { + addIndexedItem(convertIntToWord(item)); + } else if (item instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) item; + + for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext();) { + addIndexedItem((IndexedItem) i.next()); + } + } else { + throw new IllegalArgumentException("Can not add " + item + + " to a phrase"); + } + } + + @Override + public void addItem(int index, Item item) { + if (item instanceof WordItem || item instanceof PhraseSegmentItem) { + addIndexedItem(index, (IndexedItem) item); + } else if (item instanceof IntItem) { + addIndexedItem(index, convertIntToWord(item)); + } else if (item instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) item; + + for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext();) { + addIndexedItem(index++, (WordItem) i.next()); + } + } else { + throw new IllegalArgumentException("Can not add " + item + + " to a phrase"); + } + } + + @Override + public Item setItem(int index, Item item) { + if (item instanceof WordItem || item instanceof PhraseSegmentItem) { + return setIndexedItem(index, (IndexedItem) item); + } else if (item instanceof IntItem) { + return setIndexedItem(index, convertIntToWord(item)); + } else if (item instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) item; + Iterator<Item> i = phrase.getItemIterator(); + // we assume we don't try to add empty phrases + IndexedItem firstItem = (IndexedItem) i.next(); + Item toReturn = setIndexedItem(index++, firstItem); + + while (i.hasNext()) { + addIndexedItem(index++, (IndexedItem) i.next()); + } + return toReturn; + } else { + throw new IllegalArgumentException("Can not add " + item + + " to a phrase"); + } + } + + private void addIndexedItem(IndexedItem word) { + word.setIndexName(this.getIndexName()); + super.addItem((Item) word); + } + + private void addIndexedItem(int index, IndexedItem word) { + word.setIndexName(this.getIndexName()); + super.addItem(index, (Item) word); + } + + private Item setIndexedItem(int index, IndexedItem word) { + word.setIndexName(this.getIndexName()); + return super.setItem(index, (Item) word); + } + + /** + * Returns a subitem as a word item + * + * @param index + * the (0-base) index of the item to return + * @throws IndexOutOfBoundsException + * if there is no subitem at index + */ + public WordItem getWordItem(int index) { + return (WordItem) getItem(index); + } + + /** + * Returns a subitem as a block item, + * + * @param index + * the (0-base) index of the item to return + * @throws IndexOutOfBoundsException + * if there is no subitem at index + */ + public BlockItem getBlockItem(int index) { + return (BlockItem) getItem(index); + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + } + + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item subitem = i.next(); + + if (subitem instanceof PhraseSegmentItem) { + PhraseSegmentItem seg = (PhraseSegmentItem) subitem; + + // "What encode does, minus what encodeThis does" + itemCount += seg.encodeContent(buffer); + } else { + itemCount += subitem.encode(buffer); + } + } + return itemCount; + } + + /** + * Returns false, no parenthezes for phrases + */ + protected boolean shouldParenthize() { + return false; + } + + /** Phrase items uses a empty heading instead of "PHRASE " */ + protected void appendHeadingString(StringBuilder buffer) { + } + + protected void appendBodyString(StringBuilder buffer) { + appendIndexString(buffer); + + buffer.append("\""); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + if (item instanceof WordItem) { + WordItem wordItem = (WordItem) item; + + buffer.append(wordItem.getWord()); + } else { + PhraseSegmentItem seg = (PhraseSegmentItem) item; + + seg.appendContentsString(buffer); + } + if (i.hasNext()) { + buffer.append(" "); + } + } + buffer.append("\""); + } + + public String getIndexedString() { + StringBuilder buf = new StringBuilder(); + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + IndexedItem indexedItem = (IndexedItem) i.next(); + + buf.append(indexedItem.getIndexedString()); + if (i.hasNext()) { + buf.append(' '); + } + } + return buf.toString(); + } + + protected int encodingArity() { + return getNumWords(); + } + + public int getNumWords() { + int numWords = 0; + + for (Iterator<Item> j = getItemIterator(); j.hasNext();) { + numWords += ((IndexedItem) j.next()).getNumWords(); + } + return numWords; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("explicit", explicit); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java new file mode 100644 index 00000000000..7defe67eede --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java @@ -0,0 +1,202 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; +import java.util.Iterator; + + +/** + * A term which contains a fixed length phrase, a collection of word terms, + * resulting from a single segmentation operation. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class PhraseSegmentItem extends IndexedSegmentItem { + + /** Whether this was explicitly written as a phrase using quotes by the user */ + private boolean explicit = false; + + /** + * Creates a phrase containing the same words and state (as pertinent) as + * the given SegmentAndItem. + */ + public PhraseSegmentItem(AndSegmentItem segAnd) { + super(segAnd.getRawWord(), segAnd.stringValue(), segAnd.isFromQuery(), segAnd.isStemmed(), segAnd.getOrigin()); + if (segAnd.getItemCount() > 0) { + WordItem w = (WordItem) segAnd.getItem(0); + setIndexName(w.getIndexName()); + for (Iterator<Item> i = segAnd.getItemIterator(); i.hasNext();) { + WordItem word = (WordItem) i.next(); + addWordItem(word); + } + } + } + + public PhraseSegmentItem(String rawWord, boolean isFromQuery, boolean stemmed) { + super(rawWord, rawWord, isFromQuery, stemmed, null); + } + + /** + * Creates a phrase segment from strings + * + * @param rawWord the raw text as received in the request + * @param current the normalized form of the raw text, or the raw text repeated if no normalized form is known + * @param isFromQuery whether this originates in the request + * @param stemmed whether this is stemmed + */ + public PhraseSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed) { + super(rawWord, current, isFromQuery, stemmed, null); + } + + public PhraseSegmentItem(String rawWord, String current, boolean isFromQuery, + boolean stemmed, Substring substring) { + super(rawWord, current, isFromQuery, stemmed, substring); + } + + public ItemType getItemType() { + return ItemType.PHRASE; + } + + public String getName() { + return "SPHRASE"; + } + + public void setIndexName(String index) { + super.setIndexName(index); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + WordItem word = (WordItem) i.next(); + word.setIndexName(index); + } + } + + @Override + public void setWeight(int weight) { + super.setWeight(weight); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item word = i.next(); + word.setWeight(weight); + } + } + + /** + * Adds subitem. The word will have its index name set to the index name + * of this phrase. If the item is a word, it will simply be added, + * if the item is a phrase, each of the words of the phrase will be added. + * + * @throws IllegalArgumentException if the given item is not a WordItem or PhraseItem + */ + public void addItem(Item item) { + if (item instanceof WordItem) { + addWordItem((WordItem) item); + } else { + throw new IllegalArgumentException( + "Can not add " + item + " to a segment phrase"); + } + } + + private void addWordItem(WordItem word) { + word.setIndexName(this.getIndexName()); + super.addItem(word); + } + + // TODO: Override addItem(index,item), setItem(index,item) + + /** + * Returns a subitem as a word item + * + * @param index the (0-base) index of the item to return + * @throws IndexOutOfBoundsException if there is no subitem at index + */ + public WordItem getWordItem(int index) { + return (WordItem) getItem(index); + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + } + + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return encodeContent(buffer, 1); + } + + public int encodeContent(ByteBuffer buffer) { + return encodeContent(buffer, 0); + } + + private int encodeContent(ByteBuffer buffer, int itemCount) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item subitem = i.next(); + itemCount += subitem.encode(buffer); + } + return itemCount; + } + + + /** + * Returns false, no parenthezes for phrases + */ + protected boolean shouldParenthize() { + return false; + } + + /** Segment phrase items uses a empty heading instead of "SPHRASE " */ + protected void appendHeadingString(StringBuilder buffer) {} + + protected void appendBodyString(StringBuilder buffer) { + appendIndexString(buffer); + appendContentsString(buffer); + } + + void appendContentsString(StringBuilder buffer) { + buffer.append("'"); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + WordItem wordItem = (WordItem) i.next(); + + buffer.append(wordItem.getWord()); + if (i.hasNext()) { + buffer.append(" "); + } + } + buffer.append("'"); + } + + // TODO: Must check all pertinent items + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + // PhraseSegmentItem other = (PhraseSegmentItem) object; // Ensured by superclass + return true; + } + + public String getIndexedString() { + StringBuilder buf = new StringBuilder(); + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + IndexedItem indexedItem = (IndexedItem) i.next(); + + buf.append(indexedItem.getIndexedString()); + if (i.hasNext()) { + buf.append(' '); + } + } + return buf.toString(); + } + + public boolean isExplicit() { + return explicit; + } + + public void setExplicit(boolean explicit) { + this.explicit = explicit; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("explicit", explicit); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PredicateQueryItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PredicateQueryItem.java new file mode 100644 index 00000000000..6a1306ddb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PredicateQueryItem.java @@ -0,0 +1,246 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; + +/** + * A PredicateQueryItem is a collection of feature/value-pairs + * that are used to query predicate fields, which contains boolean + * constraints. If the feature/value-pairs from the PredicateQueryItem + * satisfies the boolean constraints, the document is a match. + * + * @author Magnar Nedland + */ +public class PredicateQueryItem extends SimpleTaggableItem { + + private String fieldName = "predicate"; + private ArrayList<Entry> features = new ArrayList<>(); + private ArrayList<RangeEntry> rangeFeatures = new ArrayList<>(); + public static final long ALL_SUB_QUERIES = 0xffffffffffffffffL; + + /** + * Sets the field name to be used for the predicates. + * @param index name of the field. + */ + @Override + public void setIndexName(String index) { + this.fieldName = index; + } + + /** + * @return the field name used for the predicates. + */ + public String getIndexName() { + return fieldName; + } + + /** + * Adds a feature/value-pair to the predicate query. This feature is applied to all sub queries. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + */ + public void addFeature(String key, String value) { + addFeature(key, value, ALL_SUB_QUERIES); + } + + /** + * Adds a feature/value-pair to the predicate query. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + * @param subQueryBitmap bitmap specifying which sub queries this feature applies to. + */ + public void addFeature(String key, String value, long subQueryBitmap) { + addFeature(new Entry(key, value, subQueryBitmap)); + } + + /** + * Adds a feature/value-pair to the predicate query. + * @param entry the feature to add. + */ + public void addFeature(Entry entry) { + features.add(entry); + } + + /** + * Adds a range feature with a given value to the predicate query. + * This feature is applied to all sub queries. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + */ + public void addRangeFeature(String key, long value) { + addRangeFeature(key, value, ALL_SUB_QUERIES); + } + + /** + * Adds a range feature with a given value to the predicate query. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + * @param subQueryBitmap bitmap specifying which sub queries this feature applies to. + */ + public void addRangeFeature(String key, long value, long subQueryBitmap) { + addRangeFeature(new RangeEntry(key, value, subQueryBitmap)); + } + + /** + * Adds a range feature with a given value to the predicate query. + * @param entry the feature to add. + */ + public void addRangeFeature(RangeEntry entry) { + rangeFeatures.add(entry); + } + + /** + * @return a mutable collection of feature entries. + */ + public Collection<Entry> getFeatures() { + return features; + } + + /** + * @return a mutable collection of range feature entries. + */ + public Collection<RangeEntry> getRangeFeatures() { + return rangeFeatures; + } + + @Override + public ItemType getItemType() { + return ItemType.PREDICATE_QUERY; + } + + @Override + public String getName() { + return "PREDICATE_QUERY_ITEM"; + } + + @Override + public int encode(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(fieldName, buffer); + encodeFeatures(features, buffer); + encodeFeatures(rangeFeatures, buffer); + return 1; // number of encoded stack dump items + } + + private void encodeFeatures(ArrayList<? extends EntryBase> features, ByteBuffer buffer) { + IntegerCompressor.putCompressedPositiveNumber(features.size(), buffer); + for (EntryBase e : features) { + e.encode(buffer); + } + } + + @Override + public int getTermCount() { + return 1; // one big term + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + boolean first = true; + for (Entry e : features) { + if (!first) { + buffer.append(", "); + } else { + first = false; + } + buffer.append(e.getKey()).append('=').append(e.getValue()); + if (e.getSubQueryBitmap() != ALL_SUB_QUERIES) { + buffer.append("[0x").append(Long.toHexString(e.getSubQueryBitmap())).append(']'); + } + } + for (RangeEntry e : rangeFeatures) { + if (!first) { + buffer.append(", "); + } else { + first = false; + } + buffer.append(e.getKey()).append(':').append(e.getValue()); + if (e.getSubQueryBitmap() != ALL_SUB_QUERIES) { + buffer.append("[0x").append(Long.toHexString(e.getSubQueryBitmap())).append(']'); + } + } + } + + @Override + public PredicateQueryItem clone() { + PredicateQueryItem clone = (PredicateQueryItem)super.clone(); + clone.features = new ArrayList<>(this.features); + clone.rangeFeatures = new ArrayList<>(this.rangeFeatures); + return clone; + } + + public abstract static class EntryBase { + private String key; + private long subQueryBitmap; + + public EntryBase(String key, long subQueryBitmap) { + this.key = key; + this.subQueryBitmap = subQueryBitmap; + } + + public String getKey() { + return key; + } + + public long getSubQueryBitmap() { + return subQueryBitmap; + } + + public void setSubQueryBitmap(long subQueryBitmap) { + this.subQueryBitmap = subQueryBitmap; + } + + public abstract void encode(ByteBuffer buffer); + } + + public static class Entry extends EntryBase { + private String value; + + public Entry(String key, String value) { + this(key, value, ALL_SUB_QUERIES); + } + public Entry(String key, String value, long subQueryBitmap) { + super(key, subQueryBitmap); + this.value = value; + } + + public String getValue() { + return value; + } + + @Override + public void encode(ByteBuffer buffer) { + putString(getKey(), buffer); + putString(getValue(), buffer); + buffer.putLong(getSubQueryBitmap()); + } + } + + public static class RangeEntry extends EntryBase { + private long value; + + public RangeEntry(String key, long value) { + this(key, value, ALL_SUB_QUERIES); + } + + public RangeEntry(String key, long value, long subQueryBitmap) { + super(key, subQueryBitmap); + this.value = value; + } + + public long getValue() { + return value; + } + + @Override + public void encode(ByteBuffer buffer) { + putString(getKey(), buffer); + buffer.putLong(getValue()); + buffer.putLong(getSubQueryBitmap()); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java new file mode 100644 index 00000000000..9c3a88178f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A word which matches beginnings of words instead of complete words + * + * @author bratseth + */ +public class PrefixItem extends WordItem { + + public PrefixItem(String prefix) { + this(prefix, false); + } + + public PrefixItem(String prefix, boolean isFromQuery) { + super(prefix, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.PREFIX; + } + + public String getName() { + return "PREFIX"; + } + + public String stringValue() { + return getWord() + "*"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedInteger.java b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedInteger.java new file mode 100644 index 00000000000..9a78d4c8765 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedInteger.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Created with IntelliJ IDEA. + * User: balder + * Date: 07.12.12 + * Time: 13:42 + * To change this template use File | Settings | File Templates. + */ +// TODO: Fix javadoc +public class PureWeightedInteger extends PureWeightedItem { + + private final long value; + + public PureWeightedInteger(long value) { + this(value, 100); + } + public PureWeightedInteger(long value, int weight) { + super(weight); + this.value = value; + } + + @Override + public ItemType getItemType() { + return ItemType.PURE_WEIGHTED_INTEGER; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + buffer.putLong(value); + } + + @Override + public int getTermCount() { + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(value); + super.appendBodyString(buffer); + } + public long getValue() { + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedItem.java new file mode 100644 index 00000000000..16f38159235 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedItem.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Created with IntelliJ IDEA. + * User: balder + * Date: 07.12.12 + * Time: 13:24 + * To change this template use File | Settings | File Templates. + */ +// TODO: Fix javadoc +public abstract class PureWeightedItem extends Item { + + public PureWeightedItem(int weight) { + setWeight(weight); + } + @Override + public void setIndexName(String index) { + // No index + } + + @Override + public String getName() { + return getItemType().name(); //To change body of implemented methods use File | Settings | File Templates. + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(':').append(getWeight()); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedString.java b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedString.java new file mode 100644 index 00000000000..368ccd25483 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedString.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Created with IntelliJ IDEA. + * User: balder + * Date: 07.12.12 + * Time: 13:15 + * To change this template use File | Settings | File Templates. + */ +// TODO: Fix javadoc +public class PureWeightedString extends PureWeightedItem { + + private final String value; + + public PureWeightedString(String value) { + this(value, 100); + } + public PureWeightedString(String value, int weight) { + super(weight); + this.value = value; + } + + @Override + public ItemType getItemType() { + return ItemType.PURE_WEIGHTED_STRING; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(value, buffer); + } + + @Override + public int getTermCount() { + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(value); + super.appendBodyString(buffer); + } + + public String getString() { + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java new file mode 100644 index 00000000000..410eb1c35f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -0,0 +1,219 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +import java.util.*; + + +/** + * A class which canonicalizes and validates queries. + * This class is multithread safe. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class QueryCanonicalizer { + + /** The name of the operation performed by this (for use in search chain ordering) */ + public static final String queryCanonicalization = "queryCanonicalization"; + + /** + * Validates this query and carries out possible operations on this query + * which simplifies it without changing its semantics. + * + * @return null if the query is valid, an error message if it is invalid + */ + public static String canonicalize(Query query) { + Item root = query.getModel().getQueryTree().getRoot(); + return canonicalize(query, root); + } + + /** + * Validates this query and carries out possible operations on this query + * which simplifies it without changing its semantics. + * + * @return null if the query is valid, an error message if it is invalid + */ + public static String canonicalize(QueryTree query) { + QueryWrapper q = new QueryWrapper(); + q.setRoot(query.getRoot()); // Could get rid of the wrapper... + treeCanonicalize(q, query.getRoot(), null); + query.setRoot(q.root); + return q.error; + } + + /** + * Validates this query and + * carries out possible operations on this query which simplifies it + * without changing its semantics. + * + * @param item the item to canonicalize + * @return null if the query is valid, an error message if it is invalid + */ + private static String canonicalize(Query query, Item item) { + QueryWrapper q = new QueryWrapper(); + q.setRoot(item); + treeCanonicalize(q, query.getModel().getQueryTree().getRoot(), null); + if (q.root == null) + q.root = new NullItem(); + query.getModel().getQueryTree().setRoot(q.root); + return q.error; + } + + /** + * @param bag wrapper for error message and query root + * @param item the item to canonicalize + * @param iterator iterator for the above item if pertinent + * @return whether the query could be canonicalized into something + */ + public static boolean treeCanonicalize(QueryWrapper bag, Item item, ListIterator<Item> iterator) { + if (iterator == null && (item == null || item instanceof NullItem)) { + bag.setError("No query"); + return false; + } + + if (item instanceof TermItem) { + return true; + } + + if (item instanceof NullItem) { + iterator.remove(); + } + + if ( ! (item instanceof CompositeItem)) { + return true; + } // Impossible yet + CompositeItem composite = (CompositeItem) item; + + for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext();) { + Item child = i.next(); + boolean subtreeOK = treeCanonicalize(bag, child, i); + + if (!subtreeOK) { + return false; + } + } + + if (composite instanceof EquivItem) { + removeDuplicates((EquivItem) composite); + } + else if (composite instanceof RankItem) { + makeDuplicatesCheap((RankItem)composite); + } + else if (composite instanceof NotItem) { + if (((NotItem) composite).getPositiveItem() == null) { + bag.setError("Can not search for only negative items"); + return false; + } + } + + if (composite.getItemCount() == 0) { + if (iterator == null) { + bag.setRoot(new NullItem()); + bag.setError("No query: Contained an empty " + composite.getName() + " only"); + return false; + } else { + iterator.remove(); + } + } + + if (composite.getItemCount() == 1 && ! (composite instanceof NonReducibleCompositeItem)) { + if (composite instanceof PhraseItem || composite instanceof PhraseSegmentItem) { + composite.getItem(0).setWeight(composite.getWeight()); + } + if (iterator == null) { + bag.setRoot(composite.getItem(0)); + } else { + iterator.set(composite.getItem(0)); + } + } + + return true; + } + + private static void removeDuplicates(EquivItem composite) { + int origSize = composite.getItemCount(); + for (int i = origSize - 1; i >= 1; --i) { + Item deleteCandidate = composite.getItem(i); + for (int j = 0; j < i; ++j) { + Item check = composite.getItem(j); + if (deleteCandidate.getClass() == check.getClass()) { + if (deleteCandidate instanceof PhraseItem) { + PhraseItem phraseDeletionCandidate = (PhraseItem) deleteCandidate; + PhraseItem phraseToCheck = (PhraseItem) check; + if (phraseDeletionCandidate.getIndexedString().equals(phraseToCheck.getIndexedString())) { + composite.removeItem(i); + break; + } + } else if (deleteCandidate instanceof PhraseSegmentItem) { + PhraseSegmentItem phraseSegmentDeletionCandidate = (PhraseSegmentItem) deleteCandidate; + PhraseSegmentItem phraseSegmentToCheck = (PhraseSegmentItem) check; + if (phraseSegmentDeletionCandidate.getIndexedString().equals(phraseSegmentToCheck.getIndexedString())) { + composite.removeItem(i); + break; + } + } else if (deleteCandidate instanceof BlockItem) { + BlockItem blockDeletionCandidate = (BlockItem) deleteCandidate; + BlockItem blockToCheck = (BlockItem) check; + if (blockDeletionCandidate.stringValue().equals(blockToCheck.stringValue())) { + composite.removeItem(i); + break; + } + } + } + } + } + } + + /** + * If a term is present as both a rank term (i.e not the first child) and in + * the match condition (first child), then turn off any rank calculation for + * the term during matching, as it will be made available anyway for matches + * by the same term in the rank part. + * + * @param rankItem + * an item which will be simplified in place + */ + private static void makeDuplicatesCheap(RankItem rankItem) { + // Collect terms used for ranking + Set<TermItem> rankTerms = new HashSet<>(); + for (int i = 1; i < rankItem.getItemCount(); i++) { + if (rankItem.getItem(i) instanceof TermItem) + rankTerms.add((TermItem)rankItem.getItem(i)); + } + + // Make terms used for matching cheap if they also are ranking terms + makeDuplicatesCheap(rankItem.getItem(0), rankTerms); + } + + private static void makeDuplicatesCheap(Item item, Set<TermItem> rankTerms) { + if (item instanceof CompositeItem) { + for (ListIterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext();) + makeDuplicatesCheap(i.next(), rankTerms); + } + else if (rankTerms.contains(item)) { + item.setRanked(false); + item.setPositionData(false); + } + } + + public static class QueryWrapper { + private Item root = null; + private String error = null; + + public Item getRoot() { return root; } + public void setRoot(Item root) { + this.root = root; + } + public String getError() { + return error; + } + public void setError(String error) { + this.error = error; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryException.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryException.java new file mode 100644 index 00000000000..58dc73cd0e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryException.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Runtime exception to mark errors in query parsing. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QueryException extends RuntimeException { + private static final long serialVersionUID = -2975856668328596533L; + + public QueryException(String message) { + super(message); + } + + public QueryException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/RangeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/RangeItem.java new file mode 100644 index 00000000000..4db8ff0b47a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/RangeItem.java @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * This class represents a numeric range. You can also specify the number of hits you want this range to produce, + * which can be used to create more efficient searches. + * Note that '0' as hit limit means all hits matching the range criterion will be a match, + * while positive numbers start from 'from' working + * its way towards 'to' until it has reached its limit or range is exhausted. Negative number means that it will start + * from 'to' and work its way towards 'from'. + * + * @author balder + * @author bratseth + * @since 5.1.23 + */ +// Note that this is just a convenience subclass of IntItem - it does not add any functionality not available in it. +public class RangeItem extends IntItem { + + /** + * Creates a new range operator + * + * @param from inclusive start point for range + * @param to inclusive end point for range + * @param indexName the index to search for this range + */ + public RangeItem(Number from, Number to, String indexName) { + this(from, to, indexName, false); + } + + /** + * Creates a new range operator + * + * @param from start point for range + * @param to end point for range + * @param indexName the index to search for this range + */ + public RangeItem(Limit from, Limit to, String indexName) { + this(from, to, indexName, false); + } + + /** + * Creates a new range operator + * + * @param from inclusive start point for range + * @param to inclusive end point for range + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Number from, Number to, String indexName, boolean isFromQuery) { + this(from, to, 0, indexName, isFromQuery); + } + + /** + * Creates a new range operator + * + * @param from start point for range + * @param to end point for range + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Limit from, Limit to, String indexName, boolean isFromQuery) { + this(from, to, 0, indexName, isFromQuery); + } + + /** + * + * @param from inclusive start point for range + * @param to inclusive end point for range + * @param hitLimit This tells how many results you want included from this range as a minimum. + * You might get less if there are not enough, or you might get more. It will use the dictionary and + * include enough entries to satisfy your request. + * Positive number will start from left (@from) and work right. + * Negative number will start from right and work its way left. + * 0 means no limit. + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Number from, Number to, int hitLimit, String indexName, boolean isFromQuery) { + this(new Limit(from, true), new Limit(to, true), hitLimit, indexName, isFromQuery); + } + + /** + * + * @param from start point for range + * @param to end point for range + * @param hitLimit This tells how many results you want included from this range as a minimum. + * You might get less if there are not enough, or you might get more. It will use the dictionary and + * include enough entries to satisfy your request. + * Positive number will start from left (@from) and work right. + * Negative number will start from right and work its way left. + * 0 means no limit. + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Limit from, Limit to, int hitLimit, String indexName, boolean isFromQuery) { + super(from, to, hitLimit, indexName, isFromQuery); + } + + /** Returns the lower limit of this range, which may be negative infinity */ + public final Number getFrom() { + return getFromLimit().number(); + } + + /** Returns the upper limit of this range, which may be positive infinity */ + public final Number getTo() { + return getToLimit().number(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/RankItem.java b/container-search/src/main/java/com/yahoo/prelude/query/RankItem.java new file mode 100644 index 00000000000..3ff2857b915 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/RankItem.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * Represents the rank operator, which only orders the result set and + * does not change which hits are returned. + * + * The first argument is the part selecting the result set, the + * following operands are used to order the result and does not affect + * which hits are returned. + * + * @author bratseth + */ +public class RankItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.RANK; + } + + public String getName() { + return "RANK"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/RegExpItem.java b/container-search/src/main/java/com/yahoo/prelude/query/RegExpItem.java new file mode 100644 index 00000000000..5a611a8a927 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/RegExpItem.java @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Match a field with the contained regular expression. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class RegExpItem extends TermItem { + private String expression; + + public RegExpItem(String indexName, boolean isFromQuery, String expression) { + super(indexName, isFromQuery, null); + this.expression = expression; + } + + @Override + public String stringValue() { + return expression; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public int getNumWords() { + return 1; + } + + @Override + public void setValue(String expression) { + this.expression = expression; + } + + @Override + public String getRawWord() { + return stringValue(); + } + + @Override + public boolean isWords() { + return false; + } + + @Override + public String getIndexedString() { + return stringValue(); + } + + @Override + public ItemType getItemType() { + return ItemType.REGEXP; + } + + @Override + public String getName() { + return ItemType.REGEXP.name(); + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(getIndexedString(), buffer); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("RegExpItem [expression=").append(expression).append("]"); + return builder.toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((expression == null) ? 0 : expression.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!super.equals(obj)) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + RegExpItem other = (RegExpItem) obj; + if (expression == null) { + if (other.expression != null) { + return false; + } + } else if (!expression.equals(other.expression)) { + return false; + } + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java new file mode 100644 index 00000000000..0dd8e1c36cc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + + +/** + * An immutable and'ing of a collection of sub-expressions. It does not + * extend AndItem to avoid code using instanceof handling it as an + * AndItem. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public abstract class SegmentItem extends CompositeItem implements BlockItem { + + private boolean locked = false; + private String rawWord; + private String value; + private boolean isFromQuery; + private boolean isFromUser; + private boolean stemmed; + private SegmentingRule segmentingRule = SegmentingRule.LANGUAGE_DEFAULT; + private Substring origin; + + /** + * Creates a new segment item + * + * @param rawWord the raw form of this segment as received in the request + * @param current the current transformed version of the raw form, or the raw form repeated if no normalized form is known + * @param isFromQuery whether this segment stems from the query received in the request + * @param stemmed whether this is stemmed + */ + public SegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed) { + this(rawWord, current, isFromQuery, stemmed, null); + } + + /** + * Creates a new segment item + * + * @param rawWord the raw form of this segment as received in the request + * @param current the current transformed version of the raw form, or the raw form repeated if no normalized form is known + * @param isFromQuery whether this segment stems from the query received in the request + * @param stemmed whether this is stemmed + * @param origin TODO + */ + public SegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed, Substring origin) { + this.rawWord = rawWord; + this.value = current; + this.stemmed = stemmed; + this.isFromQuery = isFromQuery; + isFromUser = isFromQuery; + this.origin = origin; + } + + public String getRawWord() { + return rawWord; + } + + public String getNormalized() { + return value; + } + + @Override + public String stringValue() { + return value; + } + + public boolean isFromQuery() { + return isFromQuery; + } + + public boolean isStemmed() { + return stemmed; + } + + public void lock() { + locked = true; + } + + public boolean isLocked() { + return locked; + } + + public int getNumWords() { + return getItemCount(); + } + + public void addItem(Item item) { + if (locked) { + dontAdd(); + } + super.addItem(item); + } + + public void addItem(int index, Item item) { + if (locked) { + dontAdd(); + } + super.addItem(index, item); + } + + private void dontAdd() { + throw new QueryException("Tried to add item to an immutable segment."); + } + + public Item removeItem(int index) { + if (locked) { + dontRemove(); + } + return super.removeItem(index); + } + + public boolean removeItem(Item item) { + if (locked) { + dontRemove(); + } + return super.removeItem(item); + } + + private void dontRemove() { + throw new QueryException("Tried to remove an item from an immutable segment."); + } + + // TODO: Add a getItemIterator which is safe for immutability + + /** Return a deep copy of this object */ + public SegmentItem clone() { + SegmentItem copy; + synchronized(this) { + boolean tmpLock = locked; + + locked = false; + copy = (SegmentItem) super.clone(); + locked = tmpLock; + copy.locked = tmpLock; + } + return copy; + } + + public boolean isWords() { + return true; + } + + public boolean isFromUser() { + return isFromUser; + } + + public void setFromUser(boolean isFromUser) { + this.isFromUser = isFromUser; + } + + /** Returns null right now */ + public Substring getOrigin() { + return origin; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("isFromQuery", isFromQuery); + discloser.addProperty("isFromUser", isFromUser); + discloser.addProperty("locked", locked); + discloser.addProperty("rawWord", rawWord); + discloser.addProperty("stemmed", stemmed); + } + + @Override + public SegmentingRule getSegmentingRule() { + return segmentingRule; + } + + public void setSegmentingRule(SegmentingRule segmentingRule) { + this.segmentingRule = segmentingRule; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SegmentingRule.java b/container-search/src/main/java/com/yahoo/prelude/query/SegmentingRule.java new file mode 100644 index 00000000000..ecd0ca4e056 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SegmentingRule.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * If a term has to be resegmented, and the result is more than one word, this + * is how the result should be handled in the query tree. For Western languages + * the default is creating a phrase, but for business reasons, some East Asian + * languages use an AND instead. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @since 5.1.28 + */ +public enum SegmentingRule { + LANGUAGE_DEFAULT, PHRASE, BOOLEAN_AND; +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java new file mode 100644 index 00000000000..b4b63ae7ed3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Common implementation for Item classes implementing the IndexedItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeIndexedItem.java + * SimpleIndexedItem.java + * IndexedSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class SimpleIndexedItem extends SimpleTaggableItem implements IndexedItem { + + @NonNull + private String index = ""; + + /** + * The name of the index this belongs to, or "" (never null) if not specified + **/ + @NonNull + public String getIndexName() { + return index; + } + + // encode index bytes + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(index, buffer); + } + + /** Sets the name of the index to search */ + public void setIndexName(String index) { + if (index == null) { + index = ""; + } + this.index = index; + } + + /** Appends the index prefix if necessary */ + protected void appendIndexString(StringBuilder buffer) { + if (!getIndexName().equals("")) { + buffer.append(getIndexName()); + buffer.append(":"); + } + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + IndexedItem other = (IndexedItem) object; // Ensured by superclass + if (!this.index.equals(other.getIndexName())) { + return false; + } + return true; + } + + public int hashCode() { + return super.hashCode() + 113 * index.hashCode(); + } + + public abstract String getIndexedString(); + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", index); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SimpleTaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SimpleTaggableItem.java new file mode 100644 index 00000000000..adaa4f614e5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SimpleTaggableItem.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Common implementation for Item classes implementing the TaggableItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeTaggableItem.java + * SimpleTaggableItem.java + * TaggableSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class SimpleTaggableItem extends Item implements TaggableItem { + + public int getUniqueID() { + return uniqueID; + } + + public void setUniqueID(int id) { + setHasUniqueID(true); + uniqueID = id; + } + + /** See {@link TaggableItem#setConnectivity} */ + public void setConnectivity(Item item, double connectivity) { + setHasUniqueID(true); + item.setHasUniqueID(true); + if (connectedItem != null) { + // untangle old connectivity + connectedItem.connectedBacklink = null; + } + this.connectivity = connectivity; + connectedItem = item; + connectedItem.connectedBacklink = this; + } + + public Item getConnectedItem() { + return connectedItem; + } + + public double getConnectivity() { + return connectivity; + } + + public void setSignificance(double significance) { + setHasUniqueID(true); + setExplicitSignificance(true); + this.significance = significance; + } + + public void setExplicitSignificance(boolean explicitSignificance) { + this.explicitSignificance = explicitSignificance; + } + + public boolean hasExplicitSignificance() { + return explicitSignificance; + } + + public double getSignificance() { + return significance; + } + + //Change access privilege from protected to public. + public boolean hasUniqueID() { + return super.hasUniqueID(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Substring.java b/container-search/src/main/java/com/yahoo/prelude/query/Substring.java new file mode 100644 index 00000000000..fa304d6b63a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Substring.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An substring which also provides access to the full (query) string it is a substring of. + * This is immutable. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Substring { + + /** The start of the substring */ + public final int start; + + /** The end of the substring */ + public final int end; + + /** The string this is a substring of */ + public final String string; + + public Substring(int start, int end,String string) { + this.start = start; + this.end = end; + this.string=string; + } + + public String getValue() { + return string.substring(start,end); + } + + /** Returns the entire string this is a substring of. The start and end offsets are into this string. */ + public String getSuperstring() { return string; } + + /** + * Returns the character n places (0 base) after the end of the value substring into the superstring. + * For example charAfter(0) returns the first character after the end of the substring + * + * @return the char n planes after the end of the substring + * @throws IndexOutOfBoundsException if the string is not long enough to have a character at this position + */ + public char charAfter(int n) { + return string.charAt(end+n); + } + + /** + * Returns the character n places (0 base) before the start of the value substring into the superstring. + * For example charBefore(0) returns the first character before the start of the substring + * + * @return the char n planes before the start of the substring + * @throws IndexOutOfBoundsException if the string does not have a character at this position + */ + public char charBefore(int n) { + return string.charAt(start-1-n); + } + + @Override + public String toString() { + return "(" + start + ' ' + end + ')'; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java new file mode 100644 index 00000000000..aa33412f205 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A word that matches substrings of words + * + * @author banino + */ +public class SubstringItem extends WordItem { + + public SubstringItem(String substring) { + this(substring, false); + } + + public SubstringItem(String substring, boolean isFromQuery) { + super(substring, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.SUBSTRING; + } + + public String getName() { + return "SUBSTRING"; + } + + public String stringValue() { + return "*" + getWord() + "*"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java new file mode 100644 index 00000000000..ef3b800e38f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A word that matches a suffix of words instead of a complete word. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class SuffixItem extends WordItem { + + public SuffixItem(String suffix) { + this(suffix, false); + } + + public SuffixItem(String suffix, boolean isFromQuery) { + super(suffix, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.SUFFIX; + } + + public String getName() { + return "SUFFIX"; + } + + public String stringValue() { + return "*" + getWord(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java new file mode 100644 index 00000000000..a71bd909000 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An interface used for anything which may be addressed using an external, + * unique ID in the query tree in the backend. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface TaggableItem { + + public int getUniqueID(); + public void setUniqueID(int id); + public boolean hasUniqueID(); + + /** + * Set the connectivity to another term in the same query tree. + * This is used to influence ranking features taking proximity into account: nativeRank and a subset of the + * fieldMatch features. + * <p> + * By default consecutive query terms are 'somewhat' connected, meaning ranking features will be better in documents + * where the terms are found close to each other. This effect can be increased or decreased by manipulating the + * connectivity value. Typical use is to increase the connectivity between terms in the query that we believe are + * semantically connected. E.g in the query 'new york hotel', it is a good idea to increase the connectivity between + * "new" and "york" to ensure that a document containing "List of hotels in New York" is ranked above one containing + * "List of new hotels in York". + * + * @param item the item this should be connected to - in practice the next consecutive item in the query + * @param connectivity a value between 0 (none) and 1 (maximal), defining the connectivity between this and the + * argument item. The default connectivity is 0.1. + */ + public void setConnectivity(Item item, double connectivity); + public Item getConnectedItem(); + public double getConnectivity(); + + + /** + * Used for setting explicit term significance (in the tf/idf sense) to a single term or phrase, + * relative to the rest of the query. + * This influences ranking features which take term significance into account and overrides the default + * partial corpus based term significance computation happening in the backend. + */ + public void setSignificance(double significance); + public boolean hasExplicitSignificance(); + public void setExplicitSignificance(boolean significance); + public double getSignificance(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TaggableSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TaggableSegmentItem.java new file mode 100644 index 00000000000..1346fc9de7f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TaggableSegmentItem.java @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Common implementation for Item classes implementing the TaggableItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeTaggableItem.java + * SimpleTaggableItem.java + * TaggableSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class TaggableSegmentItem extends SegmentItem implements TaggableItem { + + protected TaggableSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed, Substring origin) { + super(rawWord, current, isFromQuery, stemmed, origin); + } + + public int getUniqueID() { + return uniqueID; + } + + public void setUniqueID(int id) { + setHasUniqueID(true); + uniqueID = id; + } + + /** See {@link TaggableItem#setConnectivity} */ + public void setConnectivity(Item item, double connectivity) { + setHasUniqueID(true); + item.setHasUniqueID(true); + if (connectedItem != null) { + // untangle old connectivity + connectedItem.connectedBacklink = null; + } + this.connectivity = connectivity; + connectedItem = item; + connectedItem.connectedBacklink = this; + } + + public Item getConnectedItem() { + return connectedItem; + } + + public double getConnectivity() { + return connectivity; + } + + public void setSignificance(double significance) { + setHasUniqueID(true); + setExplicitSignificance(true); + this.significance = significance; + } + + public void setExplicitSignificance(boolean explicitSignificance) { + this.explicitSignificance = explicitSignificance; + } + + public boolean hasExplicitSignificance() { + return explicitSignificance; + } + + public double getSignificance() { + return significance; + } + + //Change access privilege from protected to public. + public boolean hasUniqueID() { + return super.hasUniqueID(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java new file mode 100644 index 00000000000..d20ee304b57 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; + + +/** + * <p>A query term, that is, not only a term in the query language + * (an <i>item</i>), but also a term to be found in (or excluded from) + * the search index.</p> + * + * <p>Optionally, a TermItem may also specify the name of an + * index backend to search.</p> + * + * @author bratseth + * @author havardpe + */ +public abstract class TermItem extends SimpleIndexedItem implements BlockItem { + + /** Whether the term is from the raw query or is synthetic. */ + private final boolean isFromQuery; + + /** Whether accent dropping should be performed */ + private boolean normalizable = true; + + /** The substring which is the raw form of the source of this token, or null if none. */ + private Substring origin; + + private SegmentingRule segmentingRule = SegmentingRule.LANGUAGE_DEFAULT; + + public TermItem() { + this(""); + } + + public TermItem(String indexName) { + this(indexName, false); + } + + public TermItem(String indexName, boolean isFromQuery) { + this(indexName, isFromQuery, null); + } + + protected TermItem(String indexName, boolean isFromQuery, Substring origin) { + setIndexName(indexName); + this.isFromQuery = isFromQuery; + this.origin = origin; + } + + final public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return 1; + } + + /** Appends the index prefix if necessary and delegates to the subclass */ + protected final void appendBodyString(StringBuilder buffer) { + appendIndexString(buffer); + buffer.append(stringValue()); + } + + /** + * Sets the value of this item from a string. + * + * @throws UnsupportedOperationException if this is not supported on this kind of item + */ + public abstract void setValue(String value); + + /** Returns the raw form of the text leading to this term, exactly as received, including original casing */ + public abstract String getRawWord(); + + /** + * Returns the substring which is the raw form of the text leading to this token. This substring also contains + * the superstring this substring was a part of, e.g the whole query string. + * If this did not originate directly from a user string, this is null. + */ + public Substring getOrigin() { return origin; } + + /** + * Whether this term is from the query or has been added by a searcher. + * Only terms from the user should be modified by query rewriters which attempts to improve the + * precision or recall of the user's query. + */ + public boolean isFromQuery() { return isFromQuery; } + + public abstract boolean isWords(); + + /** Sets the origin of this */ + public void setOrigin(Substring origin) { + this.origin = origin; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("origin", origin); + discloser.setValue(stringValue()); + } + + @Override + public int getTermCount() { return 1; } + + /** + * This refers to whether accent removal is a meaningful and possible + * operation for this word. It should be named "isTransformable" or similar, + * but for historical reasons that is not the case. This method has nothing + * to do with Unicode normalization. + * + * @return true if accent removal can/should be performed + */ + public boolean isNormalizable() { + return normalizable; + } + + /** + * This refers to whether accent removal is a meaningful and possible + * operation for this word. It should be named "isTransformable" or similar, + * but for historical reasons that is not the case. This method has nothing + * to do with Unicode normalization. + * + * @param normalizable + * set to true if accent removal can/should be performed + */ + public void setNormalizable(boolean normalizable) { + this.normalizable = normalizable; + } + + @Override + public SegmentingRule getSegmentingRule() { + return segmentingRule; + } + + public void setSegmentingRule(SegmentingRule segmentingRule) { + this.segmentingRule = segmentingRule; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TermType.java b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java new file mode 100644 index 00000000000..f2b38d5eacb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A term type enumeration + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TermType { + + public static TermType RANK = new TermType("rank", RankItem.class, null, "$"); + + public static TermType AND = new TermType("and", AndItem.class, null, "+"); + + public static TermType OR = new TermType("or", OrItem.class, null, "?"); + + public static TermType NOT = new TermType("not", NotItem.class, null, "-"); + + public static TermType PHRASE = new TermType("phrase", PhraseItem.class, null, "\""); + + public static TermType DEFAULT = new TermType("", CompositeItem.class, AndItem.class, ""); + + public final String name; + + private final String sign; + private final Class<? extends CompositeItem> instanceClass; + private final Class<? extends CompositeItem> itemClass; + + private TermType(String name, Class<? extends CompositeItem> itemClass, Class<? extends CompositeItem> instanceClass, String sign) { + this.name = name; + this.itemClass = itemClass; + if (instanceClass == null) { + this.instanceClass = itemClass; + } else { + this.instanceClass = instanceClass; + } + this.sign = sign; + } + + public String getName() { + return name; + } + + /** Returns the CompositeItem type this type corresponds to, or CompositeItem if it's the default */ + public Class<? extends CompositeItem> getItemClass() { + return itemClass; + } + + /** Returns true if the class corresponding to this type is the given class */ + public boolean hasItemClass(Class<?> theClass) { + return getItemClass()==theClass; + } + + /** + * Returns an instance of the class corresponding to the given type, AndItem + * if this is the DEFAULT type + * + * @throws RuntimeException + * if an instance could not be created + */ + public Item createItemClass() { + try { + return instanceClass.newInstance(); + } catch (Exception e) { + throw new RuntimeException("Could not create an instance for item " + + this, e); + } + } + + public String toSign() { + return sign; + } + + public boolean equals(Object o) { + if (!(o instanceof TermType)) { + return false; + } + + TermType other = (TermType) o; + + return name.equals(other.name); + } + + public int hashCode() { + return name.hashCode(); + } + + public @Override String toString() { return "term type '" + name + "'"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ToolBox.java b/container-search/src/main/java/com/yahoo/prelude/query/ToolBox.java new file mode 100644 index 00000000000..32205135f04 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ToolBox.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.google.common.annotations.Beta; + +/** + * Query tree helper methods and factories. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Beta +public final class ToolBox { + + public static abstract class QueryVisitor { + /** + * Called for each item in the query tree given to + * {@link ToolBox#visit(QueryVisitor, Item)}. Return true to visit the + * sub-items of the given item, return false to ignore the sub-items. + * + * @param item + * each item in the query tree + * @return whether or not to visit the sub-items of the argument item + * (and then invoke the {@link #onExit()} method) + */ + public abstract boolean visit(Item item); + + /** + * Invoked when all sub-items have been visited, or immediately after + * visit() if there are no sub-items or visit() returned false. + */ + public abstract void onExit(); + } + + public static void visit(QueryVisitor visitor, Item item) { + if (item instanceof CompositeItem) { + if (visitor.visit(item)) { + CompositeItem composite = (CompositeItem) item; + for (int i = 0; i < composite.getItemCount(); ++i) { + visit(visitor, composite.getItem(i)); + } + } + } else { + visitor.visit(item); + } + visitor.onExit(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WandItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WandItem.java new file mode 100644 index 00000000000..df967464230 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WandItem.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; + +/** + * A weighted set query item to be evaluated as a Wand with dot product scoring. + * + * The dot product is calculated between the matched tokens of the weighted set field searched + * and the weights associated with the tokens of this WandItem. + * The resulting dot product will be available as a raw score in the rank framework. + * + * @since 5.1.27 + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +public class WandItem extends WeightedSetItem { + + private int targetNumHits; + private double scoreThreshold = 0; + private double thresholdBoostFactor = 1; + + /** + * Creates an empty WandItem. + * @param fieldName The name of the weighted set field to search with this WandItem. + * @param targetNumHits The target for minimum number of hits to produce by the backend search operator handling this WandItem. + */ + public WandItem(String fieldName, int targetNumHits) { + super(fieldName); + this.targetNumHits = targetNumHits; + } + + /** + * Sets the initial score threshold used by the backend search operator handling this WandItem. + * The score of a document must be larger than this threshold in order to be considered a match. + * Default value is 0.0. + * @param scoreThreshold the initial score threshold. + */ + public void setScoreThreshold(double scoreThreshold) { + this.scoreThreshold = scoreThreshold; + } + + /** + * Sets the boost factor used by the backend search operator to boost the threshold before + * comparing it with the upper bound score of the document being evaluated. + * A large value of this factor results in fewer full evaluations and in an expected loss in precision. + * Similarly, a gain in performance might be expected. Default value is 1.0. + * + * NOTE: This boost factor is only used when this WandItem is searching a Vespa field. + * @param thresholdBoostFactor the boost factor. + */ + public void setThresholdBoostFactor(double thresholdBoostFactor) { + this.thresholdBoostFactor = thresholdBoostFactor; + } + + public int getTargetNumHits() { + return targetNumHits; + } + + public double getScoreThreshold() { + return scoreThreshold; + } + + public double getThresholdBoostFactor() { + return thresholdBoostFactor; + } + + @Override + public ItemType getItemType() { + return ItemType.WAND; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(targetNumHits, buffer); + buffer.putDouble(scoreThreshold); + buffer.putDouble(thresholdBoostFactor); + } + + @Override + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(targetNumHits); + buffer.append(","); + buffer.append(scoreThreshold); + buffer.append(","); + buffer.append(thresholdBoostFactor); + buffer.append(")"); + buffer.append(" "); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("targetNumHits", targetNumHits); + discloser.addProperty("scoreThreshold", scoreThreshold); + discloser.addProperty("thresholdBoostFactor", thresholdBoostFactor); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WeakAndItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WeakAndItem.java new file mode 100644 index 00000000000..967b9d17256 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WeakAndItem.java @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Weak And of a collection of sub-expressions: + * this behaves like an OR unless many hits are returned and then + * it starts acting more like an AND. + * Alternately it can be viewed as an n-of-m operator where n + * is 1 at first and then increases gradually to m as more hits + * are seen. + * + * @author arnej27959 + */ +public final class WeakAndItem extends NonReducibleCompositeItem { + + private int N; + @NonNull + private String index; + private int scoreThreshold = 0; + + public ItemType getItemType() { + return ItemType.WEAK_AND; + } + + public String getName() { + return "WAND"; + } + + /** + * Make a WAND item with no children. You can mention a common index or you can mention it on each child. + * @param index The index it shall search. + * @param N the target for minimum number of hits to produce; + * a backend will not suppress any hits in the operator + * until N hits have been produced. + **/ + public WeakAndItem(String index, int N) { + this.N = N; + if (index == null) { + this.index = ""; + } else { + this.index = index; + } + } + public WeakAndItem(int N) { + this("", N); + } + + /** Sets the index name of all subitems of this */ + public void setIndexName(String index) { + String toSet; + if (index == null) { + toSet = ""; + } else { + toSet = index; + } + super.setIndexName(toSet); + this.index = toSet; + } + + @NonNull + public String getIndexName() { + return index; + } + + /** Appends the heading of this string - <code>[getName()]([limit]) </code> */ + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(N); + buffer.append(")"); + buffer.append(" "); + } + + /** The default N used if none is specified: 100 */ + public static final int defaultN = 100; + + /** Creates a WAND item with default N */ + public WeakAndItem() { + this(defaultN); + } + + public int getN() { + return N; + } + + public void setN(int N) { + this.N = N; + } + + public int getScoreThreshold() { + return scoreThreshold; + } + + /** + * Sets the score threshold used by the backend search operator handling this WeakAndItem. + * This threshold is currently only used if the WeakAndItem is searching a RISE index field. + * The score threshold then specifies the minimum dot product score a match needs to be part of the result set. + * Default value is 0. + * @param scoreThreshold the score threshold. + */ + public void setScoreThreshold(int scoreThreshold) { + this.scoreThreshold = scoreThreshold; + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(N, buffer); + putString(index, buffer); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("N", N); + } + + public int hashCode() { + return super.hashCode() + 31 * N; + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (!super.equals(object)) return false; + WeakAndItem other = (WeakAndItem) object; // Ensured by superclass + if (this.N != other.N) return false; + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java new file mode 100644 index 00000000000..eb6737ba9d8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.collections.CopyOnWriteHashMap; +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.Map; + +/** + * A term which contains a weighted set. + * + * When using a weighted set to search a field, all tokens present in + * the searched field will be reverse matched against the weighted + * set. This means that using a weighted set to search a single-value + * attribute field will have similar semantics to using a normal term + * to search a weighted set field. The low-level matching information + * resulting from matching a document with a weighted set term will + * contain the weights of all the matched tokens in descending + * order. Each matched weight will be represented as a standard + * occurrence on position 0 in element 0. + * + */ +public class WeightedSetItem extends SimpleTaggableItem { + + @NonNull + private String indexName = ""; + + private CopyOnWriteHashMap<Object,Integer> set = new CopyOnWriteHashMap<>(1000); + + /** Creates an empty weighted set; note you must provide an index name up front */ + public WeightedSetItem(String indexName) { + if (indexName == null) { + this.indexName = ""; + } else { + this.indexName = indexName; + } + } + + public Integer addToken(long value, int weight) { + return addInternal(value, weight); + } + /** + * Add weighted token. + * If token is already in the set, the maximum weight is kept. + * NOTE: The weight must be 1 or more; negative values (and zero) are not allowed. + * @return weight of added token (might be old value, if kept) + */ + public Integer addToken(String token, int weight) { + if (token == null) throw new IllegalArgumentException("token must be a string"); + return addInternal(token, weight); + } + private Integer addInternal(Object token, int weight) { + Integer newWeight = weight; + Integer oldWeight = set.put(token, newWeight); + if (oldWeight != null && oldWeight > newWeight) { + set.put(token, oldWeight); + return oldWeight; + } + return newWeight; + } + + /** + * Add token with weight 1. + */ + public Integer addToken(String token) { + return addToken(token, 1); + } + + public Integer getTokenWeight(String token) { + return set.get(token); + } + + public Integer removeToken(String token) { + return set.remove(token); + } + + public int getNumTokens() { + return set.size(); + } + + public Iterator<Map.Entry<Object,Integer>> getTokens() { + return set.entrySet().iterator(); + } + + @Override + public void setIndexName(String index) { + if (index == null) { + this.indexName = ""; + } else { + this.indexName = index; + } + } + + @NonNull + public String getIndexName() { + return indexName; + } + + @Override + public ItemType getItemType() { + return ItemType.WEIGHTEDSET; + } + + @Override + public String getName() { + return getItemType().name(); + } + + // for tracing - random text format + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(indexName); + buffer.append("{"); + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + buffer.append("["); + buffer.append(entry.getValue()); + buffer.append("]:\""); + buffer.append(entry.getKey()); + buffer.append("\","); + } + buffer.deleteCharAt(buffer.length() - 1); // remove extra "," + buffer.append("}"); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", indexName); + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + WordItem subitem = new WordItem(entry.getKey().toString(), indexName); + subitem.setWeight(entry.getValue()); + discloser.addChild(subitem); + } + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + Object key = entry.getKey(); + if (key instanceof Long) { + new PureWeightedInteger((Long)key, entry.getValue()).encode(buffer); + } else { + new PureWeightedString(key.toString(), entry.getValue()).encode(buffer); + } + itemCount++; + } + return itemCount; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(set.size(), buffer); + putString(indexName, buffer); + } + + @Override + public int getTermCount() { + return 1; // this is just one (big) term + } + + @Override + public WeightedSetItem clone() { + WeightedSetItem clone = (WeightedSetItem)super.clone(); + clone.set = this.set.clone(); + return clone; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WordAlternativesItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WordAlternativesItem.java new file mode 100644 index 00000000000..b31dd2bd18e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WordAlternativesItem.java @@ -0,0 +1,183 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableList; +import com.yahoo.compress.IntegerCompressor; + +/** + * A set words with differing exactness scores to be used for literal boost + * ranking. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class WordAlternativesItem extends TermItem { + + private List<Alternative> alternatives; + private int maxIndex; + + public static final class Alternative { + public final String word; + public final double exactness; + + public Alternative(String word, double exactness) { + super(); + this.word = word; + this.exactness = exactness; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("Alternative [word=").append(word).append(", exactness=").append(exactness).append("]"); + return builder.toString(); + } + } + + public WordAlternativesItem(String indexName, boolean isFromQuery, Substring origin, Collection<Alternative> terms) { + super(indexName, isFromQuery, origin); + setAlternatives(terms); + } + + public void setAlternatives(Collection<Alternative> terms) { + this.alternatives = uniqueAlternatives(terms); + setMaxIndex(); + } + + private static ImmutableList<Alternative> uniqueAlternatives(Collection<Alternative> terms) { + List<Alternative> uniqueTerms = new ArrayList<Alternative>(terms.size()); + for (Alternative term : terms) { + int i = Collections.binarySearch(uniqueTerms, term, (t0, t1) -> t0.word.compareTo(t1.word)); + if (i >= 0) { + Alternative old = uniqueTerms.get(i); + if (old.exactness < term.exactness) { + uniqueTerms.set(i, term); + } + } else { + uniqueTerms.add(~i, term); + } + } + return ImmutableList.copyOf(uniqueTerms); + } + + private void setMaxIndex() { + int maxIndex = 0; + int currentIndex = 0; + double maxScore = 0.0d; + boolean first = true; + for (Alternative val : this.alternatives) { + if (first) { + first = false; + maxIndex = 0; + maxScore = val.exactness; + } else { + if (val.exactness > maxScore) { + maxScore = val.exactness; + maxIndex = currentIndex; + } + } + ++currentIndex; + } + this.maxIndex = maxIndex; + } + + @Override + public String stringValue() { + return alternatives.get(maxIndex).word; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public int getNumWords() { + return alternatives.size(); + } + + @Override + public void setValue(String value) { + throw new UnsupportedOperationException("semantics for setting to a string would be brittle, use setAlternatives()"); + } + + @Override + public String getRawWord() { + if (getOrigin() == null) { + return stringValue(); + } else { + return getOrigin().getValue(); + } + } + + @Override + public boolean isWords() { + return true; + } + + @Override + public String getIndexedString() { + return alternatives.stream().map((x) -> x.word).collect(Collectors.joining(" ")); + } + + @Override + public ItemType getItemType() { + return ItemType.WORD_ALTERNATIVES; // placeholder + } + + @Override + public String getName() { + return "WORD_ALTERNATIVES"; + } + + /** + * Return an immutable snapshot of the contained terms. This list will not + * reflect later changes to the item. + * + * @return an immutable list of word alternatives and their respective + * scores + */ + public List<Alternative> getAlternatives() { + return alternatives; + } + + + @Override + public void encodeThis(ByteBuffer target) { + super.encodeThis(target); + IntegerCompressor.putCompressedPositiveNumber(getNumWords(), target); + for (Alternative a : alternatives) { + Item p = new PureWeightedString(a.word, (int) (getWeight() * a.exactness + 0.5)); + p.setFilter(isFilter()); + p.encode(target); + } + } + + /** + * Add a new alternative iff the term string is not already present with an + * equal or higher exactness score. If the term string is present with a + * lower exactness score, the new, higher score will take precedence. + * + * @param term + * one of several string interpretations of the input word + * @param exactness + * how close the term string matches what the user input + */ + public void addTerm(String term, double exactness) { + // do note, Item is Cloneable, and overwriting the reference is what + // saves us from overriding the method + if (alternatives.stream().anyMatch((a) -> a.word.equals(term) && a.exactness >= exactness )) { + return; + } + List<Alternative> newTerms = new ArrayList<>(alternatives.size() + 1); + newTerms.addAll(alternatives); + newTerms.add(new Alternative(term, exactness)); + setAlternatives(newTerms); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java new file mode 100644 index 00000000000..361993900ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java @@ -0,0 +1,188 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.prelude.query.parser.Token; +import com.yahoo.prelude.query.textualrepresentation.Discloser; +import com.yahoo.protect.Validator; + +import java.nio.ByteBuffer; + +/** + * A term item containing a single word. + * + * @author bratseth + * @author havardpe + */ +public class WordItem extends TermItem { + + /** True if this is <b>not</b> part of the special tokens dictionary */ + private boolean words = true; + + /** Is this word stemmed? */ + private boolean stemmed = false; + + /** Is this word produced from segmenting a block of word characters? */ + private boolean fromSegmented = false; + + /** If fromSegmented is true, this is the index into the list of segments */ + private int segmentIndex = 0; + + /** The word as it should be searched, never null */ + private String word; + + private boolean lowercased = false; + + public ItemType getItemType() { + return ItemType.WORD; + } + + public String getName() { + return "WORD"; + } + + public WordItem(String word) { + this(word, ""); + } + + public WordItem(String word, String indexName) { + this(word, indexName, false, null); + } + + public WordItem(String word, boolean isFromQuery) { + this(word, null, isFromQuery, null); + } + + public WordItem(String word, String indexName, boolean isFromQuery) { + this(word, indexName, isFromQuery, null); + } + + public WordItem(Token word, boolean isFromQuery) { + this(word.toString(), "", isFromQuery, word.substring); + } + + public WordItem(String word, boolean isFromQuery, Substring origin) { + this(word, "", isFromQuery, origin); + } + + public WordItem(String word, String indexName, boolean isFromQuery, Substring origin) { + super(indexName, isFromQuery, origin); + setWord(word); + } + + public void setWord(String word) { + Validator.ensureNotNull("Word item word", word); + this.word = word; + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + putString(getEncodedWord(), buffer); + } + + /** Returns the word for encoding. By default simply the word */ + protected String getEncodedWord() { + return getIndexedString(); + } + + /** Returns the same as {@link #stringValue} */ + public String getWord() { return word; } + + /** + * Returns this word as it should be used in executing the query. + * This is usually (but not always) a normalized and stemmed form + */ + public @Override String stringValue() { return word; } + + /** Same as #setWord */ + public @Override void setValue(String value) { setWord(value); } + + /** + * Get the word exactly as received in the request. + * This returns the same as getWord if no other raw form is known + * + * @return the raw form of this word, never null + */ + @Override + public String getRawWord() { + if (getOrigin()!=null) return getOrigin().getValue(); + return word; + } + + public boolean isStemmed() { return stemmed; } + + public void setStemmed(boolean stemmed) { this.stemmed = stemmed; } + + public boolean isFromSegmented() { + return fromSegmented; + } + + public void setFromSegmented(boolean fromSegmented) { + this.fromSegmented = fromSegmented; + } + + public boolean isLowercased() { + return lowercased; + } + + public void setLowercased(boolean lowercased) { + this.lowercased = lowercased; + } + + public int getSegmentIndex() { + return segmentIndex; + } + + public void setSegmentIndex(int segmentIndex) { + this.segmentIndex = segmentIndex; + } + + /** Word items uses a empty heading instead of "WORD " */ + protected void appendHeadingString(StringBuilder buffer) {} + + public int hashCode() { + return word.hashCode() + 71 * super.hashCode(); + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + + WordItem other = (WordItem) object; // Ensured by superclass + + if (!this.word.equals(other.word)) { + return false; + } + + return true; + } + + public int getNumWords() { + return 1; + } + + @Override + public String getIndexedString() { + return word; + } + + /** Returns true if this consists of regular word characters. Returns false if this represents a "special token" */ + public boolean isWords() { + return words; + } + + /** Sets if this consists of regular word characters (true) or represents a "special token" (false) */ + public void setWords(boolean words) { + this.words = words; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("fromSegmented", fromSegmented); + discloser.addProperty("segmentIndex", segmentIndex); + discloser.addProperty("stemmed", stemmed); + discloser.addProperty("words", words); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/package-info.java b/container-search/src/main/java/com/yahoo/prelude/query/package-info.java new file mode 100644 index 00000000000..95dbd62849f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The query model representing a boolean combination of selection criterions, where elements may be + * simple criterions, nested boolean operators, or annotated pieces of natural language text. + */ +@ExportPackage +@PublicApi +package com.yahoo.prelude.query; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java new file mode 100644 index 00000000000..fb56e10445a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java @@ -0,0 +1,311 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.language.process.Segmenter; +import com.yahoo.log.event.*; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.*; + +/** + * The Vespa query parser. + * + * @author bratseth + * @author Steinar Knutsen + */ +@SuppressWarnings("deprecation") +public abstract class AbstractParser implements CustomParser { + + /** The current submodes of this parser */ + protected Submodes submodes = new Submodes(); + + /** + * The current language of this parser. Used to decide whether and how to + * use the CJKSegmenter + */ + protected Language language = Language.UNKNOWN; + + /** The IndexFacts.Session of this query */ + protected IndexFacts.Session indexFacts; + + /** + * The counter for braces in URLs, braces in URLs are accepted so long as + * they are balanced. + */ + protected int braceLevelURL = 0; + + protected final ParserEnvironment environment; + protected final TokenPosition tokens = new TokenPosition(); + + /** + * An enumeration of the parser index-controlled submodes. Any combination + * of these may be active at the same time. SubModes are activated or + * deactivated by specifying special indexes in the query. + */ + final class Submodes { + + /** + * Url mode allows "_" and "-" as word characters. Default is false + */ + public boolean url = false; + + /** + * Site mode - host names get start of host and end of host markers. + * Default is false + */ + public boolean site = false; + + /** + * Sets submodes from an index. + * + * @param indexName the index name which should decide the submodes, or null to do nothing. + * @param session the session used to look up information about this index + */ + @SuppressWarnings({"deprecation"}) + // To avoid this we need to pass an IndexFacts.session down instead - easily done but not without breaking API's + public void setFromIndex(final String indexName, IndexFacts.Session session) { + if (indexName == null) { + return; + } + + reset(); + + final Index current = session.getIndex(indexName); + + if (current.isUriIndex()) { + url = true; + } else if (current.isHostIndex()) { + site = true; + } + } + + /** Sets default values for all submodes */ + public void reset() { + url = false; + site = false; + } + + /** + * Returns whether we are in a mode which allows explicit anchoring + * markers, ^ and $ + * + * @return True if we are doing explicit anchoring. + */ + public boolean explicitAnchoring() { + return site; + } + } + + /** + * <p>Creates a new instance of this class, storing the given {@link ParserEnvironment} for parse-time access to the + * environment.</p> + * + * @param environment The environment settings to attach to the Parser. + */ + protected AbstractParser(ParserEnvironment environment) { + this.environment = ParserEnvironment.fromParserEnvironment(environment); + if (this.environment.getIndexFacts() == null) { + this.environment.setIndexFacts(new IndexFacts()); + } + } + + @Override + public final QueryTree parse(Parsable query) { + Item root = null; + if (query != null) { + root = parse(query.getQuery(), + query.getFilter(), + query.getLanguage(), + environment.getIndexFacts().newSession(query.getSources(), query.getRestrict()), + query.getDefaultIndexName()); + } + if (root == null) { + root = new NullItem(); + } + return new QueryTree(root); + } + + @Override + public final Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + IndexFacts.Session indexFacts, String defaultIndexName) { + if (queryToParse == null) { + return null; + } + if (parsingLanguage == null) { + parsingLanguage = environment.getLinguistics().getDetector().detect(queryToParse, null).getLanguage(); + } + setState(parsingLanguage, indexFacts); + tokenize(queryToParse, defaultIndexName, indexFacts); + Item root = parseItems(); + if (filterToParse != null) { + AnyParser filterParser = new AnyParser(environment); + if (root == null) { + root = filterParser.parseFilter(filterToParse, parsingLanguage, indexFacts); + } else { + root = filterParser.applyFilter(root, filterToParse, parsingLanguage, indexFacts); + } + } + root = simplifyPhrases(root); + if (defaultIndexName != null) { + assignDefaultIndex(indexFacts.getCanonicName(defaultIndexName), root); + } + return root; + } + + protected abstract Item parseItems(); + + /** + * Assigns the default index to query terms having no default index The + * parser _should_ have done this, for some reason it doesn't + * + * @param defaultIndex The default index to assign. + * @param item The item to check. + */ + private static void assignDefaultIndex(final String defaultIndex, + final Item item) { + if (defaultIndex == null || item == null) { + return; + } + + if (item instanceof IndexedItem) { + final IndexedItem indexName = (IndexedItem) item; + + if ("".equals(indexName.getIndexName())) { + indexName.setIndexName(defaultIndex); + } + } else if (item instanceof CompositeItem) { + final Iterator<Item> items = ((CompositeItem) item) + .getItemIterator(); + while (items.hasNext()) { + final Item i = items.next(); + assignDefaultIndex(defaultIndex, i); + } + } + } + + /** + * Unicode normalizes some piece of natural language text. The chosen form + * is compatibility decomposition, canonical composition (NFKC). + * + * @param input The string to normalize. + * @return The normalized string. + */ + protected String normalize(final String input) { + if (input == null || input.length() == 0) { + return input; + } + return environment.getLinguistics().getNormalizer().normalize(input); + } + + protected void setState(final Language queryLanguage, IndexFacts.Session indexFacts) { + this.indexFacts = indexFacts; + language = queryLanguage; + submodes.reset(); + } + + /** + * Tokenizes the given string and initializes tokens with the found tokens. + * + * @param query the string to tokenize. + * @param defaultIndexName the name of the index to use as default. + * @param indexFacts resolved information about the index we are searching + */ + protected void tokenize(String query, String defaultIndexName, IndexFacts.Session indexFacts) { + Tokenizer tokenizer = new Tokenizer(environment.getLinguistics()); + tokenizer.setSubstringSpecialTokens(language.isCjk()); + tokenizer.setSpecialTokens(environment.getSpecialTokens()); + tokens.initialize(tokenizer.tokenize(query, defaultIndexName, indexFacts)); + } + + /** + * Collapses single item phrases in the tree to the contained item. + * + * @param unwashed The item whose phrases to simplify. + * @return The simplified item. + */ + public static Item simplifyPhrases(final Item unwashed) { + if (unwashed == null) { + return unwashed; + } else if (unwashed instanceof PhraseItem) { + return collapsePhrase((PhraseItem) unwashed); + } else if (unwashed instanceof CompositeItem) { + final CompositeItem composite = (CompositeItem) unwashed; + final ListIterator<Item> i = composite.getItemIterator(); + + while (i.hasNext()) { + final Item original = i.next(); + final Item transformed = simplifyPhrases(original); + + if (original != transformed) { + i.set(transformed); + } + } + return unwashed; + } else { + return unwashed; + } + } + + private static Item collapsePhrase(final PhraseItem phrase) { + if (phrase.getItemCount() == 1 && phrase.getItem(0) instanceof WordItem) { + // TODO: Other stuff which needs propagation? + final WordItem word = (WordItem) phrase.getItem(0); + + word.setWeight(phrase.getWeight()); + return word; + } else { + return phrase; + } + } + + // TODO: The segmenting stuff is a mess now, this will fix it: + // - Make Segmenter a class which is instantiated per parsing + // - Make the instance know the language, etc and do all dispatching + // internally + // -JSB + // TODO: Use segmenting for forced phrase searches? + protected Item segment(final Token token) { + final String normalizedToken = normalize(token.toString()); + + if (token.isSpecial()) { + final WordItem w = new WordItem(token.toString(), true, token.substring); + w.setWords(false); + w.setFromSpecialToken(true); + return w; + } + + if (language == Language.UNKNOWN) { + return new WordItem(normalizedToken, true, token.substring); + } + + + Segmenter segmenter = environment.getLinguistics().getSegmenter(); + List<String> segments = segmenter.segment(normalizedToken, language); + if (segments.size() == 0) { + return null; + } + if (segments.size() == 1) { + return new WordItem(segments.get(0), "", true, token.substring); + } + + final CompositeItem composite = new PhraseSegmentItem(token.toString(), + normalizedToken, true, false, token.substring); + int n = 0; + for (final String segment : segments) { + final WordItem w = new WordItem(segment, "", true, token.substring); + w.setFromSegmented(true); + w.setSegmentIndex(n++); + w.setStemmed(false); + composite.addItem(w); + } + composite.lock(); + return composite; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AdvancedParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AdvancedParser.java new file mode 100644 index 00000000000..411565ee32c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AdvancedParser.java @@ -0,0 +1,214 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import static com.yahoo.prelude.query.parser.Token.Kind.LBRACE; +import static com.yahoo.prelude.query.parser.Token.Kind.NUMBER; + +/** + * Parser for queries of type advanced. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @deprecated since 5.11, YQL+ should be used for formal queries + */ +@Deprecated // OK DO NOT REMOVE (we'll keep this around longer) +public class AdvancedParser extends StructuredParser { + + public AdvancedParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + return advancedItems(true); + } + + protected Item handleComposite(boolean topLevel) { + return advancedItems(false); + } + + /** + * A collection of one or more advanced items. + */ + private Item advancedItems(boolean topLevel) { + int position = tokens.getPosition(); + Item item = null; + + try { + item = advancedItemsBody(topLevel); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private Item advancedItemsBody(boolean topLevel) { + Item topLevelItem = null; + Item item; + boolean itemIsComposite; + boolean topLevelIsClosed = false; + boolean expectingOperator = false; + + do { + item = null; + + if (item == null) { + item = indexableItem(); + if (item == null) { + item = compositeItem(); + itemIsComposite = true; + } else { + itemIsComposite = false; + } + if (item != null) { + Item newTop = null; + + if (expectingOperator) { + newTop = handleAdvancedOperator(topLevelItem, item, + topLevelIsClosed); + } + if (newTop != null) { // Operator found + topLevelIsClosed = false; + expectingOperator = false; + topLevelItem = newTop; + } else if (topLevelItem == null) { + topLevelItem = item; + if (itemIsComposite) { + topLevelIsClosed = true; + } + expectingOperator = true; + } else if (topLevelItem instanceof CompositeItem + && !(topLevelItem instanceof SegmentItem)) { + ((CompositeItem) topLevelItem).addItem(item); + expectingOperator = true; + } else { + AndItem and = new AndItem(); + + and.addItem(topLevelItem); + and.addItem(item); + topLevelItem = and; + topLevelIsClosed = false; + expectingOperator = true; + } + } + } + + if (topLevel && item == null) { + tokens.skip(); + } + } while (tokens.hasNext() && (topLevel || item != null)); + + // Optimize away composites containing one item only + // (including nots with only a positive) + if (topLevelItem instanceof CompositeItem + && ((CompositeItem) topLevelItem).getItemCount() == 1) { + return ((CompositeItem) topLevelItem).removeItem(0); + } + + return topLevelItem; + } + + /** Returns whether the item is a specific word item */ + private boolean isTheWord(String word, Item item) { + if (!(item instanceof WordItem)) { + return false; + } + return word.equalsIgnoreCase(((WordItem) item).getRawWord()); // TODO: Why not search for getWord w.o lowercasing? + } + + + + /** Returns the new top level, or null if the current item is not an operator */ + private Item handleAdvancedOperator(Item topLevelItem, Item item, boolean topLevelIsClosed) { + if (isTheWord("and", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof AndItem)) { + AndItem and = new AndItem(); + + and.addItem(topLevelItem); + return and; + } + return topLevelItem; + } else if (isTheWord("or", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof OrItem)) { + OrItem or = new OrItem(); + + or.addItem(topLevelItem); + return or; + } + return topLevelItem; + } else if (isTheWord("equiv", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof EquivItem)) { + EquivItem equiv = new EquivItem(); + + equiv.addItem(topLevelItem); + return equiv; + } + return topLevelItem; + } else if (isTheWord("wand", item)) { + int n=consumeNumericArgument(); + if (n==0) + n=WeakAndItem.defaultN; + if (topLevelIsClosed || !(topLevelItem instanceof WeakAndItem) || n!=((WeakAndItem)topLevelItem).getN()) { + WeakAndItem wand = new WeakAndItem(); + wand.setN(n); + wand.addItem(topLevelItem); + return wand; + } + return topLevelItem; + } else if (isTheWord("andnot", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof NotItem)) { + NotItem not = new NotItem(); + + not.addPositiveItem(topLevelItem); + return not; + } + return topLevelItem; + } else if (isTheWord("rank", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof RankItem)) { + RankItem rank = new RankItem(); + + rank.addItem(topLevelItem); + return rank; + } + return topLevelItem; + } else if (isTheWord("near", item)) { + int distance = consumeNumericArgument(); + if (distance==0) + distance=NearItem.defaultDistance; + if (topLevelIsClosed || !(topLevelItem instanceof NearItem) || distance!=((NearItem)topLevelItem).getDistance()) { + NearItem near = new NearItem(distance); + + near.addItem(topLevelItem); + return near; + } + return topLevelItem; + } else if (isTheWord("onear", item)) { + int distance = consumeNumericArgument(); + if (distance==0) + distance=ONearItem.defaultDistance; + if (topLevelIsClosed || !(topLevelItem instanceof ONearItem) || distance!=((ONearItem)topLevelItem).getDistance()) { + ONearItem oNear = new ONearItem(distance); + + oNear.addItem(topLevelItem); + return oNear; + } + return topLevelItem; + } + + return null; + } + + /** Returns the argument to this operator or 0 if none */ + private int consumeNumericArgument() { + if (!tokens.currentIs(LBRACE)) return 0; + tokens.skip(LBRACE); + if (!tokens.currentIsNoIgnore(NUMBER)) throw new IllegalArgumentException("Expected an integer argument"); + int distance=Integer.valueOf(tokens.next().image); + if (!tokens.skip(Token.Kind.RBRACE)) throw new IllegalArgumentException("Expected a right brace following the argument"); + return distance; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java new file mode 100644 index 00000000000..cb540a1f982 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Iterator; + +import static com.yahoo.prelude.query.parser.Token.Kind.MINUS; +import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; + +/** + * Parser for queries of type all. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AllParser extends SimpleParser { + + public AllParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + int position = tokens.getPosition(); + try { + return parseItemsBody(); + } finally { + tokens.setPosition(position); + } + } + + protected Item parseItemsBody() { + // Algorithm: Collect positive, negative, and and'ed items, then combine. + AndItem and=null; + NotItem not=null; // Store negatives here as we go + Item current; + + // Find all items + do { + current=negativeItem(); + if (current!=null) { + not=addNot(current,not); + continue; + } + + current=positiveItem(); + if (current==null) + current = indexableItem(); + if (current == null) + current = compositeItem(); + + if (current!=null) + and=addAnd(current,and); + + if (current == null) + tokens.skip(); + } while (tokens.hasNext()); + + // Combine the items + Item topLevel=and; + + if (not!=null && topLevel!=null) { + not.setPositiveItem(topLevel); + topLevel=not; + } + + return simplifyUnnecessaryComposites(topLevel); + } + + // Simplify if there are unnecessary composites due to single elements + protected final Item simplifyUnnecessaryComposites(Item item) { + if (item == null) return null; + + QueryTree root = new QueryTree(item); + QueryCanonicalizer.canonicalize(root); + + return root.getRoot() instanceof NullItem ? null : root.getRoot(); + } + + protected AndItem addAnd(Item item,AndItem and) { + if (and==null) + and=new AndItem(); + and.addItem(item); + return and; + } + + protected OrItem addOr(Item item,OrItem or) { + if (or==null) + or=new OrItem(); + or.addItem(item); + return or; + } + + protected NotItem addNot(Item item,NotItem not) { + if (not==null) + not=new NotItem(); + not.addNegativeItem(item); + return not; + } + + protected Item negativeItem() { + int position = tokens.getPosition(); + Item item = null; + try { + if (!tokens.skipMultiple(MINUS)) return null; + + if (tokens.currentIsNoIgnore(SPACE)) return null; + + item = indexableItem(); + if (item == null) { + item = compositeItem(); + + if (item != null) { + if (item instanceof OrItem) { // Turn into And + AndItem and = new AndItem(); + + for (Iterator<Item> i = ((OrItem) item).getItemIterator(); i.hasNext();) { + and.addItem(i.next()); + } + item = and; + } + } + } + if (item!=null) + item.setProtected(true); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns the top level item resulting from combining the given top + * level item and the new item. This implements most of the weird transformation + * rules of the parser. + */ + protected Item combineItems(Item topLevelItem, Item item) { + if (topLevelItem == null) { + return item; + } else if (topLevelItem instanceof OrItem && item instanceof OrItem) { + OrItem newTopOr = new OrItem(); + + newTopOr.addItem(topLevelItem); + newTopOr.addItem(item); + return newTopOr; + } else if (item instanceof OrItem && topLevelItem instanceof RankItem) { + for (Iterator<Item> i = ((RankItem) topLevelItem).getItemIterator(); i.hasNext();) { + ((OrItem) item).addItem(0, i.next()); + } + return item; + } else if (item instanceof OrItem && topLevelItem instanceof PhraseItem) { + OrItem newTopOr = new OrItem(); + + newTopOr.addItem(topLevelItem); + newTopOr.addItem(item); + return newTopOr; + } else if (!(topLevelItem instanceof RankItem)) { + RankItem rank = new RankItem(); + + if (topLevelItem instanceof NotItem) { // Strange rule, but that's how it is + rank.addItem(topLevelItem); + rank.addItem(item); + } else { + rank.addItem(item); + rank.addItem(topLevelItem); + } + return rank; + } else if ((item instanceof RankItem) && (((RankItem)item).getItem(0) instanceof OrItem)) { + RankItem itemAsRank = (RankItem) item; + OrItem or = (OrItem) itemAsRank.getItem(0); + + ((RankItem) topLevelItem).addItem(0, or); + for (int i = 1; i < itemAsRank.getItemCount(); i++) { + or.addItem(0, itemAsRank.getItem(i)); + } + return topLevelItem; + } else { + ((RankItem) topLevelItem).addItem(0, item); + return topLevelItem; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java new file mode 100644 index 00000000000..3043cb27247 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java @@ -0,0 +1,266 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Collections; +import java.util.Iterator; +import java.util.Set; + +import static com.yahoo.prelude.query.parser.Token.Kind.*; + +/** + * Parser for queries of type any. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AnyParser extends SimpleParser { + + public AnyParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + return anyItems(true); + } + + Item parseFilter(String filter, Language queryLanguage, Set<String> searchDefinitions) { + return parseFilter(filter, queryLanguage, environment.getIndexFacts().newSession(searchDefinitions, Collections.emptySet())); + } + + Item parseFilter(String filter, Language queryLanguage, IndexFacts.Session indexFacts) { + Item filterRoot; + + setState(queryLanguage, indexFacts); + tokenize(filter, null, indexFacts); + + filterRoot = anyItems(true); + + if (filterRoot == null) { + return null; + } + + markAllTermsAsFilters(filterRoot); + return filterRoot; + } + + protected Item negativeItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + tokens.skipMultiple(PLUS); + + if (!tokens.skipMultiple(MINUS)) { + return null; + } + + if (tokens.currentIsNoIgnore(SPACE)) { + return null; + } + + if (item == null) { + item = indexableItem(); + } + + if (item == null) { + item = compositeItem(); + + if (item != null) { + if (item instanceof OrItem) { // Turn into And + AndItem and = new AndItem(); + + for (Iterator<Item> i = ((OrItem) item).getItemIterator(); i.hasNext();) { + and.addItem(i.next()); + } + item = and; + } + } + } + if (item!=null) + item.setProtected(true); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns the top level item resulting from combining the given top + * level item and the new item. This implements most of the weird transformation + * rules of the parser. + */ + protected Item combineItems(Item topLevelItem, Item item) { + if (topLevelItem == null) { + return item; + } else if (topLevelItem instanceof OrItem && item instanceof OrItem) { + OrItem newTopOr = new OrItem(); + + newTopOr.addItem(topLevelItem); + newTopOr.addItem(item); + return newTopOr; + } else if (!(topLevelItem instanceof RankItem)) { + RankItem rank = new RankItem(); + + if (topLevelItem instanceof NotItem) { // Strange rule, but that's how it is + rank.addItem(topLevelItem); + rank.addItem(item); + } else { + rank.addItem(item); + rank.addItem(topLevelItem); + } + return rank; + } else if ((topLevelItem instanceof RankItem) + && (item instanceof RankItem) + && (((RankItem) item).getItem(0) instanceof OrItem)) { + RankItem itemAsRank = (RankItem) item; + OrItem or = (OrItem) itemAsRank.getItem(0); + + ((RankItem) topLevelItem).addItem(0, or); + for (int i = 1; i < itemAsRank.getItemCount(); i++) { + or.addItem(0, itemAsRank.getItem(i)); + } + return topLevelItem; + } else { + ((RankItem) topLevelItem).addItem(0, item); + return topLevelItem; + } + } + + Item applyFilter(Item root, String filter, Language queryLanguage, IndexFacts.Session indexFacts) { + setState(queryLanguage, indexFacts); + tokenize(filter, null, indexFacts); + return filterItems(root); + } + + private void markAllTermsAsFilters(Item root) { + if (root instanceof BlockItem) { + root.setFilter(true); + } + + if (root instanceof TermItem) { + root.setFilter(true); + } else { + if (root instanceof PhraseItem) { + root.setFilter(true); + } + for (Iterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) { + markAllTermsAsFilters(i.next()); + } + } + } + + private Item filterItems(Item root) { + while (tokens.hasNext()) { + Item item = null; + + item = positiveItem(); + root = addAndFilter(root, item); + if (item == null) { + item = negativeItem(); + root = addNotFilter(root, item); + } + if (item == null) { + item = indexableItem(); + root = addRankFilter(root, item); + } + + if (item != null) { + markAllTermsAsFilters(item); + } else { + tokens.skip(); + } + } + return root; + } + + private Item addAndFilter(Item root, Item item) { + if (item == null) { + return root; + } + + if (root instanceof AndItem) { + ((AndItem) root).addItem(item); + return root; + } + + if (root instanceof RankItem) { + Item firstChild = ((RankItem) root).getItem(0); + + if (firstChild instanceof AndItem) { + ((AndItem) firstChild).addItem(item); + return root; + } else if (firstChild instanceof NotItem) { + ((NotItem) firstChild).addPositiveItem(item); + return root; + } + } + + AndItem and = new AndItem(); + + and.addItem(root); + and.addItem(item); + return and; + } + + private Item addNotFilter(Item root, Item item) { + if (item == null) { + return root; + } + + if (root instanceof NotItem) { + ((NotItem) root).addNegativeItem(item); + return root; + } + + if (root instanceof RankItem) { + RankItem rootAsRank = (RankItem) root; + Item firstChild = rootAsRank.getItem(0); + + if (firstChild instanceof NotItem) { + ((NotItem) firstChild).addNegativeItem(item); + return root; + } else { + NotItem not = new NotItem(); + + not.addPositiveItem(rootAsRank.removeItem(0)); + not.addNegativeItem(item); + if (rootAsRank.getItemCount() == 0) { + return not; + } else { + rootAsRank.addItem(0, not); + return root; + } + } + } + + NotItem not = new NotItem(); + + not.addPositiveItem(root); + not.addNegativeItem(item); + return not; + } + + private Item addRankFilter(Item root, Item item) { + if (item == null) { + return root; + } + + if (root instanceof RankItem) { + ((RankItem) root).addItem(item); + return root; + } + + RankItem rank = new RankItem(); + + rank.addItem(root); + rank.addItem(item); + return rank; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java new file mode 100644 index 00000000000..a658d35e6de --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.parser.Parser; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public interface CustomParser extends Parser { + + /** + * Returns the raw result from parsing, <i>not</i> wrapped in a QueryTree + * instance. This may also be null, as opposed to using + * {@link Parser#parse(com.yahoo.search.query.parser.Parsable)}. + */ + default Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + Set<String> toSearch, IndexFacts indexFacts, String defaultIndexName) { + if (indexFacts == null) + indexFacts = new IndexFacts(); + return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Collections.emptySet()), defaultIndexName); + } + + Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + IndexFacts.Session indexFacts, String defaultIndexName); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/ParseException.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/ParseException.java new file mode 100644 index 00000000000..17ee905400e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/ParseException.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + + +/** + * Parser exceptions. JavaCC legacy, never thrown. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class ParseException extends RuntimeException { + + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java new file mode 100644 index 00000000000..ba10b7b6ee1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.search.query.parser.ParserEnvironment; + +/** + * Parser for queries of type phrase. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class PhraseParser extends AbstractParser { + + public PhraseParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + return forcedPhrase(); + } + + /** + * Ignores everything but words and numbers + * + * @return a phrase item if several words/numbers was found, + * a word item if only one was found + */ + private Item forcedPhrase() { + Item firstWord = null; + PhraseItem phrase = null; + + while (tokens.hasNext()) { + Token token = tokens.next(); + + if (token.kind != Token.Kind.WORD && token.kind != Token.Kind.NUMBER) { + continue; + } + // Note, this depends on segment never creating AndItems when quoted + // (the second argument) is true. + Item newWord = segment(token); + + if (firstWord == null) { // First pass + firstWord = newWord; + } else if (phrase == null) { // Second pass + phrase = new PhraseItem(); + phrase.addItem(firstWord); + phrase.addItem(newWord); + } else { // Following passes + phrase.addItem(newWord); + } + } + if (phrase != null) { + return phrase; + } else { + return firstWord; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/ProgrammaticParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/ProgrammaticParser.java new file mode 100644 index 00000000000..f509825d14c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/ProgrammaticParser.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.textserialize.TextSerialize; + +import java.util.Set; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public final class ProgrammaticParser implements CustomParser { + + @Override + public QueryTree parse(Parsable query) { + Item root = parse(query.getQuery(), null, null, null, null, null); + if (root == null) { + root = new NullItem(); + } + return new QueryTree(root); + + } + + @Override + public Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + IndexFacts.Session indexFacts, String defaultIndexName) { + if (queryToParse == null) return null; + return TextSerialize.parse(queryToParse); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java new file mode 100644 index 00000000000..6117e8e29ed --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java @@ -0,0 +1,250 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Iterator; + +import static com.yahoo.prelude.query.parser.Token.Kind.PLUS; +import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; + +/** + * Base class for parsers of the "simple" query languages (query types + * ANY and ALL). + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +abstract class SimpleParser extends StructuredParser { + + protected SimpleParser(ParserEnvironment environment) { + super(environment); + } + + protected Item handleComposite(boolean topLevel) { + return anyItems(false); // Nesteds are any even if all on top level + } + + + protected abstract Item negativeItem(); + + /** + * A collection of one or more items. + * More items are collected in the default composite - or. + * If there's a explicit composite and some other terms, + * a rank terms combines them + */ + protected Item anyItems(boolean topLevel) { + int position = tokens.getPosition(); + Item item = null; + + try { + item = anyItemsBody(topLevel); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private Item anyItemsBody(boolean topLevel) { + Item topLevelItem = null; + NotItem not = null; + Item item; + + do { + item = null; + + if (item == null) { + item = positiveItem(); + if (item != null) { + if (not == null) { + not = new NotItem(); + not.addPositiveItem(item); + topLevelItem = combineItems(topLevelItem, not); + } else { + not.addPositiveItem(item); + } + } + } + + if (item == null) { + item = negativeItem(); + if (item != null) { + if (not == null && item != null) { + not = new NotItem(); + not.addNegativeItem(item); + topLevelItem = combineItems(topLevelItem, not); + } else if (item != null) { + not.addNegativeItem(item); + } + } + } + + if (item == null) { + item = compositeItem(); + if (item != null) { + if (topLevelItem == null) { + topLevelItem = item; + } else { + topLevelItem = combineItems(topLevelItem, item); + } + } + } + + if (item == null) { + item = indexableItem(); + if (item != null) { + if (topLevelItem == null) { + topLevelItem = item; + } else if (needNewTopLevel(topLevelItem, item)) { + CompositeItem newTop = new OrItem(); + + newTop.addItem(topLevelItem); + newTop.addItem(item); + topLevelItem = newTop; + } else if (topLevelItem instanceof NotItem) { + topLevelItem = combineItems(topLevelItem, item); + } else { + ((CompositeItem) topLevelItem).addItem(item); + } + } + } + + if (topLevel && item == null) { + tokens.skip(); + } + } while (tokens.hasNext() && (topLevel || item != null)); + + if (not != null && not.getItemCount() == 1) { + // Incomplete not, only positive + // => pass the positive upwards instead, drop the not + if (topLevelItem == null || topLevelItem == not) { + return not.removeItem(0); // The positive + } else if (topLevelItem instanceof RankItem) { + removeNot((RankItem) topLevelItem); + return combineItems(topLevelItem, not.getPositiveItem()); + } + } + if (not != null && not.getPositiveItem() == null) { + // Incomplete not, only negatives - + + if (topLevelItem != null && topLevelItem != not) { + // => neutral rank items becomes implicit positives + not.addPositiveItem(getItemAsPositiveItem(topLevelItem, not)); + return not; + } else { // Only negatives - ignore them + return null; + } + } + if (topLevelItem != null) { + return topLevelItem; + } else { + return not; + } + } + + + /** Says whether we need a new top level item given the new item */ + private boolean needNewTopLevel(Item topLevelItem, Item item) { + if (item == null) { + return false; + } + if (topLevelItem instanceof TermItem) { + return true; + } + if (topLevelItem instanceof PhraseItem) { + return true; + } + if (topLevelItem instanceof BlockItem) { + return true; + } + return false; + } + + + /** + * Removes and returns the first <i>not</i> found in the composite, + * or returns null if there's none + */ + private NotItem removeNot(CompositeItem composite) { + for (int i = 0; i < composite.getItemCount(); i++) { + if (composite.getItem(i) instanceof NotItem) { + return (NotItem) composite.removeItem(i); + } + } + return null; + } + + protected abstract Item combineItems(Item topLevelItem, Item item); + + protected Item positiveItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + if (!tokens.skipMultiple(PLUS)) { + return null; + } + + if (tokens.currentIsNoIgnore(SPACE)) { + return null; + } + + if (item == null) { + item = indexableItem(); + } + + if (item == null) { + item = compositeItem(); + } + if (item!=null) + item.setProtected(true); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns the content of the given item as an item to be added as a positive item. + * Used to turn a top level item into implicit positives when explicit positives + * (+ items) are not found, but negatives are. + */ + private Item getItemAsPositiveItem(Item item, NotItem not) { + if (!(item instanceof RankItem)) { + return item; + } + + RankItem rank = (RankItem) item; + + // Remove the not from the rank item, the rank should generally + // be the first, but this is not always the case + int limit = rank.getItemCount(); + int n = 0; + + while (n < limit) { + if (rank.getItem(n) == not) { + rank.removeItem(n); + break; + } + n++; + } + + if (rank.getItemCount() == 1) { + return rank.getItem(0); + } + + // Several items - or together + OrItem or = new OrItem(); + + for (Iterator<Item> i = rank.getItemIterator(); i.hasNext();) { + or.addItem(i.next()); + } + return or; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java new file mode 100644 index 00000000000..d2640e64821 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig.Tokenlist; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig.Tokenlist.Tokens; + +import java.util.*; +import java.util.logging.Logger; + + +/** + * A <i>registry</i> which is responsible for knowing the current + * set of special tokens. The default registry returns empty token lists + * for all names. Usage of this registry is multithread safe. + * + * @author bratseth + */ +public class SpecialTokenRegistry { + + /** The log of this */ + private static Logger log = Logger.getLogger(SpecialTokens.class.getName()); + + private static final SpecialTokens nullSpecialTokens = new SpecialTokens(); + + /** + * The current authorative special token lists, indexed on name. + * These lists are unmodifiable and used directly by clients of this + */ + private Map<String,SpecialTokens> specialTokenMap = new HashMap<>(); + + private boolean frozen = false; + + /** + * Creates an empty special token registry which + * does not subscribe to any configuration + */ + public SpecialTokenRegistry() {} + + /** + * Create a special token registry which subscribes to the specialtokens + * configuration. Only used for testing. + */ + public SpecialTokenRegistry(String configId) { + try { + build(new ConfigGetter<>(SpecialtokensConfig.class).getConfig(configId)); + } catch (Exception e) { + log.config( + "No special tokens are configured (" + e.getMessage() + ")"); + } + } + + /** + * Create a special token registry from a configuration object. This is the production code path. + */ + public SpecialTokenRegistry(SpecialtokensConfig config) { + if (config != null) { + build(config); + } + freeze(); + } + + private void freeze() { + frozen = true; + } + + private void build(SpecialtokensConfig config) { + List<SpecialTokens> list = new ArrayList<>(); + for (Iterator<Tokenlist> i = config.tokenlist().iterator(); i.hasNext();) { + Tokenlist tokenList = i.next(); + SpecialTokens tokens = new SpecialTokens(tokenList.name()); + + for (Iterator<Tokens> j = tokenList.tokens().iterator(); j.hasNext();) { + Tokens token = j.next(); + tokens.addSpecialToken(token.token(), token.replace()); + } + tokens.freeze(); + list.add(tokens); + } + addSpecialTokens(list); + } + + /** + * Adds a SpecialTokens instance to the registry. That is, add the + * tokens contained for the name of the SpecialTokens instance + * given. + * + * @param specialTokens the SpecialTokens object to add + */ + public void addSpecialTokens(SpecialTokens specialTokens) { + ensureNotFrozen(); + List<SpecialTokens> list = new ArrayList<>(); + list.add(specialTokens); + addSpecialTokens(list); + + } + + private void ensureNotFrozen() { + if (frozen) { + throw new IllegalStateException("Tried to modify a frozen SpecialTokenRegistry instance."); + } + } + + private void addSpecialTokens(List<SpecialTokens> list) { + HashMap<String,SpecialTokens> tokens = new HashMap<>(specialTokenMap); + for(SpecialTokens t: list) { + tokens.put(t.getName(),t); + } + specialTokenMap = tokens; + } + + + /** + * Returns the currently authorative list of special tokens for + * a given name. + * + * @param name the name of the special tokens to return + * null, the empth string or the string "default" returns + * the default ones + * @return a read-only list of SpecialToken instances, an empty list if this name + * has no special tokens + */ + public SpecialTokens getSpecialTokens(String name) { + if (name == null || name.trim().equals("")) { + name = "default"; + } + SpecialTokens specialTokens = specialTokenMap.get(name); + + if (specialTokens == null) { + return nullSpecialTokens; + } + return specialTokens; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java new file mode 100644 index 00000000000..2db7afc36a1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.query.Substring; + +import java.util.*; +import java.util.logging.Logger; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * A list of special tokens - string that should be threated as word + * no matter what they contain. Special tokens are case insensitive. + * + * @author bratseth + */ +public class SpecialTokens { + + private static final Logger log = Logger.getLogger(SpecialTokens.class.getName()); + + private String name; + + private List<SpecialToken> specialTokens = new ArrayList<>(); + + private boolean frozen = false; + + private int currentMaximumLength = 0; + + /** Creates a null list of special tokens */ + public SpecialTokens() { + this.name = "(null)"; + } + + public SpecialTokens(String name) { + this.name = name; + } + + /** Returns the name of this special tokens list */ + public String getName() { + return name; + } + + /** + * Adds a special token to this + * + * @param token the special token string to add + * @param replace the token to replace instances of the special token with, + * or null to keep the token + */ + public void addSpecialToken(String token, String replace) { + ensureNotFrozen(); + if (!caseIndependentLength(token)) { + return; + } + // TODO are special tokens correctly unicode normalized in reagards to query parsing? + final SpecialToken specialTokenToAdd = new SpecialToken(token, replace); + currentMaximumLength = Math.max(currentMaximumLength, specialTokenToAdd.token.length()); + specialTokens.add(specialTokenToAdd); + Collections.sort(specialTokens); + } + + private boolean caseIndependentLength(String token) { + // XXX not fool proof length test, should test codepoint by codepoint for mixed case user input? not even that will necessarily be 100% robust... + String asLow = toLowerCase(token); + // TODO put along with the global toLowerCase + String asHigh = token.toUpperCase(Locale.ENGLISH); + if (asLow.length() != token.length() || asHigh.length() != token.length()) { + log.log(LogLevel.ERROR, "Special token '" + token + "' has case sensitive length. Ignoring the token." + + " Please report this message in a bug to the Vespa team."); + return false; + } else { + return true; + } + } + + /** + * Returns the special token starting at the start of the given string, or null if no + * special token starts at this string + * + * @param string the string to search for a special token at the start position + * @param substring true to allow the special token to be followed by a character which does not + * mark the end of a token + */ + public SpecialToken tokenize(String string, boolean substring) { + // XXX detonator pattern token.length may be != the length of the + // matching data in string, ref caseIndependentLength(String) + final String input = toLowerCase(string.substring(0, Math.min(string.length(), currentMaximumLength))); + for (Iterator<SpecialToken> i = specialTokens.iterator(); i.hasNext();) { + SpecialTokens.SpecialToken special = i.next(); + + if (input.startsWith(special.token())) { + if (string.length() == special.token().length() || substring || tokenEndsAt(special.token().length(), string)) + return special; + } + } + return null; + } + + private boolean tokenEndsAt(int position,String string) { + return !Character.isLetterOrDigit(string.charAt(position)); + } + + /** Returns the number of special tokens in this */ + public int size() { + return specialTokens.size(); + } + + private void ensureNotFrozen() { + if (frozen) { + throw new IllegalStateException("Tried to modify a frozen SpecialTokens instance."); + } + } + + public void freeze() { + frozen = true; + } + + /** An immutable special token */ + public final static class SpecialToken implements Comparable<SpecialToken> { + + private String token; + + private String replace; + + public SpecialToken(String token, String replace) { + this.token = toLowerCase(token); + if (replace == null || replace.trim().equals("")) { + this.replace = this.token; + } else { + this.replace = toLowerCase(replace); + } + } + + /** Returns the special token */ + public String token() { + return token; + } + + /** Returns the right replace value, never null or an empty string */ + public String replace() { + return replace; + } + + public int compareTo(SpecialToken other) { + if (this.token().length() < other.token().length()) { + return 1; + } else if (this.token().length() == other.token().length()) { + return 0; + } else { + return -1; + } + } + + public Token toToken(int start,String rawSource) { + return new Token(Token.Kind.WORD, replace(), true, new Substring(start, start + token.length(), rawSource)); // XXX: Unsafe? + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java new file mode 100644 index 00000000000..eb35655e4ca --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java @@ -0,0 +1,782 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static com.yahoo.prelude.query.parser.Token.Kind.*; + +/** + * Base class for parsers of the query languages which can be used + * for structured queries (types ANY, ALL and ADVANCED). + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +abstract class StructuredParser extends AbstractParser { + + protected StructuredParser(ParserEnvironment environment) { + super(environment); + } + + protected abstract Item handleComposite(boolean topLevel); + + protected Item compositeItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + tokens.skipMultiple(PLUS); + if (!tokens.skip(LBRACE)) { + return null; + } + + item = handleComposite(false); + + tokens.skip(RBRACE); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** Sets the submodes used for url parsing. Override this to influence when such submodes are used. */ + protected void setSubmodeFromIndex(String indexName, IndexFacts.Session indexFacts) { + submodes.setFromIndex(indexName, indexFacts); + } + + protected Item indexableItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + String indexName = indexPrefix(); + setSubmodeFromIndex(indexName, indexFacts); + + item = number(indexName != null); + + if (item == null) { + item = phrase(); + } + + if (item == null && indexName != null) { + if (wordsAhead()) { + item = phrase(); + } + } + + submodes.reset(); + + int weight = -1; + + if (item != null) { + weight = weightSuffix(); + } + + if (indexName != null && item != null) { + item.setIndexName(indexName); + } + + if (weight != -1 && item != null) { + item.setWeight(weight); + } + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + // scan forward for terms while ignoring noise + private boolean wordsAhead() { + while (tokens.hasNext()) { + if (tokens.currentIsNoIgnore(SPACE)) { + return false; + } + if (tokens.currentIsNoIgnore(NUMBER) + || tokens.currentIsNoIgnore(WORD)) { + return true; + } + tokens.skipNoIgnore(); + } + return false; + } + + // wordsAhead and nothingAhead... uhm... so similar... + private boolean nothingAhead(boolean skip) { + int position = tokens.getPosition(); + try { + boolean quoted = false; + while (tokens.hasNext()) { + if (tokens.currentIsNoIgnore(QUOTE)) { + tokens.skipMultiple(QUOTE); + quoted = !quoted; + } else { + if (!quoted && tokens.currentIsNoIgnore(SPACE)) { + return true; + } + if (tokens.currentIsNoIgnore(NUMBER) + || tokens.currentIsNoIgnore(WORD)) { + return false; + } + tokens.skipNoIgnore(); + } + } + return true; + } finally { + if (!skip) { + tokens.setPosition(position); + } + } + } + + private String indexPrefix() { + int position = tokens.getPosition(); + String item = null; + + try { + List<Token> firstWord = new ArrayList<>(); + List<Token> secondWord = new ArrayList<>(); + + tokens.skip(LSQUAREBRACKET); // For test 93 and 60 + + if (!tokens.currentIs(WORD) && !tokens.currentIs(NUMBER) + && !tokens.currentIs(UNDERSCORE)) { + return null; + } + + firstWord.add(tokens.next()); + + while (tokens.currentIsNoIgnore(UNDERSCORE) + || tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER)) { + firstWord.add(tokens.next()); + } + + if (tokens.currentIsNoIgnore(DOT)) { + tokens.skip(); + if (tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER)) { + secondWord.add(tokens.next()); + } else { + return null; + } + while (tokens.currentIsNoIgnore(UNDERSCORE) + || tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER)) { + secondWord.add(tokens.next()); + } + } + + if (!tokens.skipNoIgnore(COLON)) { + return null; + } + + if (secondWord.size() == 0) { + item = concatenate(firstWord); + } else { + item = concatenate(firstWord) + "." + concatenate(secondWord); + } + + item = indexFacts.getCanonicName(item); + + if ( ! indexFacts.isIndex(item)) { // Only if this really is an index + // Marker for the finally block + item = null; + return null; + } else { + if (nothingAhead(false)) { + // correct index syntax, correct name, but followed + // by noise. Let's skip this. + nothingAhead(true); + position = tokens.getPosition(); + item = indexPrefix(); + } + } + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private String concatenate(List<Token> tokens) { + StringBuilder s = new StringBuilder(); + for (Token t : tokens) { + s.append(t.toString()); + } + return s.toString(); + } + + /** Returns the specified term weight, or -1 if there is no weight suffix */ + private int weightSuffix() { + int position = tokens.getPosition(); + int item = -1; + + try { + if (!tokens.skipNoIgnore(EXCLAMATION)) { + return -1; + } + item = 150; + + if (tokens.currentIsNoIgnore(NUMBER)) { + try { + item = Integer.parseInt(tokens.next().toString()); + } catch (NumberFormatException e) { + item = -1; + } + } else { + while (tokens.currentIsNoIgnore(EXCLAMATION)) { + item += 50; + tokens.skipNoIgnore(); + } + } + return item; + + } finally { + if (item == -1) { + tokens.setPosition(position); + } + } + } + + private boolean endOfNumber() { + return tokens.currentIsNoIgnore(SPACE) + || tokens.currentIsNoIgnore(RSQUAREBRACKET) + || tokens.currentIsNoIgnore(SEMICOLON) + || tokens.currentIsNoIgnore(RBRACE) + || tokens.currentIsNoIgnore(EOF) + || tokens.currentIsNoIgnore(EXCLAMATION); + } + + private String decimalPart() { + int position = tokens.getPosition(); + boolean consumed = false; + + try { + if (!tokens.skipNoIgnore(DOT)) return ""; + if (tokens.currentIsNoIgnore(NUMBER)) { + consumed = true; + return "." + tokens.next().toString(); + } + return ""; + } finally { + if ( ! consumed) + tokens.setPosition(position); + } + } + + private IntItem number(boolean hasIndex) { + int position = tokens.getPosition(); + IntItem item = null; + + try { + if (item == null) { + item = numberRange(); + } + + tokens.skip(LSQUAREBRACKET); // For test 93 and 60 + + // TODO: Better definition of start and end of numeric items + if (item == null && hasIndex && tokens.currentIsNoIgnore(MINUS) && (tokens.currentNoIgnore(1).kind == NUMBER)) { + tokens.skipNoIgnore(); + Token t = tokens.next(); + item = new IntItem("-" + t.toString() + decimalPart(), true); + item.setOrigin(t.substring); + } else if (item == null && tokens.currentIs(NUMBER)) { + Token t = tokens.next(); + item = new IntItem(t.toString() + decimalPart(), true); + item.setOrigin(t.substring); + } + + if (item == null) { + item = numberSmaller(); + } + + if (item == null) { + item = numberGreater(); + } + if (item != null && !endOfNumber()) { + item = null; + } + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private IntItem numberRange() { + int position = tokens.getPosition(); + IntItem item = null; + boolean negative = false; + + try { + Token initial = tokens.next(); + if (initial.kind != LSQUAREBRACKET) { + return null; + } + + String rangeStart = ""; + + negative = tokens.skip(MINUS); + + if (tokens.currentIs(NUMBER)) { + rangeStart = (negative ? "-" : "") + tokens.next().toString() + decimalPart(); + } + + if (!tokens.skip(SEMICOLON)) { + return null; + } + + String rangeEnd = ""; + + negative = tokens.skip(MINUS); + + if (tokens.currentIs(NUMBER)) { + rangeEnd = (negative ? "-" : "") + tokens.next().toString() + decimalPart(); + } + + + String range = "[" + rangeStart + ";" + rangeEnd; + if (tokens.skip(SEMICOLON)) { + negative = tokens.skip(MINUS); + if (tokens.currentIs(NUMBER)) { + String rangeLimit = (negative ? "-" : "") + tokens.next().toString(); + range += ";" + rangeLimit; + } + } + tokens.skip(RSQUAREBRACKET); + + item = new IntItem(range + "]", true); + item.setOrigin(new Substring(initial.substring.start, tokens.currentNoIgnore().substring.start, + initial.getSubstring().getSuperstring())); // XXX: Unsafe end? + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private IntItem numberSmaller() { + int position = tokens.getPosition(); + IntItem item = null; + boolean negative = false; + + try { + Token initial = tokens.next(); + if (initial.kind != SMALLER) { + return null; + } + negative = tokens.skipNoIgnore(MINUS); + if (!tokens.currentIs(NUMBER)) { + return null; + } + + item = new IntItem("<" + (negative ? "-" : "") + tokens.next() + decimalPart(), true); + item.setOrigin(new Substring(initial.substring.start, tokens.currentNoIgnore().substring.start, + initial.getSubstring().getSuperstring())); // XXX: Unsafe end? + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private IntItem numberGreater() { + int position = tokens.getPosition(); + IntItem item = null; + boolean negative = false; + + try { + Token t = tokens.next(); + if (t.kind != GREATER) { + return null; + } + + negative = tokens.skipNoIgnore(MINUS); + if (!tokens.currentIs(NUMBER)) { + return null; + } + + Token number = tokens.next(); + item = new IntItem(">" + (negative ? "-" : "") + number + decimalPart(), true); + item.setOrigin(new Substring(t.substring.start, tokens.currentNoIgnore().substring.start, t.getSubstring().getSuperstring())); // XXX: Unsafe end? + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** Words for phrases also permits numerals as words */ + private Item phraseWord(boolean insidePhrase) { + int position = tokens.getPosition(); + Item item = null; + + try { + if (item == null) { + item = word(); + } + + if (item == null && tokens.currentIs(NUMBER)) { + Token t = tokens.next(); + if (insidePhrase) { + item = new WordItem(t, true); + } else { + item = new IntItem(t.toString(), true); + ((TermItem) item).setOrigin(t.substring); + } + } + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns a WordItem if this is a non CJK query, + * a WordItem or PhraseSegmentItem if this is a CJK query, + * null if the current item is not a word + */ + private Item word() { + int position = tokens.getPosition(); + Item item = null; + + try { + if (!tokens.currentIs(WORD) + && ((!tokens.currentIs(NUMBER) && !tokens.currentIs(MINUS) + && !tokens.currentIs(UNDERSCORE)) || (!submodes.url && !submodes.site))) { + return null; + } + Token word = tokens.next(); + + if (submodes.url) { + item = new WordItem(word, true); + } else { + item = segment(word); + } + + if (submodes.url || submodes.site) { + StringBuilder buffer = null; + Token token = tokens.currentNoIgnore(); + + while (token.kind == WORD || token.kind == NUMBER || token.kind == MINUS || token.kind == UNDERSCORE) { + if (buffer == null) { + buffer = getStringContents(item); + } + buffer.append(token.toString()); + tokens.skipNoIgnore(); + token = tokens.currentNoIgnore(); + } + if (buffer != null) { + Substring termSubstring = ((BlockItem) item).getOrigin(); + Substring substring = new Substring(termSubstring.start, token.substring.start, termSubstring.getSuperstring()); // XXX: Unsafe end? + String str = buffer.toString(); + item = new WordItem(str, "", true, substring); + } + } + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private StringBuilder getStringContents(Item item) { + if (item instanceof TermItem) { + return new StringBuilder( + ((TermItem) item).stringValue()); + } else if (item instanceof SegmentItem) { + return new StringBuilder( + ((SegmentItem) item).getRawWord()); + } else { + throw new RuntimeException("Parser bug. Unexpected item type, send stack trace in a bug ticket to the Vespa team."); + } + } + + + /** + * An phrase or word, either marked by quotes or by non-spaces between + * words or by a combination. + * + * @return a word if there's only one word, a phrase if there is + * several quoted or non-space-separated words, or null otherwise + */ + private Item phrase() { + int position = tokens.getPosition(); + Item item = null; + + try { + item = phraseBody(); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** Returns a word, a phrase or another composite */ + private Item phraseBody() { + boolean quoted = false; + PhraseItem phrase = null; + Item firstWord = null; + boolean starAfterFirst = false; + boolean starBeforeFirst = false; + + if (tokens.skipMultiple(QUOTE)) { + quoted = !quoted; + } + boolean addStartOfHostMarker = addStartMarking(); + + braceLevelURL = 0; + + do { + starBeforeFirst = tokens.skip(STAR); + + if (tokens.skipMultiple(QUOTE)) { + quoted = !quoted; + } + + Item word = phraseWord((firstWord != null) || (phrase != null)); + + if (word == null) { + if (tokens.skipMultiple(QUOTE)) { + quoted = !quoted; + } + if (quoted && tokens.hasNext()) { + tokens.skipNoIgnore(); + continue; + } else { + break; + } + } else if (quoted && word instanceof PhraseSegmentItem) { + ((PhraseSegmentItem) word).setExplicit(true); + } + + if (phrase != null) { + phrase.addItem(word); + } else if (firstWord != null) { + phrase = new PhraseItem(); + if (quoted || submodes.site || submodes.url) { + phrase.setExplicit(true); + } + if (addStartOfHostMarker) { + phrase.addItem(MarkerWordItem.createStartOfHost()); + } + if (firstWord instanceof IntItem) { + IntItem asInt = (IntItem) firstWord; + firstWord = new WordItem(asInt.stringValue(), asInt.getIndexName(), + true, asInt.getOrigin()); + } + phrase.addItem(firstWord); + phrase.addItem(word); + } else if (word instanceof PhraseItem) { + phrase = (PhraseItem) word; + } else { + firstWord = word; + starAfterFirst = tokens.skipNoIgnore(STAR); + } + if (!quoted && tokens.currentIs(QUOTE)) { + break; + } + + boolean atWord = skipToNextPhraseWord(quoted); + + if (!atWord && tokens.skipMultipleNoIgnore(QUOTE)) { + quoted = !quoted; + } + + if (!atWord && !quoted) { + break; + } + + if (quoted && tokens.skipMultiple(QUOTE)) { + break; + } + + } while (tokens.hasNext()); + + braceLevelURL = 0; + + if (phrase != null) { + if (addEndMarking()) { + phrase.addItem(MarkerWordItem.createEndOfHost()); + } + return phrase; + } else if (firstWord != null && submodes.site) { + if (starAfterFirst && !addStartOfHostMarker) { + return firstWord; + } else { + phrase = new PhraseItem(); + if (addStartOfHostMarker) { + phrase.addItem(MarkerWordItem.createStartOfHost()); + } + if (firstWord instanceof IntItem) { + IntItem asInt = (IntItem) firstWord; + firstWord = new WordItem(asInt.stringValue(), asInt.getIndexName(), true, asInt.getOrigin()); + } + phrase.addItem(firstWord); + if (!starAfterFirst) { + phrase.addItem(MarkerWordItem.createEndOfHost()); + } + phrase.setExplicit(true); + return phrase; + } + } else { + if (firstWord != null && firstWord instanceof TermItem && (starAfterFirst || starBeforeFirst)) { + // prefix, suffix or substring + TermItem firstTerm = (TermItem) firstWord; + if (starAfterFirst) { + if (starBeforeFirst) { + return new SubstringItem(firstTerm.stringValue(), true); + } else { + return new PrefixItem(firstTerm.stringValue(), true); + } + } else { + return new SuffixItem(firstTerm.stringValue(), true); + } + } + return firstWord; + } + } + + private boolean addStartMarking() { + if (submodes.explicitAnchoring() && tokens.currentIs(HAT)) { + tokens.skip(); + return true; + } + return false; + } + + private boolean addEndMarking() { + if (submodes.explicitAnchoring() && tokens.currentIs(DOLLAR)) { + tokens.skip(); + return true; + } else if (submodes.site && tokens.currentIs(STAR)) { + tokens.skip(); + return false; + } else if (submodes.site && !tokens.currentIs(DOT)) { + return true; + } + return false; + } + + /** + * Skips one or multiple phrase separators + * + * @return true if the item we land at after skipping zero or more is + * a phrase word + */ + private boolean skipToNextPhraseWord(boolean quoted) { + boolean skipped = false; + + do { + skipped = false; + if (submodes.url) { + if (tokens.currentIsNoIgnore(RBRACE)) { + braceLevelURL--; + } + if (tokens.currentIsNoIgnore(LBRACE)) { + braceLevelURL++; + } + if (tokens.hasNext() && !tokens.currentIsNoIgnore(SPACE) + && braceLevelURL >= 0) { + tokens.skip(); + skipped = true; + } + } else if (submodes.site) { + if (tokens.hasNext() && !tokens.currentIsNoIgnore(SPACE) + && !tokens.currentIsNoIgnore(STAR) + && !tokens.currentIsNoIgnore(HAT) + && !tokens.currentIsNoIgnore(DOLLAR) + && !tokens.currentIsNoIgnore(RBRACE)) { + tokens.skip(); + skipped = true; + } + } else { + if (tokens.skipMultipleNoIgnore(DOT)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(COMMA)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(PLUS)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(MINUS)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(UNDERSCORE)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(HAT)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(DOLLAR)) { + skipped = true; + } + ; + if (tokens.skipMultipleNoIgnore(STAR)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(COLON)) { + skipped = true; + } + if (quoted) { + if (tokens.skipMultipleNoIgnore(RBRACE)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(LBRACE)) { + skipped = true; + } + } + if (tokens.skipMultipleNoIgnore(NOISE)) { + skipped = true; + } + } + } while (skipped && !tokens.currentIsNoIgnore(WORD) + && !tokens.currentIsNoIgnore(NUMBER) && !URLModeWordChar()); + + return tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER) || URLModePhraseChar(); + } + + private boolean URLModeWordChar() { + if (!submodes.url) { + return false; + } + return tokens.currentIsNoIgnore(UNDERSCORE) + || tokens.currentIsNoIgnore(MINUS); + } + + private boolean URLModePhraseChar() { + if (!submodes.url) { + return false; + } + return !(tokens.currentIsNoIgnore(RBRACE) + || tokens.currentIsNoIgnore(SPACE)); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Token.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Token.java new file mode 100644 index 00000000000..27ad26279e7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Token.java @@ -0,0 +1,117 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + + +import com.yahoo.prelude.query.Substring; + +/** + * A query token. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Token { + + public static enum Kind { + EOF("<EOF>"), + NUMBER("<NUMBER>"), + WORD("<WORD>"), + LETTER("<LETTER>"), + DIGIT("<DIGIT>"), + SPACE("\" \""), + NOISE("<NOISE>"), + LATINSIGN("<LATINSIGN>"), + QUOTE("\"\\\"\""), + MINUS("\"-\""), + PLUS("\"+\""), + DOT("\".\""), + COMMA("\",\""), + COLON("\":\""), + LBRACE("\"(\""), + RBRACE("\")\""), + LSQUAREBRACKET("\"[\""), + RSQUAREBRACKET("\"]\""), + SEMICOLON("\";\""), + GREATER("\">\""), + SMALLER("\"<\""), + EXCLAMATION("\"!\""), + UNDERSCORE("\"_\""), + HAT("\"^\""), + STAR("\"*\""), + DOLLAR("\"$\""), + DEFAULT(""); + + public final String image; + + private Kind(String image) { + this.image = image; + } + } + + /** The raw substring causing this token, never null */ + public final Substring substring; + + public final Token.Kind kind; + + /** Lowercase image */ + public final String image; + + /** True if this is a <i>special token</i> */ + private final boolean special; + + /** Crates a token which fails to know its origin (as a substring). Do not use, except for testing. */ + public Token(Token.Kind kind, String image) { + this(kind,image,false,null); + } + + public Token(Token.Kind kind, String image, Substring substring) { + this(kind,image,false,substring); + } + + public Token(Token.Kind kind, String image, boolean special, Substring substring) { + this.kind = kind; + this.image = image; + this.special = special; + this.substring = substring; + } + + /** Returns whether this is a <i>special token</i> */ + public boolean isSpecial() { return special; } + + public String toString() { return image; } + + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null) { + return false; + } + if (object.getClass() != this.getClass()) { + return false; + } + + Token other = (Token) object; + + if (this.kind != other.kind) { + return false; + } + if (!(this.image.equals(other.image))) { + return false; + } + + return true; + } + + /** + * Returns the substring containing the image ins original form (including casing), + * as well as all the text surrounding the token + * + * @return the image in original casing, never null + */ + public Substring getSubstring() { return substring; } + + public int hashCode() { + return image.hashCode() ^ kind.hashCode(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/TokenPosition.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/TokenPosition.java new file mode 100644 index 00000000000..a1ad4983f34 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/TokenPosition.java @@ -0,0 +1,218 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + + +import java.util.List; + + +/** + * An iterator-like view of a list, but typed, random-accessible + * and with more convenience methods + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class TokenPosition { + + private List<Token> tokenList; + + private int position = 0; + + /** + * Creates an empty token position which must be {@link #initialize initialized} + * before use + */ + public TokenPosition() {} + + /** + * Initializes this token position. Must be done once or more before use + * + * @param tokens a list of tokens, which is not modified, and not used + * outside the calling thread + */ + public void initialize(List<Token> tokens) { + this.tokenList = tokens; + position = 0; + } + + /** + * Returns the current token without changing the position. + * Returns null (no exception) if there are no more tokens. + */ + public Token current() { + Token token = current(0); + + return token; + } + + /** + * Returns the current token without changing the position, + * and without ignoring spaces. + * Returns null (no exception) if there are no more tokens. + */ + public Token currentNoIgnore() { + return currentNoIgnore(0); + } + + /** + * Returns the token at <code>offset</code> steps from here. + * Null (no exception) if there is no token at that position + */ + public Token current(int offset) { + int i = position + offset; + + while (i < tokenList.size()) { + Token token = tokenList.get(i++); + + if (token.kind != Token.Kind.SPACE) { + return token; + } + } + return null; + } + + /** + * Returns the token at <code>offset</code> steps from here, + * without ignoring spaces. + * Null (no exception) if there is no token at that position + */ + public Token currentNoIgnore(int offset) { + if (tokenList.size() <= position + offset) { + return null; + } + return tokenList.get(position + offset); + } + + /** + * Returns whether the current token is of the given kind. + * False also if there is no token at the current position + */ + public boolean currentIs(Token.Kind kind) { + Token current = current(); + + if (current == null) { + return false; + } + return current.kind == kind; + } + + /** + * Returns whether the current token is of the given kind, + * without skipping spaces. + * False also if there is no token at the current position + */ + public boolean currentIsNoIgnore(Token.Kind kind) { + Token current = currentNoIgnore(); + + if (current == null) { + return false; + } + return current.kind == kind; + } + + /** Returns whether more tokens are available */ + public boolean hasNext() { + return tokenList.size() > (position + 1); + } + + /** + * Returns the current token and increases the position by one. + * Returns null (no exception) if there are no more tokens + */ + public Token next() { + // Go to the next-non-space. Then set token, then increase position by one + while (position < tokenList.size()) { + Token current = tokenList.get(position++); + + if (current.kind != Token.Kind.SPACE) { + return current; + } + } + return null; + } + + /** Skips past the current token */ + public void skip() { + next(); + } + + /** Skips to the next token, even if the next is a space */ + public void skipNoIgnore() { + position++; + } + + /** Sets the position */ + public void setPosition(int position) { + this.position = position; + } + + /** Returns the current position */ + public int getPosition() { + return position; + } + + /** + * Skips one or more tokens of the given kind + * + * @return true if at least one was skipped, false if there was none + */ + public boolean skipMultiple(Token.Kind kind) { + boolean skipped = false; + + while (hasNext() && current().kind == kind) { + skipped = true; + skip(); + } + return skipped; + } + + /** + * Skips one or more tokens of the given kind, without ignoring spaces + * + * @return true if at least one was skipped, false if there was none + */ + public boolean skipMultipleNoIgnore(Token.Kind kind) { + boolean skipped = false; + + while (hasNext() && currentNoIgnore().kind == kind) { + skipped = true; + skip(); + } + return skipped; + } + + /** + * Skips one or zero items of the given kind. + * + * @return true if one item was skipped, false if none was, + * or if there are no more tokens + */ + public boolean skip(Token.Kind kind) { + Token current = current(); + + if (current == null || current.kind != kind) { + return false; + } + + skip(); + return true; + } + + /** + * Skips one or zero items of the given kind, without ignoring + * spaces + * + * @return true if one item was skipped, false if none was, + * or if there are no more tokens + */ + public boolean skipNoIgnore(Token.Kind kind) { + Token current = currentNoIgnore(); + + if (current == null || current.kind != kind) { + return false; + } + + skip(); + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java new file mode 100644 index 00000000000..e52a0347834 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -0,0 +1,550 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.CharacterClasses; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Substring; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import static com.yahoo.prelude.query.parser.Token.Kind.*; + +/** + * Query tokenizer. Singlethreaded. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public final class Tokenizer { + + private List<Token> tokens = new java.util.ArrayList<>(); + + private String source; + + /** Tokens which should be words, regardless of which characters they contain */ + private SpecialTokens specialTokens = null; + + /** Whether to recognize tokens also as substrings of other tokens, needed for cjk */ + private boolean substringSpecialTokens=false; + + private final CharacterClasses characterClasses; + + private int parensToEat = 0; + + private int indexLastExplicitlyChangedAt = 0; + + /** Creates a tokenizer which initializes from a given Linguistics */ + public Tokenizer(Linguistics linguistics) { + this.characterClasses=linguistics.getCharacterClasses(); + } + + /** + * Sets a list of tokens (Strings) which should be returned as WORD tokens regardless + * of their content. This list is used directly by the Tokenizer and should not be changed + * after calling this. The tokenizer will not change it. Special tokens are case + * sensitive. + */ + public void setSpecialTokens(SpecialTokens specialTokens) { + this.specialTokens = specialTokens; + } + + /** Sets whether to recognize tokens also as substrings of other tokens, needed for cjk. Default false. */ + public void setSubstringSpecialTokens(boolean substringSpecialTokens) { + this.substringSpecialTokens=substringSpecialTokens; + } + + /** + * Resets this tokenizer and create tokens from the given string, using + * "default" as the default index, and using no index information. + * + * @return a read-only list of tokens. This list can only be used by this thread + */ + public List<Token> tokenize(String string) { + return tokenize(string, new IndexFacts().newSession(Collections.emptySet(), Collections.emptySet())); + } + + /** + * Resets this tokenizer and create tokens from the given string, using + * "default" as the default index + * + * @return a read-only list of tokens. This list can only be used by this thread + */ + public List<Token> tokenize(String string, IndexFacts.Session indexFacts) { + return tokenize(string, "default", indexFacts); + } + + /** + * Resets this tokenizer and create tokens from the given string. + * + * @param string the string to tokenize + * @param defaultIndexName the name of the index to use as default + * @param indexFacts information about the indexes we will search + * @return a read-only list of tokens. This list can only be used by this thread + */ + @SuppressWarnings({"deprecation"}) + // To avoid this we need to pass an IndexFacts.session down instead - easily done but not without breaking API's + public List<Token> tokenize(String string, String defaultIndexName, IndexFacts.Session indexFacts) { + this.source = string; + tokens.clear(); + parensToEat = 0; + Index topLevelIndex = Index.nullIndex; + Index defaultIndex = indexFacts.getIndex(defaultIndexName); + if (defaultIndexName != null) { + topLevelIndex = defaultIndex; + } + Index currentIndex = topLevelIndex; + for (int i = 0; i < source.length(); i++) { + if (currentIndex.isExact()) { + i = consumeExact(i, currentIndex); // currentIndex may change after seeing a colon below + currentIndex = topLevelIndex; + } + else { + i = consumeSpecialToken(i); + } + + if (i >= source.length()) break; + + int c = source.codePointAt(i); + if (characterClasses.isLetterOrDigit(c) + || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) { + i = consumeWordOrNumber(i, currentIndex); + } else if (Character.isWhitespace(c)) { + addToken(SPACE, " ", i, i + 1); + } else if (c == '"' || c == '\u201C' || c == '\u201D' + || c == '\u201E' || c == '\u201F' || c == '\u2039' + || c == '\u203A' || c == '\u00AB' || c == '\u00BB' + || c == '\u301D' || c == '\u301E' || c == '\u301F' + || c == '\uFF02') { + addToken(QUOTE, "\"", i, i + 1); + } else if (c == '-' || c == '\uFF0D') { + addToken(MINUS, "-", i, i + 1); + } else if (c == '+' || c == '\uFF0B') { + addToken(PLUS, "+", i, i + 1); + } else if (c == '.' || c == '\uFF0E') { + addToken(DOT, ".", i, i + 1); + } else if (c == ',' || c == '\uFF0C') { + addToken(COMMA, ",", i, i + 1); + } else if (c == ':' || c == '\uFF1A') { + currentIndex = determineCurrentIndex(defaultIndex, indexFacts); + addToken(COLON, ":", i, i + 1); + } else if (c == '(' || c == '\uFF08') { + addToken(LBRACE, "(", i, i + 1); + parensToEat++; + } else if (c == ')' || c == '\uFF09') { + addToken(RBRACE, ")", i, i + 1); + parensToEat--; + if (parensToEat < 0) parensToEat = 0; + } else if (c == '[' || c == '\uFF3B') { + addToken(LSQUAREBRACKET, "[", i, i + 1); + } else if (c == ']' || c == '\uFF3D') { + addToken(RSQUAREBRACKET, "]", i, i + 1); + } else if (c == ';' || c == '\uFF1B') { + addToken(SEMICOLON, ";", i, i + 1); + } else if (c == '>' || c == '\uFF1E') { + addToken(GREATER, ">", i, i + 1); + } else if (c == '<' || c == '\uFF1C') { + addToken(SMALLER, "<", i, i + 1); + } else if (c == '!' || c == '\uFF01') { + addToken(EXCLAMATION, "!", i, i + 1); + } else if (c == '_' || c == '\uFF3F') { + addToken(UNDERSCORE, "_", i, i + 1); + } else if (c == '^' || c == '\uFF3E') { + addToken(HAT, "^", i, i + 1); + } else if (c == '*' || c == '\uFF0A') { + addToken(STAR, "*", i, i + 1); + } else if (c == '$' || c == '\uFF04') { + addToken(DOLLAR, "$", i, i + 1); + } else { + addToken(NOISE, "<NOISE>", i, i + 1); + } + } + addToken(EOF, "<EOF>", source.length(), source.length()); + source = null; + return tokens; + } + + private boolean acceptApostropheAsWordCharacter(Index currentIndex) { + if (!(currentIndex.isUriIndex() || currentIndex.isHostIndex())) { + return true; + } + // this is a heuristic to check whether we probably have reached the end of an URL element + for (int i = tokens.size() - 1; i >= 0; --i) { + Token lookAt = tokens.get(i); + switch (lookAt.kind) { + case COLON: + if (i == indexLastExplicitlyChangedAt) { + return false; + } + case SPACE: + return true; + default: + // do nothing + } + } + // really not sure whether we should choose false instead, on cause of the guard at + // the start, but this seems like the conservative choice + return true; + } + + @SuppressWarnings({"deprecation"}) + private Index determineCurrentIndex(Index defaultIndex, IndexFacts.Session indexFacts) { + int backtrack = tokens.size(); + int tokencnt = 0; + for (int i = 1; i <= tokens.size(); i++) { + backtrack = tokens.size() - i; + Token lookAt = tokens.get(backtrack); + if (lookAt.kind != WORD && lookAt.kind != UNDERSCORE && lookAt.kind != NUMBER && lookAt.kind != DOT) { + // do not use this token + backtrack++; + break; + } + tokencnt++; + } + StringBuilder tmp = new StringBuilder(); + for (int i = 0; i < tokencnt; i++) { + Token useToken = tokens.get(backtrack+i); + tmp.append(useToken.image); + } + String indexName = tmp.toString(); + if (indexName.length() > 0) { + String canonicIndexName = indexFacts.getCanonicName(indexName); + Index index = indexFacts.getIndex(canonicIndexName); + if (! index.isNull()) { + indexLastExplicitlyChangedAt = tokens.size(); + return index; + } + } + return defaultIndex; + } + + private int consumeSpecialToken(int start) { + SpecialTokens.SpecialToken specialToken=getSpecialToken(start); + if (specialToken==null) return start; + tokens.add(specialToken.toToken(start,source)); + return start + specialToken.token().length(); + } + + private SpecialTokens.SpecialToken getSpecialToken(int start) { + if (specialTokens == null) { + return null; + } + return specialTokens.tokenize(source.substring(start), substringSpecialTokens); + } + + private int consumeExact(int start,Index index) { + if (index.getExactTerminator() == null) return consumeHeuristicExact(start); + return consumeToTerminator(start,index.getExactTerminator()); + } + + private boolean looksLikeExactEnd(int end) { + int parens = parensToEat; + boolean wantStar = true; + boolean wantBang = true; + boolean eatDigit = false; + + int endLimit = source.length(); + + while (end < endLimit) { + char c = source.charAt(end++); + + if (Character.isWhitespace(c)) { + // ends in whitespace + return true; + } + // handle digits (after a ! sign) + if (eatDigit && Character.isDigit(c)) { + continue; + } + eatDigit = false; + + // ! digits or any number of ! signs: + if (wantBang && c == '!') { + eatDigit = true; + while (end < endLimit) { + c = source.charAt(end); + if (c == '!') { + end++; + // more than one ! -> do not eat digits + eatDigit = false; + } else { + break; + } + } + wantBang = false; + continue; + } + + // star meaning prefix after a string: + if (wantStar && (c == '*' || c == '\uFF0A')) { + wantStar = false; + continue; + } + + // parens ending a group: + if (parens > 0 && c == ')') { + parens--; + continue; + } + + // something else + return false; + } + // end of field + return true; + } + + private int consumeHeuristicExact(int start) { + int curPos = -1; + int actualStart = -1; + int starPos = -1; + int endLimit = source.length(); + + boolean suffStar = false; + boolean isQuoted = false; + boolean seenSome = false; + + boolean wantStartQuote = true; + boolean wantEndQuote = false; + boolean wantStartStar = true; + + // ignore whitespace at start until we something else + boolean ignWS = true; + + for (curPos = start; curPos < endLimit; curPos++) { + char c = source.charAt(curPos); + + if (Character.isWhitespace(c)) { + if (ignWS) continue; + // ends exact token unless quoted + if (!wantEndQuote) break; + } + ignWS = false; + + if (c == '"') { + if (wantStartQuote) { + // starts actual token + wantStartQuote = false; + wantEndQuote = true; + actualStart = curPos+1; + } else if (wantEndQuote && looksLikeExactEnd(curPos+1)) { + // System.err.println("seen quoted token from "+actualStart+" to "+curPos); + seenSome = true; + wantEndQuote = false; + isQuoted = true; + // ends token + break; + } + // else: part of exact token + continue; + } + // no processing of non-quotes inside quotes + if (wantEndQuote) continue; + + if (c == '*' || c == '\uFF0A') { + if (wantStartStar) { + suffStar = true; + wantStartStar = false; + starPos = curPos; + continue; + } + } + + if (c == '!' || c == '*' || c == '\uFF0A') { + // ends token if non-empty + if (seenSome && looksLikeExactEnd(curPos)) break; + } + + if (c == ')' && seenSome && looksLikeExactEnd(curPos)) { + break; + } + if (!seenSome) { + // everything else: something that starts the actual token + actualStart = curPos; + seenSome = true; + wantStartQuote = false; + wantStartStar = false; + } + } + + int end = curPos; + + // handle some ill-formed inputs: + + if (wantEndQuote) { + // missing end quote: reprocess without quote handling + isQuoted = false; + actualStart = -1; + starPos = -1; + suffStar = false; + seenSome = false; + wantStartStar = true; + + // ignore whitespace at start until we something else + ignWS = true; + + for (curPos = start; curPos < endLimit; curPos++) { + char c = source.charAt(curPos); + + if (Character.isWhitespace(c)) { + if (ignWS) continue; + // ends exact token + break; + } + ignWS = false; + + if (c == '*' || c == '\uFF0A') { + if (wantStartStar) { + suffStar = true; + wantStartStar = false; + starPos = curPos; + continue; + } + } + + if (c == '!' || c == '*' || c == '\uFF0A') { + // ends token if non-empty + if (seenSome) break; + } + + if (c == ')' && seenSome && parensToEat > 0) { + break; + } + if (!seenSome) { + // everything else: something that starts the actual token + actualStart = curPos; + seenSome = true; + wantStartStar = false; + } + } + end = curPos; + } + + if (! seenSome) { + // no token content: may need to include stars or whitespace + if (suffStar) { + // use the star as token: + suffStar = false; + actualStart = starPos; + } else { + // just include all we have (possibly whitespace or an empty string): + actualStart = start; + } + } + + if (suffStar) { + addToken(STAR, "*", starPos, starPos + 1); + } + tokens.add(new Token(WORD, source.substring(actualStart, end), true, new Substring(actualStart, end, source))); // XXX: Unsafe? + + // skip terminating quote + if (isQuoted) { + end++; + } + return end; + } + + private int consumeToTerminator(int start,String terminator) { + int end = start; + while (end < source.length()) { + if (terminatorStartsAt(end,terminator)) + break; + end++; + } + tokens.add(new Token(WORD, source.substring(start, end), true, new Substring(start, end, source))); // XXX: Unsafe start? + if (end>=source.length()) + return end; + else + return end+terminator.length(); // Don't create a token for the terminator + } + + private boolean terminatorStartsAt(int start,String terminator) { + int terminatorPosition=0; + while ((terminatorPosition+start)<source.length()) { + if (source.charAt(start+terminatorPosition)!=terminator.charAt(terminatorPosition)) + return false; + terminatorPosition++; + if (terminatorPosition >= terminator.length()) + return true; // Reached end of terminator + } + return false; // Reached end of source before reaching end of terminator + } + + /** Consumes a word or number <i>and/or possibly</i> a special token starting within this word or number */ + private int consumeWordOrNumber(int start, Index currentIndex) { + int tokenEnd = start; + SpecialTokens.SpecialToken substringSpecialToken = null; + boolean digitsOnly = true; + // int underscores = 0; + // boolean underscoresOnly = true; + boolean quotesOnly = true; + + while (tokenEnd < source.length()) { + if (substringSpecialTokens) { + substringSpecialToken=getSpecialToken(tokenEnd); + if (substringSpecialToken!=null) break; + } + + int c = source.codePointAt(tokenEnd); + + if (characterClasses.isLetter(c)) { + digitsOnly = false; + // if (c != '_') { + // if (underscores > 3) { + // break; + // } else { + // underscores = 0; + // } + // underscoresOnly = false; + // } else { + // underscores += 1; + // } + quotesOnly = false; + } else if (characterClasses.isLatinDigit(c)) { + // Yes, do nothing as long as the underscore logic + // is deactivated. + // underscoresOnly = false; + quotesOnly = false; + } else if (c == '\'') { + if (!acceptApostropheAsWordCharacter(currentIndex)) { + break; + } + // Otherwise consume apostrophes... + digitsOnly = false; + } else { + break; + } + tokenEnd += Character.charCount(c); + } + // if (underscores > 3 && !underscoresOnly) { + // tokenEnd -= underscores; + // } + if (tokenEnd>start) { + // if (underscoresOnly) { + // addToken(NOISE, source.substring(start, tokenEnd), start, tokenEnd); + // } else + if (quotesOnly) { + addToken(NOISE, source.substring(start, tokenEnd), start, tokenEnd); + } else { + addToken(digitsOnly ? NUMBER : WORD, source.substring(start, tokenEnd), start, tokenEnd); + } + } + + if (substringSpecialToken==null) + return --tokenEnd; + // TODO: test the logic around tokenEnd with friends + addToken(substringSpecialToken.toToken(tokenEnd,source)); + return --tokenEnd+substringSpecialToken.token().length(); + } + + private void addToken(Token.Kind kind, String word, int start, int end) { + addToken(new Token(kind, word, false, new Substring(start, end, source))); // XXX: Unsafe? + } + + private void addToken(Token token) { + tokens.add(token); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/UnicodePropertyDump.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/UnicodePropertyDump.java new file mode 100644 index 00000000000..6c48e980aff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/UnicodePropertyDump.java @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import java.io.PrintStream; + + +/** + * Dump properties of unicode characters in a format compatible + * with fastlib/text/unicode_propertydump + * + * <p>Arguments:</p> + * + * <ol> + * <li>start-char-number</li> + * <li>end-char-number</li> + * <li>debug true|false</li> + * </ol> + * + * @author <a href="mailto:vlarsen@yahoo-inc.com">Vidar Larsen</a> + */ +class UnicodePropertyDump { + public static void main(String[] arg) { + int start = 0; + int end = 0xffff; + boolean debug = false; + + if (arg.length > 0) { + start = new Integer(arg[0]).intValue(); + } + if (arg.length > 1) { + end = new Integer(arg[1]).intValue(); + } + if (arg.length > 2) { + debug = new Boolean(arg[2]).booleanValue(); + } + dumpProperties(start, end, debug, System.out); + } + + static void dumpProperties(int start, int end, boolean debug, PrintStream out) { + for (int i = start; i < end; i++) { + // printf("%08x ", i); + String charcode = Integer.toHexString(i); + + while (charcode.length() < 8) { + charcode = "0" + charcode; + } + out.print(charcode + " "); + + /* + * compute property bitmap fastlib-style + * bit 0 = white space + * bit 1 = word char + * bit 2 = ideographic + * bit 3 = decimal digit + * bit 4 = ignorable control + * + * White_Space = 0x01 + * Alphabetic = 0x02 + * Diacritic = 0x02 + * Extender = 0x02 + * Custom_word_char = 0x02 + * Ideographic = 0x04 + * Nd = 0x0A (both digit and alphabetic) + * Default_Ignorable_Code_Point = 0x10 + * Custom_Non_Word_Char = ~0x02 + * + * Uses both PropList, DerivedCoreProperties, CustomProperties + * and UnicodeData + */ + int map = 0; + char the_char = (char) i; + int char_type = Character.getType(the_char); + + if (Character.isWhitespace(the_char)) { + map |= 0x01; + } + + if (Character.isLetter(the_char)) { + map |= 0x02; + } + + if (Character.getType(the_char) == Character.OTHER_LETTER) { + map |= 0x04; + } + + if (Character.isDigit(the_char)) { + map |= 0x0A; + } + + if ((char_type == Character.CONTROL || char_type == Character.FORMAT + || char_type == Character.SURROGATE + || char_type == Character.UNASSIGNED) + && !Character.isWhitespace(the_char) + ) { + map |= 0x10; + } + + // printf("%04x\n", map); + String mapcode = Integer.toHexString(map); + + while (mapcode.length() < 4) { + mapcode = "0" + mapcode; + } + out.print(mapcode); + if (debug) { + out.print(" " + char_type); + } + out.println(); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/WebParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/WebParser.java new file mode 100644 index 00000000000..519f07b0aa3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/WebParser.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Set; + +/** + * Parser for web search queries. Language: + * + * <ul> + * <li>+item: always include this item as-is when searching (term becomes <i>protected</i>) + * <li>-item: Exclude item when searching (terms becomes <i>protected</i>) + * <li>a OR b (capital or): Or search for a or b + * <li>"a b": Phrase search for a followed by b + * </ul> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class WebParser extends AllParser { + + public WebParser(ParserEnvironment environment) { + super(environment); + } + + protected @Override Item parseItemsBody() { + // Algorithm: Collect positive, negative, and'ed and or'ed elements, then combine. + AndItem and=null; + OrItem or=null; + NotItem not=null; // Store negatives here as we go + Item current; + + // Find all items + do { + current=negativeItem(); + if (current!=null) { + not=addNot(current,not); + continue; + } + + current=positiveItem(); + if (current==null) + current = indexableItem(); + + if (current!=null) { + if (and!=null && (current instanceof WordItem) && "OR".equals(((WordItem)current).getRawWord())) { + if (or==null) + or=addOr(and,or); + and=new AndItem(); + or.addItem(and); + } + else { + and=addAnd(current,and); + } + } + + if (current == null) // Change + tokens.skip(); + } while (tokens.hasNext()); + + // Combine the items + Item topLevel=and; + + if (or!=null) + topLevel=or; + + if (not!=null && topLevel!=null) { + not.setPositiveItem(topLevel); + topLevel=not; + } + + return simplifyUnnecessaryComposites(topLevel); + } + + protected void setSubmodeFromIndex(String indexName, Set<String> searchDefinitions) { + // No submodes in this language + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/package-info.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/package-info.java new file mode 100644 index 00000000000..e75e7aad9dc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.query.parser; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/Discloser.java b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/Discloser.java new file mode 100644 index 00000000000..dc49979bec7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/Discloser.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.textualrepresentation; + +import com.yahoo.prelude.query.Item; + +/** + * Allows an item to disclose its properties and children/value. + * + * @author tonytv + */ +public interface Discloser { + void addProperty(String key, Object value); + + //A given item should either call setValue or addChild, not both. + void setValue(Object value); + void addChild(Item item); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/TextualQueryRepresentation.java b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/TextualQueryRepresentation.java new file mode 100644 index 00000000000..418636d39db --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/TextualQueryRepresentation.java @@ -0,0 +1,210 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.textualrepresentation; + +import com.yahoo.prelude.query.Item; + +import java.lang.reflect.Array; +import java.util.*; +import java.util.regex.Pattern; + +/** + * Creates a detailed, QED inspired representation of a query tree. + * + * @author tonytv + */ +public class TextualQueryRepresentation { + private Map<Item, Integer> itemReferences = new IdentityHashMap<>(); + private int nextItemReference = 0; + + final private ItemDiscloser rootDiscloser; + + /** Creates the textual representation for a single Item. */ + private class ItemDiscloser implements Discloser { + private final Item item; + + final Map<String, Object> properties = new TreeMap<>(); + final String name; + + Object value; + final List<ItemDiscloser> children = new ArrayList<>(); + + ItemDiscloser(Item item) { + this.item = item; + name = item.getName(); + } + + public void addProperty(String key, Object value) { + assert(key.indexOf(' ') == -1); + properties.put(key, value); + + if (value instanceof Item) + setItemReference((Item)value); + } + + public void setValue(Object value) { + assert(children.isEmpty()); + this.value = value; + } + + public void addChild(Item child) { + assert(value == null); + children.add(expose(child)); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append(name); + + if (!properties.isEmpty() || itemReferences.get(item) != null) { + builder.append('['); + addPropertiesString(builder); + builder.append(']'); + } + + if (value != null || !children.isEmpty()) { + builder.append("{\n"); + addBody(builder); + builder.append("}\n"); + } + return builder.toString(); + } + + private void addBody(StringBuilder builder) { + if (value != null) { + addIndented(builder, valueString(value)); + } else { + for (ItemDiscloser child : children) { + addIndented(builder, child.toString()); + } + } + } + + //for each line: add "<indentation><line><newline>" + private void addIndented(StringBuilder builder, String toAdd) { + String indent = " "; + for (String line : toAdd.split(Pattern.quote("\n"))) + builder.append(indent).append(line).append('\n'); + } + + private void addPropertiesString(StringBuilder s) { + boolean firstTime = true; + + Integer itemReference = itemReferences.get(item); + if (itemReference != null) { + addPropertyString(s, "%id", itemReference); + firstTime = false; + } + + for (Map.Entry<String,Object> entry : properties.entrySet()) { + if (!firstTime) { + s.append(' '); + } + addPropertyString(s, entry.getKey(), entry.getValue()); + firstTime = false; + } + } + + private void addPropertyString(StringBuilder s, String key, Object value) { + s.append(key).append('=').append(valueString(value)); + } + + private void setItemReference(Item item) { + if (itemReferences.get(item) == null) + itemReferences.put(item, nextItemReference++); + } + + } + + + @SuppressWarnings("rawtypes") + private String valueString(Object value) { + if (value == null) + return null; + else if (value instanceof String) + return '"' + quote((String)value) + '"'; + else if (value instanceof Number || value instanceof Boolean || value instanceof Enum) + return value.toString(); + else if (value instanceof Item) + return itemReference((Item)value); + else if (value.getClass().isArray()) + return listString(arrayToList(value).iterator()); + else if ( value instanceof List ) + return listString(((List)value).iterator()); + else if ( value instanceof Set ) + return listString( ((Set)value).iterator()); + else if ( value instanceof Map ) + return mapString((Map)value); + else + return '"' + quote(value.toString()) + '"'; + } + + //handles both primitive and object arrays. + @SuppressWarnings({ "rawtypes", "unchecked" }) + private List arrayToList(Object array) { + int length = Array.getLength(array); + List list = new ArrayList(); + for (int i = 0; i<length; ++i) + list.add(Array.get(array, i)); + return list; + } + + private String mapString(Map<?, ?> map) { + StringBuilder result = new StringBuilder(); + final String mapBegin = "map("; + result.append(mapBegin); + + boolean firstTime = true; + for (Map.Entry<?,?> entry: map.entrySet()) { + if (!firstTime) + result.append(' '); + firstTime = false; + + result.append(valueString(entry.getKey())).append("=>").append(valueString(entry.getValue())); + } + + result.append(')'); + return result.toString(); + } + + private String listString(Iterator<?> iterator) { + StringBuilder result = new StringBuilder(); + result.append('('); + + boolean firstTime = true; + while (iterator.hasNext()) { + if (!firstTime) + result.append(' '); + firstTime = false; + + result.append(valueString(iterator.next())); + } + + result.append(')'); + return result.toString(); + } + + private String itemReference(Item item) { + Integer reference = itemReferences.get(item); + return reference != null ? reference.toString() : "Unknown item: '" + System.identityHashCode(item) + "'"; + } + + private static String quote(String s) { + return s.replaceAll("\"", "\\\\\"" ); + } + + private ItemDiscloser expose(Item item) { + ItemDiscloser itemDiscloser = new ItemDiscloser(item); + item.disclose(itemDiscloser); + return itemDiscloser; + } + + public TextualQueryRepresentation(Item root) { + rootDiscloser = expose(root); + } + + @Override + public String toString() { + return rootDiscloser.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java new file mode 100644 index 00000000000..009c11ab1fd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +import java.util.Iterator; +import java.util.ListIterator; + +import com.yahoo.language.Language; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.AndSegmentItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PhraseSegmentItem; +import com.yahoo.prelude.query.SegmentItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +/** + * Search to do necessary transforms if the query is in segmented in + * a "CJK language". + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After(PhaseNames.UNBLENDED_RESULT) +@Before(STEMMING) +@Provides(CJKSearcher.TERM_ORDER_RELAXATION) +public class CJKSearcher extends Searcher { + public static final String TERM_ORDER_RELAXATION = "TermOrderRelaxation"; + + @Override + public Result search(Query query, Execution execution) { + Language l = query.getModel().getParsingLanguage(); + if (!l.isCjk()) return execution.search(query); + + QueryTree tree = query.getModel().getQueryTree(); + tree.setRoot(transform(tree.getRoot())); + query.trace("Rewriting for CJK behavior for implicit phrases", true, 2); + return execution.search(query); + } + + private Item transform(Item root) { + if (root instanceof PhraseItem) { + PhraseItem asPhrase = (PhraseItem) root; + if (asPhrase.isExplicit() || hasOverlappingTokens(asPhrase)) return root; + + AndItem replacement = new AndItem(); + for (ListIterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) { + Item item = i.next(); + if (item instanceof WordItem) replacement.addItem(item); + else if (item instanceof PhraseSegmentItem) { + replacement.addItem(new AndSegmentItem((PhraseSegmentItem) item)); + } + else replacement.addItem(item); // should never run, but hey... just convert and hope it's OK :) + } + return replacement; + } else if (root instanceof PhraseSegmentItem) { + PhraseSegmentItem asSegment = (PhraseSegmentItem) root; + if (asSegment.isExplicit() || hasOverlappingTokens(asSegment)) return root; + else return new AndSegmentItem(asSegment); + } else if (root instanceof SegmentItem) { + return root; // avoid descending into AndSegmentItems and similar + } else if (root instanceof CompositeItem) { + for (ListIterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) { + Item item = i.next(); + Item transformedItem = transform(item); + if (item != transformedItem) { + i.set(transformedItem); + } + } + return root; + } + return root; + } + + + private boolean hasOverlappingTokens(PhraseItem phrase) { + boolean has = false; + for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext(); ) { + Item segment = i.next(); + if (segment instanceof PhraseSegmentItem) has = hasOverlappingTokens((PhraseSegmentItem) segment); + if (has) return true; + } + return has; + } + + /* + * We have overlapping tokens (see + * com.yahoo.prelude.querytransform.test.CJKSearcherTestCase + * .testCjkQueryWithOverlappingTokens and ParseTestCase for an explanation) + * if the sum of length of tokens is greater than the lenght of the original + * word + */ + private boolean hasOverlappingTokens(PhraseSegmentItem segments) { + int segmentsLength=0; + for (Iterator<Item> i = segments.getItemIterator(); i.hasNext(); ) { + WordItem segment = (WordItem) i.next(); + segmentsLength += segment.getWord().length(); + } + return segmentsLength > segments.getRawWord().length(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CollapsePhraseSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CollapsePhraseSearcher.java new file mode 100644 index 00000000000..0bddaf5ff51 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CollapsePhraseSearcher.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import java.util.ListIterator; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.searchchain.Execution; + +/** + * Make single item phrases in query into single word items. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class CollapsePhraseSearcher extends Searcher { + public Result search(Query query, Execution execution) { + QueryTree tree = query.getModel().getQueryTree(); + Item root = tree.getRoot(); + if (root != null) { + Item newRoot = root.clone(); + newRoot = simplifyPhrases(newRoot); + // Sets new root instead of transforming the query tree + // to make code nicer if the root is a single term phrase + if (!root.equals(newRoot)) { + tree.setRoot(newRoot); + query.trace("Collapsing single term phrases to single terms", + true, 2); + } + } + return execution.search(query); + } + + + private Item simplifyPhrases(Item root) { + if (root == null) { + return root; + } + else if (root instanceof PhraseItem) { + return collapsePhrase((PhraseItem)root); + } + else if (root instanceof CompositeItem) { + CompositeItem composite = (CompositeItem)root; + ListIterator<Item> i = composite.getItemIterator(); + while (i.hasNext()) { + Item original = i.next(); + Item transformed = simplifyPhrases(original); + if (original != transformed) + i.set(transformed); + } + return root; + } + else { + return root; + } + } + private Item collapsePhrase(PhraseItem root) { + if (root.getItemCount() == 1) + return root.getItem(0); + else + return root; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java new file mode 100644 index 00000000000..e56303e60f8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java @@ -0,0 +1,358 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import static com.yahoo.prelude.querytransform.PhrasingSearcher.PHRASE_REPLACEMENT; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.Index.Attribute; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +/** + * Searcher to rewrite queries to achieve mixed recall between indices and + * memory attributes. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After({PhaseNames.RAW_QUERY, PHRASE_REPLACEMENT}) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(IndexCombinatorSearcher.MIXED_RECALL_REWRITE) +// TODO: This is not necessary on Vespa 6, we should probably remove it from the default chain but keep it +// around until Vespa 6 to avoid breaking those who refer to it. +public class IndexCombinatorSearcher extends Searcher { + public static final String MIXED_RECALL_REWRITE = "MixedRecallRewrite"; + + private static class ArrayComparator implements Comparator<Attribute[]> { + /** + * Note, this ignores if there is a difference in whether to + * attributes have tokenized content. (If this is the case, + * we are having worse problems anyway.) + */ + public int compare(Attribute[] o1, Attribute[] o2 ) { + if (o1.length < o2.length) { + return -1; + } else if (o1.length > o2.length) { + return 1; + } + int limit = o1.length; + for (int i = 0; i < limit; ++i) { + int r = o1[i].name.compareTo(o2[i].name); + if (r != 0) { + return r; + } + } + return 0; + } + } + + private final ArrayComparator comparator = new ArrayComparator(); + + private enum RewriteStrategies { + NONE, CHEAP_AND, EXPENSIVE_AND, FLAT + } + + @Override + public com.yahoo.search.Result search(Query query, Execution execution) { + Item root = query.getModel().getQueryTree().getRoot(); + IndexFacts.Session session = execution.context().getIndexFacts().newSession(query); + String oldQuery = (query.getTraceLevel() >= 2) ? root.toString() : ""; + + if (root instanceof BlockItem || root instanceof PhraseItem) { + root = convertSinglePhraseOrBlock(root, session); + } else if (root instanceof CompositeItem) { + root = rewrite((CompositeItem) root, session); + } + query.getModel().getQueryTree().setRoot(root); + + if ((query.getTraceLevel() >= 2) && !(oldQuery.equals(root.toString()))) { + query.trace("Rewriting for mixed recall between indices and attributes", true, 2); + } + return execution.search(query); + } + + private RewriteStrategies chooseRewriteStrategy(CompositeItem c, IndexFacts.Session session) { + if (c instanceof OrItem) { + return RewriteStrategies.FLAT; + } else if (!(c instanceof AndItem)) { + return RewriteStrategies.NONE; + } + Map<Attribute[], Integer> m = new TreeMap<>(comparator); + for (Iterator<Item> i = c.getItemIterator(); i.hasNext();) { + Item j = i.next(); + if (j instanceof BlockItem || j instanceof PhraseItem) { + Attribute[] attributes= getIndices((HasIndexItem) j, session); + if (attributes == null) { + continue; + } + Integer count = m.get(attributes); + if (count == null) { + count = 1; + } else { + count = count.intValue() + 1; + } + m.put(attributes, count); + } + } + + if (m.size() == 0) { + return RewriteStrategies.NONE; + } + + int singles = 0; + int pairs = 0; + int higher = 0; + // count the number of sets being associated with 1, 2 or more terms + for (Integer i : m.values()) { + switch (i.intValue()) { + case 1: + ++singles; + break; + case 2: + pairs += 2; + break; + default: + ++higher; + break; + } + } + if (higher == 0 && pairs + singles <= 2) { + return RewriteStrategies.EXPENSIVE_AND; + } else { + return RewriteStrategies.CHEAP_AND; + } + } + + private CompositeItem rewriteNot(NotItem not, IndexFacts.Session session) { + Item positive = not.getItem(0); + if (positive instanceof BlockItem || positive instanceof PhraseItem) { + positive = convertSinglePhraseOrBlock(positive, session); + not.setItem(0, positive); + } else if (positive instanceof CompositeItem) { + CompositeItem c = (CompositeItem) positive; + positive = rewrite(c, session); + not.setItem(0, positive); + } + + int length = not.getItemCount(); + // no need for keeping proximity in the negative branches, so we + // convert them one by one, _and_ always uses cheap transform + for (int i = 1; i < length; ++i) { + Item exclusion = not.getItem(i); + if (exclusion instanceof BlockItem || exclusion instanceof PhraseItem) { + exclusion = convertSinglePhraseOrBlock(exclusion, session); + not.setItem(i, exclusion); + } else if (exclusion instanceof CompositeItem) { + CompositeItem c = (CompositeItem) exclusion; + switch (chooseRewriteStrategy(c, session)) { + case NONE: + c = traverse(c, session); + break; + case CHEAP_AND: + case EXPENSIVE_AND: + c = cheapTransform(c, session); + break; + default: + c = flatTransform(c, session); + break; + } + not.setItem(i, c); + } + } + return not; + } + + private Item rewrite(CompositeItem c, IndexFacts.Session session) { + if (c instanceof NotItem) { + c = rewriteNot((NotItem) c, session); + return c; + } else if (c instanceof CompositeItem) { + switch (chooseRewriteStrategy(c, session)) { + case NONE: + c = traverse(c, session); + break; + case CHEAP_AND: + c = cheapTransform(c, session); + break; + case EXPENSIVE_AND: + c = expensiveTransform((AndItem) c, session); + break; + case FLAT: + c = flatTransform(c, session); + default: + break; + } + } + return c; + } + + private CompositeItem traverse(CompositeItem c, IndexFacts.Session session) { + int length = c.getItemCount(); + for (int i = 0; i < length; ++i) { + Item word = c.getItem(i); + if (word instanceof CompositeItem && !(word instanceof PhraseItem) + && !(word instanceof BlockItem)) { + c.setItem(i, rewrite((CompositeItem) word, session)); + } + } + return c; + } + + private CompositeItem expensiveTransform(AndItem c, IndexFacts.Session session) { + int[] indices = new int[2]; + int items = 0; + int length = c.getItemCount(); + Attribute[][] names = new Attribute[2][]; + CompositeItem result = null; + for (int i = 0; i < length; ++i) { + Item word = c.getItem(i); + if (word instanceof BlockItem || word instanceof PhraseItem) { + Attribute[] attributes = getIndices((HasIndexItem) word, session); + if (attributes == null) { + continue; + } + // this throwing an out of bounds if more than two candidates is intentional + names[items] = attributes; + indices[items++] = i; + } else if (word instanceof CompositeItem) { + c.setItem(i, rewrite((CompositeItem) word, session)); + } + } + switch (items) { + case 1: + result = linearAnd(c, names[0], indices[0]); + break; + case 2: + result = quadraticAnd(c, names[0], names[1], indices[0], indices[1]); + break; + default: + // should never happen + getLogger().log( + LogLevel.WARNING, + "Unexpected number of items for mixed recall, got " + items + + ", expected 1 or 2."); + break; + } + return result; + } + + private Attribute[] getIndices(HasIndexItem block, IndexFacts.Session session) { + return session.getIndex(block.getIndexName()).getMatchGroup(); + } + + private OrItem linearAnd(AndItem c, Attribute[] names, int brancherIndex) { + OrItem or = new OrItem(); + for (int i = 0; i < names.length; ++i) { + AndItem duck = (AndItem) c.clone(); + Item b = retarget(duck.getItem(brancherIndex), names[i]); + duck.setItem(brancherIndex, b); + or.addItem(duck); + } + return or; + } + + private OrItem quadraticAnd(AndItem c, Attribute[] firstNames, Attribute[] secondNames, int firstBrancher, int secondBrancher) { + OrItem or = new OrItem(); + for (int i = 0; i < firstNames.length; ++i) { + for (int j = 0; j < secondNames.length; ++j) { + AndItem duck = (AndItem) c.clone(); + Item b = retarget(duck.getItem(firstBrancher), firstNames[i]); + duck.setItem(firstBrancher, b); + b = retarget(duck.getItem(secondBrancher), secondNames[j]); + duck.setItem(secondBrancher, b); + or.addItem(duck); + } + } + return or; + } + + private CompositeItem flatTransform(CompositeItem c, IndexFacts.Session session) { + int maxIndex = c.getItemCount() - 1; + for (int i = maxIndex; i >= 0; --i) { + Item word = c.getItem(i); + if (word instanceof BlockItem || word instanceof PhraseItem) { + Attribute[] attributes = getIndices((HasIndexItem) word, session); + if (attributes == null) { + continue; + } + c.removeItem(i); + for (Attribute name : attributes) { + Item term = word.clone(); + Item forNewIndex = retarget(term, name); + c.addItem(forNewIndex); + } + } else if (word instanceof CompositeItem) { + c.setItem(i, rewrite((CompositeItem) word, session)); + } + } + return c; + } + + private CompositeItem cheapTransform(CompositeItem c, IndexFacts.Session session) { + if (c instanceof OrItem) { + return flatTransform(c, session); + } + int length = c.getItemCount(); + for (int i = 0; i < length; ++i) { + Item j = c.getItem(i); + if (j instanceof BlockItem || j instanceof PhraseItem) { + Attribute[] attributes = getIndices((HasIndexItem) j, session); + if (attributes == null) { + continue; + } + CompositeItem or = searchAllForItem(j, attributes); + c.setItem(i, or); + } else if (j instanceof CompositeItem) { + c.setItem(i, rewrite((CompositeItem) j, session)); + } + } + return c; + } + + private OrItem searchAllForItem(Item word, Attribute[] attributes) { + OrItem or = new OrItem(); + for (Attribute name : attributes) { + Item term = word.clone(); + term = retarget(term, name); + or.addItem(term); + } + return or; + } + + private Item retarget(Item word, Attribute newIndex) { + if (word instanceof PhraseItem && !newIndex.isTokenizedContent()) { + PhraseItem asPhrase = (PhraseItem) word; + WordItem newWord = new WordItem(asPhrase.getIndexedString(), newIndex.name, false); + return newWord; + } else if (word instanceof IndexedItem) { + word.setIndexName(newIndex.name); + } else if (word instanceof CompositeItem) { + CompositeItem asComposite = (CompositeItem) word; + for (Iterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { + Item segment = i.next(); + segment.setIndexName(newIndex.name); + } + } + return word; + } + + private Item convertSinglePhraseOrBlock(Item item, IndexFacts.Session session) { + Item newItem; + Attribute[] attributes = getIndices((HasIndexItem) item, session); + if (attributes == null) { + return item; + } + newItem = searchAllForItem(item, attributes); + return newItem; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/LiteralBoostSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/LiteralBoostSearcher.java new file mode 100644 index 00000000000..152a7565cb9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/LiteralBoostSearcher.java @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Adds rank terms to boost hits matching exact literals fields using info + * from indexing commands. + * + * @author bratseth + */ +@Before(STEMMING) +@After(PhaseNames.UNBLENDED_RESULT) +public class LiteralBoostSearcher extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + addRankTerms(query, execution.context().getIndexFacts().newSession(query)); + return execution.search(query); + } + + private void addRankTerms(Query query, IndexFacts.Session indexFacts) { + RankItem newRankTerms = new RankItem(); + addLiterals(newRankTerms, query.getModel().getQueryTree().getRoot(), indexFacts); + if (newRankTerms.getItemCount() > 0) + addTopLevelRankTerms(newRankTerms, query); + + if (query.getTraceLevel() >= 2 && newRankTerms.getItemCount() > 0) + query.trace("Added rank terms for possible literal field matches.", true, 2); + } + + /** + * Adds a RankItem at the root of a query, but only if there is + * at least one rank term in the specified RankItem. + * If the root is already a RankItem, just append the new rank terms. + * + * @param rankTerms the new rank item to add. + * @param query the query to add to + */ + private void addTopLevelRankTerms(RankItem rankTerms, Query query) { + Item root = query.getModel().getQueryTree().getRoot(); + if (root instanceof RankItem) { + for (Iterator<Item> i = rankTerms.getItemIterator(); i.hasNext(); ) { + ((RankItem)root).addItem(i.next()); + } + } + else { + rankTerms.addItem(0, root); + query.getModel().getQueryTree().setRoot(rankTerms); + + } + } + + private void addLiterals(RankItem rankTerms, Item item, IndexFacts.Session indexFacts) { + if (item == null) return; + + if (item instanceof NotItem) { + addLiterals(rankTerms, ((NotItem) item).getPositiveItem(), indexFacts); + } + else if (item instanceof CompositeItem) { + for (Iterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext(); ) + addLiterals(rankTerms, i.next(), indexFacts); + } + else if (item instanceof TermItem) { + TermItem termItem = (TermItem)item; + Index index = indexFacts.getIndex(termItem.getIndexName()); + if (index.getLiteralBoost()) + rankTerms.addItem(new WordItem(toLowerCase(termItem.getRawWord()), index.getName() + "_literal")); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NoRankingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NoRankingSearcher.java new file mode 100644 index 00000000000..7df98fdd093 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NoRankingSearcher.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + + +import java.util.List; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Sorting.FieldOrder; +import com.yahoo.search.searchchain.Execution; + + +/** + * Avoid doing relevance calculations if sorting only + * on attributes. + * + * @author Steinar Knutsen + */ +@After("rawQuery") +@Before("transformedQuery") +public class NoRankingSearcher extends Searcher { + + private static final String RANK = "[rank]"; + private static final String UNRANKED = "unranked"; + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<FieldOrder> s = (query.getRanking().getSorting() != null) ? query.getRanking().getSorting().fieldOrders() : null; + if (s == null) { + return execution.search(query); + } + for (FieldOrder f : s) { + if (RANK.equals(f.getFieldName())) { + return execution.search(query); + } + } + query.getRanking().setProfile(UNRANKED); + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java new file mode 100644 index 00000000000..c79933dbbd0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; + +import java.util.List; + +/** + * <p>Detects and removes certain phrases from the query.</p> + * + * @author bratseth + */ +@After("rawQuery") +@Before("transformedQuery") +public class NonPhrasingSearcher extends Searcher { + + private static final CompoundName suggestonly=new CompoundName("suggestonly"); + + private PhraseMatcher phraseMatcher; + + public NonPhrasingSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + setupAutomatonFile(config.com().yahoo().prelude().querytransform().NonPhrasingSearcher().automatonfile()); + } + + /** + * Creates a nonphrasing searcher + * + * @param phraseAutomatonFile the file containing phrases which should be removed + * @throws IllegalStateException if the automata component is unavailable + * in the current environment + * @throws IllegalArgumentException if the file is not found + */ + public NonPhrasingSearcher(String phraseAutomatonFile) { + setupAutomatonFile(phraseAutomatonFile); + } + + private void setupAutomatonFile(String phraseAutomatonFile) { + if (phraseAutomatonFile == null || phraseAutomatonFile.trim().equals("")) { + //no file, just use dummy matcher + phraseMatcher = PhraseMatcher.getNullMatcher(); + } else { + //use real matcher + phraseMatcher = new PhraseMatcher(phraseAutomatonFile); + } + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<PhraseMatcher.Phrase> phrases=phraseMatcher.matchPhrases(query.getModel().getQueryTree().getRoot()); + if (phrases!=null && !query.properties().getBoolean(suggestonly, false)) { + remove(phrases); + query.trace("Removing stop words",true,2); + } + return execution.search(query); + } + + private void remove(List<PhraseMatcher.Phrase> phrases) { + // Removing the leaf replace phrases first to preserve + // the start index of each replace phrase until removing + for (int i=phrases.size()-1; i>=0; i-- ) { + PhraseMatcher.Phrase phrase= phrases.get(i); + if (phrase.getLength()<phrase.getOwner().getItemCount()) // Don't removeField all + phrase.remove(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java new file mode 100644 index 00000000000..1d77b9184a3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import java.util.*; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.IndexFacts.Session; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.query.WordAlternativesItem.Alternative; +import com.yahoo.search.Searcher; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.search.Query; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * Normalizes accents + * + * @author bratseth + */ +@After({ PhaseNames.UNBLENDED_RESULT, STEMMING }) +@Provides(NormalizingSearcher.ACCENT_REMOVAL) +public class NormalizingSearcher extends Searcher { + + public static final String ACCENT_REMOVAL = "AccentRemoval"; + private final Linguistics linguistics; + + @Inject + public NormalizingSearcher(Linguistics linguistics) { + this.linguistics = linguistics; + } + + protected boolean handles(String command) { + return "normalize".equals(command); + } + + public String getFunctionName() { + return "Normalizing accents"; + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + normalize(query, execution.context().getIndexFacts().newSession(query)); + return execution.search(query); + } + + protected void normalize(Query query, IndexFacts.Session indexFacts) { + String oldQuery = (query.getTraceLevel() >= 2) ? query.getModel().getQueryTree().getRoot().toString() : ""; + normalizeBody(query, indexFacts); + if (query.getTraceLevel() >= 2) + if (!(oldQuery.equals(query.getModel().getQueryTree().getRoot().toString()))) query.trace(getFunctionName(), true, 2); + } + + private Query normalizeBody(Query query, IndexFacts.Session indexFacts) { + Item root = query.getModel().getQueryTree().getRoot(); + Language language = query.getModel().getParsingLanguage(); + if (root instanceof BlockItem) { + List<Item> rootItems = new ArrayList<>(1); + + rootItems.add(root); + ListIterator<Item> i = rootItems.listIterator(); + + i.next(); + normalizeBlocks(language, indexFacts, (BlockItem) root, i); + query.getModel().getQueryTree().setRoot(rootItems.get(0)); + } else if (root instanceof CompositeItem) { + query.getModel().getQueryTree().setRoot(normalizeComposite(language, indexFacts, (CompositeItem) root)); + } + return query; + } + + private Item normalizeComposite(Language language, IndexFacts.Session indexFacts, CompositeItem item) { + if (item instanceof PhraseItem) { + return normalizePhrase(language, indexFacts, (PhraseItem) item); + } + else { + for (ListIterator<Item> i = item.getItemIterator(); i.hasNext(); ) { + Item current = i.next(); + + if (current instanceof BlockItem) { + normalizeBlocks(language, indexFacts, (BlockItem) current, i); + } else if (current instanceof CompositeItem) { + Item currentProcessed = normalizeComposite(language, indexFacts, (CompositeItem) current); + i.set(currentProcessed); + } + } + return item; + } + } + + private void normalizeBlocks(Language language, IndexFacts.Session indexFacts, BlockItem block, ListIterator<Item> i) { + if (block instanceof TermItem) { + if (block instanceof WordAlternativesItem) { + normalizeAlternatives(language, indexFacts, (WordAlternativesItem) block); + } else { + normalizeWord(language, indexFacts, (TermItem) block, i); + } + } else { + for (ListIterator<Item> j = ((SegmentItem) block).getItemIterator(); j.hasNext();) + normalizeWord(language, indexFacts, (TermItem) j.next(), j); + } + } + + private void normalizeAlternatives(Language language, Session indexFacts, WordAlternativesItem block) { + if (!block.isNormalizable()) { + return; + } + { + Index index = indexFacts.getIndex(block.getIndexName()); + if (index.isAttribute()) { + return; + } + if (!index.getNormalize()) { + return; + } + } + + List<Alternative> terms = block.getAlternatives(); + for (Alternative term : terms) { + String accentDropped = linguistics.getTransformer().accentDrop(term.word, language); + if (!term.word.equals(accentDropped) && accentDropped.length() > 0) { + block.addTerm(accentDropped, term.exactness * .7d); + } + } + } + + private Item normalizePhrase(Language language, IndexFacts.Session indexFacts, PhraseItem phrase) { + if ( ! indexFacts.getIndex(phrase.getIndexName()).getNormalize()) return phrase; + + for (ListIterator<Item> i = phrase.getItemIterator(); i.hasNext();) { + IndexedItem content = (IndexedItem) i.next(); + + if (content instanceof TermItem) { + normalizeWord(language, indexFacts, (TermItem) content, i); + } + else { + PhraseSegmentItem segment = (PhraseSegmentItem) content; + for (ListIterator<Item> j = segment.getItemIterator(); j.hasNext();) + normalizeWord(language, indexFacts, (TermItem) j.next(), j); + } + } + return phrase; + } + + private void normalizeWord(Language language, IndexFacts.Session indexFacts, TermItem term, ListIterator<Item> i) { + if ( ! (term instanceof WordItem)) return; + if ( ! term.isNormalizable()) return; + Index index = indexFacts.getIndex(term.getIndexName()); + if (index.isAttribute()) return; + if ( ! index.getNormalize()) return; + + WordItem word = (WordItem) term; + String accentDropped = linguistics.getTransformer().accentDrop(word.getWord(), language); + if (accentDropped.length() == 0) + i.remove(); + else + word.setWord(accentDropped); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/PhraseMatcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhraseMatcher.java new file mode 100644 index 00000000000..da969986394 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhraseMatcher.java @@ -0,0 +1,556 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.fsa.FSA; +import com.yahoo.prelude.query.*; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * <p>Detects query phrases using an automaton. This class is thread safe.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class PhraseMatcher { + + private FSA phraseFSA; + + private boolean matchPhraseItems=false; + + private boolean matchSingleItems=false; + + /** Whether this should ignore regular plural/singular form differences when matching */ + private boolean ignorePluralForm=false; + + /** False to matche the longest phrase, true to match <i>all</i> phrases */ + private boolean matchAll =false; + + /** For null subclass only */ + PhraseMatcher() { + } + + /** + * Creates a phrase matcher. This will not ignore plural/singular form differences when matching + * + * @param phraseAutomatonFile the file containing phrases to match + * @throws IllegalArgumentException if the file is not found + */ + public PhraseMatcher(String phraseAutomatonFile) { + this(phraseAutomatonFile,false); + } + + /** + * Creates a phrase matcher + * + * @param phraseAutomatonFile the file containing phrases to match + * @param ignorePluralForm whether we should ignore plural and singular forms as matches + * @throws IllegalArgumentException if the file is not found + */ + public PhraseMatcher(String phraseAutomatonFile,boolean ignorePluralForm) { + this.ignorePluralForm=ignorePluralForm; + phraseFSA=new FSA(phraseAutomatonFile); + } + + /** + * Creates a phrase matcher + * + * @param phraseAutomatonFSA the fsa containing phrases to match + * @param ignorePluralForm whether we should ignore plural and singular forms as matches + * @throws IllegalArgumentException if FSA is null + */ + public PhraseMatcher(FSA phraseAutomatonFSA,boolean ignorePluralForm) { + if(phraseAutomatonFSA==null) throw new IllegalArgumentException("FSA is null"); + this.ignorePluralForm=ignorePluralForm; + phraseFSA=phraseAutomatonFSA; + } + + /** + * Set whether to match words contained in phrase items as well. + * Default is false - don't match words contained in phrase items + */ + public void setMatchPhraseItems(boolean matchPhraseItems) { + this.matchPhraseItems=matchPhraseItems; + } + + /** + * Sets whether single items should be matched and returned as phrase matches. + * Default is false. + */ + public void setMatchSingleItems(boolean matchSingleItems) { + this.matchSingleItems=matchSingleItems; + } + + /** Sets whether we should ignore plural/singular form when matching */ + public void setIgnorePluralForm(boolean ignorePluralForm) { this.ignorePluralForm=ignorePluralForm; } + + /** + * Sets whether to return the longest matching phrase when there are overlapping matches (default), + * or <i>all</i> matching phrases + */ + public void setMatchAll(boolean matchAll) { this.matchAll =matchAll; } + + /** + * Finds all phrases (word sequences of length 1 or higher) + * of the same index, not negative items of a notitem, + * which constitutes a complete entry in the automaton of this matcher + * + * @param queryItem the root query item in which to match phrases + * @return the matched phrases, or <b>null</b> if there was no matches + */ + public List<Phrase> matchPhrases(Item queryItem) { + if (matchSingleItems && (queryItem instanceof TermItem)) { + return matchSingleItem((TermItem)queryItem); + } + else { + MatchedPhrases phrases=new MatchedPhrases(); + recursivelyMatchPhrases(queryItem,phrases); + return phrases.toList(); + } + } + + /** Returns null if this word does not match the automaton, a single-item list if it does */ + private List<Phrase> matchSingleItem(TermItem termItem) { + String matchWord=toLowerCase(termItem.stringValue()); + String replaceWord=null; + FSA.State state = phraseFSA.getState(); + if (!matches(state,matchWord)) { + if (!ignorePluralForm) return null; + matchWord=switchForm(matchWord); + if (!matches(state,matchWord)) return null; + replaceWord=matchWord; + } + + List<Phrase> itemList=new java.util.ArrayList<>(1); + itemList.add(new Phrase(termItem,replaceWord,state.dataString())); + return itemList; + + } + + private boolean matches(FSA.State state,String word) { + state.start(); + state.delta(word); + return state.isFinal(); + } + + /** Find matches within a composite */ + private void recursivelyMatchPhrases(Item item,MatchedPhrases phrases) { + if (item==null) return; + if ( ! (item instanceof CompositeItem) ) return; + if ( !matchPhraseItems && item instanceof PhraseItem ) return; + + CompositeItem owner=(CompositeItem)item; + int i=0; + int checkItemCount=owner.getItemCount(); + if (owner instanceof NotItem) + checkItemCount=1; // Skip negatives + + while (i<checkItemCount) { + int largestFoundLength=findPhrasesAtStartpoint(i,owner,phrases); + + if (largestFoundLength==0 || matchAll) { + recursivelyMatchPhrases(owner.getItem(i),phrases); + i=i+1; + } + else { + i=i+largestFoundLength; + } + } + } + + /** + * If (!matchAll), finds longest possible phrase starting at the + * given index in the owner and adds it to phrases. + * + * If (matchAll), finds all possible phrases starting at the given index + * + * @return the length of the largest phrase found at this starting point, or 0 if none + */ + private int findPhrasesAtStartpoint(int startIndex,CompositeItem owner,MatchedPhrases phrases) { + FSA.State state = phraseFSA.getState(); + int currentIndex=startIndex; + Phrase phrase=null; + List<String> replaceList=null; + + String index=null; + state.start(); + + while (currentIndex<owner.getItemCount()) { // Loop until the largest possible phrase is passed + Item current=owner.getItem(currentIndex); + if (! (current instanceof TermItem) ) break; + + TermItem termItem=(TermItem)current; + + if (state.isStartState()) + index=termItem.getIndexName(); + else + if (!termItem.getIndexName().equals(index)) break; + + String lowercased = toLowerCase(termItem.stringValue()); + boolean matched=state.tryDeltaWord(lowercased); + if (!matched && ignorePluralForm) { + String invertedWord=switchForm(lowercased); + matched=state.tryDeltaWord(invertedWord); + if (matched) + replaceList=setReplace(replaceList,currentIndex-startIndex,invertedWord); + } + if (!matched) break; + + if (state.isFinal()) // Legal return point reached, but we'll look for longer ones too + phrase=new Phrase(owner,replaceList,startIndex,currentIndex-startIndex+1,state.dataString()); + if (matchAll) + phrases.add(phrase); + currentIndex++; + } + + if (phrase==null) return 0; + if (!matchAll) + phrases.add(phrase); + return phrase.getLength(); + } + + /** Adds a replace word at an index, and any required null's to get to this item. Creates the list if it is null */ + private List<String> setReplace(List<String> replaceList,int index,String invertedWord) { + if (replaceList==null) + replaceList=new ArrayList<>(); + while (replaceList.size()<index) + replaceList.add(null); + replaceList.add(invertedWord); + return replaceList; + } + + /** Makes this plural if it is singular and vice-versa */ + private String switchForm(String word) { + if (word.endsWith("s") && word.length()>2) + return word.substring(0,word.length()-1); + return word + "s"; + } + + /** Holder of a lazily created list of matched phrases */ + private static class MatchedPhrases { + + private List<Phrase> phrases=null; + + private void add(Phrase phrase) { + if (phrase==null) return; + if (phrases==null) + phrases=new java.util.ArrayList<>(5); + phrases.add(phrase); + } + + /** Returns the list of contained phrases, or null */ + public List<Phrase> toList() { return phrases; } + + } + + /** + * Points to a collection of word items (one or more) + * which is matches a complete listing in an automat + */ + public static class Phrase { + + /** Points to the single or multiple words matched by this phrase */ + private Matched matched; + + private String data; + + + private Phrase(Matched matched,String data) { + this.matched=matched; + this.data=data; + } + + + public Phrase(TermItem item,String replace,String data) { + this(new MatchedWord(item,replace),data); + } + + /** + * Creates a phrase match + * + * @param owner the composite we have matched within + * @param replace the list of string to replace the matched by, or null to not replace. + * This transfers ownership of this list to this class - it can not subsequently be accessed + * by the caller. If this list is set, it must have the same length as <code>length</code>. + * No replacement is represented by null items within the list. + * @param startIndex the first index in composite to match + * @param length the length of the matched terms + * @param data the data accompanying this match + */ + private Phrase(CompositeItem owner,List<String> replace,int startIndex,int length,String data) { + this(new MatchedComposite(owner,replace,startIndex,length),data); + } + + /** Returns the owner, or null if this is a single item phrase with no owner */ + public CompositeItem getOwner() { return matched.getOwner(); } + + public int getStartIndex() { return matched.getStartIndex(); } + + public int getLength() { return matched.getLength(); } + + /** Returns the data stored by the automaton for this phrase at this position, or null if none */ + public String getData() { return data; } + + /** Returns the n'th item in this, throws if index out of bounds */ + public TermItem getItem(int index) { + return matched.getItem(index); + } + + /** Returns true if this phrase contains all the words of the owner, or if there is no owner */ + public boolean isComplete() { + return matched.isComplete(); + } + + /** Replaces the words items of this phrase with a phrase item. Does nothing if this is not a composite match */ + public void replace() { + matched.replace(); + } + + /** Removes the word items of this phrase. Does nothing nuless this is a composite */ + public void remove() { + matched.remove(); + } + + /** Returns the length of the underlying phrase */ + public int getBackedLength() { + return matched.getBackedLength(); + } + + /** Returns the items of this phrase as a read-only iterator */ + public MatchIterator itemIterator() { + return new MatchIterator(this); + } + + public String toString() { + StringBuilder buffer=new StringBuilder("\""); + for (Iterator<Item> i=itemIterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + buffer.append("\""); + return buffer.toString(); + } + + private abstract static class Matched { + + public abstract CompositeItem getOwner(); + + public abstract int getStartIndex(); + + public abstract int getLength(); + + public abstract boolean isComplete(); + + /** Returns whether there is an index at the current item */ + public abstract boolean hasItemAt(int index); + + public void replace() {} + + public void remove() {} + + public abstract TermItem getItem(int index); + + public abstract String getReplace(int index); + + /** Returns the length of the underlying item */ + public abstract int getBackedLength(); + + public abstract boolean hasReplaces(); + + } + + private static class MatchedWord extends Matched { + + /** The term matched by this */ + private TermItem item; + + /** The word to replace the matched word by, or null to not replace */ + private String replace; + + public MatchedWord(TermItem item,String replace) { + this.item=item; + this.replace=replace; + } + + public Item getItem() { return item; } + + public boolean hasItemAt(int index) { + return index==0; + } + + public CompositeItem getOwner() { return null; } + + public int getStartIndex() { return 0; } + + public int getLength() { return 1; } + + public @Override TermItem getItem(int index) { + if (index!=0) throw new IndexOutOfBoundsException("No word at " + index + " in " + this); + return item; + } + + public boolean isComplete() { return true; } + + public int getBackedLength() { return 1; } + + public String getReplace(int index) { return replace; } + + public boolean hasReplaces() { return replace!=null; } + + } + + private static class MatchedComposite extends Matched { + + /** The item having the phrase words as direct descendants */ + private CompositeItem owner; + + /** The number of phrase items */ + private int length; + + private int initialOwnerLength; + + /** The (0-base) index of the first phrase word item in the owner */ + private int startIndex; + + /** The first matched item */ + private Item startItem; + + /** + * The word to replace by at the given index, or null if none of the phrase words should be replaced + * This is either null, or of length <code>length</code>, with null values where nothing should be replaced + */ + private List<String> replace=null; + + public MatchedComposite(CompositeItem owner,List<String> replace,int startIndex,int length) { + this.owner=owner; + this.initialOwnerLength=owner.getItemCount(); + this.replace = replace; + this.startIndex=startIndex; + this.startItem=owner.getItem(startIndex); + this.length=length; + } + + public CompositeItem getOwner() { return owner; } + + public int getStartIndex() { return startIndex; } + + public int getLength() { return length; } + + public int getBackedLength() { return owner.getItemCount()-startIndex; } + + public boolean hasItemAt(int index) { + adjustIfBackingChanged(); + if (startIndex<0) return false; // Invalid state because of backing changes + if ( index >= length ) return false; + if ( index+startIndex >= owner.getItemCount() ) return false; + return true; + } + + public boolean isComplete() { + return startIndex==0 && length==owner.getItemCount(); + } + + public @Override TermItem getItem(int index) { + adjustIfBackingChanged(); + return (TermItem)owner.getItem(startIndex+index); + } + + public String getReplace(int index) { + if (replace==null) return null; + return replace.get(index); + } + + public void replace() { + PhraseItem phrase=new PhraseItem(); + TermItem firstWord=(TermItem)owner.setItem(startIndex,phrase); + replace(firstWord,0); + phrase.setIndexName(firstWord.getIndexName()); + phrase.addItem(firstWord); + for (int i=1; i<length; i++) { + TermItem followingWord=(TermItem)owner.removeItem(startIndex+1); + replace(followingWord,i); + phrase.addItem(followingWord); + } + } + + private void replace(TermItem item,int index) { + if (replace==null) return; + String replaceString=replace.get(index); + if (replaceString==null) return; + item.setValue(replaceString); + } + + public void remove() { + for (int i=startIndex+length-1; i>=startIndex; i--) + owner.removeItem(i); + } + + public boolean hasReplaces() { return replace!=null; } + + /** + * Detects and attemts to compensate for a changed backing. Stop-gap measure until we get a through + * design for this + */ + private void adjustIfBackingChanged() { + if (owner.getItemCount()==initialOwnerLength) return; + startIndex=owner.getItemIndex(startItem); + } + + } + + public static class MatchIterator implements Iterator<Item> { + + private Phrase phrase; + + private int currentIndex=0; + + public MatchIterator(Phrase phrase) { + this.phrase=phrase; + } + + public boolean hasNext() { + return phrase.matched.hasItemAt(currentIndex); + //return (currentIndex<phrase.getLength()); + //return phrase.matched.hasItemAt(currentIndex); + } + + /** Returns the value to replace the item last returned by next(), or null to keep it as-is */ + public String getReplace() { + return phrase.matched.getReplace(currentIndex-1); + } + + public Item next() { + if (!hasNext()) + throw new NoSuchElementException(this + " has no more elements"); + + currentIndex++; + if ((phrase.matched instanceof MatchedWord)) + return ((MatchedWord)phrase.matched).getItem(); + else + return phrase.getOwner().getItem(phrase.getStartIndex()+currentIndex-1); + } + + public void remove() { + throw new UnsupportedOperationException("Can not remove from a phrasematcher phrase"); + } + + } + + } + + /** Returns a phrase matcher which (quickly) never matches anything */ + public static PhraseMatcher getNullMatcher() { + return new PhraseMatcher() { + + public List<Phrase> matchPhrases(Item item) { + return null; + } + }; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/PhrasingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhrasingSearcher.java new file mode 100644 index 00000000000..f3d4b09c65c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhrasingSearcher.java @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + + +import java.util.List; + +/** + * <p>Detects query phrases. When a phrase is detected in the query, + * the query is mutated to reflect this fact.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(PhrasingSearcher.PHRASE_REPLACEMENT) +public class PhrasingSearcher extends Searcher { + + private static final CompoundName suggestonly=new CompoundName("suggestonly"); + + public static final String PHRASE_REPLACEMENT = "PhraseReplacement"; + + private PhraseMatcher phraseMatcher; + + @Inject + public PhrasingSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + setupAutomatonFile(config.com().yahoo().prelude().querytransform().PhrasingSearcher().automatonfile()); + } + + public PhrasingSearcher(String phraseAutomatonFile) { + setupAutomatonFile(phraseAutomatonFile); + } + + private void setupAutomatonFile(String phraseAutomatonFile) { + if (phraseAutomatonFile == null || phraseAutomatonFile.trim().equals("")) { + //no file, just use dummy matcher + phraseMatcher = PhraseMatcher.getNullMatcher(); + } else { + //use real matcher + phraseMatcher = new PhraseMatcher(phraseAutomatonFile,true); + } + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<PhraseMatcher.Phrase> replacePhrases = phraseMatcher.matchPhrases(query.getModel().getQueryTree().getRoot()); + if (replacePhrases != null && !query.properties().getBoolean(suggestonly, false)) { + replace(replacePhrases); + query.trace("Replacing phrases", true, 2); + } + return execution.search(query); + } + + /** Replaces all phrases longer than one word with a PhraseItem */ + private void replace(List<PhraseMatcher.Phrase> phrases) { + // Replacing the leaf replace phrases first to preserve + // the start index of each replace phrase until replacement + for (int i = phrases.size()-1; i >= 0; i--) { + PhraseMatcher.Phrase phrase = phrases.get(i); + if (phrase.getLength() > 1) + phrase.replace(); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java new file mode 100644 index 00000000000..fe680bd5ad0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java @@ -0,0 +1,241 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.SimpleIndexedItem; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.search.Query; +import com.yahoo.search.query.Model; +import com.yahoo.search.result.Hit; + +/** + * @author balder + */ +public class QueryRewrite { + + private static enum Recall { + RECALLS_EVERYTHING, + RECALLS_NOTHING, + UNKNOWN_RECALL + } + + /** + * Optimize multiple NotItems under and or by collapsing them in to one and leaving + * the positive ones behind in its place and moving itself with the original and as its positive item + * and the union of all the negative items of all the original NotItems as its negative items. + * + * @param query to optimize + */ + public static void optimizeAndNot(Query query) { + Item root = query.getModel().getQueryTree().getRoot(); + Item possibleNewRoot = optimizeAndNot(root); + if (root != possibleNewRoot) { + query.getModel().getQueryTree().setRoot(possibleNewRoot); + } + } + private static Item optimizeAndNot(Item node) { + if (node instanceof CompositeItem) { + return extractAndNotRecursively((CompositeItem) node); + } + return node; + } + private static CompositeItem extractAndNotRecursively(CompositeItem parent) { + for (int i = 0; i < parent.getItemCount(); i++) { + Item child = parent.getItem(i); + Item possibleNewChild = optimizeAndNot(child); + if (child != possibleNewChild) { + parent.setItem(i, possibleNewChild); + } + } + if (parent instanceof AndItem) { + return extractAndNot((AndItem) parent); + } + return parent; + } + private static CompositeItem extractAndNot(AndItem parent) { + NotItem theOnlyNot = null; + for (int i = 0; i < parent.getItemCount(); i++) { + Item child = parent.getItem(i); + if (child instanceof NotItem) { + NotItem thisNot = (NotItem) child; + parent.setItem(i, thisNot.getPositiveItem()); + if (theOnlyNot == null) { + theOnlyNot = thisNot; + theOnlyNot.setPositiveItem(parent); + } else { + for (int j=1; j < thisNot.getItemCount(); j++) { + theOnlyNot.addNegativeItem(thisNot.getItem(j)); + } + } + } + } + return (theOnlyNot != null) ? theOnlyNot : parent; + } + /** + * Optimizes the given query tree based on its {@link Model#getRestrict()} parameter, if any. + * + * @param query to optimize. + */ + public static void optimizeByRestrict(Query query) { + if (query.getModel().getRestrict().size() != 1) { + return; + } + Item root = query.getModel().getQueryTree().getRoot(); + if (optimizeByRestrict(root, query.getModel().getRestrict().iterator().next()) == Recall.RECALLS_NOTHING) { + query.getModel().getQueryTree().setRoot(new NullItem()); + } + } + + private static Recall optimizeByRestrict(Item item, String restrictParam) { + if (item instanceof SimpleIndexedItem) { + return optimizeIndexedItemByRestrict((SimpleIndexedItem)item, restrictParam); + } else if (item instanceof NotItem) { + return optimizeNotItemByRestrict((NotItem)item, restrictParam); + } else if (item instanceof CompositeItem) { + return optimizeCompositeItemByRestrict((CompositeItem)item, restrictParam); + } else { + return Recall.UNKNOWN_RECALL; + } + } + + private static Recall optimizeIndexedItemByRestrict(SimpleIndexedItem item, String restrictParam) { + if (!Hit.SDDOCNAME_FIELD.equals(item.getIndexName())) { + return Recall.UNKNOWN_RECALL; + } + // a query term searching for sddocname will either recall everything or nothing, depending on whether + // the term matches the restrict parameter or not + return restrictParam.equals(item.getIndexedString()) + ? Recall.RECALLS_EVERYTHING + : Recall.RECALLS_NOTHING; + } + + private static Recall optimizeNotItemByRestrict(NotItem item, String restrictParam) { + // first item is the positive one + if (optimizeByRestrict(item.getItem(0), restrictParam) == Recall.RECALLS_NOTHING) { + return Recall.RECALLS_NOTHING; + } + // all the remaining items are negative ones + for (int i = item.getItemCount(); --i >= 1; ) { + Item child = item.getItem(i); + switch (optimizeByRestrict(child, restrictParam)) { + case RECALLS_EVERYTHING: + return Recall.RECALLS_NOTHING; + case RECALLS_NOTHING: + item.removeItem(i); + break; + } + } + return Recall.UNKNOWN_RECALL; + } + + private static Recall optimizeCompositeItemByRestrict(CompositeItem item, String restrictParam) { + for (int i = item.getItemCount(); --i >= 0; ) { + switch (optimizeByRestrict(item.getItem(i), restrictParam)) { + case RECALLS_EVERYTHING: + if ((item instanceof OrItem) || (item instanceof EquivItem)) { + retainChild(item, i); + return Recall.RECALLS_EVERYTHING; + } else if ((item instanceof AndItem) || (item instanceof NearItem)) { + item.removeItem(i); + } else if (item instanceof RankItem) { + // empty + } else { + throw new UnsupportedOperationException(item.getClass().getName()); + } + break; + case RECALLS_NOTHING: + if ((item instanceof OrItem) || (item instanceof EquivItem)) { + item.removeItem(i); + } else if ((item instanceof AndItem) || (item instanceof NearItem)) { + return Recall.RECALLS_NOTHING; + } else if (item instanceof RankItem) { + item.removeItem(i); + } else { + throw new UnsupportedOperationException(item.getClass().getName()); + } + break; + } + } + return Recall.UNKNOWN_RECALL; + } + + private static void retainChild(CompositeItem item, int childIdx) { + Item child = item.removeItem(childIdx); + for (int i = item.getItemCount(); --i >= 0; ) { + item.removeItem(i); + } + item.addItem(child); + } + + /** + * Collapses all single-child {@link CompositeItem}s into their parent item. + * + * @param query The query whose composites to collapse. + */ + public static void collapseSingleComposites(Query query) { + Item oldRoot = query.getModel().getQueryTree().getRoot(); + Item newRoot = collapseSingleComposites(oldRoot); + if (oldRoot != newRoot) { + query.getModel().getQueryTree().setRoot(newRoot); + } + } + + private static Item collapseSingleComposites(Item item) { + if (!(item instanceof CompositeItem)) { + return item; + } + CompositeItem parent = (CompositeItem)item; + int numChildren = parent.getItemCount(); + for (int i = 0; i < numChildren; ++i) { + Item oldChild = parent.getItem(i); + Item newChild = collapseSingleComposites(oldChild); + if (oldChild != newChild) { + parent.setItem(i, newChild); + } + } + return numChildren == 1 ? parent.getItem(0) : item; + } + + /** + * Replaces and {@link SimpleIndexedItem} searching in the {@link Hit#SDDOCNAME_FIELD} with an item + * appropriate for the search node. + * + * @param query The query to rewrite. + */ + public static void rewriteSddocname(Query query) { + Item oldRoot = query.getModel().getQueryTree().getRoot(); + Item newRoot = rewriteSddocname(oldRoot); + if (oldRoot != newRoot) { + query.getModel().getQueryTree().setRoot(newRoot); + } + } + + private static Item rewriteSddocname(Item item) { + if (item instanceof CompositeItem) { + CompositeItem parent = (CompositeItem)item; + for (int i = 0, len = parent.getItemCount(); i < len; ++i) { + Item oldChild = parent.getItem(i); + Item newChild = rewriteSddocname(oldChild); + if (oldChild != newChild) { + parent.setItem(i, newChild); + } + } + } else if (item instanceof SimpleIndexedItem) { + SimpleIndexedItem oldItem = (SimpleIndexedItem)item; + if (Hit.SDDOCNAME_FIELD.equals(oldItem.getIndexName())) { + SubstringItem newItem = new SubstringItem(oldItem.getIndexedString()); + newItem.setIndexName("[documentmetastore]"); + return newItem; + } + } + return item; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/RecallSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/RecallSearcher.java new file mode 100644 index 00000000000..4a47b23d30d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/RecallSearcher.java @@ -0,0 +1,156 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.query.parser.AnyParser; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; +import java.util.Stack; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * This searcher parses the content of the "recall" query property as a filter expression alongside a placeholder + * query string. The node corresponding to the placeholder query is then swapped with the current query tree. This allows + * us to parse "recall" using the same rules as "filter" without modifying the parser. + * + * If the "recall" property is unset, this searcher does nothing. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@After("com.yahoo.search.querytransform.WandSearcher") +@Before({STEMMING, ACCENT_REMOVAL}) +public class RecallSearcher extends Searcher { + + private static final CompoundName recallName=new CompoundName("recall"); + + @Override + public com.yahoo.search.Result search(Query query, Execution execution) { + String recall = query.properties().getString(recallName); + if (recall == null) { + return execution.search(query); + } + AnyParser parser = new AnyParser( + ParserEnvironment.fromExecutionContext(execution.context())); + QueryTree root = parser.parse(Parsable.fromQueryModel(query.getModel()) + .setQuery("foo").setFilter(recall)); + String err; + if (root.getRoot() instanceof NullItem) { + err = "Failed to parse recall parameter."; + } else if (!(root.getRoot() instanceof CompositeItem)) { + err = "Expected CompositeItem root node, got " + + root.getClass().getSimpleName() + "."; + } else if (hasRankItem(root.getRoot())) { + query.getModel().getQueryTree().setRoot(root.getRoot()); + + err = "Recall contains at least one rank item."; + } else { + WordItem placeholder = findOrigWordItem(root.getRoot(), "foo"); + if (placeholder == null) { + err = "Could not find placeholder workQuery root."; + } else { + updateFilterTerms(root); + CompositeItem parent = placeholder.getParent(); + parent.setItem(parent.getItemIndex(placeholder), query + .getModel().getQueryTree().getRoot()); + query.getModel().getQueryTree().setRoot(root.getRoot()); + + query.trace("ANDed recall tree with root workQuery node.", + true, 3); + return execution.search(query); + } + } + com.yahoo.search.Result ret = new com.yahoo.search.Result(query); + ret.hits().addError(ErrorMessage.createInvalidQueryParameter(err)); + return ret; + } + + /** + * Returns true if the given item tree contains at least one instance of {@link RankItem}. + * + * @param root The root of the tree to check. + * @return True if a rank item was found. + */ + private static boolean hasRankItem(Item root) { + Stack<Item> stack = new Stack<>(); + stack.push(root); + while (!stack.isEmpty()) { + Item item = stack.pop(); + if (item instanceof RankItem) { + return true; + } + if (item instanceof CompositeItem) { + CompositeItem lst = (CompositeItem)item; + for (Iterator<Item> it = lst.getItemIterator(); it.hasNext();) { + stack.push(it.next()); + } + } + } + return false; + } + + /** + * Returns the first word item contained in the given item tree that is an instance of {@link WordItem} with the + * given word value. + * + * @param root The root of the tree to check. + * @param value The word to look for. + * @return The first node found. + */ + private static WordItem findOrigWordItem(Item root, String value) { + Stack<Item> stack = new Stack<>(); + stack.push(root); + while (!stack.isEmpty()) { + Item item = stack.pop(); + if (item.getCreator() == Item.ItemCreator.ORIG && + item instanceof WordItem) + { + WordItem word = (WordItem)item; + if (word.getWord().equals(value)) { + return word; + } + } + if (item instanceof CompositeItem) { + CompositeItem lst = (CompositeItem)item; + for (Iterator<Item> it = lst.getItemIterator(); it.hasNext();) { + stack.push(it.next()); + } + } + } + return null; + } + + /** + * Marks all filter terms in the given query tree as unranked. + * + * @param root The root of the tree to update. + */ + private static void updateFilterTerms(Item root) { + Stack<Item> stack = new Stack<>(); + stack.push(root); + while (!stack.isEmpty()) { + Item item = stack.pop(); + if (item.getCreator() == Item.ItemCreator.FILTER) { + item.setRanked(false); + } + if (item instanceof CompositeItem) { + CompositeItem lst = (CompositeItem)item; + for (Iterator<Item> it = lst.getItemIterator(); it.hasNext();) { + stack.push(it.next()); + } + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java new file mode 100644 index 00000000000..dfa7a024224 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -0,0 +1,431 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.StemMode; +import com.yahoo.language.process.StemList; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.query.WordAlternativesItem.Alternative; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +import static com.yahoo.prelude.querytransform.CJKSearcher.TERM_ORDER_RELAXATION; + + +/** + * Replaces query terms with their stems + * + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Lidal</a> + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After({PhaseNames.UNBLENDED_RESULT, TERM_ORDER_RELAXATION}) +@Provides(StemmingSearcher.STEMMING) +public class StemmingSearcher extends Searcher { + + public static final String STEMMING = "Stemming"; + public static final CompoundName DISABLE = new CompoundName("nostemming"); + private final Linguistics linguistics; + + public StemmingSearcher(Linguistics linguistics) { + this.linguistics = linguistics; + } + + @Inject + public StemmingSearcher(ComponentId id, Linguistics linguistics) { + super(id); + this.linguistics = linguistics; + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().getBoolean(DISABLE)) return execution.search(query); + + IndexFacts.Session indexFacts = execution.context().getIndexFacts().newSession(query); + Item newRoot = replaceTerms(query, indexFacts); + query.getModel().getQueryTree().setRoot(newRoot); + + query.trace(getFunctionName(), true, 2); + + Highlight highlight = query.getPresentation().getHighlight(); + if (highlight != null) { + Set<String> highlightFields = highlight.getHighlightItems().keySet(); + for (String field : highlightFields) { + StemMode stemMode = indexFacts.getIndex(field).getStemMode(); + if (stemMode != StemMode.NONE) { + Item newHighlight = scan(highlight.getHighlightItems().get(field), false, Language.ENGLISH, indexFacts, null); + highlight.getHighlightItems().put(field, (AndItem)newHighlight); + } + } + } + return execution.search(query); + } + + public String getFunctionName() { return "Stemming"; } + + private Item replaceTerms(Query q, IndexFacts.Session indexFacts) { + Language l = q.getModel().getParsingLanguage(); + if (l == Language.UNKNOWN) { + return q.getModel().getQueryTree().getRoot(); + } + return scan(q.getModel().getQueryTree().getRoot(), l.isCjk(), l, indexFacts, + createReverseConnectivities(q.getModel().getQueryTree().getRoot())); + } + + private Map<Item, TaggableItem> createReverseConnectivities(Item root) { + return populateReverseConnectivityMap(root, new IdentityHashMap<>()); + } + + private Map<Item, TaggableItem> populateReverseConnectivityMap(Item root, Map<Item, TaggableItem> reverseConnectivity) { + if (root instanceof TaggableItem) { + TaggableItem asTaggable = (TaggableItem) root; + Item connectsTo = asTaggable.getConnectedItem(); + if (connectsTo != null) { + reverseConnectivity.put(connectsTo, asTaggable); + } + } + if (root instanceof CompositeItem && !(root instanceof BlockItem)) { + CompositeItem c = (CompositeItem) root; + for (Iterator<Item> i = c.getItemIterator(); i.hasNext();) { + Item item = i.next(); + populateReverseConnectivityMap(item, reverseConnectivity); + } + } + return reverseConnectivity; + } + + private Item scan(Item item, + boolean isCJK, + Language l, + IndexFacts.Session indexFacts, + Map<Item, TaggableItem> reverseConnectivity) { + if (item == null) { + return null; + } else if (item instanceof BlockItem) { + return checkBlock((BlockItem) item, isCJK, l, indexFacts, reverseConnectivity); + } else if (item instanceof CompositeItem) { + CompositeItem comp = (CompositeItem) item; + ListIterator<Item> i = comp.getItemIterator(); + + while (i.hasNext()) { + Item original = i.next(); + Item transformed = scan(original, isCJK, l, indexFacts, reverseConnectivity); + if (original != transformed) + i.set(transformed); + } + return item; + } else { + return item; + } + } + + private Item checkBlock(BlockItem b, boolean isCJK, Language language, + IndexFacts.Session indexFacts, Map<Item, TaggableItem> reverseConnectivity) { + if (b instanceof PrefixItem || !b.isWords()) return (Item) b; + + if (b.isFromQuery() && !b.isStemmed()) { + final Index index = indexFacts.getIndex(b.getIndexName()); + StemMode stemMode = index.getStemMode(); + if (stemMode != StemMode.NONE) return stem(b, isCJK, language, reverseConnectivity, index); + } + return (Item) b; + } + + private Substring getOffsets(BlockItem b) { + if (b instanceof TermItem) { + return b.getOrigin(); + } else if (b instanceof CompositeItem) { + Item i = ((CompositeItem) b).getItem(0); + if (i instanceof TermItem) { + return ((TermItem) i).getOrigin(); // this should always be the case + } else { + getLogger().log(LogLevel.WARNING, + "Weird, BlockItem '" + b + "' was a composite containing " + i.getClass().getName() + + ", expected TermItem."); + } + } + return null; + } + + // The rewriting logic is here + private Item stem(BlockItem current, boolean isCJK, + Language language, Map<Item, TaggableItem> reverseConnectivity, Index index) { + Item blockAsItem = (Item)current; + CompositeItem composite; + List<StemList> segments = linguistics.getStemmer().stem(current.stringValue(), index.getStemMode(), language); + String indexName = current.getIndexName(); + Substring substring = getOffsets(current); + + if (segments.size() == 1) { + TaggableItem w = singleWordSegment(current, segments.get(0), index, substring); + setMetaData(current, reverseConnectivity, w); + return (Item) w; + } + + if (isCJK) { + composite = chooseCompositeForCJK(current, + ((Item) current).getParent(), + indexName); + } else { + composite = phraseSegment(current, indexName); + } + + for (StemList segment : segments) { + TaggableItem w = singleWordSegment(current, segment, index, substring); + + if (composite instanceof AndSegmentItem) { + setSignificance(w, current); + } + composite.addItem((Item) w); + } + if (composite instanceof AndSegmentItem) { + andSegmentConnectivity(current, reverseConnectivity, composite); + } + copyAttributes(blockAsItem, composite); + composite.lock(); + + if (composite instanceof PhraseSegmentItem) { + PhraseSegmentItem replacement = (PhraseSegmentItem) composite; + setSignificance(replacement, current); + phraseSegmentConnectivity(current, reverseConnectivity, replacement); + } + + return composite; + } + + private void phraseSegmentConnectivity(BlockItem current, + Map<Item, TaggableItem> reverseConnectivity, + PhraseSegmentItem replacement) { + Connectivity c = getConnectivity(current); + if (c != null) { + replacement.setConnectivity(c.word, c.value); + reverseConnectivity.put(c.word, replacement); + } + setConnectivity(current, reverseConnectivity, replacement); + } + + private void andSegmentConnectivity(BlockItem current, + Map<Item, TaggableItem> reverseConnectivity, CompositeItem composite) { + // if the original has connectivity to something, add to last word + Connectivity connectivity = getConnectivity(current); + if (connectivity != null) { + TaggableItem w = lastWord(composite); + if (w != null) { + w.setConnectivity(connectivity.word, connectivity.value); + reverseConnectivity.put(connectivity.word, w); + } + } + // If we create an AND from something taggable, add connectivity to the first word + TaggableItem w = firstWord(composite); + if (w != null) { + setConnectivity(current, reverseConnectivity, (Item) w); + } + } + + private Connectivity getConnectivity(BlockItem current) { + if (!(current instanceof TaggableItem)) { + return null; + } + TaggableItem t = (TaggableItem) current; + if (t.getConnectedItem() == null) { + return null; + } + return new Connectivity(t.getConnectedItem(), t.getConnectivity()); + } + + private TaggableItem firstWord(CompositeItem composite) { + // yes, this assumes only WordItem instances in the CompositeItem + int l = composite.getItemCount(); + if (l == 0) { + return null; + } else { + return (TaggableItem) composite.getItem(0); + } + } + + private TaggableItem lastWord(CompositeItem composite) { + // yes, this assumes only WordItem instances in the CompositeItem + int l = composite.getItemCount(); + if (l == 0) { + return null; + } else { + return (TaggableItem) composite.getItem(l - 1); + } + } + + private TaggableItem singleWordSegment(BlockItem current, + StemList segment, + Index index, + Substring substring) + { + String indexName = current.getIndexName(); + if (index.getLiteralBoost() || index.getStemMode() == StemMode.ALL) { + // Yes, this will create a new WordAlternativesItem even if stemmed + // and original form are identical. This is to decrease complexity + // in accent removal and lowercasing. + List<Alternative> terms = new ArrayList<>(segment.size() + 1); + terms.add(new Alternative(current.stringValue(), 1.0d)); + for (String term : segment) { + terms.add(new Alternative(term, 0.7d)); + } + WordAlternativesItem alternatives = new WordAlternativesItem(indexName, current.isFromQuery(), substring, terms); + return alternatives; + } else { + WordItem first = singleStemSegment((Item) current, segment.get(0), indexName, substring); + return first; + } + } + + private void setMetaData(BlockItem current, Map<Item, TaggableItem> reverseConnectivity, TaggableItem replacement) { + copyAttributes((Item) current, (Item) replacement); + setSignificance(replacement, current); + Connectivity c = getConnectivity(current); + if (c != null) { + replacement.setConnectivity(c.word, c.value); + reverseConnectivity.put(c.word, replacement); + } + setConnectivity(current, reverseConnectivity, (Item) replacement); + } + + private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName, + Substring substring) + { + WordItem replacement = new WordItem(stem, indexName, true, substring); + replacement.setStemmed(true); + copyAttributes(blockAsItem, replacement); + return replacement; + } + + private void setConnectivity(BlockItem current, + Map<Item, TaggableItem> reverseConnectivity, + Item replacement) + { + if (reverseConnectivity != null && !reverseConnectivity.isEmpty()) { + // This Map<Item, TaggableItem>.get(BlockItem) is technically wrong, but the Item API ensures its correctness + TaggableItem connectedTo = reverseConnectivity.get(current); + if (connectedTo != null) { + double connectivity = connectedTo.getConnectivity(); + connectedTo.setConnectivity(replacement, connectivity); + } + } + } + + private CompositeItem chooseCompositeForCJK(BlockItem current, + CompositeItem parent, String indexName) { + CompositeItem composite; + if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) { + if (parent instanceof PhraseItem + || current instanceof PhraseSegmentItem) { + composite = phraseSegment(current, indexName); + } else + composite = createAndSegment(current); + } else { + switch (current.getSegmentingRule()) { + case PHRASE: + composite = phraseSegment(current, indexName); + break; + case BOOLEAN_AND: + composite = createAndSegment(current); + break; + default: + throw new IllegalArgumentException( + "Unknown segmenting rule: " + + current.getSegmentingRule() + + ". This is a bug in Vespa, as the implementation has gotten out of sync." + + " Please create a ticket as soon as possible."); + } + } + return composite; + } + + private AndSegmentItem createAndSegment(BlockItem current) { + return new AndSegmentItem(current.stringValue(), true, true); + } + + private CompositeItem phraseSegment(BlockItem current, String indexName) { + CompositeItem composite; + composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); + composite.setIndexName(indexName); + return composite; + } + + private void copyAttributes(Item blockAsItem, Item replacement) { + copyWeight(blockAsItem, replacement); + replacement.setCreator(blockAsItem.getCreator()); + replacement.setRanked(blockAsItem.isRanked()); + replacement.setPositionData(blockAsItem.usePositionData()); + } + + private void copyWeight(Item block, Item replacement) { + int weight = getWeight(block); + setWeight(replacement, weight); + } + + private int getWeight(Item block) { + if (block instanceof AndSegmentItem + && ((AndSegmentItem) block).getItemCount() > 0) { + return ((AndSegmentItem) block).getItem(0).getWeight(); + } else { + return block.getWeight(); + } + } + + // this smells like an extension of AndSegmentItem... + private void setWeight(Item replacement, int weight) { + if (replacement instanceof AndSegmentItem) { + for (Iterator<Item> i = ((AndSegmentItem) replacement).getItemIterator(); + i.hasNext();) { + i.next().setWeight(weight); + } + } else { + replacement.setWeight(weight); + } + } + + // TODO: Next four methods indicate Significance should be bubbled up the class hierarchy + // TODO: Perhaps Significance should bubble up, but the real problem is the class/interface hierarchy for queries is in dire need of restructuring + private void setSignificance(PhraseSegmentItem target, BlockItem original) { + if (hasExplicitSignificance(original)) target.setSignificance(getSignificance(original)); + } + + private void setSignificance(TaggableItem target, BlockItem original) { + if (hasExplicitSignificance(original)) target.setSignificance(getSignificance(original)); //copy + } + + private boolean hasExplicitSignificance(BlockItem blockItem) { + if (blockItem instanceof TermItem ) return ((TermItem)blockItem).hasExplicitSignificance(); + if (blockItem instanceof PhraseSegmentItem ) return ((PhraseSegmentItem)blockItem).hasExplicitSignificance(); + return false; + } + + //assumes blockItem instanceof TermItem or PhraseSegmentItem + private double getSignificance(BlockItem blockItem) { + if (blockItem instanceof TermItem) return ((TermItem)blockItem).getSignificance(); + else return ((PhraseSegmentItem)blockItem).getSignificance(); + } + + private static class Connectivity { + public final Item word; + public final double value; + + public Connectivity(Item connectedItem, double connectivity) { + this.word = connectedItem; + this.value = connectivity; + } + + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/package-info.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/package-info.java new file mode 100644 index 00000000000..1d7dbb946d9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.querytransform; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java new file mode 100644 index 00000000000..268fe5f4ea5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java @@ -0,0 +1,276 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + + +/** + * Flattens a result consisting of multiple hit groups containing hits + * into a single flat list of hits. + * + * @author Bob Travis + * @author Steinar Knutsen + * @author Arne Fossaa + */ +@After(PhaseNames.BLENDED_RESULT) +@Before(PhaseNames.UNBLENDED_RESULT) +@Provides(BlendingSearcher.BLENDING) +public class BlendingSearcher extends Searcher { + + public static final String BLENDING = "Blending"; + + private final String documentId; + + @Inject + public BlendingSearcher(ComponentId id, QrSearchersConfig cfg) { + super(id); + QrSearchersConfig.Com.Yahoo.Prelude.Searcher.BlendingSearcher s = cfg.com().yahoo().prelude().searcher().BlendingSearcher(); + documentId = s.docid().length() > 0 ? s.docid() : null; + + } + + /** + * Only for legacy tests. + */ + public BlendingSearcher(String blendingDocumentId) { + this.documentId = blendingDocumentId; + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + Result result = execution.search(query); + + Result blended = blendResults(result, query, query.getOffset(), query.getHits(), execution); + blended.trace("Blended result"); + return blended; + } + + /** + * Fills this result by forwarding to the right chained searchers + */ + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + result.analyzeHits(); + } + + /** + * Produce a single blended result list from a group of hitgroups. + * + * It is assumed that the results are ordered in hitgroups. If not, the blend will not be performed + */ + protected Result blendResults(Result result, Query q, int offset, int hits, Execution execution) { + + //Assert that there are more than one hitgroup and that there are only hitgroups on the lowest level + + boolean foundNonGroup = false; + Iterator<Hit> hitIterator = result.hits().iterator(); + List<HitGroup> groups = new ArrayList<>(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit instanceof HitGroup) { + groups.add((HitGroup)hit); + hitIterator.remove(); + } else if(!hit.isMeta()) { + foundNonGroup = true; + } + } + + if(foundNonGroup) { + result.hits().addError(ErrorMessage.createUnspecifiedError("Blendingsearcher could not blend - there are toplevel hits" + + " that are not hitgroups")); + return result; + } + if (groups.size() == 0) { + return result; + } else if (groups.size() == 1) { + result.hits().addAll(groups.get(0).asUnorderedHits()); + result.hits().setOrderer(groups.get(0).getOrderer()); + return result; + } else { + if (documentId != null) { + return blendResultsUniquely(result, q, offset, hits, groups, execution); + } else { + return blendResultsDirectly(result, q, offset, hits, groups, execution); + } + } + } + + private Result sortAndTrimResults(Result result, Query q, int offset, int hits, Execution execution) { + if (q.getRanking().getSorting() != null) { + execution.fillAttributes(result); // Always correct as we can only sort on attributes + result.hits().sort(); + } + result.hits().trim(offset, hits); + return result; + } + + private abstract class DocumentMerger { + protected Set<String> documentsToStrip; + protected Result result; + protected HitGroup group; + + abstract void put(HitGroup source, Hit hit, Execution execution); + + abstract void scan(Hit hit, int i, Execution execution); + + Result getResult() { + return result; + } + + //Since we cannot use prelude.hit#getProperty, we'll have to improvise + private String getProperty(Hit hit, String field) { + Object o = hit.getField(field); + return o == null ? null : o.toString(); + } + + + protected void storeID(Hit hit, Execution execution) { + String id = getProperty(hit, documentId); + + if (id != null) { + documentsToStrip.add(id); + } else { + if (!result.isFilled(result.getQuery().getPresentation().getSummary())) { + fill(result, result.getQuery().getPresentation().getSummary(), execution); + id = getProperty(hit, documentId); + if (id != null) { + documentsToStrip.add(id); + } + } + } + } + + protected boolean known(HitGroup source, Hit hit, Execution execution) { + String stripID = getProperty(hit, documentId); + + if (stripID == null) { + if (!source.isFilled(result.getQuery().getPresentation().getSummary())) { + Result nResult = new Result(result.getQuery()); + nResult.hits().add(source); + fill(nResult, nResult.getQuery().getPresentation().getSummary(), execution); + stripID = getProperty(hit, documentId); + if (stripID == null) { + return false; + } + } else { + return false; + } + } + + if (documentsToStrip.contains(stripID)) { + return true; + } + + documentsToStrip.add(stripID); + return false; + } + + void scanResult(Execution execution) { + List<Hit> hits = group.asUnorderedHits(); + for (int i = hits.size()-1; i >= 0; i--) { + Hit sniffHit = hits.get(i); + if (!sniffHit.isMeta()) { + scan(sniffHit, i, execution); + } else { + result.hits().add(sniffHit); + } + } + } + + void mergeResults(List<HitGroup> groups, Execution execution) { + // note, different loop direction from scanResult() + for(HitGroup group : groups.subList(1, groups.size())) { + for(Hit hit : group.asList()) { + if(hit.isMeta()) { + result.hits().add(hit); + } else { + put(group, hit, execution); + } + } + } + } + } + + + private class BasicMerger extends DocumentMerger { + BasicMerger(Result result, HitGroup group) { + this.result = result; + this.group = group; + } + + void put(HitGroup source, Hit hit, Execution execution) { + result.hits().add(hit); + } + + void scan(Hit hit, int i, Execution execution) { + result.hits().add(hit); + } + } + + + private class UniqueMerger extends DocumentMerger { + UniqueMerger(Result result, HitGroup group, Set<String> documentsToStrip) { + this.documentsToStrip = documentsToStrip; + this.result = result; + this.group = group; + } + + void scan(Hit hit, int i, Execution execution) { + result.hits().add(hit); + if (!hit.isMeta()) { + storeID(hit, execution); + } + } + + void put(HitGroup source, Hit hit, Execution execution) { + if (!hit.isMeta()) { + if (!known(source, hit, execution)) { + addHit(hit); + } + } else { + result.hits().add(hit); + } + } + + protected void addHit(Hit hit) { + result.hits().add(hit); + } + + } + + private Result blendResultsDirectly(Result result, Query q, int offset, + int hits, List<HitGroup> groups, Execution execution) { + DocumentMerger m = new BasicMerger(result, groups.get(0)); + + m.scanResult(execution); + m.mergeResults(groups, execution); + return sortAndTrimResults(m.getResult(), q, offset, hits, execution); + } + + private Result blendResultsUniquely(Result result, Query q, int offset, + int hits, List<HitGroup> groups, Execution execution) { + DocumentMerger m = new UniqueMerger(result, groups.get(0), new HashSet<>(20)); + + m.scanResult(execution); + m.mergeResults(groups, execution); + return sortAndTrimResults(m.getResult(), q, offset, hits, execution); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java new file mode 100644 index 00000000000..1152246a32e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.prelude.cache.Cache; +import com.yahoo.prelude.cache.QueryCacheKey; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +/** + * A generic caching searcher which caches all passing results. + * + * @author vegardh + */ +@After("rawQuery") +@Before("transformedQuery") +public class CachingSearcher extends Searcher { + + private static final CompoundName nocachewrite=new CompoundName("nocachewrite"); + + private Cache<QueryCacheKey, Result> cache; + private Value cacheHitRatio = null; + + public CachingSearcher(QrSearchersConfig config, Statistics manager) { + long maxSizeBytes = config.com().yahoo().prelude().searcher().CachingSearcher().cachesizemegabytes()*1024*1024; + long timeToLiveMillis = config.com().yahoo().prelude().searcher().CachingSearcher().timetoliveseconds()*1000; + long maxEntrySizeBytes = config.com().yahoo().prelude().searcher().CachingSearcher().maxentrysizebytes(); + cache=new Cache<>(maxSizeBytes, timeToLiveMillis, maxEntrySizeBytes, manager); + initRatio(manager); + } + + private void initRatio(Statistics manager) { + cacheHitRatio = new Value("querycache_hit_ratio", manager, + new Value.Parameters().setNameExtension(false).setLogRaw(false).setLogMean(true)); + } + + private synchronized void cacheHit() { + cacheHitRatio.put(1.0d); + } + + private synchronized void cacheMiss() { + cacheHitRatio.put(0.0d); + } + + private boolean noCacheWrite(Query query) { + return query.properties().getBoolean(nocachewrite); + } + + public Result search(com.yahoo.search.Query query, Execution execution) { + if (query.getNoCache()) { + return execution.search(query); + } + QueryCacheKey queryKey = new QueryCacheKey(query); + Result cachedResult=cache.get(queryKey); + if (cachedResult!=null) { + cacheHit(); + return cachedResult; + } + cacheMiss(); + Query originalQuery = query.clone(); // Need a copy, as cache hash key later on, maybe. + Result result = execution.search(query); + execution.fill(result); + if (!noCacheWrite(query)) { + queryKey.setQuery(originalQuery); // Because the query member has changed state + cache.put(queryKey,result); + } + return result; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java new file mode 100644 index 00000000000..f4b3ab3406a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java @@ -0,0 +1,222 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +/** + * <p>Implements a document source. You pass in a query and a Result + * set. When this Searcher is called with that query it will return + * that result set.</p> + * + * <p>This supports multi-phase search.</p> + * + * <p>To avoid having to add type information for the fields, a quck hack is used to + * support testing of attribute prefetching. + * Any field in the configured hits which has a name starting by attribute + * will be returned when attribute prefetch filling is requested.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +@SuppressWarnings({"rawtypes"}) +public class DocumentSourceSearcher extends Searcher { + // as for the SuppressWarnings annotation above, we are inside + // com.yahoo.prelude, this is old stuff, really no point firing off those + // warnings here... + + private Result defaultFilledResult; + private Map<Query, Result> completelyFilledResults = new HashMap<>(); + private Map<Query, Result> attributeFilledResults = new HashMap<>(); + private Map<Query, Result> unFilledResults = new HashMap<>(); + //private Result defaultUnfilledResult; + + /** Time (in ms) at which the index of this searcher was last modified */ + long editionTimeStamp=0; + + private int queryCount; + + public DocumentSourceSearcher() { + addDefaultResults(); + } + + /** + * Adds a result which can be returned either as empty, + * filled or attribute only filled later. + * Summary fields starting by "a" are attributes, others are not. + * + * @return true when replacing an existing <query, result> pair. + */ + public boolean addResultSet(Query query, Result fullResult) { + Result emptyResult = new Result(query.clone()); + Result attributeResult = new Result(query.clone()); + emptyResult.setTotalHitCount(fullResult.getTotalHitCount()); + attributeResult.setTotalHitCount(fullResult.getTotalHitCount()); + int counter=0; + for (Iterator i = fullResult.hits().deepIterator();i.hasNext();) { + Hit fullHit = (Hit)i.next(); + + Hit emptyHit = (Hit)fullHit.clone(); + emptyHit.clearFields(); + emptyHit.setFillable(); + emptyHit.setRelevance(fullHit.getRelevance()); + + Hit attributeHit = (Hit)fullHit.clone(); + removePropertiesNotStartingByA(attributeHit); + attributeHit.setFillable(); + attributeHit.setRelevance(fullHit.getRelevance()); + for (Object propertyKeyObject : (Set) fullHit.fields().keySet()) { + String propertyKey=propertyKeyObject.toString(); + if (propertyKey.startsWith("attribute")) + attributeHit.setField(propertyKey, fullHit.getField(propertyKey)); + } + if (fullHit.getField(Hit.SDDOCNAME_FIELD)!=null) + attributeHit.setField(Hit.SDDOCNAME_FIELD, fullHit.getField(Hit.SDDOCNAME_FIELD)); + + // A simple summary lookup mechanism, similar to FastSearch's + emptyHit.setField("summaryid", String.valueOf(counter)); + attributeHit.setField("summaryid", String.valueOf(counter)); + fullHit.setField("summaryid", String.valueOf(counter)); + + counter++; + emptyResult.hits().add(emptyHit); + attributeResult.hits().add(attributeHit); + } + unFilledResults.put(getQueryKeyClone(query), emptyResult); + attributeFilledResults.put(getQueryKeyClone(query), attributeResult); + if (completelyFilledResults.put(getQueryKeyClone(query), fullResult.clone()) != null) { + setEditionTimeStamp(System.currentTimeMillis()); + return true; + } + return false; + } + + /** + * Returns a query clone which has offset and hits set to null. This is used by access to + * the maps using the query as key to achieve lookup independent of offset/hits value + */ + private com.yahoo.search.Query getQueryKeyClone(com.yahoo.search.Query query) { + com.yahoo.search.Query key=query.clone(); + key.setWindow(0,0); + key.getModel().setSources(""); + return key; + } + + private void removePropertiesNotStartingByA(Hit hit) { + List<String> toRemove=new java.util.ArrayList<>(); + for (Iterator i= ((Set) hit.fields().keySet()).iterator(); i.hasNext(); ) { + String key=(String)i.next(); + if (!key.startsWith("a")) + toRemove.add(key); + } + for (Iterator<String> i=toRemove.iterator(); i.hasNext(); ) { + String propertyName=i.next(); + hit.removeField(propertyName); + } + } + + private void addDefaultResults() { + Query q = new Query("?query=default"); + Result r = new Result(q); + r.hits().add(new Hit("http://default-1.html")); + r.hits().add(new Hit("http://default-2.html")); + r.hits().add(new Hit("http://default-3.html")); + r.hits().add(new Hit("http://default-4.html")); + defaultFilledResult = r; + addResultSet(q, r); + } + + public long getEditionTimeStamp(){ + long myEditionTime; + synchronized(this){ + myEditionTime=this.editionTimeStamp; + } + return myEditionTime; + } + + public void setEditionTimeStamp(long editionTime) { + synchronized(this){ + this.editionTimeStamp=editionTime; + } + } + + public Result search(com.yahoo.search.Query query, Execution execution) { + queryCount++; + Result r; + r = unFilledResults.get(getQueryKeyClone(query)); + if (r == null) { + r = defaultFilledResult.clone(); + } else { + r = r.clone(); + } + r.setQuery(query); + r.hits().trim(query.getOffset(), query.getHits()); + return r; + } + + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + Result filledResult; + if ("attributeprefetch".equals(summaryClass)) + filledResult=attributeFilledResults.get(getQueryKeyClone(result.getQuery())); + else + filledResult = completelyFilledResults.get(getQueryKeyClone(result.getQuery())); + + if (filledResult == null) { + filledResult = defaultFilledResult; + } + fillHits(filledResult,result,summaryClass); + } + + private void fillHits(Result source,Result target,String summaryClass) { + for (Iterator hitsToFill= target.hits().deepIterator() ; hitsToFill.hasNext();) { + Hit hitToFill = (Hit) hitsToFill.next(); + String summaryId= (String) hitToFill.getField("summaryid"); + if (summaryId==null) continue; // Can not fill this + Hit filledHit = lookupBySummaryId(source,summaryId); + if (filledHit==null) + throw new RuntimeException("Can't fill hit with summaryid '" + summaryId + "', not present"); + + for (Iterator props= filledHit.fieldIterator();props.hasNext();) { + Map.Entry propertyEntry = (Map.Entry)props.next(); + hitToFill.setField(propertyEntry.getKey().toString(), + propertyEntry.getValue()); + } + hitToFill.setFilled(summaryClass); + } + target.analyzeHits(); + } + + private Hit lookupBySummaryId(Result result,String summaryId) { + for (Iterator i= result.hits().deepIterator(); i.hasNext(); ) { + Hit hit=(Hit)i.next(); + if (summaryId.equals(hit.getField("summaryid"))) { + return hit; + } + } + return null; + } + + /** + * Returns the number of queries made to this searcher since the last + * reset. For testing - not reliable if multiple threads makes + * queries simultaneously + */ + public int getQueryCount() { + return queryCount; + } + + public void resetQueryCount() { + queryCount=0; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java new file mode 100644 index 00000000000..10a436b3ae8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -0,0 +1,190 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; +import java.util.Map; + + +/** + * A searcher which does parametrized collapsing. Based on + * SiteCollapsingSearcher. Deprecated - use grouping. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("deprecation") +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +public class FieldCollapsingSearcher extends Searcher { + + private static final CompoundName collapse = new CompoundName("collapse"); + private static final CompoundName collapsefield=new CompoundName("collapsefield"); + private static final CompoundName collapsesize=new CompoundName("collapsesize"); + private static final CompoundName collapseSummaryName=new CompoundName("collapse.summary"); + + /** Maximum number of queries to send next searcher */ + private int maxQueries = 4; + + /** + * The max number of hits that will be preserved per unique + * value of the collapsing parameter. + */ + private int defaultCollapseSize; + + /** + * The factor by which to scale up the requested number of hits + * from the next searcher in the chain, because collapsing will + * likely delete many hits. + */ + private double extraFactor; + + /** Create this searcher using default values for all settings */ + public FieldCollapsingSearcher() { + this((String) null); + } + + /** + * Creates a collapser + * + * @param collapseField the default field to collapse on, or null to not collapse as default + */ + public FieldCollapsingSearcher(String collapseField) { + this(1, 2.0, collapseField); + } + + @Inject + public FieldCollapsingSearcher(QrSearchersConfig config) { + QrSearchersConfig.Com.Yahoo.Prelude.Searcher.FieldCollapsingSearcher + s = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); + + init(s.collapsesize(), s.extrafactor()); + } + + /** + * Creates a collapser + * + * @param collapseSize the maximum number of hits to keep per + * field the default max number of hits in each collapsed group + * @param extraFactor the percentage by which to scale up the + * requested number of hits, to allow some hits to be removed + * without refetching + * @param collapseField the field to collapse on. This is currently <b>ignored</b>. + */ + public FieldCollapsingSearcher(int collapseSize, double extraFactor, String collapseField) { + init(collapseSize, extraFactor); + } + + private void init(int collapseSize, double extraFactor) { + this.defaultCollapseSize = collapseSize; + this.extraFactor = extraFactor; + } + + /** + * First fetch result from the next searcher in the chain. + * If collapse is active, do collapsing. + * Otherwise, act as a simple pass through + */ + public Result search(com.yahoo.search.Query query, Execution execution) { + String collapseField = query.properties().getString(collapsefield); + + if (collapseField==null) return execution.search(query); + + int collapseSize = query.properties().getInteger(collapsesize,defaultCollapseSize); + query.properties().set(collapse, "0"); + + int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; + int nextOffset = 0; + int hitsAfterCollapse; + boolean moreHitsAvailable = true; + Map<String, Integer> knownCollapses = new java.util.HashMap<>(); + Result result = new Result(query); + int performedQueries = 0; + Result resultSource; + String collapseSummary = query.properties().getString(collapseSummaryName); + + do { + resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); + String summaryClass = (collapseSummary == null) + ? query.getPresentation().getSummary() : collapseSummary; + fill(resultSource, summaryClass, execution); + collapse(result, knownCollapses, resultSource, collapseField, collapseSize); + + hitsAfterCollapse = result.getHitCount(); + if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { + // the searcher downstream has no more hits + moreHitsAvailable = false; + } + nextOffset += hitsToRequest; + if (hitsAfterCollapse < query.getOffset() + query.getHits()) { + hitsToRequest = (int) Math.ceil(hitsToRequest * extraFactor); + } + ++performedQueries; + + } while (hitsToRequest != 0 + && (hitsAfterCollapse < query.getOffset() + query.getHits()) + && moreHitsAvailable + && (performedQueries <= maxQueries)); + + // Set correct meta information + result.mergeWith(resultSource); + // Keep only (offset,.. offset+hits) hits + result.hits().trim(query.getOffset(), query.getHits()); + // Mark query as query with collapsing + query.properties().set(collapse, "1"); + return result; + } + + private Result search(Query query, Execution execution, int offset , int hits) { + query.setOffset(offset); + query.setHits(hits); + return execution.search(query); + } + + /** + * Collapse logic. Preserves only maxHitsPerField hits + * for each unique value of the collapsing parameter. + */ + private void collapse(Result result, Map<String, Integer> knownCollapses, + Result resultSource, String collapseField, int collapseSize) { + for (Iterator<Hit> it = resultSource.hits().iterator(); it.hasNext();) { + Hit unknownHit = it.next(); + + if (!(unknownHit instanceof FastHit)) { + result.hits().add(unknownHit); + continue; + } + FastHit hit = (FastHit) unknownHit; + Object peek = hit.getField(collapseField); + String collapseId = peek != null ? peek.toString() : null; + if (collapseId == null) { + result.hits().add(hit); + continue; + } + + if (knownCollapses.containsKey(collapseId)) { + int numHitsThisField = knownCollapses.get(collapseId).intValue(); + + if (numHitsThisField < collapseSize) { + result.hits().add(hit); + ++numHitsThisField; + knownCollapses.put(collapseId, new Integer(numHitsThisField)); + } + } else { + knownCollapses.put(collapseId, new Integer(1)); + result.hits().add(hit); + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java new file mode 100644 index 00000000000..f7bff5b481c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * This searcher fills the results in the first phase. May be put into + * a search chain to ensure full results are present at an earlier + * time than they would normally be. + * + * @author <a href="mailto:havardpe@yahoo-inc.com">havardpe</a> + **/ +public class FillSearcher extends Searcher { + private final Searcher next; + + public FillSearcher() { + next = null; + } + + public FillSearcher(Searcher next) { + this.next = next; + } + + @Override + public Result search(Query query, Execution execution) { + Result result; + if (next == null) { + result = execution.search(query); + execution.fill(result); + } else { + Execution e = new Execution(next, execution.context()); + result = e.search(query); + e.fill(result); + } + return result; + } + + // TODO: Remove this method as it does nothing new + @Override + public void fill(Result result, String summaryClass, Execution execution) { + if (next == null) { + execution.fill(result, summaryClass); + } else { + Execution e = new Execution(next, execution.context()); + e.fill(result, summaryClass); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java new file mode 100644 index 00000000000..dbfde502b75 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.FeatureData; +import com.yahoo.search.result.StructuredData; +import com.yahoo.search.searchchain.Execution; + +import java.util.Iterator; + +/** + * Save the query in the incoming state to a meta hit in the result. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class JSONDebugSearcher extends Searcher { + public static final String JSON_FIELD = "JSON field: "; + public static final String STRUCT_FIELD = "Structured data field (as json): "; + public static final String FEATURE_FIELD = "Feature data field (as json): "; + + private static CompoundName PROPERTYNAME = new CompoundName("dumpjson"); + + public Result search(com.yahoo.search.Query query, Execution execution) { + Result r = execution.search(query); + String propertyName = query.properties().getString(PROPERTYNAME); + if (propertyName != null) { + execution.fill(r); + for (Iterator<Hit> i = r.hits().deepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h instanceof FastHit) { + FastHit hit = (FastHit) h; + Object o = hit.getField(propertyName); + if (o instanceof JSONString) { + JSONString j = (JSONString) o; + r.getQuery().trace(JSON_FIELD + j.getContent(), false, 5); + } + if (o instanceof StructuredData) { + StructuredData d = (StructuredData) o; + r.getQuery().trace(STRUCT_FIELD + d.toJson(), false, 5); + } + if (o instanceof FeatureData) { + FeatureData d = (FeatureData) o; + r.getQuery().trace(FEATURE_FIELD + d.toJson(), false, 5); + } + } + } + } + return r; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java new file mode 100644 index 00000000000..75ae960cac0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java @@ -0,0 +1,212 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.search.Searcher; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.BoldCloseFieldPart; +import com.yahoo.prelude.hitfield.BoldOpenFieldPart; +import com.yahoo.prelude.hitfield.FieldPart; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.SeparatorFieldPart; +import com.yahoo.prelude.hitfield.StringFieldPart; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +/** + * Converts juniper highlighting to XML style + * <p> + * Note: This searcher only converts backend binary highlighting and separators + * to the configured highlighting and separator tags. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(JuniperSearcher.JUNIPER_TAG_REPLACING) +public class JuniperSearcher extends Searcher { + + public final static char RAW_HIGHLIGHT_CHAR = '\u001F'; + public final static char RAW_SEPARATOR_CHAR = '\u001E'; + + private static final String ELLIPSIS = "..."; + + // The name of the field containing document type + private static final String MAGIC_FIELD = Hit.SDDOCNAME_FIELD; + + public static final String JUNIPER_TAG_REPLACING = "JuniperTagReplacing"; + + private String boldOpenTag; + private String boldCloseTag; + private String separatorTag; + + @Inject + public JuniperSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + + boldOpenTag = config.tag().bold().open(); + boldCloseTag = config.tag().bold().close(); + separatorTag = config.tag().separator(); + } + + /** + * Convert Juniper style property highlighting to XML style. + */ + @Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + highlight(query.getPresentation().getBolding(), result.hits().deepIterator(), null, + execution.context().getIndexFacts().newSession(query)); + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + Result workResult = result; + final int worstCase = workResult.getHitCount(); + final List<Hit> hits = new ArrayList<>(worstCase); + for (final Iterator<Hit> i = workResult.hits().deepIterator(); i.hasNext();) { + final Hit sniffHit = i.next(); + if ( ! (sniffHit instanceof FastHit)) continue; + + final FastHit hit = (FastHit) sniffHit; + if (hit.isFilled(summaryClass)) continue; + + hits.add(hit); + } + execution.fill(workResult, summaryClass); + highlight(workResult.getQuery().getPresentation().getBolding(), hits.iterator(), summaryClass, + execution.context().getIndexFacts().newSession(result.getQuery())); + } + + private void highlight(boolean bolding, Iterator<Hit> hitsToHighlight, + String summaryClass, IndexFacts.Session indexFacts) { + while (hitsToHighlight.hasNext()) { + Hit sniffHit = hitsToHighlight.next(); + if ( ! (sniffHit instanceof FastHit)) continue; + + FastHit hit = (FastHit) sniffHit; + if (summaryClass != null && ! hit.isFilled(summaryClass)) continue; + + Object searchDefinitionField = hit.getField(MAGIC_FIELD); + if (searchDefinitionField == null) continue; + String searchDefinitionName = searchDefinitionField.toString(); + + for (String fieldName : hit.fields().keySet()) { + Index index = indexFacts.getIndex(fieldName, searchDefinitionName); + if (index.getDynamicSummary() || index.getHighlightSummary()) + insertTags(hit.buildHitField(fieldName, true, true), bolding, index.getDynamicSummary()); + } + } + } + + private void insertTags(final HitField oldProperty, final boolean bolding, final boolean dynteaser) { + boolean insideHighlight = false; + for (final ListIterator<FieldPart> i = oldProperty.listIterator(); i.hasNext();) { + final FieldPart f = i.next(); + if (f instanceof SeparatorFieldPart) { + setSeparatorString(bolding, (SeparatorFieldPart) f); + } + if (f.isFinal()) { + continue; + } + + final String toQuote = f.getContent(); + List<FieldPart> newFieldParts = null; + int previous = 0; + for (int j = 0; j < toQuote.length(); j++) { + final char key = toQuote.charAt(j); + switch (key) { + case RAW_HIGHLIGHT_CHAR: + newFieldParts = initFieldParts(newFieldParts); + addBolding(bolding, insideHighlight, f, toQuote, newFieldParts, previous, j); + previous = j + 1; + insideHighlight = !insideHighlight; + break; + case RAW_SEPARATOR_CHAR: + newFieldParts = initFieldParts(newFieldParts); + addSeparator(bolding, dynteaser, f, toQuote, newFieldParts, + previous, j); + previous = j + 1; + break; + default: + // no action + break; + } + } + if (previous > 0 && previous < toQuote.length()) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous), f.isToken())); + } + if (newFieldParts != null) { + i.remove(); + for (final Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext();) { + i.add(j.next()); + } + } + } + } + + private void setSeparatorString(final boolean bolding,final SeparatorFieldPart f) { + if (bolding) { + f.setContent(separatorTag); + } else { + f.setContent(ELLIPSIS); + } + } + + private void addSeparator(final boolean bolding, final boolean dynteaser, + final FieldPart f, final String toQuote, + final List<FieldPart> newFieldParts, final int previous, final int j) { + if (previous != j) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous, j), f.isToken())); + } + if (dynteaser) { + final FieldPart s = (bolding ? new SeparatorFieldPart(separatorTag) : new SeparatorFieldPart(ELLIPSIS)); + newFieldParts.add(s); + } + } + + private void addBolding(final boolean bolding, + final boolean insideHighlight, final FieldPart f, + final String toQuote, final List<FieldPart> newFieldParts, + final int previous, final int j) { + if (previous != j) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous, j), f.isToken())); + } + if (bolding) { + if (insideHighlight) { + newFieldParts.add(new BoldCloseFieldPart(boldCloseTag)); + } else { + if (newFieldParts.size() > 0 + && newFieldParts.get(newFieldParts.size() - 1) instanceof BoldCloseFieldPart) { + newFieldParts.remove(newFieldParts.size() - 1); + } else { + newFieldParts.add(new BoldOpenFieldPart(boldOpenTag)); + } + } + } + } + + private List<FieldPart> initFieldParts(List<FieldPart> newFieldParts) { + if (newFieldParts == null) { + newFieldParts = new ArrayList<>(); + } + return newFieldParts; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java new file mode 100644 index 00000000000..a282dc22b53 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java @@ -0,0 +1,166 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.document.BucketId; +import com.yahoo.document.BucketIdFactory; +import com.yahoo.document.DocumentId; +import com.yahoo.document.GlobalId; +import com.yahoo.document.idstring.IdString; +import com.yahoo.documentapi.messagebus.protocol.SearchColumnPolicy; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.Model; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.result.DefaultErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vdslib.BucketDistribution; +import com.yahoo.component.chain.dependencies.Before; + +import java.util.Iterator; +import java.util.logging.Logger; + + +/** + * Searcher that does efficient key/value lookup using Vespa search as a + * backend. It does so by bypassing the first phase ranking, and only performs + * the second phase summary fetching. + * + * The keys to find are input as a comma-seprated list using the <i>keys</i> + * query parameter. Each key should match a part of a document id. Given the key + * 'foo', and document id namespace 'mynamespace', the document id matched will + * be 'id:mynamespace:keyvalue::foo'. + * + * To scale the throughput with the number of partitions, the searcher uses the + * same hashing mechanisms as the document API to find out which node each key + * belongs to. The searcher then dispatches a summary request to retrieve keys + * and returns the result. + * + * @author <a href="lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +@Before(GroupingExecutor.COMPONENT_NAME) +public class KeyValueSearcher extends Searcher { + + private static final Logger log = Logger.getLogger(KeyValueSearcher.class.getName()); + private final BucketIdFactory factory = new BucketIdFactory(); + private final BucketDistribution distribution; + private final String summaryClass; + private final String idSchemePrefix; + private final int numRowBits; + private final int traceLevel = 5; + + public KeyValueSearcher(KeyvalueConfig config) { + this.summaryClass = config.summaryName(); + this.idSchemePrefix = createIdSchemePrefix(config); + this.distribution = new BucketDistribution(config.numparts(), SearchColumnPolicy.DEFAULT_NUM_BUCKET_BITS); + this.numRowBits = calcNumRowBits(config.numrows()); + log.config("Configuring " + KeyValueSearcher.class.getName() + " with " + config.numparts() + " partitions and doc id scheme '" + idSchemePrefix + "'"); + } + + private String createIdSchemePrefix(KeyvalueConfig config) { + if (config.docIdScheme().equals(KeyvalueConfig.DocIdScheme.Enum.DOC_SCHEME)) { + return "doc:" + config.docIdNameSpace() + ":"; + } else { + return "id:" + config.docIdNameSpace() + ":" + config.docIdType() + "::"; + } + } + + public Hit createHit(Query query, String key) { + String docId = createDocId(key.trim()); + BucketId id = factory.getBucketId(new DocumentId(docId)); + int partition = getPartition(id); + + FastHit hit = new FastHit(); + hit.setGlobalId(new GlobalId(IdString.createIdString(docId))); + hit.setQuery(query); + hit.setFillable(); + hit.setCached(false); + hit.setPartId(partition << numRowBits, numRowBits); + hit.setRelevance(1.0); + hit.setIgnoreRowBits(true); + hit.setDistributionKey(42); + return hit; + } + + private String createDocId(String key) { + return idSchemePrefix + key; + } + + + @Override + public Result search(Query query, Execution execution) { + String keyProp = query.properties().getString("keys"); + query.getPresentation().setSummary(summaryClass); + if (keyProp == null || keyProp.length() == 0) { + return new Result(query, new ErrorMessage(ErrorMessage.NULL_QUERY, "'keys' parameter not set or empty.")); + } + String[] keyList = keyProp.split(","); + Model model = query.getModel(); + QueryTree tree = model.getQueryTree(); + QueryCanonicalizer.canonicalize(tree); + if (tree.isEmpty()) { + tree.setRoot(new IntItem(String.valueOf(keyProp.hashCode()))); + } + + Result result = new Result(query); + for (String key : keyList) { + result.hits().add(createHit(query, key)); + } + execution.fill(result, summaryClass); + if (query.isTraceable(traceLevel)) { + traceResult(query, result); + } + int totalHits = 0; + Iterator<Hit> hitIterator = result.hits().iterator(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit.isFillable() && hit.isFilled(summaryClass)) { + totalHits++; + } else { + hitIterator.remove(); + } + } + if (totalHits != keyList.length) { + ErrorMessage error = new ErrorMessage(1, "Some keys could not be fetched"); + result.hits().setError(error); + } + result.setTotalHitCount(totalHits); + return result; + } + + private void traceResult(Query query, Result result) { + Iterator<Hit> hitIterator = result.hits().iterator(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit.isFillable() && hit.isFilled(summaryClass)) { + query.trace("Found filled hit: " + hit, traceLevel); + } else { + query.trace("Found hit that was not filled/fillable: " + hit, traceLevel); + } + } + query.trace("Error hit: " + result.hits().getErrorHit(), traceLevel); + } + + private int getPartition(BucketId bucketId) { + return distribution.getColumn(bucketId); + } + + private static int calcNumRowBits(int numRows) { + if (numRows < 1) { + throw new IllegalArgumentException(); + } + for (int i = 0; i < 30; ++i) { + if (numRows - 1 < 1 << i) { + return i; + } + } + return 31; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java new file mode 100644 index 00000000000..ac2196bb9f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java @@ -0,0 +1,376 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; + +import java.util.*; + +/** + * <p> Groups hits according to sddocname. </p> + * + * <p> For each group, the desired number of hits can be specified. </p> + * + * @author tonytv + */ +public class MultipleResultsSearcher extends Searcher { + + private final static String propertyPrefix = "multipleresultsets."; + private static final CompoundName additionalHitsFactorName=new CompoundName(propertyPrefix + "additionalHitsFactor"); + private static final CompoundName maxTimesRetrieveHeterogeneousHitsName=new CompoundName(propertyPrefix + "maxTimesRetrieveHeterogeneousHits"); + private static final CompoundName numHits=new CompoundName(propertyPrefix + "numHits"); + + public @Override Result search(Query query, Execution e) { + try { + Parameters parameters = new Parameters(query); + + query.trace("MultipleResultsSearcher: " + parameters, false, 2); + HitsRetriever hitsRetriever = new HitsRetriever(query,e,parameters); + + for (DocumentGroup documentGroup : parameters.documentGroups) { + if ( hitsRetriever.numHits(documentGroup) < documentGroup.targetNumberOfDocuments) { + hitsRetriever.retrieveMoreHits(documentGroup); + } + } + + return hitsRetriever.createMultipleResultSets(); + } catch(ParameterException exception) { + Result result = new Result(query); + result.hits().setError(ErrorMessage.createInvalidQueryParameter(exception.msg)); + return result; + } + } + + private class HitsRetriever { + + PartitionedResult partitionedResult; + + private int numRetrieveMoreHitsCalls = 0; + private int nextOffset; + private Query query; + private final Parameters parameters; + private final int hits; + private final int offset; + private Execution execution; + private Result initialResult; + + HitsRetriever(Query query, Execution execution, Parameters parameters) throws ParameterException { + this.offset=query.getOffset(); + this.hits=query.getHits(); + this.nextOffset = query.getOffset() + query.getHits(); + this.query = query; + this.parameters = parameters; + this.execution = execution; + + initialResult = retrieveHits(); + partitionedResult = new PartitionedResult(parameters.documentGroups, initialResult); + + this.query = query; + } + + void retrieveMoreHits(DocumentGroup documentGroup) { + if ( ++numRetrieveMoreHitsCalls < + parameters.maxTimesRetrieveHeterogeneousHits) { + + retrieveHeterogenousHits(); + + if (numHits(documentGroup) < + documentGroup.targetNumberOfDocuments) { + + retrieveMoreHits(documentGroup); + } + + } else { + retrieveRemainingHitsForGroup(documentGroup); + } + } + + void retrieveHeterogenousHits() { + int numHitsToRetrieve = (int)(hits * parameters.additionalHitsFactor); + + final int maxNumHitsToRetrieve = 1000; + numHitsToRetrieve = Math.min(numHitsToRetrieve,maxNumHitsToRetrieve); + + try { + query.setWindow(nextOffset,numHitsToRetrieve); + partitionedResult.addHits(retrieveHits()); + } + finally { + restoreWindow(); + nextOffset += numHitsToRetrieve; + } + } + + private void restoreWindow() { + query.setWindow(offset,hits); + } + + void retrieveRemainingHitsForGroup(DocumentGroup documentGroup) { + Set<String> oldRestrictList = query.getModel().getRestrict(); + try { + int numMissingHits = documentGroup.targetNumberOfDocuments - numHits(documentGroup); + int offset = numHits(documentGroup); + + query.getModel().getRestrict().clear(); + query.getModel().getRestrict().add(documentGroup.documentName); + query.setWindow(offset, numMissingHits); + partitionedResult.addHits(retrieveHits()); + + } finally { + restoreWindow(); + query.getModel().getRestrict().clear(); + query.getModel().getRestrict().addAll(oldRestrictList); + } + } + + int numHits(DocumentGroup documentGroup) { + return partitionedResult.numHits(documentGroup.documentName); + } + + Result createMultipleResultSets() { + Iterator<Hit> i = initialResult.hits().iterator(); + while (i.hasNext()) { + i.next(); + i.remove(); + } + + for (DocumentGroup group: parameters.documentGroups) { + partitionedResult.cropResultSet(group.documentName,group.targetNumberOfDocuments); + } + + partitionedResult.insertInto(initialResult.hits()); + return initialResult; + } + + private Result retrieveHits() { + Result result = execution.search(query); + // ensure that field sddocname is available + execution.fill(result); // TODO: Suffices to fill attributes + + if (result.hits().getErrorHit() != null) + initialResult.hits().getErrorHit().addErrors( + result.hits().getErrorHit()); + + + return result; + } + } + + // Assumes that field sddocname is available + private static class PartitionedResult { + + private Map<String, HitGroup> resultSets = new HashMap<>(); + + private List<Hit> otherHits = new ArrayList<>(); + + PartitionedResult(List<DocumentGroup> documentGroups,Result result) throws ParameterException { + for (DocumentGroup group : documentGroups) + addGroup(group); + + addHits(result, true); + } + + void addHits(Result result, boolean addOtherHits) { + Iterator<Hit> i = result.hits().iterator(); + while (i.hasNext()) { + add(i.next(), addOtherHits); + } + } + + void addHits(Result result) { + addHits(result, false); + } + + + void add(Hit hit, boolean addOtherHits) { + String documentName = (String)hit.getField(Hit.SDDOCNAME_FIELD); + + if (documentName != null) { + HitGroup resultSet = resultSets.get(documentName); + + if (resultSet != null) { + resultSet.add(hit); + return; + } + } + + if (addOtherHits) { + otherHits.add(hit); + } + } + + int numHits(String documentName) { + return resultSets.get(documentName).size(); + } + + void insertInto(HitGroup group) { + for (Hit hit: otherHits) { + group.add(hit); + } + + for (HitGroup hit: resultSets.values() ) { + hit.copyOrdering(group); + group.add(hit); + } + } + + void cropResultSet(String documentName, int numDocuments) { + resultSets.get(documentName).trim(0, numDocuments); + } + + private void addGroup(DocumentGroup group) throws ParameterException { + final String documentName = group.documentName; + if ( resultSets.put(group.documentName, + new HitGroup(documentName) { + /** + * + */ + private static final long serialVersionUID = 5732822886080288688L; + }) + != null ) { + + throw new ParameterException("Document name " + group.documentName + "mentioned multiple times"); + } + } + + } + + + //examples: + //multipleresultsets.numhits=music:10,movies:20 + //multipleresultsets.additionalhitsFactor=0.8 + //multipleresultsets.maxtimesretrieveheterogeneoushits=2 + private static class Parameters { + Parameters(Query query) + throws ParameterException { + + readNumHitsSpecification(query); + readMaxTimesRetrieveHeterogeneousHits(query); + readAdditionalHitsFactor(query); + } + + + List<DocumentGroup> documentGroups = new ArrayList<>(); + double additionalHitsFactor = 0.8; + int maxTimesRetrieveHeterogeneousHits = 2; + + private void readAdditionalHitsFactor(Query query) + throws ParameterException { + + String additionalHitsFactorStr = query.properties().getString(additionalHitsFactorName); + + if (additionalHitsFactorStr == null) + return; + + try { + additionalHitsFactor = + Double.parseDouble(additionalHitsFactorStr); + } catch (NumberFormatException e) { + throw new ParameterException( + "Expected floating point number, got '" + + additionalHitsFactorStr + "'."); + } + } + + private void readMaxTimesRetrieveHeterogeneousHits(Query query) { + maxTimesRetrieveHeterogeneousHits = query.properties().getInteger( + maxTimesRetrieveHeterogeneousHitsName, + maxTimesRetrieveHeterogeneousHits); + } + + + private void readNumHitsSpecification(Query query) + throws ParameterException { + + //example numHitsSpecification: "music:10,movies:20" + String numHitsSpecification = + query.properties().getString(numHits); + + if (numHitsSpecification == null) + return; + + String[] numHitsForDocumentNames = numHitsSpecification.split(","); + + for (String s:numHitsForDocumentNames) { + handleDocumentNameWithNumberOfHits(s); + } + + } + + public String toString() { + String s = "additionalHitsFactor=" + additionalHitsFactor + + ", maxTimesRetrieveHeterogeneousHits=" + + maxTimesRetrieveHeterogeneousHits + + ", numHitsSpecification='"; + + for (DocumentGroup group : documentGroups) { + s += group.documentName + ":" + + group.targetNumberOfDocuments + ", "; + } + + s += "'"; + + return s; + } + + //example input: music:10 + private void handleDocumentNameWithNumberOfHits(String s) + throws ParameterException { + + String[] documentNameWithNumberOfHits = s.split(":"); + + if (documentNameWithNumberOfHits.length != 2) { + String msg = "Expected a single ':' in '" + s + "'."; + + if (documentNameWithNumberOfHits.length > 2) + msg += " Please check for missing commas."; + + throw new ParameterException(msg); + } else { + String documentName = + documentNameWithNumberOfHits[0].trim(); + try { + int numHits = Integer.parseInt( + documentNameWithNumberOfHits[1].trim()); + + numRequestedHits(documentName, numHits); + } catch (NumberFormatException e) { + throw new ParameterException( + "Excpected an integer but got '" + + documentNameWithNumberOfHits[1] + "'"); + } + } + } + + private void numRequestedHits(String documentName, int numHits) { + documentGroups.add(new DocumentGroup(documentName, numHits)); + } + + } + + private static class DocumentGroup { + String documentName; + int targetNumberOfDocuments; + + DocumentGroup(String documentName, int targetNumberOfDocuments) { + this.documentName = documentName; + this.targetNumberOfDocuments = targetNumberOfDocuments; + } + } + + @SuppressWarnings("serial") + private static class ParameterException extends Exception { + String msg; + + ParameterException(String msg) { + this.msg = msg; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java new file mode 100644 index 00000000000..03e212fc854 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java @@ -0,0 +1,174 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.geo.DegreesParser; +import com.yahoo.geo.BoundingBoxParser; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.prelude.Location; + +/** + * A searcher converting human-readable position parameters + * into internal format. + * <br> + * Reads the following query properties: + * <ul> + * <li> pos.ll (geographical latitude and longitude) + * <li> pos.xy (alternate to pos.ll - direct x and y in internal units) + * <li> pos.radius (distance in one of: + * internal units (no suffix), meter (m), kilometer (km) or miles (mi) + * </ul> + * + * @author Arne J + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(PosSearcher.POSITION_PARSING) +public class PosSearcher extends Searcher { + public static final String POSITION_PARSING = "PositionParsing"; + + private static final CompoundName posBb = new CompoundName("pos.bb"); + private static final CompoundName posLl = new CompoundName("pos.ll"); + private static final CompoundName posXy = new CompoundName("pos.xy"); + private static final CompoundName posAttributeName = new CompoundName("pos.attribute"); + private static final CompoundName posRadius = new CompoundName("pos.radius"); + private static final CompoundName posUnits = new CompoundName("pos.units"); + + // according to wikipedia: + // Earth's equatorial radius = 6378137 meter - not used + // meters per mile = 1609.344 + // 180 degrees equals one half diameter equals PI*r + // Earth's polar radius = 6356752 meter + + public final static double km2deg = 1000.000 * 180.0 / (Math.PI * 6356752.0); + public final static double mi2deg = 1609.344 * 180.0 / (Math.PI * 6356752.0); + + + public Result search(Query query, Execution execution) { + String bb = query.properties().getString(posBb); + String ll = query.properties().getString(posLl); + String xy = query.properties().getString(posXy); + + if (ll == null && xy == null && bb == null) { + return execution.search(query); // Nothing to do + } + if (query.getRanking().getLocation() != null) { + // this searcher is a NOP if there is already a location + // in the query + query.trace("query already has a location set, not processing 'pos' params", false, 1); + return execution.search(query); + } + + Location loc = new Location(); + loc.setDimensions(2); + String posAttribute = query.properties().getString(posAttributeName); + loc.setAttribute(posAttribute); + + try { + if (ll == null && xy == null && bb != null) { + parseBoundingBox(bb, loc); + } else { + if (ll != null && xy != null) { + throw new IllegalArgumentException("Cannot handle both lat/long and xy coords at the same time"); + } + if (ll != null) { + handleGeoCircle(query, ll, loc); + } + if (xy != null) { + handleXyCircle(query, xy, loc); + } + if (bb != null) { + parseBoundingBox(bb, loc); + } + } + } + catch (IllegalArgumentException e) { + // System.err.println("error: "+e); + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Error in pos parameters: " + Exceptions.toMessageString(e))); + } + // and finally: + query.getRanking().setLocation(loc); + return execution.search(query); + } + + private void handleGeoCircle(Query query, String ll, Location target) { + double ewCoord = 0; + double nsCoord = 0; + try { + DegreesParser parsed = new DegreesParser(ll); + ewCoord = parsed.longitude; + nsCoord = parsed.latitude; + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Unable to parse lat/long string '" +ll + "'", e); + } + String radius = query.properties().getString(posRadius); + double radiusdegrees = 0.0; + + if (radius == null) { + radiusdegrees = 50.0 * km2deg; + } else if (radius.endsWith("km")) { + double radiuskm = Double.valueOf(radius.substring(0, radius.length()-2)); + radiusdegrees = radiuskm * km2deg; + } else if (radius.endsWith("m")) { + double radiusm = Double.valueOf(radius.substring(0, radius.length()-1)); + radiusdegrees = radiusm * km2deg / 1000.0; + } else if (radius.endsWith("mi")) { + double radiusmiles = Double.valueOf(radius.substring(0, radius.length()-2)); + radiusdegrees = radiusmiles * mi2deg; + } else { + radiusdegrees = Integer.parseInt(radius) * 0.000001; + } + target.setGeoCircle(nsCoord, ewCoord, radiusdegrees); + } + + + private void handleXyCircle(Query query, String xy, Location target) { + int xcoord = 0; + int ycoord = 0; + // parse xy + int semipos = xy.indexOf(';'); + if (semipos > 0 && semipos < xy.length()) { + xcoord = Integer.parseInt(xy.substring(0, semipos)); + ycoord = Integer.parseInt(xy.substring(semipos+1, xy.length())); + } else { + throw new IllegalArgumentException("pos.xy must be in the format 'digits;digits' but was: '"+xy+"'"); + } + String radius = query.properties().getString(posRadius); + int radiusUnits = 0; + if (radius == null) { + radiusUnits = 5000; + } else if (radius.endsWith("km")) { + double radiuskm = Double.valueOf(radius.substring(0, radius.length()-2)); + double radiusdegrees = radiuskm * km2deg; + radiusUnits = (int)(radiusdegrees * 1000000); + } else if (radius.endsWith("m")) { + double radiusm = Double.valueOf(radius.substring(0, radius.length()-1)); + double radiusdegrees = radiusm * km2deg / 1000.0; + radiusUnits = (int)(radiusdegrees * 1000000); + } else if (radius.endsWith("mi")) { + double radiusmiles = Double.valueOf(radius.substring(0, radius.length()-2)); + double radiusdegrees = radiusmiles * mi2deg; + radiusUnits = (int)(radiusdegrees * 1000000); + } else { + radiusUnits = Integer.parseInt(radius); + } + target.setXyCircle(xcoord, ycoord, radiusUnits); + } + + + private static void parseBoundingBox(String bb, Location target) { + BoundingBoxParser parser = new BoundingBoxParser(bb); + target.setBoundingBox(parser.n, parser.s, parser.e, parser.w); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java new file mode 100644 index 00000000000..7d0ae0a6d99 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * Save the query in the incoming state to a meta hit in the result. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class QuerySnapshotSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + Query q = query.clone(); + Result r = execution.search(query); + Hit h = new Hit("meta:querysnapshot", new Relevance( + Double.POSITIVE_INFINITY)); + h.setMeta(true); + h.setField("query", q); + r.hits().add(h); + return r; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java new file mode 100644 index 00000000000..5678cc918da --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * Ensures hits is 1000 or less and offset is 1000 or less. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QueryValidatingSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + if (query.getHits() > 1000) { + Result result = new Result(query); + ErrorMessage error + = ErrorMessage.createInvalidQueryParameter("Too many hits (more than 1000) requested."); + result.hits().addError(error); + return result; + } + if (query.getOffset() > 1000) { + Result result = new Result(query); + ErrorMessage error + = ErrorMessage.createInvalidQueryParameter("Offset too high (above 1000)."); + result.hits().addError(error); + return result; + } + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java new file mode 100644 index 00000000000..6c5a6492b92 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java @@ -0,0 +1,193 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.*; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.result.Hit; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.FieldPart; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.ImmutableFieldPart; +import com.yahoo.prelude.hitfield.StringFieldPart; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * A searcher which does quoting based on a quoting table. + * + * May be extended to do quoting template sensitive. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QuotingSearcher extends Searcher { + + // Char to String + private QuoteTable quoteTable; + + private synchronized void setQuoteTable(QuoteTable quoteTable) { + this.quoteTable = quoteTable; + } + private synchronized QuoteTable getQuoteTable() { + return quoteTable; + } + + private static class QuoteTable { + private final int lowerUncachedBound; + private final int upperUncachedBound; + private final Map<Character, String> quoteMap; + private final String[] lowerTable; + private final boolean useMap; + private final boolean isEmpty; + + public QuoteTable(QrQuotetableConfig config) { + int minOrd = 0; + int maxOrd = 0; + String[] newLowerTable = new String[256]; + boolean newUseMap = false; + boolean newIsEmpty = true; + Map<Character, String> newQuoteMap = new HashMap<>(); + for (Iterator<?> i = config.character().iterator(); i.hasNext(); ) { + QrQuotetableConfig.Character character + = (QrQuotetableConfig.Character)i.next(); + if (character.ordinal() > 256) { + newIsEmpty = false; + newQuoteMap.put(new Character((char)character.ordinal()), + character.quoting()); + newUseMap = true; + if (minOrd == 0 || character.ordinal() < minOrd) + minOrd = character.ordinal(); + if (maxOrd == 0 || character.ordinal() > maxOrd) + maxOrd = character.ordinal(); + } + else { + newIsEmpty = false; + newLowerTable[character.ordinal()] + = character.quoting(); + } + } + lowerUncachedBound = minOrd; + upperUncachedBound = maxOrd; + quoteMap = newQuoteMap; + useMap = newUseMap; + isEmpty = newIsEmpty; + lowerTable = newLowerTable; + } + public String get(char c) { + if (isEmpty) + return null; + int ord = (int)c; + if (ord < 256) { + return lowerTable[ord]; + } + else { + if ((!useMap) || ord < lowerUncachedBound + || ord > upperUncachedBound) + { + return null; + } + else { + return quoteMap.get(new Character(c)); + } + } + } + public boolean isEmpty() { + return isEmpty; + } + } + + public QuotingSearcher(ComponentId id, QrQuotetableConfig config) { + super(id); + setQuoteTable(new QuoteTable(config)); + } + + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + execution.fill(result); + QuoteTable translations = getQuoteTable(); + if (translations == null || translations.isEmpty()) { + return result; + } + for (Iterator<Hit> i = result.hits().deepIterator(); i.hasNext(); ) { + Hit h = i.next(); + if (h instanceof FastHit) { + quoteProperties((FastHit)h, translations); + } + } + return result; + } + + private void quoteProperties(FastHit hit, QuoteTable translations) { + for (Iterator<?> i = ((Set<?>) hit.fields().keySet()).iterator(); i.hasNext(); ) { + String propertyName = (String) i.next(); + Object entry = hit.getField(propertyName); + if (entry == null) { + continue; + } + Class<? extends Object> propertyType = entry.getClass(); + if (propertyType.equals(HitField.class)) { + quoteField((HitField) entry, translations); + } else if (propertyType.equals(String.class)) { + quoteProperty(hit, propertyName, (String)entry, translations); + } + } + } + + private void quoteProperty(Hit hit, String fieldname, String toQuote, QuoteTable translations) { + List<FieldPart> l = translate(toQuote, translations, true); + if (l != null) { + HitField hf = new HitField(fieldname, toQuote); + hf.setTokenizedContent(l); + hit.setField(fieldname, hf); + } + } + + + private void quoteField(HitField field, QuoteTable translations) { + for (ListIterator<FieldPart> i = field.listIterator(); i.hasNext(); ) { + FieldPart f = i.next(); + if (!f.isFinal()) { + List<FieldPart> newFieldParts = translate(f.getContent(), translations, + f.isToken()); + if (newFieldParts != null) { + i.remove(); + for (Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext(); ) { + i.add(j.next()); + } + } + } + } + } + + private List<FieldPart> translate(String toQuote, QuoteTable translations, + boolean isToken) { + List<FieldPart> newFieldParts = null; + int lastIdx = 0; + for (int i = 0; i < toQuote.length(); i++) { + String quote = translations.get(toQuote.charAt(i)); + if (quote != null) { + if (newFieldParts == null) { + newFieldParts = new ArrayList<>(); + } + if (lastIdx != i) { + newFieldParts.add( + new StringFieldPart(toQuote.substring(lastIdx, i), + isToken)); + } + String initContent = Character.toString(toQuote.charAt(i)); + newFieldParts.add(new ImmutableFieldPart(initContent, + quote, + isToken)); + lastIdx = i+1; + } + } + if (lastIdx > 0 && lastIdx < toQuote.length()) { + newFieldParts.add( + new StringFieldPart(toQuote.substring(lastIdx), + isToken)); + } + return newFieldParts; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java new file mode 100644 index 00000000000..3706f4fa9ea --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.Optional; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.querytransform.BooleanSearcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +import java.util.Collection; + +/** + * Checks that predicate queries don't use values outside the defined upper/lower bounds. + * + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +@After(BooleanSearcher.PREDICATE) +public class ValidatePredicateSearcher extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + Optional<ErrorMessage> e = validate(query, execution.context().getIndexFacts().newSession(query)); + if (e.isPresent()) { + Result r = new Result(query); + r.hits().addError(e.get()); + return r; + } + return execution.search(query); + } + + private Optional<ErrorMessage> validate(Query query, IndexFacts.Session indexFacts) { + ValidatePredicateVisitor visitor = new ValidatePredicateVisitor(indexFacts); + ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot()); + return visitor.errorMessage; + } + + private static class ValidatePredicateVisitor extends ToolBox.QueryVisitor { + + private final IndexFacts.Session indexFacts; + + public Optional<ErrorMessage> errorMessage = Optional.empty(); + + public ValidatePredicateVisitor(IndexFacts.Session indexFacts) { + this.indexFacts = indexFacts; + } + + @Override + public boolean visit(Item item) { + if (item instanceof PredicateQueryItem) { + visit((PredicateQueryItem) item); + } + return true; + } + + private void visit(PredicateQueryItem item) { + Index index = getIndexFromUnionOfDocumentTypes(item); + for (PredicateQueryItem.RangeEntry entry : item.getRangeFeatures()) { + long value = entry.getValue(); + if (value < index.getPredicateLowerBound() || value > index.getPredicateUpperBound()) { + errorMessage = Optional.of(ErrorMessage.createIllegalQuery( + String.format("%s=%d outside configured predicate bounds.", entry.getKey(), value))); + } + } + } + + private Index getIndexFromUnionOfDocumentTypes(PredicateQueryItem item) { + return indexFacts.getIndex(item.getIndexName()); + } + + @Override + public void onExit() {} + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java new file mode 100644 index 00000000000..ee8a896f73b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; + + +/** + * Check sorting specification makes sense to the search cluster before + * passing it on to the backend. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Before(PhaseNames.BACKEND) +@After(ACCENT_REMOVAL) +public class ValidateSortingSearcher extends Searcher { + + private Map<String, AttributesConfig.Attribute> attributeNames = null; + private String clusterName = ""; + private final QrSearchersConfig.Searchcluster.Indexingmode.Enum indexingMode; + + public String getClusterName() { + return clusterName; + } + + public void setClusterName(String clusterName) { + this.clusterName = clusterName; + } + + private Map<String, AttributesConfig.Attribute> getAttributeNames() { + return attributeNames; + } + + public void setAttributeNames(Map<String, AttributesConfig.Attribute> attributeNames) { + this.attributeNames = attributeNames; + } + + public void initAttributeNames(AttributesConfig config) { + HashMap<String, AttributesConfig.Attribute> attributes = new HashMap<>(config.attribute().size()); + + for (AttributesConfig.Attribute attr : config.attribute()) { + if (AttributesConfig.Attribute.Collectiontype.SINGLE != attr.collectiontype()) { + continue; // cannot sort on multivalue attributes + } + attributes.put(attr.name(), attr); + } + setAttributeNames(attributes); + } + + public ValidateSortingSearcher(QrSearchersConfig qrsConfig, ClusterConfig clusterConfig, + AttributesConfig attributesConfig) + { + initAttributeNames(attributesConfig); + setClusterName(qrsConfig.searchcluster(clusterConfig.clusterId()).name()); + indexingMode = qrsConfig.searchcluster(clusterConfig.clusterId()).indexingmode(); + } + + @Override + public Result search(Query query, Execution execution) { + if (indexingMode != QrSearchersConfig.Searchcluster.Indexingmode.STREAMING) { + ErrorMessage e = validate(query); + if (e != null) { + Result r = new Result(query); + r.hits().addError(e); + return r; + } + } + return execution.search(query); + } + + private static Sorting.UcaSorter.Strength config2Strength(AttributesConfig.Attribute.Sortstrength.Enum s) { + if(s == AttributesConfig.Attribute.Sortstrength.PRIMARY) { + return Sorting.UcaSorter.Strength.PRIMARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.SECONDARY) { + return Sorting.UcaSorter.Strength.SECONDARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.TERTIARY) { + return Sorting.UcaSorter.Strength.TERTIARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.QUATERNARY) { + return Sorting.UcaSorter.Strength.QUATERNARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.IDENTICAL) { + return Sorting.UcaSorter.Strength.IDENTICAL; + } + return Sorting.UcaSorter.Strength.PRIMARY; + } + private ErrorMessage validate(Query query) { + Sorting sorting = query.getRanking().getSorting(); + List<Sorting.FieldOrder> l = (sorting != null) ? sorting.fieldOrders() : null; + + if (l == null) { + return null; + } + Map<String, AttributesConfig.Attribute> names = getAttributeNames(); + if (names == null) { + return null; + } + + String queryLocale = null; + if (query.getModel().getLocale() != null) { + queryLocale = query.getModel().getLocale().toString(); + } + + for (Sorting.FieldOrder f : l) { + String name = f.getFieldName(); + if ("[rank]".equals(name) || "[docid]".equals(name)) { + } else if (names.containsKey(name)) { + AttributesConfig.Attribute attrConfig = names.get(name); + if (attrConfig != null) { + if (f.getSortOrder() == Sorting.Order.UNDEFINED) { + f.setAscending(attrConfig.sortascending()); + } + if (f.getSorter().getClass().equals(Sorting.AttributeSorter.class)) { + // This indicates that it shall use default. + if ((attrConfig.datatype() == AttributesConfig.Attribute.Datatype.STRING)) { + if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.UCA) { + String locale = attrConfig.sortlocale(); + if (locale == null || locale.isEmpty()) { + locale = queryLocale; + } + // can only use UcaSorter if we have knowledge about wanted locale + if (locale != null) { + f.setSorter(new Sorting.UcaSorter(name, locale, Sorting.UcaSorter.Strength.UNDEFINED)); + } else { + // wanted UCA but no locale known, so use lowercase as fallback + f.setSorter(new Sorting.LowerCaseSorter(name)); + } + } else if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.LOWERCASE) { + f.setSorter(new Sorting.LowerCaseSorter(name)); + } else if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.RAW) { + f.setSorter(new Sorting.RawSorter(name)); + } else { + // default if no config found for this string attribute + f.setSorter(new Sorting.LowerCaseSorter(name)); + } + } + } + if (f.getSorter() instanceof Sorting.UcaSorter) { + Sorting.UcaSorter sorter = (Sorting.UcaSorter) f.getSorter(); + String locale = sorter.getLocale(); + + if (locale == null || locale.isEmpty()) { + // first fallback + locale = attrConfig.sortlocale(); + } + if (locale == null || locale.isEmpty()) { + // second fallback + locale = queryLocale; + } + // final fallback + if (locale == null || locale.isEmpty()) { + locale = "en_US"; + } + + // getLogger().info("locale = " + locale + " attrConfig.sortlocale.value() = " + attrConfig.sortlocale.value() + " query.getLanguage() = " + query.getModel().getLanguage()); + // getLogger().info("locale = " + locale); + + Sorting.UcaSorter.Strength strength = sorter.getStrength(); + if (sorter.getStrength() == Sorting.UcaSorter.Strength.UNDEFINED) { + strength = config2Strength(attrConfig.sortstrength()); + } + if ((sorter.getStrength() == Sorting.UcaSorter.Strength.UNDEFINED) || (sorter.getLocale() == null) || sorter.getLocale().isEmpty()) { + // getLogger().info("locale = " + locale + " strength = " + strength.toString()); + sorter.setLocale(locale, strength); + } + //getLogger().info("locale = " + locale + " strength = " + strength.toString() + "decompose = " + sorter.getDecomposition()); + } + } else { + return ErrorMessage.createInvalidQueryParameter("The cluster " + getClusterName() + " has attribute config for field: " + name); + } + } else { + return ErrorMessage.createInvalidQueryParameter("The cluster " + getClusterName() + " has no sortable attribute named: " + name); + } + } + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java b/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java new file mode 100644 index 00000000000..5a795e859af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.searcher; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java new file mode 100644 index 00000000000..d3f51e76712 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import com.yahoo.search.Query; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.semantics.engine.RuleEngine; +import com.yahoo.prelude.semantics.parser.ParseException; +import com.yahoo.prelude.semantics.rule.*; +import com.yahoo.protect.Validator; + +import java.io.File; +import java.util.*; + +/** + * A set of semantic production rules and named conditions used to analyze + * and rewrite queries + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleBase { + + /** The globally identifying name of this rule base */ + private String name; + + /** The name of the source of this rules */ + private String source; + + /** The name of the automata file used, or null if none */ + protected String automataFileName=null; + + /** + * True if this rule base is default. + * The semantics of default is left to the surrounding framework + */ + private boolean isDefault=false; + + private List<ProductionRule> productionRules=new java.util.ArrayList<>(); + + private Map<String, NamedCondition> namedConditions=new java.util.LinkedHashMap<>(); + + /** The analyzer used to do evaluations over this rule base */ + private RuleEngine analyzer=new RuleEngine(this); + + private static final PhraseMatcher nullPhraseMatcher=PhraseMatcher.getNullMatcher(); + + /** + * The matcher using an automata to match terms and phrases prior to matching rules + * or the null matcher if no matcher is used. + */ + private PhraseMatcher phraseMatcher=nullPhraseMatcher; + + /** + * The names of the rule bases included indirectly or directly in this + * Ordered by first to last included + */ + private Set<String> includedNames=new java.util.LinkedHashSet<>(); + + /** + * True if this uses an automata, even if an automata is not present right now. Useful to validate without + * having automatas available + */ + private boolean usesAutomata=false; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + /** Creates an empty rule base. TODO: Disallow */ + public RuleBase() { + } + + /** Creates an empty rule base */ + public RuleBase(String name) { + setName(name); + } + + /** + * Creates a rule base from a file + * + * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name + * @param automataFile the automata file, or null to not use an automata + * @throws java.io.IOException if there is a problem reading one of the files + * @throws ParseException if the rule file can not be parsed correctly + * @throws RuleBaseException if the rule file contains inconsistencies + */ + public static RuleBase createFromFile(String ruleFile,String automataFile) throws java.io.IOException, ParseException { + return new RuleImporter().importFile(ruleFile,automataFile); + } + + /** + * Creates a rule base from a string + * + * @param name the name of the rule base + * @param ruleString the rule string to read + * @param automataFile the automata file, or null to not use an automata + * @throws java.io.IOException if there is a problem reading the automata file + * @throws com.yahoo.prelude.semantics.parser.ParseException if the rule file can not be parsed correctly + * @throws com.yahoo.prelude.semantics.RuleBaseException if the rule file contains inconsistencies + */ + public static RuleBase createFromString(String name,String ruleString,String automataFile) throws java.io.IOException, ParseException { + RuleBase base=new RuleImporter().importString(ruleString,automataFile,new RuleBase()); + base.setName(name); + return base; + } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + /** + * <p>Include another rule base into this. This <b>transfers ownership</b> + * of the given rule base - it can not be subsequently used for any purpose + * (including accessing).</p> + * + * <p>Each rule base will only be included by the first include directive enountered + * for that rule base.</p> + */ + public void include(RuleBase include) { + productionRules.add(new IncludeDirective(include)); + includedNames.addAll(include.includedNames); + includedNames.add(include.getName()); + } + + /** Rules are order based - they are included recursively depth first */ + private void inlineIncluded() { + // Re-add our own conditions last to - added later overrides + Map<String, NamedCondition> thisConditions=namedConditions; + namedConditions=new LinkedHashMap<>(); + + Set<RuleBase> included=new HashSet<>(); + included.add(this); + for (ListIterator<ProductionRule> i=productionRules.listIterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if ( ! (rule instanceof IncludeDirective) ) continue; + + i.remove(); + RuleBase toInclude=((IncludeDirective)rule).getIncludedBase(); + if ( ! included.contains(toInclude)) + toInclude.inlineIn(this,i,included); + } + + namedConditions.putAll(thisConditions); + } + + /** + * Recursively include this and everything it includes into the given rule base. + * Skips bases already included in this. + */ + private void inlineIn(RuleBase receiver,ListIterator<ProductionRule> receiverRules,Set<RuleBase> included) { + if (included.contains(this)) return; + included.add(this); + + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if (rule instanceof IncludeDirective) + ((IncludeDirective)rule).getIncludedBase().inlineIn(receiver,receiverRules,included); + else + receiverRules.add(rule); + } + + receiver.namedConditions.putAll(namedConditions); + } + + /** Adds a named condition which can be referenced by rules */ + public void addCondition(NamedCondition namedCondition) { + namedConditions.put(namedCondition.getName(),namedCondition); + + Condition condition=namedCondition.getCondition(); + Condition superCondition=findIncludedCondition(namedCondition.getName()); + resolveSuper(condition,superCondition); + } + + private void resolveSuper(Condition condition,Condition superCondition) { + if (condition instanceof SuperCondition) { + ((SuperCondition)condition).setCondition(superCondition); + } + else if (condition instanceof CompositeCondition) { + for (Iterator<Condition> i=((CompositeCondition)condition).conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + resolveSuper(subCondition,superCondition); + } + } + } + + private Condition findIncludedCondition(String name) { + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if ( ! (rule instanceof IncludeDirective) ) continue; + + RuleBase included=((IncludeDirective)rule).getIncludedBase(); + NamedCondition condition=included.getCondition(name); + if (condition!=null) return condition.getCondition(); + included.findIncludedCondition(name); + // FIXME: dead code commented out + // if (condition!=null) return condition.getCondition(); + } + return null; + } + + /** + * Returns whether this rule base - directly or through other includes - includes + * the rule base with the given name + */ + public boolean includes(String ruleBaseName) { + return includedNames.contains(ruleBaseName); + } + + /** + * Sets the name of this rule base. + * If this rule base is given to a searcher, it must be removed before the name + * change, and then re-added + */ + public void setName(String name) { + Validator.ensureNotNull("Rule base name",name); + this.name=name; + } + + /** Returns the name of this rule base. This is never null. */ + public String getName() { return name; } + + /** + * Sets the name of the automata file to use as a source of condition matches. + * To reload the automata, call this again. This can be done safely at any + * point by any thread while this rule base is in use. + * + * @throws IllegalArgumentException if the file is not found + */ + public void setAutomataFile(String automataFile) { + if ( ! new File(automataFile).exists()) + throw new IllegalArgumentException("Automata file '" + automataFile + "' " + + "included in " + this + " not found"); + phraseMatcher=new PhraseMatcher(automataFile); + phraseMatcher.setIgnorePluralForm(true); + phraseMatcher.setMatchAll(true); + phraseMatcher.setMatchPhraseItems(true); + phraseMatcher.setMatchSingleItems(true); + setPhraseMatcher(phraseMatcher); + this.automataFileName=automataFile; + } + + /** Returns the name of the automata file used, or null if none */ + public String getAutomataFile() { return automataFileName; } + + /** Sets whether this base is default, the semantics of default is left to the application */ + public void setDefault(boolean isDefault) { this.isDefault=isDefault; } + + /** Returns whether this base is default, the semantics of default is left to the application */ + public boolean isDefault() { return isDefault; } + + /** Thread safely sets the phrase matcher to use in this, or null to not use a phrase matcher */ + public synchronized void setPhraseMatcher(PhraseMatcher matcher) { + if (matcher==null) + this.phraseMatcher = nullPhraseMatcher; + else + this.phraseMatcher = matcher; + } + + /** Thread safely gets the phrase matcher to use in this */ + public synchronized PhraseMatcher getPhraseMatcher() { + return this.phraseMatcher; + } + + /** + * The identifying name of the source of this rule base. + * The absolute file name if this came from a file. + */ + public String getSource() { return source; } + + /** + * Sets the name of the source of this rule base. If this came from a file, + * the source must be set to the absolute file name of the rule base + */ + public void setSource(String source) { this.source = source; } + + /** Returns whether this uses a phrase matcher automata */ + public boolean usesAutomata() { + return usesAutomata || phraseMatcher!=nullPhraseMatcher; + } + + /** + * Set to truew if this uses an automata, even if an automata is not present right now. + * Useful to validate without having automatas available + */ + void setUsesAutomata(boolean usesAutomata) { this.usesAutomata=usesAutomata; } + + // Note that included rules are added though a list iterator, not this */ + public void addRule(ProductionRule productionRule) { + productionRules.add(productionRule); + } + + /** Returns a named condition, or null if no condition with that name exists */ + public NamedCondition getCondition(String name) { + return namedConditions.get(name); + } + + /** + * Call this when all rules are added, before any rule evaluation starts. + * + * @throws RuleBaseException if there is an inconsistency in the rule base. + */ + public void initialize() { + inlineIncluded(); + makeReferences(); + } + + /** + * Analyzes a query over this rule base + * + * @param query the query to analyze + * @param traceLevel the level of tracing to add to the query + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String analyze(Query query,int traceLevel) { + int queryTraceLevel=query.getTraceLevel(); + if (traceLevel>0 && queryTraceLevel==0) + query.setTraceLevel(1); + + matchAutomata(query,traceLevel); + String error=analyzer.evaluate(query,traceLevel); + + query.setTraceLevel(queryTraceLevel); + return error; + } + + protected void matchAutomata(Query query,int traceLevel) { + List<PhraseMatcher.Phrase> matches=getPhraseMatcher().matchPhrases(query.getModel().getQueryTree().getRoot()); + if (matches==null || matches.size()==0) return; + for (Iterator<PhraseMatcher.Phrase> i=matches.iterator(); i.hasNext(); ) { + PhraseMatcher.Phrase phrase= i.next(); + if (traceLevel>=3) + query.trace("Semantic searcher automata matched " + phrase,false,1); + + annotatePhrase(phrase,query,traceLevel); + } + } + + // Note: When changing this method, change CompatibleRuleBase as well! + // TODO: Values are not added right now + protected void annotatePhrase(PhraseMatcher.Phrase phrase,Query query,int traceLevel) { + for (StringTokenizer tokens=new StringTokenizer(phrase.getData(),"|",false) ; tokens.hasMoreTokens(); ) { + String token=tokens.nextToken(); + int semicolonIndex=token.indexOf(";"); + String annotation=token; + String value=""; + if (semicolonIndex>0) { + annotation=token.substring(0,semicolonIndex); + value=token.substring(semicolonIndex+1); + } + + // Annotate all matched items + phrase.getItem(0).addAnnotation(annotation,phrase); + if (traceLevel>=4) + query.trace(" Annotating '" + phrase + "' as " + annotation + + (value.equals("") ? "" :"=" + value),false,1); + } + } + + private void makeReferences() { + for (Iterator<ProductionRule> i=ruleIterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + rule.makeReferences(this); + } + for (Iterator<NamedCondition> i=conditionIterator(); i.hasNext(); ) { + NamedCondition namedCondition=i.next(); + namedCondition.getCondition().makeReferences(this); + } + } + + /** Returns the rules in added order */ + public ListIterator<ProductionRule> ruleIterator() { return productionRules.listIterator(); } + + /** Returns the rules unmodifiable */ + public List<ProductionRule> rules() { + return Collections.unmodifiableList(productionRules); + } + + /** Returns the named conditions in added order */ + public Iterator<NamedCondition> conditionIterator() { return namedConditions.values().iterator(); } + + /** Returns true if the given object is a rule base having the same name as this */ + public boolean equals(Object object) { + if ( ! (object instanceof RuleBase)) return false; + return ((RuleBase)object).getName().equals(this.getName()); + } + + public int hashCode() { + return getName().hashCode(); + } + + public String toString() { + return "rule base '" + getName() + "'"; + } + + /** + * Returns a string containing all the rules and conditions of this rule base + * in the form they will be evaluated, with all included rule bases inlined + */ + public String toContentString() { + StringBuilder b=new StringBuilder(); + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + b.append(i.next().toString()); + b.append("\n"); + } + b.append("\n"); + b.append("\n"); + for (Iterator<NamedCondition> i=namedConditions.values().iterator(); i.hasNext(); ) { + b.append(i.next().toString()); + b.append("\n"); + } + return b.toString(); + } + + /** A placeholder for an included rule base until it is inlined */ + private static class IncludeDirective extends ProductionRule { + + private RuleBase includedBase; + + public IncludeDirective(RuleBase ruleBase) { + this.includedBase=ruleBase; + } + + public RuleBase getIncludedBase() { return includedBase; } + + /** Not used */ + public String getSymbol() { return ""; } + + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java new file mode 100644 index 00000000000..34c113ceec8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +/** + * Thrown on rule base consistency problems + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class RuleBaseException extends RuntimeException { + + public RuleBaseException(String message) { + super(message); + } + + public RuleBaseException(String message,Exception cause) { + super(message,cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java new file mode 100644 index 00000000000..1dab816f22b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java @@ -0,0 +1,285 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; +import java.util.List; + +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.prelude.semantics.parser.*; + +/** + * Imports rule bases from various sources. + * + * @author bratseth + */ +// Uses the JavaCC-generated parser to read rule bases. +// This is an intermediate between the parser and the rule base being loaded +// on implementation of some directives, for example, it knows where to find +// rule bases included into others, while neither the rule base or the parser knows. +public class RuleImporter { + + /** + * If this is set, imported rule bases are looked up in this config + * otherwise, they are looked up as files + */ + private SemanticRulesConfig config = null; + + /** + * Ignore requests to read automata files. + * Useful to validate rule bases without having automatas present + */ + private boolean ignoreAutomatas = false; + + /** + * Ignore requests to include files. + * Useful to validate rule bases one by one in config + */ + private boolean ignoreIncludes = false; + + /** Create a rule importer which will read from file */ + public RuleImporter() { + this(null, false); + } + + /** Create a rule importer which will read from a config object */ + public RuleImporter(SemanticRulesConfig config) { + this(config, false); + } + + public RuleImporter(boolean ignoreAutomatas) { + this(null, ignoreAutomatas); + } + + public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes) { + this(null, ignoreAutomatas, ignoreIncludes); + } + + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas) { + this.config=config; + this.ignoreAutomatas=ignoreAutomatas; + } + + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, boolean ignoreIncludes) { + this.config = config; + this.ignoreAutomatas = ignoreAutomatas; + this.ignoreIncludes = ignoreIncludes; + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName) throws IOException, ParseException { + return importFile(fileName,null); + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @param automataFile the automata file to use, or null to not use any + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName,String automataFile) throws IOException, ParseException { + return importFile(fileName,automataFile,null); + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @param automataFile the automata file to use, or null to not use any + * @param ruleBase an existing rule base to import these rules into, or null + * to create a new + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName,String automataFile,RuleBase ruleBase) throws IOException, ParseException { + ruleBase=privateImportFile(fileName,automataFile,ruleBase); + ruleBase.initialize(); + return ruleBase; + } + + public RuleBase privateImportFile(String fileName,String automataFile,RuleBase ruleBase) throws IOException, ParseException { + BufferedReader reader=null; + try { + reader= IOUtils.createReader(fileName, "utf-8"); + File file=new File(fileName); + String absoluteFileName=file.getAbsolutePath(); + if (ruleBase==null) + ruleBase=new RuleBase(); + ruleBase.setName(stripLastName(file.getName())); + privateImportFromReader(reader,absoluteFileName,automataFile,ruleBase); + return ruleBase; + } + finally { + IOUtils.closeReader(reader); + } + } + + /** Imports all the rule files (files ending by "sr") in the given directory */ + public List<RuleBase> importDir(String ruleBaseDir) throws IOException, ParseException { + File ruleBaseDirFile=new File(ruleBaseDir); + if (!ruleBaseDirFile.exists()) + throw new IOException("Rule base dir '" + ruleBaseDirFile.getAbsolutePath() + "' does not exist"); + File[] files=ruleBaseDirFile.listFiles(); + Arrays.sort(files); + List<RuleBase> ruleBases=new java.util.ArrayList<>(); + for (File file : files) { + if (!file.getName().endsWith(".sr")) continue; + RuleBase base = importFile(file.getAbsolutePath()); + ruleBases.add(base); + } + return ruleBases; + } + + /** Read and include a rule base in another */ + public void include(String ruleBaseName,RuleBase ruleBase) throws java.io.IOException, ParseException { + if (ignoreIncludes) return; + RuleBase include; + if (config==null) { + include=privateImportFromDirectory(ruleBaseName,ruleBase); + } + else { + include=privateImportFromConfig(ruleBaseName); + } + ruleBase.include(include); + } + + /** Returns an unitialized rule base */ + private RuleBase privateImportFromDirectory(String ruleBaseName,RuleBase ruleBase) throws IOException, ParseException { + RuleBase include = new RuleBase(); + String includeDir=new File(ruleBase.getSource()).getParentFile().getAbsolutePath(); + if (!ruleBaseName.endsWith(".sr")) + ruleBaseName=ruleBaseName + ".sr"; + File importFile=new File(includeDir,ruleBaseName); + if (!importFile.exists()) + throw new IOException("No file named '" + shortenPath(importFile.getPath()) + "'"); + return privateImportFile(importFile.getPath(),null,include); + } + + /** Returns an unitialized rule base */ + private RuleBase privateImportFromConfig(String ruleBaseName) throws IOException, ParseException { + SemanticRulesConfig.Rulebase ruleBaseConfig=findRuleBaseConfig(config,ruleBaseName); + if (ruleBaseConfig==null) + ruleBaseConfig=findRuleBaseConfig(config,stripLastName(ruleBaseName)); + if (ruleBaseConfig==null) + throw new ParseException("Could not find included rule base '" + ruleBaseName + "'"); + return privateImportConfig(ruleBaseConfig); + } + + private SemanticRulesConfig.Rulebase findRuleBaseConfig(SemanticRulesConfig config,String ruleBaseName) { + for (Object aRulebase : config.rulebase()) { + SemanticRulesConfig.Rulebase ruleBaseConfig = (SemanticRulesConfig.Rulebase) aRulebase; + if (ruleBaseConfig.name().equals(ruleBaseName)) + return ruleBaseConfig; + } + return null; + } + + public void setAutomata(RuleBase base,String automata) { + if (ignoreAutomatas) + base.setUsesAutomata(true); // Stop it from failing on automata condition references + else + base.setAutomataFile(automata); + } + + static String stripLastName(String fileName) { + int lastDotIndex=fileName.lastIndexOf("."); + if (lastDotIndex<0) return fileName; + return fileName.substring(0,lastDotIndex); + } + + public RuleBase importString(String string, String automataFile) throws IOException, ParseException { + return importString(string, automataFile, null, null); + } + + public RuleBase importString(String string, String automataFile, String sourceName) throws IOException, ParseException { + return importString(string, automataFile, sourceName, null); + } + + public RuleBase importString(String string, String automataFile, RuleBase ruleBase) throws IOException, ParseException { + return importString(string, automataFile, null, ruleBase); + } + + public RuleBase importString(String string, String automataFile, String sourceName, RuleBase ruleBase) throws IOException, ParseException { + return importFromReader(new StringReader(string), sourceName, automataFile, ruleBase); + } + + public RuleBase importConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws IOException, ParseException { + RuleBase ruleBase=privateImportConfig(ruleBaseConfig); + ruleBase.initialize(); + return ruleBase; + } + + /** Imports an unitialized rule base */ + public RuleBase privateImportConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws IOException, ParseException { + if (config==null) throw new IllegalStateException("Must initialize with config if importing from config"); + RuleBase ruleBase = new RuleBase(); + ruleBase.setName(ruleBaseConfig.name()); + return privateImportFromReader(new StringReader(ruleBaseConfig.rules()),"semantic-rules.cfg", + ruleBaseConfig.automata(),ruleBase); + } + + public RuleBase importFromReader(Reader reader,String sourceInfo,String automataFile) throws ParseException { + return importFromReader(reader,sourceInfo,automataFile,null); + } + + /** + * Imports rules from a reader + * + * @param reader the reader containing rules on the proper syntax + * @param sourceName a string describing the source of the rules used for error messages + * @param ruleBase an existing rule base to import the rules into, or null to create a new one + * @return the rule base containing the rules added from the reader + * @throws ParseException if the reader contains illegal rule syntax + */ + public RuleBase importFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { + ruleBase=privateImportFromReader(reader, sourceName, automataFile,ruleBase); + ruleBase.initialize(); + return ruleBase; + } + + /** Returns an unitialized rule base */ + public RuleBase privateImportFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { + try { + if (ruleBase==null) { + ruleBase=new RuleBase(); + if (sourceName == null) + sourceName = "anonymous"; + ruleBase.setName(sourceName); + } + ruleBase.setSource(sourceName.replace('\\','/')); + new SemanticsParser(reader).semanticRules(ruleBase, this); + if (automataFile!=null && !automataFile.isEmpty()) + ruleBase.setAutomataFile(automataFile.replace('\\','/')); + return ruleBase; + } catch (Throwable t) { // also catches token mgr errors + ParseException p=new ParseException("Could not parse '" + shortenPath(sourceName) + "'"); + p.initCause(t); + throw p; + } + } + + /** + * Snips what's in from of rules/ if "rules/" is present in the string + * to avoid displaying details about where application content is copied + * (if rules/ is present, these rules are read from an applicatino package) + */ + private static String shortenPath(String path) { + int rulesIndex=path.indexOf("rules/"); + if (rulesIndex<0) return path; + return path.substring(rulesIndex); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java new file mode 100644 index 00000000000..f4858bbb9e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +import static com.yahoo.prelude.querytransform.IndexCombinatorSearcher.MIXED_RECALL_REWRITE; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * Analyzes query semantics and enhances the query to reflect findings + * + * @author bratseth + */ +@After(PhaseNames.RAW_QUERY) +@Before({PhaseNames.TRANSFORMED_QUERY, STEMMING, MIXED_RECALL_REWRITE}) +public class SemanticSearcher extends Searcher { + + private static final CompoundName rulesRulebase=new CompoundName("rules.rulebase"); + private static final CompoundName rulesOff=new CompoundName("rules.off"); + private static final CompoundName tracelevelRules=new CompoundName("tracelevel.rules"); + + /** The default rule base of this */ + private RuleBase defaultRuleBase; + + /** All rule bases of this (always including the default) */ + private final Map<String, RuleBase> ruleBases = new java.util.HashMap<>(); + + /** Creates a semantic searcher using the given default rule base */ + public SemanticSearcher(RuleBase ruleBase) { + this(Collections.singletonList(ruleBase)); + defaultRuleBase = ruleBase; + } + + public SemanticSearcher(RuleBase ... ruleBases) { + this(Arrays.asList(ruleBases)); + } + + @Inject + public SemanticSearcher(SemanticRulesConfig config) { + this(toList(config)); + } + + public SemanticSearcher(List<RuleBase> ruleBases) { + for (RuleBase ruleBase : ruleBases) { + if (ruleBase.isDefault()) + defaultRuleBase = ruleBase; + this.ruleBases.put(ruleBase.getName(),ruleBase); + } + } + + private static List<RuleBase> toList(SemanticRulesConfig config) { + try { + RuleImporter ruleImporter = new RuleImporter(config); + List<RuleBase> ruleBaseList = new java.util.ArrayList<>(); + for (SemanticRulesConfig.Rulebase ruleBaseConfig : config.rulebase()) { + RuleBase ruleBase = ruleImporter.importConfig(ruleBaseConfig); + if (ruleBaseConfig.isdefault()) + ruleBase.setDefault(true); + ruleBaseList.add(ruleBase); + } + return ruleBaseList; + } + catch (Exception e) { + throw new ConfigurationException("Failed configuring semantic rules",e); + } + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().getBoolean(rulesOff)) + return execution.search(query); + + int traceLevel= query.properties().getInteger(tracelevelRules, query.getTraceLevel()-2); + if (traceLevel<0) traceLevel=0; + RuleBase ruleBase=resolveRuleBase(query); + if (ruleBase==null) + return execution.search(query); + + String error=ruleBase.analyze(query,traceLevel); + if (error!=null) + return handleError(ruleBase, query,error); + else + return execution.search(query); + } + + private RuleBase resolveRuleBase(Query query) { + String ruleBaseName=query.properties().getString(rulesRulebase); + if (ruleBaseName==null || ruleBaseName.equals("")) return getDefaultRuleBase(); + RuleBase ruleBase=getRuleBase(ruleBaseName); + if (ruleBase==null) + throw new RuleBaseException("Requested rule base '" + ruleBaseName + "' does not exist"); + return ruleBase; + } + + private Result handleError(RuleBase ruleBase,Query query,String error) { + String message="Evaluation of query '" + query.getModel().getQueryTree() + + "' over '" + ruleBase + "' caused the invalid query '" + + query.getModel().getQueryTree().getRoot() + "': " + error; + getLogger().warning(message); + return new Result(query,ErrorMessage.createInvalidQueryTransformation(message)); + } + + /** Returns the default rule base */ + public RuleBase getDefaultRuleBase() { return defaultRuleBase; } + + /** + * Returns the rule base of the given name, or null if none. + * The part of the name following the last dot (if any) is removed before lookup. + */ + public RuleBase getRuleBase(String ruleBaseName) { + ruleBaseName=RuleImporter.stripLastName(ruleBaseName); + return ruleBases.get(ruleBaseName); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java new file mode 100644 index 00000000000..b04e693089a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.benchmark; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; + +import com.yahoo.search.Query; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleImporter; +import com.yahoo.prelude.semantics.parser.ParseException; + +public class RuleBaseBenchmark { + + public void benchmark(String ruleBaseFile, String queryFile, int iterations) + throws IOException, ParseException { + + String fsaFile = null; + if(ruleBaseFile.endsWith(".sr")){ + fsaFile = ruleBaseFile.substring(0,ruleBaseFile.length()-3) + ".fsa"; + File fsa = new File(fsaFile); + if(!fsa.exists()){ + fsaFile = null; + } + } + RuleBase ruleBase = new RuleImporter().importFile(ruleBaseFile,fsaFile); + ArrayList<String> queries = new ArrayList<>(); + BufferedReader reader = new BufferedReader(new FileReader(queryFile)); + String line; + while((line=reader.readLine())!=null){ + queries.add(line); + } + Date start = new Date(); + for (int i=0;i<iterations;i++){ + for (Iterator<String> iter = queries.iterator(); iter.hasNext(); ){ + String queryString = iter.next(); + Query query = new Query("?query="+queryString); + ruleBase.analyze(query,0); + } + } + Date end = new Date(); + long elapsed = end.getTime()-start.getTime(); + System.out.print("BENCHMARK: rulebase=" + ruleBaseFile + + "\n fsa=" + fsaFile + + "\n queries=" + queryFile + + "\n iterations=" + iterations + + "\n elapsed=" + elapsed + "ms\n"); + } + + + public static void main(String[] args) { + if(args.length<3){ + System.out.println("USAGE: RuleBaseBenchmark ruleBaseFile queryFile iterations"); + System.exit(1); + } + + try { + new RuleBaseBenchmark().benchmark(args[0],args[1],Integer.parseInt(args[2])); + } + catch (Exception e) { + System.out.println("ERROR: " + collectMessage(e)); + //e.printStackTrace(); + System.exit(1); + } + } + + private static String collectMessage(Throwable e) { + if (e.getCause()==null) + return messageOrName(e); + else + return messageOrName(e) + ": " + collectMessage(e.getCause()); + } + + private static String messageOrName(Throwable e) { + if (e.getMessage()!=null) + return e.getMessage(); + else + return e.getClass().getName(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries new file mode 100644 index 00000000000..3feebfb4698 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries @@ -0,0 +1,5 @@ +shop in geary street +foo +bar +aardwark +to be or not to be that is the question diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr new file mode 100644 index 00000000000..020699ba7cb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr @@ -0,0 +1,62 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Local use case + +[listing] [preposition] [place] -> listing:[listing] place:[place]; + +[listing] :- restaurant, shop, cafe, hotel; + +[preposition] :- in, at, near; + +[place] :- [street] [city], [street]; + +[street] :- geary street, geary; +[city] :- san francisco; + +# Shopping use case + +[brand] -> brand:[brand]; +[category] -> category:[category]; + +[brand] :- sony, dell; # Refer to automata later +[category] :- digital camera, camera, phone; # Ditto + +# Travel use case, note how explicit reference name overrides named condition as reference name + +# [from:place] [to:place] -> from:[from] to:[to] + +# Answers use case + +# why is [noun] ... [adjective] +> ?about:[noun] + +# Adding rule using the default query mode (and/or) + +[foobar] +> foobar:[foobar]; + +[foobar] :- foo, bar; + +# Adding rank rule + +[word] +> $foobar:[word]; + +[word] :- aardwark, word; + +# Literal production + +lotr -> lord of the rings; + +# Adding a negative + +java +> -coffee; + +# Adding another negative +# TODO: Term types in conditions +# java -coffee +> -island + +# "Stopwords" + +be -> ; +the -> ; + +[stopword] -> ; + +[stopword] :- to, or, not; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java b/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java new file mode 100644 index 00000000000..b0e50727773 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.config; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleImporter; +import com.yahoo.prelude.semantics.parser.ParseException; + +/** + * Reads the rule base files in the given directory and creates a + * semantic-rules.cfg file containing those rule bases in the given output dir. + * + * @author bratseth + */ +// Note: This is not used by the config model any more and can be removed +public class RuleConfigDeriver { + + public void derive(String ruleBaseDir, String outputDir) throws IOException, ParseException { + // Validate output dir + File outputDirFile=new File(outputDir); + if (!outputDirFile.exists()) + throw new IOException("Output dir " + outputDirFile.getAbsolutePath() + + " does not exist"); + + List<RuleBase> ruleBases = derive(ruleBaseDir); + // Convert file to config + exportConfig(ruleBases,outputDir); + } + + public List<RuleBase> derive(String ruleBaseDir) throws IOException, ParseException { + // Validate the rule bases + boolean ignoreAutomatas=true; // Don't fail if they are not available in config + List<RuleBase> ruleBases = new RuleImporter(ignoreAutomatas).importDir(ruleBaseDir); + ensureZeroOrOneDefault(ruleBases); + return ruleBases; + } + + public List<RuleBase> derive(List<NamedReader> readers) throws IOException, ParseException { + // Validate the rule bases + boolean ignoreAutomatas = true; // Don't fail if they are not available in config + List<RuleBase> ruleBases = new ArrayList<>(); + RuleImporter importer = new RuleImporter(ignoreAutomatas); + for (NamedReader reader : readers) { + ruleBases.add(importer.importFromReader(reader, reader.getName(), null)); + } + ensureZeroOrOneDefault(ruleBases); + return ruleBases; + } + + private void ensureZeroOrOneDefault(List<RuleBase> ruleBases) throws ParseException { + String defaultName=null; + for (RuleBase ruleBase : ruleBases) { + if (defaultName != null && ruleBase.isDefault()) + throw new ParseException("Both '" + defaultName + "' and '" + ruleBase.getName() + + "' is marked as default, there can only be one"); + if (ruleBase.isDefault()) + defaultName = ruleBase.getName(); + } + } + + private void exportConfig(List<RuleBase> ruleBases, String outputDir) + throws IOException { + BufferedWriter writer=null; + try { + writer=IOUtils.createWriter(outputDir + "/semantic-rules.cfg","utf-8",false); + writer.write("rulebase[" + ruleBases.size() + "]\n"); + for (int i=0; i<ruleBases.size(); i++) { + RuleBase ruleBase= ruleBases.get(i); + writer.write("rulebase[" + i + "].name \"" + ruleBase.getName() + "\"\n"); + writer.write("rulebase[" + i + "].rules \""); + writeRuleBaseAsLine(ruleBase.getSource(),writer); + writer.write("\"\n"); + } + } + finally { + IOUtils.closeWriter(writer); + } + } + + private void writeRuleBaseAsLine(String file, Writer writer) throws IOException { + BufferedReader reader=null; + try { + reader=IOUtils.createReader(file,"utf-8"); + String line; + while (null!=(line=reader.readLine())) { + writer.write(line); + writer.write("\\n"); + } + } + finally { + IOUtils.closeReader(reader); + } + } + + public static void main(String[] args) { + if(args.length<2){ + System.out.println("USAGE: RuleConfigDeriver ruleBaseDir outputDir"); + System.exit(1); + } + + try { + new RuleConfigDeriver().derive(args[0],args[1]); + } + catch (Exception e) { + System.out.println("ERROR: " + collectMessage(e)); + System.exit(1); + } + } + + private static String collectMessage(Throwable e) { + if (e.getCause()==null) + return messageOrName(e); + else + return messageOrName(e) + ": " + collectMessage(e.getCause()); + } + + private static String messageOrName(Throwable e) { + if (e.getMessage()!=null) + return e.getMessage(); + else + return e.getClass().getName(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java new file mode 100644 index 00000000000..6b2801d10d7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics.config; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java new file mode 100644 index 00000000000..f2650fef83a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.semantics.rule.Condition; + +/** + * A choice point in an rule evaluation. A choicepoint is open if there are other choices to make at the point, + * closed if there are no further choices. In addition it contains enough information to enable + * the rule evaluation to backtrack to this point + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Choicepoint { + + /** Whether there are (or may be) open choices to explore at this choicepoint yet */ + private boolean open=true; + + /** The number of tries made at this choice point */ + private int tries=0; + + /** The condition creating this choicepoint */ + private Condition condition; + + /** The state this choice point can be rolled back to */ + private State state; + + private RuleEvaluation owner; + + public Choicepoint(RuleEvaluation e, Condition condition) { + this.owner=e; + state=new State(this,e); + this.condition=condition; + if (e.getTraceLevel()>=5) + e.trace(5,"Added choice point at " + e.currentItem() + " for '" + condition + "'"); + } + + /** Returns the condition which created this choice point */ + public Condition getCondition() { return condition; } + + /** Returns wether there are (or may be) open choices to explore at this choicepoint yet */ + public boolean isOpen() { return open; } + + /** Marks this choice point as closed (!open) - there are no further choices to explore */ + public void close() { this.open=false; } + + /** Returns the number open tries made at this point */ + public int tryCount() { return tries; } + + /** Registers that another try has been made */ + public void addTry() { + tries++; + } + + /** + * Backtrack to the evaluation state at the point where this choicepoint were instantiated. + */ + public void backtrack() { + state.backtrack(owner); + if (owner.getTraceLevel()>=5) + owner.trace(5,"Backtracked to " + owner.currentItem() + " for '" + condition + "'"); + } + + /** Backtracks the position only, not matches */ + public void backtrackPosition() { + state.backtrackPosition(owner); + } + + /** + * Updates the state of this choice point to the current state of its evaluation + */ + public void updateState() { + state.updateState(owner); + } + + /** Returns the state of this choice point */ + public State getState() { return state; } + + /** The state of this choicepoint */ + public final static class State { + + private int position=0; + + private int referencedMatchCount=0; + + private int nonreferencedMatchCount=0; + + public State(Choicepoint choicepoint,RuleEvaluation evaluation) { + updateState(evaluation); + } + + public void updateState(RuleEvaluation evaluation) { + position=evaluation.currentPosition(); + referencedMatchCount=evaluation.getReferencedMatchCount(); + nonreferencedMatchCount=evaluation.getNonreferencedMatchCount(); + } + + /** Backtrack to the evaluation state at the point where this choicepoint were instantiated */ + public void backtrack(RuleEvaluation e) { + backtrackPosition(e); + + // Is this check masking errors? + if (e.referencedMatches().size()>referencedMatchCount) + e.referencedMatches().subList(referencedMatchCount, + e.referencedMatches().size()) + .clear(); + // Is this check masking errors? + if (e.nonreferencedMatches().size()>nonreferencedMatchCount) + e.nonreferencedMatches().subList(nonreferencedMatchCount, + e.nonreferencedMatches().size()) + .clear(); + } + + public void backtrackPosition(RuleEvaluation e) { + e.setPosition(position); + } + + public int getPosition() { return position; } + + public int getReferencedMatchCount() { return referencedMatchCount; } + + public int getNonreferencedMatchCount() { return nonreferencedMatchCount; } + + } + +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java new file mode 100644 index 00000000000..fe3543fc655 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * An evaluation of a query over a rule base. There is one evaluation for each evaluation + * of one query over one rule base. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Evaluation { + + // TODO: Retrofit query into the namespace construct + private ParameterNameSpace parameterNameSpace=null; + + private Query query; + + /** The current index into the flattened item list */ + private int currentIndex = 0; + + /** Query items flattened to a list iterator */ + private List<FlattenedItem> flattenedItems; + + /** The rule evaluation context, can be reset once the rule is evaluated */ + private RuleEvaluation ruleEvaluation; + + /** + * The amount of context information to collect about this evaluation. + * 0 means no context information, higher numbers means more context information. + */ + private int traceLevel=0; + + private String traceIndentation=""; + + /** See RuleEngine */ + private Set<Integer> matchDigests=new HashSet<>(); + + /** The previous size of this query (see RuleEngine), set on matches only */ + private int previousQuerySize=0; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + public Evaluation(Query query) { + this(query,0); + } + + /** + * Creates a new evaluation + * + * @param query the query this evaluation is for + * @param traceLevel the amount of tracing to do + */ + public Evaluation(Query query,int traceLevel) { + this.query=query; + this.traceLevel=traceLevel; + reset(); + ruleEvaluation=new RuleEvaluation(this); + } + + /** Resets the item iterator to point to the first item */ + public void reset() { + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + currentIndex=0; + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + } + + /** Sets the item iterator to point to the last item: */ + public void setToLast() { // PGA + if (flattenedItems!=null) + currentIndex = flattenedItems.size()-1; + else + currentIndex = -1; + } + + /** Resets the item iterator to point to the last item: */ + public void resetToLast() { // PGA + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + currentIndex = flattenedItems.size()-1; + } + + public Query getQuery() { return query; } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + void addMatchDigest(int digest) { matchDigests.add(new Integer(digest)); } + + boolean hasMatchDigest(int matchDigest) { return matchDigests.contains(new Integer(matchDigest)); } + + int getPreviousQuerySize() { return previousQuerySize; } + + public int getQuerySize() { return flattenedItems.size(); } + + /** Advances to the next item as current item */ + public void next() { + currentIndex++; + } + + public void previous() {//PGA + currentIndex--; + } + + + /** Returns the current item, or null if there is no more elements */ + public FlattenedItem currentItem() { + if ( (currentIndex>=flattenedItems.size()) || (currentIndex<0)) return null; //PGA + return flattenedItems.get(currentIndex); + } + + /** Returns a fresh rule evaluation starting at the current position of this */ + public RuleEvaluation freshRuleEvaluation() { + ruleEvaluation.initialize(flattenedItems,currentIndex); + return ruleEvaluation; + } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + // TODO: Add this functionality to Query? + public void addItem(Item item, TermType termType) { + Item root= query.getModel().getQueryTree().getRoot(); + if (root==null) + query.getModel().getQueryTree().setRoot(item); + else + query.getModel().getQueryTree().setRoot(combineItems(root,item,termType)); + } + + /** Removes this item */ + public void removeItem(Item item) { + item.getParent().removeItem(item); + } + + /** + * Removes this item by identity to ensure we remove the right one if there are multiple + * equal items + */ + public void removeItemByIdentity(Item item) { + int position=findIndexByIdentity(item); + if (position>=0) + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Fallback to removeField by equal() + } + + private int findIndexByIdentity(Item item) { + int position=0; + for (Iterator<Item> i=item.getParent().getItemIterator(); i.hasNext(); ) { + Item child=i.next(); + if (item==child) { + return position; + } + position++; + } + return -1; + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + Item removeCandidate=item.getParent().getItem(position); + if (removeCandidate.equals(item)) // Remove based on position + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Otherwise, just removeField any such item + } + + /** + * Convert segment items into their mutable counterpart, do not update query tree. + * Non-segment items are returned directly. + * + * @return a mutable CompositeItem instance + */ + private CompositeItem convertSegmentItem(CompositeItem item) { + if (!(item instanceof SegmentItem)) { + return item; + } + CompositeItem converted = null; + if (item instanceof AndSegmentItem) { + converted = new AndItem(); + } else if (item instanceof PhraseSegmentItem) { + PhraseItem p = new PhraseItem(); + PhraseSegmentItem old = (PhraseSegmentItem) item; + p.setIndexName(old.getIndexName()); + converted = p; + } else { + // TODO: Do something else than nothing for unknowns? + return item; + } + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + converted.addItem(i.next()); + } + return converted; + } + + + private void insertMutableInTree(CompositeItem mutable, CompositeItem original, CompositeItem parent) { + if (parent == null) { + query.getModel().getQueryTree().setRoot(mutable); + + } else { + int parentsIndex = parent.getItemIndex(original); + parent.setItem(parentsIndex, mutable); + } + } + + /** + * Convert The parent of this item into a mutable item. Note, this + * may change the shape of the query tree. (E.g. if the original parent is a + * segment phrase, and the original parent's parent is a phrase, the terms + * from the parent will be moved to the parent's parent.) + * + * @param item The item for which the parent shall be made mutable + */ + public void makeParentMutable(TermItem item) { + CompositeItem parent = item.getParent(); + CompositeItem mutable = convertSegmentItem(parent); + if (parent != mutable) { + CompositeItem parentsParent = parent.getParent(); + insertMutableInTree(mutable, parent, parentsParent); + } + } + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param desiredParentType the desired type of the composite which contains item when this returns + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType desiredParentType) { + if (parent==null) { // TODO: Accommodate for termtype in this case too + query.getModel().getQueryTree().setRoot(item); + + return; + } + + if (parent.getItemCount()>0 && parent instanceof QueryTree && parent.getItem(0) instanceof CompositeItem) { + // combine with the existing root instead + parent=(CompositeItem)parent.getItem(0); + if (index==1) { // that means adding it after the existing root + index=parent.getItemCount(); + } + } + + if (( desiredParentType==TermType.DEFAULT || desiredParentType.hasItemClass(parent.getClass()) ) + && equalIndexNameIfParentIsPhrase(item,parent)) { + addItem(parent,index,item,desiredParentType); + } + else { + insertIncompatibleItem(item,parent,query,desiredParentType); + } + } + + private void addItem(CompositeItem parent,int index,Item item,TermType desiredParentType) { + if (parent instanceof NotItem) { + if (index==0 && parent.getItem(0)==null) { // Case 1: The current positive is null and we are adding a positive + parent.setItem(0,item); + } + else if (index<=1 && !(parent.getItem(0) instanceof CompositeItem)) { // Case 2: The positive must become a composite + CompositeItem positiveComposite=(CompositeItem)desiredParentType.createItemClass(); + positiveComposite.addItem(parent.getItem(0)); + positiveComposite.addItem(index,item); + parent.setItem(0,positiveComposite); + } + else if (parent.getItem(0)!=null && parent.getItem(0) instanceof CompositeItem // Case 3: Add to the positive composite + && index<=((CompositeItem)parent.getItem(0)).getItemCount()) { + ((CompositeItem)parent.getItem(0)).addItem(index,item); + } + else { // Case 4: Add negative + parent.addItem(index,item); + } + } + else if (parent.getItemCount()>0 && parent instanceof QueryTree) { + CompositeItem composite=(CompositeItem)desiredParentType.createItemClass(); + composite.addItem(parent.getItem(0)); + composite.addItem(index,item); + parent.setItem(0,composite); + } + else { + parent.addItem(index,item); + } + } + + /** A special purpose check used to simplify the above */ + private boolean equalIndexNameIfParentIsPhrase(Item item,CompositeItem parent) { + if ( ! (parent instanceof PhraseItem)) return true; + if ( ! (item instanceof IndexedItem)) return true; + + return ((PhraseItem)parent).getIndexName().equals(((IndexedItem)item).getIndexName()); + } + + private void insertIncompatibleItem(Item item,CompositeItem parent,Query query,TermType desiredParentType) { + // Create new parent + CompositeItem newParent; + if (desiredParentType==TermType.DEFAULT) + newParent=new AndItem(); + else + newParent=(CompositeItem)desiredParentType.createItemClass(); + + // Save previous parent parent + CompositeItem parentsParent=parent.getParent(); + + // Add items to new parent + newParent.addItem(parent); + newParent.addItem(item); + + // Insert new parent as root or child of old parents parent + if (parentsParent==null) { + query.getModel().getQueryTree().setRoot(newParent); + + } + else { + int parentIndex=0; + if (parentsParent!=null) { + parentIndex=parentsParent.getItemIndex(parent); + } + parentsParent.setItem(parentIndex,newParent); + } + } + + private Item combineItems(Item first,Item second,TermType termType) { + if (first instanceof NullItem) { + return second; + } else if (first instanceof NotItem) { + NotItem notItem=(NotItem)first; + if (termType==TermType.NOT) { + notItem.addNegativeItem(second); + } + else { + Item newPositive=combineItems(notItem.getPositiveItem(),second,termType); + notItem.setPositiveItem(newPositive); + } + return notItem; + } + else if (first instanceof CompositeItem) { + CompositeItem composite=(CompositeItem)first; + CompositeItem combined=createType(termType); + if (combined.getClass().equals(composite.getClass())) { + composite.addItem(second); + return composite; + } + else { + combined.addItem(first); + combined.addItem(second); // Also works for nots + return combined; + } + } + else if (first instanceof TermItem) { + CompositeItem combined=createType(termType); + combined.addItem(first); + combined.addItem(second); + return combined; + } + else { + throw new RuntimeException("Don't know how to add an item to type " + first.getClass()); + } + } + + private CompositeItem createType(TermType termType) { + if (termType==TermType.DEFAULT) { + if (query.getModel().getType().equals(Query.Type.ANY)) + return new OrItem(); + else + return new AndItem(); + } + else if (termType==TermType.AND) { + return new AndItem(); + } + else if (termType==TermType.OR) { + return new OrItem(); + } + else if (termType==TermType.RANK) { + return new RankItem(); + } + else if (termType==TermType.NOT) { + return new NotItem(); + } + throw new IllegalArgumentException("Programing error, this method should be updated with add in RankType"); + } + + private void flatten(Item item,int position,List<FlattenedItem> toList) { + if (item==null) return; + if (item.isFilter()) return; + + if (item instanceof TermItem) { // make eligible for matching + toList.add(new FlattenedItem((TermItem)item,position)); + return; + } + + if (item instanceof CompositeItem) { // make children eligible for matching + CompositeItem composite=(CompositeItem)item; + int childPosition=0; + for (Iterator<?> i=composite.getItemIterator(); i.hasNext(); ) { + flatten((Item)i.next(),childPosition++,toList); + } + } + + // other terms are unmatchable + } + + public void trace(int level,String message) { + if (level>getTraceLevel()) return; + query.trace(traceIndentation + message,false,1); + } + + /** + * The amount of context information to collect about this evaluation. + * 0 (the default) means no context information, higher numbers means + * more context information. + */ + public int getTraceLevel() { return traceLevel; } + + public void indentTrace() { + traceIndentation=traceIndentation + " "; + } + + public void unindentTrace() { + if (traceIndentation.length()<3) + traceIndentation=""; + else + traceIndentation=traceIndentation.substring(3); + } + + public NameSpace getNameSpace(String nameSpaceName) { + if (nameSpaceName.equals("parameter")) { + if (parameterNameSpace==null) + parameterNameSpace=new ParameterNameSpace(); + return parameterNameSpace; + } + + // That's all for now + throw new RuntimeException("Unknown namespace '" + nameSpaceName + "'"); + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java new file mode 100644 index 00000000000..00a66206b46 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * Thrown on semantic exceptions on evaluation over a rule base + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class EvaluationException extends RuntimeException { + + public EvaluationException(String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java new file mode 100644 index 00000000000..1631d60df6b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.TermItem; + +/** + * An item which knows its position in its parent + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class FlattenedItem { + + private TermItem item; + + /** The position of this item in its parent */ + private int position; + + public FlattenedItem(TermItem item,int position) { + this.item=item; + this.position=position; + } + + public TermItem getItem() { return item; } + + public int getPosition() { return position; } + + public String toString() { + return position + ":" + item; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java new file mode 100644 index 00000000000..fc7aec62412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; + +/** + * A match + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Match { + + /** The start position of this match */ + private int position; + + private TermItem item; + + /** The string to replace the match by, usually item.getIndexedString() */ + private String replaceValue; + + /** The parent of the matched item */ + private CompositeItem parent=null; + + /** + * Creates a match + * + * @param item the match to add + * @param replaceValue the string to replace this match by, usually the item.getIndexedString() + * which is what the replace value will be if it is passed as null here + */ + public Match(FlattenedItem item,String replaceValue) { + this.item=item.getItem(); + if (replaceValue==null) + this.replaceValue=item.getItem().getIndexedString(); + else + this.replaceValue=replaceValue; + this.parent=this.item.getParent(); + this.position=item.getPosition(); + } + + public int getPosition() { return position; } + + public TermItem getItem() { return item; } + + public String getReplaceValue() { + return replaceValue; + } + + /** + * Returns the parent in which the item was matched, or null if the item was root. + * Note that the item may subsequently have been removed, so it does not necessarily + * have this parent + */ + public CompositeItem getParent() { return parent; } + + public int hashCode() { + return + 17*item.getIndexedString().hashCode()+ + 33*item.getIndexName().hashCode(); + } + + /** Returns a new item representing this match */ + public Item toItem(String label) { + return new WordItem(getReplaceValue(),label); + } + + public boolean equals(Object o) { + if (! (o instanceof Match)) return false; + + Match other=(Match)o; + if (other.position!=position) return false; + if (!other.item.equals(item)) return false; + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java new file mode 100644 index 00000000000..76eea63bd68 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * A collection of facts (addressed by namespace.fact in conditions) + * over which we may write conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class NameSpace { + + public abstract boolean matches(String term,RuleEvaluation e); + + // TODO: public abstract void produce(RuleEvaluation e); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java new file mode 100644 index 00000000000..35427250511 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; + +/** + * A name space representing the (http) parameters following this query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ParameterNameSpace extends NameSpace { + + public boolean matches(String term,RuleEvaluation e) { + Query query=e.getEvaluation().getQuery(); + String value=query.properties().getString(term); + if (value==null) return false; + e.setValue(value); + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java new file mode 100644 index 00000000000..cb7d2af8d19 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; + +/** + * The Matches referenced by a particular context name in a rule evaluation + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferencedMatches { + + private String contextName; + + private List<Match> matches=new java.util.ArrayList<>(1); + + public ReferencedMatches(String contextName) { + this.contextName=contextName; + } + + public void addMatch(Match match) { + matches.add(match); + } + + public String getContextName() { return contextName; } + + public Iterator<Match> matchIterator() { + return matches.iterator(); + } + + /** + * Returns the item to insert from these referenced matches, or null if none + * + * @param label the label of the matches + */ + public Item toItem(String label) { + if (matches.size()==0) return null; + if (matches.size()==1) return matches.get(0).toItem(label); + + PhraseItem phrase=new PhraseItem(); // TODO: Somehow allow AND items instead here + phrase.setIndexName(label); + for (Iterator<Match> i=matches.iterator(); i.hasNext(); ) { + phrase.addItem(i.next().toItem(label)); + } + return phrase; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java new file mode 100644 index 00000000000..ee874b76ed6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.ListIterator; + +/** + * Evaluates the rules of a rule base. This method is thread safe on analyze calls, but + * not on modification calls. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEngine { + + private RuleBase rules; + + public RuleEngine(RuleBase rules) { + this.rules=rules; + } + + /** + * Evaluates a rule base over a query + * + * @param query the query to evaluate + * @param traceLevel the level of tracing to do + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String evaluate(Query query,int traceLevel) { + // TODO: This is O(query size*rule base size). We'll eventually need to create indices + // on rules to look up rule candidates per term to make it O(query size) instead + // Probably create indices on the first term like Prolog implementations use to + + boolean matchedAnything=false; + Evaluation evaluation=new Evaluation(query,traceLevel); + evaluation.setStemming(rules.getStemming()); + evaluation.trace(2,"Evaluating query '" + evaluation.getQuery().getModel().getQueryTree().getRoot() + "':"); + for (ListIterator<ProductionRule> i=rules.ruleIterator(); i.hasNext(); ) { + evaluation.reset(); + ProductionRule rule=i.next(); + boolean matched=matchRuleAtAllStartPoints(evaluation,rule); + matchedAnything|=matched; + } + + if (!matchedAnything) return null; + + String error=QueryCanonicalizer.canonicalize(query); + + if (query.getTraceLevel()>=1) + query.trace("SemanticSearcher: Rewrote query",true,1); + + return error; + } + + /** Match a rule at any starting point in the query */ + private boolean matchRuleAtAllStartPoints(Evaluation evaluation, ProductionRule rule) { + boolean matchedAtLeastOnce=false; + int iterationCount=0; + + /** + * Test if it is a removal rule, if so iterate backwards so that precalculated + * replacement positions does not become invalid as the query shrink + */ + boolean removalRule = false; + if ( (rule instanceof com.yahoo.prelude.semantics.rule.ReplacingProductionRule) && + (rule.getProduction().toString().length() == 0) ) { // empty replacement + removalRule = true; + evaluation.setToLast(); + } + + int loopLimit=Math.max(15,evaluation.getQuerySize()*3); + + while (evaluation.currentItem() != null) { + boolean matched=matchRule(evaluation,rule); + if (matched) { + if (removalRule) + evaluation.resetToLast(); + else + evaluation.reset(); + matchedAtLeastOnce = true; + if (rule.isLoop()) break; + } + else { + if (removalRule) + evaluation.previous(); + else + evaluation.next(); + } + + if (matched && iterationCount++ > loopLimit) { + throw new RuleBaseException("Rule '" + rule + "' has matched '" + + evaluation.getQuery().getModel().getQueryTree().getRoot() + + "' " + loopLimit + " times, aborting"); + } + } + + return matchedAtLeastOnce; + } + + /** + * Matches a rule at the current starting point of the evaluation, and carries + * out the production if there is a match + * + * @return whether this rule matched + */ + // TODO: Code cleanup + private boolean matchRule(Evaluation evaluation, ProductionRule rule) { + RuleEvaluation ruleEvaluation=evaluation.freshRuleEvaluation(); + + ruleEvaluation.indentTrace(); + if (ruleEvaluation.getTraceLevel()>=3) { + ruleEvaluation.trace(3,"Evaluating rule '" + rule + + "' on '" + ruleEvaluation.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + + "' at '" + ruleEvaluation.currentItem() + "':"); + } + + ruleEvaluation.indentTrace(); + + boolean matches=rule.matches(ruleEvaluation); + + boolean matchedBefore=false; + int currentMatchDigest=ruleEvaluation.calculateMatchDigest(rule); + if (evaluation.hasMatchDigest(currentMatchDigest)) + matchedBefore=true; + + boolean queryGotShorter=false; + if (evaluation.getPreviousQuerySize()>evaluation.getQuerySize()) + queryGotShorter=true; + + boolean doProduction=!matchedBefore || queryGotShorter; + + ruleEvaluation.unindentTrace(); + + if (ruleEvaluation.getTraceLevel()>=2) { + if (matches && doProduction) + ruleEvaluation.trace(2,"Matched rule '" + rule + "' at " + ruleEvaluation.previousItem()); + else if (!matches) + ruleEvaluation.trace(2,"Did not match rule '" + rule + "' at " + ruleEvaluation.currentItem()); + else if (!doProduction) + ruleEvaluation.trace(2,"Ignoring repeated match of '" + rule + "'"); + } + + ruleEvaluation.unindentTrace(); + + if (!matches || !doProduction) return false; + + // Do production barrier + + evaluation.addMatchDigest(currentMatchDigest); + String preQuery=null; + if (evaluation.getTraceLevel()>=1) { + preQuery= evaluation.getQuery().getModel().getQueryTree().getRoot().toString(); + } + rule.produce(ruleEvaluation); + if (evaluation.getTraceLevel()>=1) { + evaluation.trace(1,"Transforming '" + preQuery + "' to '" + + evaluation.getQuery().getModel().getQueryTree().getRoot().toString() + + "' since '" + rule + "' matched"); + } + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java new file mode 100644 index 00000000000..a6b90f98879 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java @@ -0,0 +1,346 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.rule.Condition; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.*; + +/** + * A particular evalutation of a particular rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEvaluation { + + // TODO: Create a query builder (or something) though which all query manipulation + // here and in Evaluation is done. This class must also hold all the matches + // and probably be able to update the match positions to keep them in sync with changes + // to the query + + // Remember that whenever state is added to this class, you + // must consider whether/how to make that state backtrackable + // by savinginformation in choicepoint.state + + /** The items to match in this evaluation */ + private List<FlattenedItem> items; + + /** The current position into the list of items */ + private int position; + + /** The start position into the item list */ + private int startPosition; + + /** The references to matched contexts to be made in this evaluation */ + private Set<String> matchReferences; + + /** The current context of this evaluation, or null we're currently not in an interesting context */ + private String currentContext; + + /** A list of referencedMatches */ + private List<ReferencedMatches> referencedMatchesList =new java.util.ArrayList<>(); + + private List<Match> nonreferencedMatches=new java.util.ArrayList<>(); + + /** The evaluation owning this */ + private Evaluation evaluation; + + /** The choice points saved in this evaluation */ + private Stack<Choicepoint> choicepoints=null; + + /* The last value returned by a condition evaluated in this, may be null */ + private Object value=null; + + /** True when we are evaluating inside a condition which inverts the truth value */ + private boolean inNegation=false; + + /** + * A label we should use to match candidate terms for. + * Used to propagate a label from e.g. reference conditions to named conditions + */ + private String currentLabel=null; + + public RuleEvaluation(Evaluation owner) { + this.evaluation=owner; + } + + public void initialize(List<FlattenedItem> list,int startPosition) { + this.startPosition=startPosition; + items=list; + reinitialize(); + } + + void reinitialize() { + position=startPosition; + currentContext=null; + referencedMatchesList.clear(); + nonreferencedMatches.clear(); + if (choicepoints!=null) + choicepoints.clear(); + } + + public void setMatchReferences(Set<String> matchReferences) { this.matchReferences=matchReferences; } + + /** + * <p>Calculates an id which is unique for each match (the totality of the matched terms) + * to a high probability. Why can we not simply look at the position + * of terms? Because rules are allowed to modify the query tree in ways that makes positions + * change.</p> + * + * <p>This digest is also problematic, because it's really the matching condition who should + * calculate a match digest for that term which incorporates the semantics of that kind + * of match (maybe not the word and index, but something else). This is a todo for when + * we add other kinds of conditions.</p> + */ + int calculateMatchDigest(ProductionRule rule) { + int matchDigest=rule.hashCode(); + int matchCounter=1; + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + int termCounter=0; + for (Iterator<Match> j=matches.matchIterator(); j.hasNext(); ) { + Match match=j.next(); + matchDigest=7*matchDigest*matchCounter+ + 71*termCounter+ + match.hashCode(); + termCounter++; + } + matchCounter++; + } + for (Iterator<Match> i=nonreferencedMatches.iterator(); i.hasNext(); ) { + Match match=i.next(); + matchDigest=7*matchDigest*matchCounter+match.hashCode(); + matchCounter++; + } + return matchDigest; + } + + /** + * Returns the current term item to look at, + * or null if there are no more elements + */ + public FlattenedItem currentItem() { + if (position>=items.size()) return null; + return items.get(position); + } + + public FlattenedItem previousItem() { + if (position-1<0) return null; + return items.get(position-1); + } + + /** Returns the position of the current item */ + public int currentPosition() { + return position; + } + + /** Sets the current position */ + public void setPosition(int position) { + this.position=position; + } + + /** Returns the total number of items to match in this evaluation */ + public int itemCount() { + return items.size() - startPosition; + } + + /** Returns the last value returned by a condition in this evaluation, or null */ + public Object getValue() { return value; } + + /** Sets the last value returned by a condition in this evaluatiino, or null */ + public void setValue(Object value) { this.value=value; } + + /** Returns whether we are evaluating inside a condition which inverts the truth value */ + public boolean isInNegation() { return inNegation; } + + /** sets whether we are evaluating inside a condition which inverts the truth value */ + public void setInNegation(boolean inNegation) { this.inNegation=inNegation; } + + /** Returns the current position into the terms this evaluates over */ + public int getPosition() { return position; } + + /** Sets a new current label and returns the previous one */ + public String setCurrentLabel(String currentLabel) { + String oldLabel=currentLabel; + this.currentLabel=currentLabel; + return oldLabel; + } + + public String getCurrentLabel() { return currentLabel; } + + /** + * Advances currentItem to the next term item and returns thatItem. + * If the current item before this call is the last item, this will + * return (and set currentItem to) null. + */ + public FlattenedItem next() { + position++; + + if (position>=items.size()) { + position=items.size(); + return null; + } + + return items.get(position); + } + + // TODO: Simplistic yet. Nedd to support context nesting etc. + public void entering(String context) { + if (context==null) return; + if (matchReferences!=null && matchReferences.contains(context)) + currentContext=context; + + } + + public void leaving(String context) { + if (context==null) return; + if (currentContext==null) return; + if (currentContext.equals(context)) + currentContext=null; + } + + /** + * Adds a match + * + * @param item the match to add + * @param replaceString the string to replace this match by, usually the item.getIndexedValue() + */ + public void addMatch(FlattenedItem item,String replaceString) { + evaluation.makeParentMutable(item.getItem()); + Match match=new Match(item,replaceString); + if (currentContext!=null) { + ReferencedMatches matches=getReferencedMatches(currentContext); + if (matches==null) { + matches=new ReferencedMatches(currentContext); + referencedMatchesList.add(matches); + } + matches.addMatch(match); + } + else { + nonreferencedMatches.add(match); + } + } + + /** Returns the referenced matches for a context name, or null if none */ + public ReferencedMatches getReferencedMatches(String name) { + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + if (name.equals(matches.getContextName())) + return matches; + } + return null; + } + + public int getReferencedMatchCount() { return referencedMatchesList.size(); } + + public int getNonreferencedMatchCount() { return nonreferencedMatches.size(); } + + /** Returns the evaluation this belongs to */ + public Evaluation getEvaluation() { return evaluation; } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + public void addItem(Item item, TermType termType) { + evaluation.addItem(item,termType); + } + + public void removeItem(Item item) { + evaluation.removeItem(item); + } + + public void removeItemByIdentity(Item item) { + evaluation.removeItemByIdentity(item); + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + evaluation.removeItem(position,item); + } + + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param termType the kind of item to index, this decides the resulting structure + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType termType) { + evaluation.insertItem(item,parent,index,termType); + } + + /** Returns a read-only view of the items of this */ + public List<FlattenedItem> items() { + return Collections.unmodifiableList(items); + } + + public Match getNonreferencedMatch(int index) { + return nonreferencedMatches.get(index); + } + + public void trace(int level,String string) { + evaluation.trace(level,string); + } + + public int getTraceLevel() { + return evaluation.getTraceLevel(); + } + + public void indentTrace() { + evaluation.indentTrace(); + } + + public void unindentTrace() { + evaluation.unindentTrace(); + } + + /** + * Add a choice point to this evaluation + * + * @param condition the creating condition + * @param create true to create this choicepoint if it is missing + * @return the choicepoint, or null if not present, and create is false + */ + public Choicepoint getChoicepoint(Condition condition,boolean create) { + if (choicepoints==null) { + if (!create) return null; + choicepoints=new java.util.Stack<>(); + } + Choicepoint choicepoint=lookupChoicepoint(condition); + if (choicepoint==null) { + if (!create) return null; + choicepoint=new Choicepoint(this,condition); + choicepoints.push(choicepoint); + } + return choicepoint; + } + + private Choicepoint lookupChoicepoint(Condition condition) { + for (Iterator<Choicepoint> i=choicepoints.iterator(); i.hasNext(); ) { + Choicepoint choicepoint=i.next(); + if (condition==choicepoint.getCondition()) + return choicepoint; + } + return null; + } + + List<ReferencedMatches> referencedMatches() { + return referencedMatchesList; + } + + List<Match> nonreferencedMatches() { + return nonreferencedMatches; + } + + /** Remove all the terms recognized by this match */ + public void removeMatches(ReferencedMatches matches) { + for (Iterator<Match> i=matches.matchIterator(); i.hasNext(); ) { + Match match=i.next(); + removeItemByIdentity(match.getItem()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java new file mode 100644 index 00000000000..6adbd065352 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java new file mode 100644 index 00000000000..309c3f7a456 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics.parser; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java new file mode 100644 index 00000000000..91eef25a8b0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +/** + * A production rule which <i>adds</i> the production to the matched query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class AddingProductionRule extends ProductionRule { + + protected String getSymbol() { return "+>"; } + + public void setProduction(ProductionList productionList) { + super.setProduction(productionList); + productionList.setReplacing(false); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java new file mode 100644 index 00000000000..2c826df9196 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which consists of a list of alternatives to match at any location + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class AndCondition extends CompositeCondition { + + // TODO: Not in use. What was this for? Remove? + + public AndCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches=allSubConditionsMatches(e); + if (!matches) + choicepoint.backtrack(); + return matches; + } + + protected boolean useParentheses() { + return (getParent()!=null + && ! (getParent() instanceof ChoiceCondition)); + } + + protected String toInnerString() { + return toInnerString(" & "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java new file mode 100644 index 00000000000..5cf3d4bf7a4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which consists of a list of alternatives to match at a specific location + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ChoiceCondition extends CompositeCondition { + + public ChoiceCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + //if (e.currentItem()==null) return false; + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition= i.next(); + if (subCondition.matches(e)) + return true; + } + + return false; + } + + protected String toInnerString() { + return toInnerString(", "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java new file mode 100644 index 00000000000..0d24368cf28 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which is true of the <i>values</i> of its two subconditions are true + * and both have the same value + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ComparisonCondition extends CompositeCondition { + + private Operator operator; + + public ComparisonCondition(Condition leftCondition,String operatorString,Condition rightCondition) { + operator=Operator.get(operatorString); + addCondition(leftCondition); + addCondition(rightCondition); + } + + protected boolean doesMatch(RuleEvaluation evaluation) { + Object left=null; + Object right=null; + boolean matches=false; + Choicepoint choicepoint=evaluation.getChoicepoint(this,true); + try { + matches=getLeftCondition().matches(evaluation); + if (!matches) return false; + + left=evaluation.getValue(); + evaluation.setValue(null); + + choicepoint.backtrackPosition(); + matches=getRightCondition().matches(evaluation); + if (!matches) return false; + + right=evaluation.getValue(); + evaluation.setValue(right); + matches=operator.compare(left,right); + return matches; + } + finally { + if (!matches) + choicepoint.backtrack(); + traceResult(matches,evaluation,left,right); + } + } + + protected void traceResult(boolean matches,RuleEvaluation e) { + // Uses our own logging method instead + } + + protected void traceResult(boolean matches,RuleEvaluation e,Object left,Object right) { + if (matches && e.getTraceLevel()>=3) + e.trace(3,"Matched '" + this + "'" + getMatchInfoString(e) + " at " + e.previousItem() + " as " + left + operator + right + " is true"); + if (!matches && e.getTraceLevel()>=3) + e.trace(3,"Did not match '" + this + "' at " + e.currentItem() + " as " + left + operator + right + " is false"); + } + + public Condition getLeftCondition() { + return getCondition(0); + } + + public void setLeftCondition(Condition leftCondition) { + setCondition(0,leftCondition); + } + + public Condition getRightCondition() { + return getCondition(1); + } + + public void setRightCondition(Condition rightCondition) { + setCondition(1,rightCondition); + } + + protected String toInnerString() { + return toInnerString(operator.toString()); + } + + private static final class Operator { + + private String operatorString; + + private static Map<String, Operator> operators=new HashMap<>(); + + public static final Operator equals=new Operator("="); + public static final Operator largerequals=new Operator(">="); + public static final Operator smallerequals=new Operator("<="); + public static final Operator larger=new Operator(">"); + public static final Operator smaller=new Operator("<"); + public static final Operator different=new Operator("!="); + public static final Operator contains=new Operator("=~"); + + private Operator(String operator) { + this.operatorString=operator; + operators.put(operatorString,this); + } + + private static Operator get(String operatorString) { + Operator operator=operators.get(operatorString); + if (operator==null) + throw new IllegalArgumentException("Unknown operator '" + operatorString + "'"); + return operator; + } + + public boolean compare(Object left,Object right) { + if (this==equals) + return equals(left,right); + if (this==different) + return !equals(left,right); + + if (left==null || right==null) return false; + + if (this==contains) + return contains(left,right); + if (this==largerequals) + return larger(left,right) || equals(left,right); + if (this==smallerequals) + return !larger(left,right); + if (this==larger) + return larger(left,right); + if (this==smaller) + return !larger(left,right) && !equals(left,right); + throw new RuntimeException("Programming error, fix this method"); + } + + private boolean equals(Object left,Object right) { + if (left==null && right==null) return true; + if (left==null) return false; + return left.equals(right); + } + + /** True if left contains right */ + private boolean contains(Object left,Object right) { + if (left instanceof Collection) + return ((Collection<?>)left).contains(right); + else + return left.toString().indexOf(right.toString())>=0; + } + + /** true if left is larger than right */ + private boolean larger(Object left,Object right) { + if ((left instanceof Number) && (right instanceof Number)) + return ((Number)left).doubleValue()>((Number)right).doubleValue(); + else + return left.toString().compareTo(right.toString())>0; + } + + public int hashCode() { + return operatorString.hashCode(); + } + + public boolean equals(Object other) { + if ( ! (other instanceof Operator)) return false; + return other.toString().equals(this.toString()); + } + + public String toString() { + return operatorString; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java new file mode 100644 index 00000000000..e7fd8d599d4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which contains a list of conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class CompositeCondition extends Condition { + + private List<Condition> conditions=new java.util.ArrayList<>(); + + public CompositeCondition() { + } + + public void preMatchHook(RuleEvaluation e) { + super.preMatchHook(e); + if (e.getTraceLevel()>=3) { + e.trace(3,"Evaluating '" + this + "'" + " at " + e.currentItem()); + e.indentTrace(); + } + } + + public void postMatchHook(RuleEvaluation e) { + if (e.getTraceLevel()>=3) { + e.unindentTrace(); + } + } + + protected boolean hasOpenChoicepoint(RuleEvaluation evaluation) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + if (subCondition.hasOpenChoicepoint(evaluation)) + return true; + } + return false; + } + + public void addCondition(Condition condition) { + conditions.add(condition); + condition.setParent(this); + } + + /** Sets the condition at the given index */ + public void setCondition(int index,Condition condition) { + conditions.set(index,condition); + } + + /** Returns the number of subconditions */ + public int conditionSize() { return conditions.size(); } + + /** + * Returns the condition at the given index + * + * @param i the 0-base index + * @return the condition at this index + * @throws IndexOutOfBoundsException if there is no condition at this index + */ + public Condition getCondition(int i) { + return conditions.get(i); + } + + /** + * Returns the condition at the given index + * + * @param i the 0-base index + * @return the removed condition + * @throws IndexOutOfBoundsException if there is no condition at this index + */ + public Condition removeCondition(int i) { + Condition condition=conditions.remove(i); + condition.setParent(null); + return condition; + } + + /** Returns an iterator of the immediate children of this condition */ + public Iterator<Condition> conditionIterator() { return conditions.iterator(); } + + public void makeReferences(RuleBase rules) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition condition=i.next(); + condition.makeReferences(rules); + } + } + + /** Whether this should be output with parentheses, default is parent!=null */ + protected boolean useParentheses() { + return getParent()!=null; + } + + protected String toInnerString(String conditionSeparator) { + if (getLabel()!=null) + return getLabel() + ":(" + conditionsToString(conditionSeparator) + ")"; + else if (useParentheses()) + return "(" + conditionsToString(conditionSeparator) + ")"; + else + return conditionsToString(conditionSeparator); + } + + protected final String conditionsToString(String conditionSeparator) { + StringBuilder buffer=new StringBuilder(); + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(conditionSeparator); + } + return buffer.toString(); + } + + /** Returns whether all the conditions of this matches the current evaluation state */ + protected final boolean allSubConditionsMatches(RuleEvaluation e) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + if (!subCondition.matches(e)) + return false; + } + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java new file mode 100644 index 00000000000..18fbbb04412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition on the presense of a particular kind of composite item (possibly also with a particular content) + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @since 5.1.15 + */ +public class CompositeItemCondition extends CompositeCondition { + + @Override + protected boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint = e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches = e.currentItem().getItem().getParent() instanceof PhraseItem + && allSubConditionsMatches(e); + if ( ! matches) + choicepoint.backtrack(); + return matches; + + } + + @Override + protected String toInnerString() { + if (getLabel()!=null) + return getLabel() + ":(" + toInnerStringBody() + ")"; + else if (useParentheses()) + return "(" + toInnerStringBody() + ")"; + else + return toInnerStringBody(); + } + + private String toInnerStringBody() { + return "\"" + conditionsToString(" ") + "\""; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java new file mode 100644 index 00000000000..f2029ede6fa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java @@ -0,0 +1,255 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * Superclass of all kinds of conditions of production rules + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class Condition { + + /** The parent of this condition, or null if this is not nested */ + private CompositeCondition parent=null; + + /** + * The label of this condition, or null if none. + * Specified by label:condition + * The label is also the default context is no context is speficied explicitly + */ + private String label=null; + + /** + * The name space refered by this match, or null if the default (query) + * Specified by namespace.condition in rules. + */ + private String nameSpace=null; + + /** + * The name of the context created by this, or null if none + * Specified by context/condition in rules + */ + private String contextName; + + /** Position constraints of the terms matched by this condition */ + private Anchor anchor=Anchor.NONE; + + public static enum Anchor { + NONE, START, END, BOTH; + public static Anchor create(boolean start,boolean end) { + if (start && end) return Anchor.BOTH; + if (start) return Anchor.START; + if (end) return Anchor.END; + return NONE; + } + } + + public Condition() { + this(null,null); + } + + public Condition(String label) { + this(label,null); + } + + public Condition(String label,String context) { + this.label=label; + this.contextName=context; + } + + /** + * Sets the name whatever is matched by this condition can be refered as, or null + * to make it unreferable + */ + public void setContextName(String contextName) { this.contextName=contextName; } + + /** + * Returns the name whatever is matched by this condition can be refered as, or null + * if it is unreferable + */ + public String getContextName() { return contextName; } + + /** Returns whether this is referable, returns context!=null by default */ + protected boolean isReferable() { return contextName!=null; } + + /** Sets the label of this. Set to null to use the default */ + public String getLabel() { return label; } + + /** Returns the label of this, or null if none (the default) */ + public void setLabel(String label) { this.label = label; } + + /** Returns the name of the namespace of this, or null if default (query) */ + public String getNameSpace() { return nameSpace; } + + /** Sets the name of the namespace of this */ + public void setNameSpace(String nameSpace) { this.nameSpace=nameSpace; } + + /** Returns the condition this is nested within, or null if it is not nested */ + public CompositeCondition getParent() { return parent; } + + /** Called by CompositeCondition.addCondition() */ + void setParent(CompositeCondition parent) { this.parent=parent; } + + /** Sets a positional constraint on this condition */ + public void setAnchor(Anchor anchor) { this.anchor=anchor; } + + /** Returns the positional constraint on this anchor. This is never null */ + public Anchor getAnchor() { return anchor; } + + /** + * <p>Returns whether this condition matches the given evaluation + * at the <i>current</i> location of the evaluation. Calls the doesMatch + * method of each condition subtype.</p> + */ + public final boolean matches(RuleEvaluation e) { + // TODO: With this algoritm, each choice point will move to the next choice on each reevaluation + // In the case where there are multiple ellipses, we may want to do globally coordinated + // moves of all the choice points instead + try { + preMatchHook(e); + + if (!matchesStartAnchor(e)) return false; + + String higherLabel=e.getCurrentLabel(); + if (getLabel()!=null) + e.setCurrentLabel(getLabel()); + + boolean matches=doesMatch(e); + while (!matches && hasOpenChoicepoint(e)) { + matches=doesMatch(e); + } + + e.setCurrentLabel(higherLabel); + + if (!matchesEndAnchor(e)) return false; + + traceResult(matches,e); + return matches; + } + finally { + postMatchHook(e); + } + + } + + /** Check start anchor. Trace level 4 if no match */ + protected boolean matchesStartAnchor(RuleEvaluation e) { + if (anchor!=Anchor.START && anchor!=Anchor.BOTH) return true; + if (e.getPosition()==0) return true; + if (e.getTraceLevel()>=4) + e.trace(4,this + " must be at the start, which " + e.currentItem() + " isn't"); + return false; + } + + /** Check start anchor. Trace level 4 if no match */ + protected boolean matchesEndAnchor(RuleEvaluation e) { + if (anchor!=Anchor.END && anchor!=Anchor.BOTH) return true; + if (e.getPosition()>=e.items().size()) return true; + if (e.getTraceLevel()>=4) + e.trace(4,this + " must be at the end, which " + e.currentItem() + " isn't"); + return false; + } + + protected void traceResult(boolean matches,RuleEvaluation e) { + if (matches && e.getTraceLevel()>=3) + e.trace(3,"Matched '" + this + "'" + getMatchInfoString(e) + " at " + e.previousItem()); + if (!matches && e.getTraceLevel()>=4) + e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); + } + + protected String getMatchInfoString(RuleEvaluation e) { + String matchInfo=getMatchInfo(e); + if (matchInfo==null) return ""; + return " as '" + matchInfo + "'"; + } + + /** + * Called when match is called, before anything else. + * Always call super.preMatchHook when overriding. + */ + protected void preMatchHook(RuleEvaluation e) { + e.entering(contextName); + } + + /** + * Called just before match returns, on any return condition including exceptions. + * Always call super.postMatchHook when overriding + */ + protected void postMatchHook(RuleEvaluation e) { + e.leaving(contextName); + } + + /** + * Override this to return a string describing what this condition has matched in this evaluation. + * Will only be called when this condition is actually matched in this condition + * + * @return info about what is matched, or null if there is no info to return (default) + */ + protected String getMatchInfo(RuleEvaluation e) { return null; } + + /** + * Returns whether this condition matches the given evaluation + * at the <i>current</i> location of the evaluation. If there is a + * match, the evaluation must be advanced to the location beyond + * the matching item(s) before this method returns. + */ + protected abstract boolean doesMatch(RuleEvaluation e); + + /** + * Returns whether there is an <i>open choice</i> in this or any of its subconditions. + * Returns false by default, must be overriden by conditions which may generate + * choices open accross multiple calls to matches, or contain such conditions. + */ + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + return false; + } + + /** Override if references needs to be set in this condition of its children */ + public void makeReferences(RuleBase rules) { } + + protected String getLabelString() { + if (label==null) return ""; + return label + ":"; + } + + /** Whether the label matches the current item, true if there is no current item */ + protected boolean labelMatches(RuleEvaluation e) { + FlattenedItem flattenedItem=e.currentItem(); + if (flattenedItem==null) return true; + TermItem item=flattenedItem.getItem(); + if (item==null) return true; + return labelMatches(item,e); + } + + protected boolean labelMatches(TermItem evaluationTerm,RuleEvaluation e) { + String indexName=evaluationTerm.getIndexName(); + String label=getLabel(); + if (label==null) + label=e.getCurrentLabel(); + if ("".equals(indexName) && label==null) return true; + if (indexName.equals(label)) return true; + if (e.getTraceLevel()>=4) + e.trace(4,"'" + this + "' does not match, label of " + e.currentItem() + " was required to be " + label); + return false; + } + + /** All instances of this produces a parseable string output */ + protected abstract String toInnerString(); + + protected boolean isDefaultContextName() { return false; } + + public String toString() { + String contextString=""; + String nameSpaceString=""; + if (contextName!=null && !isDefaultContextName()) + contextString=contextName + "/"; + if (getNameSpace()!=null) + nameSpaceString=getNameSpace() + "."; + return contextString + nameSpaceString + toInnerString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java new file mode 100644 index 00000000000..855a8b802ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.EvaluationException; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +import java.util.Map; + +/** + * A reference to a named condition + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ConditionReference extends Condition { + + /** The name of the referenced rule */ + private String conditionName; + + /** + * The actual condition references by this, or null if not initialized or not found, + * or if this is really an automata reference + */ + private NamedCondition namedCondition; + + /** + * True if this condition should be looked up in the automata + * annotations of the item instead of by reference to another item + */ + private boolean automataLookup=false; + + public ConditionReference(String conditionName) { + this(null,conditionName); + } + + public ConditionReference(String label,String conditionName) { + super(label); + Validator.ensureNotNull("Name of referenced condition",conditionName); + this.conditionName=conditionName; + setContextName(conditionName); + } + + /** Returns the name of the referenced rule, never null */ + public String getConditionName() { return conditionName; } + + public void setConditionName(String name) { this.conditionName=name; } + + public boolean doesMatch(RuleEvaluation e) { + if (automataLookup) return automataMatch(e); + + if (namedCondition==null) + throw new EvaluationException("Condition reference '" + conditionName + + "' not found or not initialized"); + + return namedCondition.matches(e); + } + + private boolean automataMatch(RuleEvaluation e) { + FlattenedItem current=e.currentItem(); + if (current==null) return false; + + Object annotation=current.getItem().getAnnotation(conditionName); + if (annotation==null) return false; + if (! (annotation instanceof PhraseMatcher.Phrase)) return false; + + PhraseMatcher.Phrase phrase=(PhraseMatcher.Phrase)annotation; + + Choicepoint choicePoint=e.getChoicepoint(this,true); + boolean matches=automataMatchPhrase(phrase,e); + + if (!matches && e.isInNegation()) { // TODO: Temporary hack! Works for single items only + e.addMatch(current,null); + } + + if ((!matches && !e.isInNegation() || (matches && e.isInNegation()))) + choicePoint.backtrackPosition(); + + return matches; + } + + private boolean automataMatchPhrase(PhraseMatcher.Phrase phrase,RuleEvaluation e) { + for (PhraseMatcher.Phrase.MatchIterator i=phrase.itemIterator(); i.hasNext(); ) { + i.next(); + FlattenedItem current=e.currentItem(); + if (current==null) return false; + if (!labelMatches(e.currentItem().getItem(),e)) return false; + if (!e.isInNegation()) + e.addMatch(current,i.getReplace()); + e.next(); + } + if (phrase.getLength()>phrase.getBackedLength()) return false; // The underlying composite item has changed + return true; + } + + public void makeReferences(RuleBase ruleBase) { + namedCondition=ruleBase.getCondition(conditionName); + if (namedCondition==null) { // Then this may reference some automata value, if we have an automata + if (ruleBase.usesAutomata()) + automataLookup=true; + else + throw new RuleBaseException("Referenced condition '" + conditionName + + "' does not exist in " + ruleBase); + } + } + + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + if (namedCondition==null) return false; + return namedCondition.getCondition().hasOpenChoicepoint(e); + } + + protected boolean isDefaultContextName() { + return getContextName()==null || getContextName().equals(conditionName); + } + + protected String toInnerString() { + return "[" + conditionName + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java new file mode 100644 index 00000000000..84a470ff64e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which greedily matches anything, represented as "..." + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class EllipsisCondition extends Condition { + + /** Whether this ellipsis is actually referable (enclosed in []) or not */ + private boolean referable; + + /** Creates a referable ellipsis condition with no label */ + public EllipsisCondition() { + this(true); + } + + /** Creates an ellipsis condition with no label */ + public EllipsisCondition(boolean referable) { + this(null,referable); + } + + /** Creates an ellipsis condition */ + public EllipsisCondition(String label,boolean referable) { + super(label); + this.referable=referable; + if (referable) + setContextName("..."); + } + + public EllipsisCondition(String label,String context) { + super(label,context); + } + + public boolean doesMatch(RuleEvaluation e) { + // We use a choice point to remember which untried alternatives are not tried (if any) + // We never need to backtrack to this choice - backtracking is done by the parent + // if this choice gives a global invalid state + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) { // First try + choicepoint=e.getChoicepoint(this,true); + } + else { + if (!choicepoint.isOpen()) return false; + } + + // Match all the rest of the items the first time, then all except the last item and so on + int numberOfTermsToMatch=e.itemCount() - e.currentPosition() - choicepoint.tryCount(); + if (numberOfTermsToMatch<0) { + choicepoint.close(); + return false; + } + choicepoint.addTry(); + + String matchedTerms=matchTerms(numberOfTermsToMatch,e); + e.setValue(matchedTerms); + return true; + } + + private String matchTerms(int numberOfTerms,RuleEvaluation e) { + StringBuilder b=new StringBuilder(); + for (int i=0; i<numberOfTerms; i++) { + e.addMatch(e.currentItem(),e.currentItem().getItem().getIndexedString()); + b.append(e.currentItem().getItem().stringValue()); + if (i<(numberOfTerms-1)) + b.append(" "); + e.next(); + } + return b.toString(); + } + + public String getMatchInfo(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) return null; + + return spaceSeparated(e.items().subList(choicepoint.getState().getPosition(), + e.itemCount() - choicepoint.tryCount() +1 )); + } + + private String spaceSeparated(List<FlattenedItem> items) { + StringBuilder buffer=new StringBuilder(); + for (Iterator<FlattenedItem> i=items.iterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + + /** Returns whether this ellipsis condition can be referred from a production */ + public boolean isReferable() { + return referable || super.isReferable(); + } + + /** Sets whether this ellipsis condition can be referred from a production or not */ + public void setReferable(boolean referable) { + this.referable=referable; + if (referable && getContextName()==null) + setContextName("..."); + if (!referable && "...".equals(getContextName())) + setContextName(null); + } + + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) return false; // Not tried yet + if (!choicepoint.isOpen()) return false; + return true; + } + + protected boolean isDefaultContextName() { + return (getContextName()==null || getContextName().equals("...")); + } + + protected String toInnerString() { + if (referable) + return "[...]"; + else + return "..."; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java new file mode 100644 index 00000000000..3cde8bba5ff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which is always true, and which has it's own value as return value + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class LiteralCondition extends Condition { + + private String value; + + public LiteralCondition(String value) { + this.value=value; + } + + protected boolean doesMatch(RuleEvaluation e) { + e.setValue(value); + return true; + } + + public void setValue(String value) { this.value=value; } + + public String getValue() { return value; } + + public String toInnerString() { return "'" + value + "'"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java new file mode 100644 index 00000000000..23404fbc6e2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A literal phrase produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class LiteralPhraseProduction extends TermProduction { + + private List<String> terms=new ArrayList<>(); + + /** Creates a new produced literal phrase */ + public LiteralPhraseProduction() { + super(); + } + + /** + * Creates a new produced literal phrase + * + * @param label the label of the produced term + */ + public LiteralPhraseProduction(String label) { + super(label); + } + + /** Adds a term to this phrase */ + public void addTerm(String term) { + Validator.ensureNotNull("A term in a produced phrase",term); + terms.add(term); + } + + /** Returns a read only view of the terms produced by this, never null */ + public List<String> getTerms() { return Collections.unmodifiableList(terms); } + + public void produce(RuleEvaluation e,int offset) { + PhraseItem newPhrase=new PhraseItem(); + newPhrase.setIndexName(getLabel()); + for (String term : terms) + newPhrase.addItem(new WordItem(term)); + + if (replacing) { + Match matched=e.getNonreferencedMatch(0); + insertMatch(e,matched,newPhrase,offset); + } + else { + newPhrase.setWeight(getWeight()); + if (e.getTraceLevel()>=6) + e.trace(6,"Adding '" + newPhrase + "'"); + e.addItem(newPhrase,getTermType()); + } + } + + public String toInnerTermString() { + return getLabelString() + "\"" + getSpaceSeparated(terms) + "\""; + } + + private String getSpaceSeparated(List<String> terms) { + StringBuilder builder=new StringBuilder(); + for (Iterator<String> i=terms.iterator(); i.hasNext(); ) { + builder.append(i.next()); + if (i.hasNext()) + builder.append(" "); + } + return builder.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java new file mode 100644 index 00000000000..f157fd6901d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A literal term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class LiteralTermProduction extends TermProduction { + + private String literal; + + /** + * Creates a new produced literal term + * + * @param literal the label of the condition this should take it's value from + */ + public LiteralTermProduction(String literal) { + super(); + setLiteral(literal); + } + + /** + * Creates a new produced literal term + * + * @param literal the label of the condition this should take it's value from + * @param termType the type of term to produce + */ + public LiteralTermProduction(String literal, TermType termType) { + super(termType); + setLiteral(literal); + } + + /** + * Creates a new produced literal term + * + * @param label the label of the produced term + * @param literal this term word + * @param termType the type of term to produce + */ + public LiteralTermProduction(String label,String literal, TermType termType) { + super(label,termType); + setLiteral(literal); + } + + /** The literal term value, never null */ + public void setLiteral(String literal) { + Validator.ensureNotNull("A produced term",literal); + this.literal=literal; + } + + /** Returns the term word produced, never null */ + public String getLiteral() { return literal; } + + public void produce(RuleEvaluation e,int offset) { + WordItem newItem=new WordItem(literal,getLabel()); + if (replacing) { + Match matched=e.getNonreferencedMatch(0); + insertMatch(e,matched,newItem,offset); + } + else { + newItem.setWeight(getWeight()); + if (e.getTraceLevel()>=6) + e.trace(6,"Adding '" + newItem + "'"); + e.addItem(newItem,getTermType()); + } + } + + public String toInnerTermString() { + return getLabelString() + literal; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java new file mode 100644 index 00000000000..ca1d623847d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition given a name which enables it to be referenced from other conditions. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class NamedCondition { + + private String conditionName; + + private Condition condition; + + public NamedCondition(String name,Condition condition) { + this.conditionName=name; + this.condition=condition; + } + + public String getName() { return conditionName; } + + public void setName(String name) { this.conditionName = name; } + + public Condition getCondition() { return condition; } + + public void setCondition(Condition condition) { this.condition = condition; } + + public boolean matches(RuleEvaluation e) { + if (e.getTraceLevel()>=3) { + e.trace(3,"Evaluating '" + this + "' at " + e.currentItem()); + e.indentTrace(); + } + + boolean matches=condition.matches(e); + + if (e.getTraceLevel()>=3) { + e.unindentTrace(); + if (matches) + e.trace(3,"Matched '" + this + "' at " + e.previousItem()); + else if (e.getTraceLevel()>=4) + e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); + } + return matches; + } + + /** + * Returns the canonical string representation of this named condition. + * This string representation can always be reparsed to produce an + * identical rule to this one. + */ + public String toString() { + return "[" + conditionName + "] :- " + condition.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java new file mode 100644 index 00000000000..0c73427ad82 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A production in a specified namespace + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class NamespaceProduction extends Production { + + /** The label in this namespace */ + private String namespace; + + /** The key ito set in the namespace */ + private String key; + + /** The value to set in the namespace */ + private String value=null; + + /** Creates a produced template term with no label and the default type */ + public NamespaceProduction(String namespace,String key,String value) { + setNamespace(namespace); + this.key=key; + this.value=value; + } + + public String getNamespace() { return namespace; } + + public final void setNamespace(String namespace) { + if (!namespace.equals("parameter")) + throw new RuleBaseException("Can not produce into namespace '" + namespace + + ". Only the 'parameter' name space can be referenced currently"); + this.namespace = namespace; + } + + public String getKey() { return key; } + + public void setKey(String key) { this.key = key; } + + public String getValue() { return value; } + + public void setValue(String value) { this.value = value; } + + public void produce(RuleEvaluation e,int offset) { + e.getEvaluation().getQuery().properties().set(key, value); + } + + /** All instances of this produces a parseable string output */ + public String toInnerString() { + return namespace + "." + key + "='" + value + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java new file mode 100644 index 00000000000..64a10ea821a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which matches if its contained condition doesn't. + * NotCondition inverts the term checking but not the label checking. + * That is, it means "label:!term", it does not mean "!label:term". + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class NotCondition extends Condition { + + private Condition condition; + + public NotCondition(Condition condition) { + this.condition=condition; + } + + public Condition getCondtiion() { return condition; } + + public void setCondition(Condition condition) { this.condition=condition; } + + protected boolean doesMatch(RuleEvaluation e) { + e.setInNegation(!e.isInNegation()); + boolean matches=!condition.matches(e); + e.setInNegation(!e.isInNegation()); + return matches; + } + + public String toInnerString() { + return "!" + condition; + } + + public void makeReferences(RuleBase ruleBase) { + condition.makeReferences(ruleBase); + } + + protected boolean hasOpenChoicepoint(RuleEvaluation evaluation) { + return condition.hasOpenChoicepoint(evaluation); + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java new file mode 100644 index 00000000000..cc6a9c87fb0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Set; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A new term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class Production { + + /** True to add, false to replace, default true */ + protected boolean replacing=true; + + /** The (0-base) position of this term in the productions of this rule */ + private int position=0; + + /** The weight (strength) of this production as a percentage (default is 100) */ + private int weight=100; + + /** Creates a produced template term with no label and the default type */ + public Production() { + } + + /** True to replace, false to add, if this production can do both. Default true. */ + public void setReplacing(boolean replacing) { this.replacing=replacing; } + + public int getPosition() { return position; } + + public void setPosition(int position) { this.position = position; } + + /** Sets the weight of this production as a percentage (default is 100) */ + public void setWeight(int weight) { this.weight=weight; } + + /** Returns the weight of this production as a percentage (default is 100) */ + public int getWeight() { return weight; } + + /** + * Produces this at the current match + * + * @param e the evaluation context containing the current match and the query + * @param offset the offset position at which to produce this. Offsets are used to produce multiple items + * at one position, inserted in the right order. + */ + public abstract void produce(RuleEvaluation e,int offset); + + /** + * Called to add the references into the condition of this rule made by this production + * into the given set. The default implementation is void, override for productions + * which refers to the condition + */ + void addMatchReferences(Set<String> matchReferences) { } + + /** All instances of this produces a parseable string output */ + public final String toString() { + return toInnerString() + (getWeight()!=100 ? ("!" + getWeight()) : ""); + } + + /** All instances of this produces a parseable string output */ + protected abstract String toInnerString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java new file mode 100644 index 00000000000..3397a9ada1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A list of the productions of a rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ProductionList { + + private List<Production> productions =new java.util.ArrayList<>(); + + /** True to replace by the production, false to add it */ + private boolean replacing=true; + + public void addProduction(Production term) { + term.setReplacing(replacing); + term.setPosition(productions.size()); + productions.add(term); + } + + /** True to replace, false to add, default true */ + void setReplacing(boolean replacing) { + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + Production production=i.next(); + production.setReplacing(replacing); + } + + this.replacing=replacing; + } + + /** Returns an unmodifiable view of the productions in this */ + public List<Production> productionList() { return Collections.unmodifiableList(productions); } + + public int getTermCount() { return productions.size(); } + + void addMatchReferences(Set<String> matchReferences) { + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + Production term=i.next(); + term.addMatchReferences(matchReferences); + } + } + + public void produce(RuleEvaluation e) { + for (int i=0; i<productions.size(); i++) { + productions.get(i).produce(e,i); + } + } + + public String toString() { + StringBuilder buffer=new StringBuilder(); + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java new file mode 100644 index 00000000000..55be2aa2afd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collections; +import java.util.Iterator; +import java.util.Set; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A query rewriting rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class ProductionRule { + + /** What must be true for this rule to be true */ + private Condition condition; + + /** What is produced when this rule is true */ + private ProductionList production=new ProductionList(); + + /** The set of match name Strings which the production part of this rule references */ + private Set<String> matchReferences=new java.util.LinkedHashSet<>(); + + /** Sets what must be true for this rule to be true */ + public void setCondition(Condition condition) { this.condition=condition; } + + public Condition getCondition() { return condition; } + + /** Sets what is produced when this rule is true */ + public void setProduction(ProductionList production) { this.production=production; } + + public ProductionList getProduction() { return production; } + + /** Returns whether this rule matches the given query */ + public boolean matches(RuleEvaluation e) { + e.setMatchReferences(matchReferences); + return condition.matches(e); + } + + /** + * Returns the set of context names the production of this rule references + * + * @return an unmodifiable Set of condition context name Strings + */ + public Set<String> matchReferences() { + return Collections.unmodifiableSet(matchReferences); + } + + public void makeReferences(RuleBase rules) { + condition.makeReferences(rules); + production.addMatchReferences(matchReferences); + } + + /** Carries out the production of this rule */ + public void produce(RuleEvaluation e) { + production.produce(e); + } + + /** + * Returns the canonical string representation of this rule. + * This string representation can always be reparsed to produce an + * identical rule to this one. + */ + public String toString() { + return condition.toString() + " " + getSymbol() + " " + production.toString(); + } + + /** + * Returns the symbol of this production rule. + * All rules are on the form <code>condition symbol production</code>. + */ + protected abstract String getSymbol(); + + /** + * Returns true if it is known that this rule matches its own output. + * If it does, it will only be evaluated once, to avoid infinite loops. + * This default implementation returns false; + */ + public boolean isLoop() { + // TODO: There are many more possible loops, we should probably detect + // a few more obvious ones + if (conditionIsEllipsAndOtherNameSpacesOnly(getCondition())) return true; + return false; + } + + private boolean conditionIsEllipsAndOtherNameSpacesOnly(Condition condition) { + if (condition instanceof EllipsisCondition) return true; + if (! (condition instanceof CompositeCondition)) return false; + for (Iterator<Condition> i=((CompositeCondition)condition).conditionIterator(); i.hasNext(); ) { + Condition child= i.next(); + if (child.getNameSpace()==null && conditionIsEllipsAndOtherNameSpacesOnly(child)) + return true; + } + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java new file mode 100644 index 00000000000..319e1969174 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Set; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.engine.EvaluationException; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.ReferencedMatches; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A term produced by a production rule which takes it's actual term value + * from one or more terms matched in the condition + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferenceTermProduction extends TermProduction { + + private String reference; + + /** + * Creates a new produced reference term + * + * @param reference the label of the condition this should take it's value from + */ + public ReferenceTermProduction(String reference) { + super(); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param reference the label of the condition this should take it's value from + * @param termType the type of the term to produce + */ + public ReferenceTermProduction(String reference, TermType termType) { + super(termType); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param label the label of the produced term + * @param reference the label of the condition this should take it's value from + */ + public ReferenceTermProduction(String label,String reference) { + super(label); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param label the label of the produced term + * @param reference the label of the condition this should take it's value from + * @param termType the type of term to produce + */ + public ReferenceTermProduction(String label,String reference, TermType termType) { + super(label,termType); + setReference(reference); + } + + /** The label of the condition this should take its value from, never null */ + public void setReference(String reference) { + Validator.ensureNotNull("reference name of a produced reference term",reference); + this.reference =reference; + } + + /** Returns the label of the condition this should take its value from, never null */ + public String getReference() { return reference; } + + void addMatchReferences(Set<String> matchReferences) { + matchReferences.add(reference); + } + + public void produce(RuleEvaluation e,int offset) { + ReferencedMatches referencedMatches=e.getReferencedMatches(reference); + if (referencedMatches==null) + throw new EvaluationException("Referred match '" + reference + "' not found"); + if (replacing) { + replaceMatches(e,referencedMatches); + } + else { + addMatches(e,referencedMatches); + } + } + + public void replaceMatches(RuleEvaluation e,ReferencedMatches referencedMatches) { + Item referencedItem=referencedMatches.toItem(getLabel()); + if (referencedItem==null) return; + e.removeMatches(referencedMatches); + insertMatch(e, referencedMatches.matchIterator().next(),referencedItem,0); + } + + private void addMatches(RuleEvaluation e,ReferencedMatches referencedMatches) { + Item referencedItem=referencedMatches.toItem(getLabel()); + if (referencedItem==null) return; + e.addItem(referencedItem,getTermType()); + } + + public String toInnerTermString() { + return getLabelString() + "[" + reference + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java new file mode 100644 index 00000000000..76433ec693c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A production rule which <i>replaces</i> matched terms by the production + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReplacingProductionRule extends ProductionRule { + + /** Carries out the production of this rule */ + public void produce(RuleEvaluation e) { + removeNonreferencedMatches(e); + if (e.getTraceLevel()>=5) { + e.trace(5,"Removed terms to get '" + e.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + "', will add terms"); + } + super.produce(e); + } + + /** Remove items until there's only one item left */ + private void removeNonreferencedMatches(RuleEvaluation e) { + int itemCount=e.getEvaluation().getQuerySize(); + + // Remove items backwards to ease index handling + for (int i=e.getNonreferencedMatchCount()-1; i>=0; i--) { + // Ensure we don't produce an empty query + if (getProduction().getTermCount()==0 && itemCount==1) + break; + itemCount--; + + Match match=e.getNonreferencedMatch(i); + match.getItem().getParent().removeItem(match.getPosition()); + } + } + + protected String getSymbol() { return "->"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java new file mode 100644 index 00000000000..3ba929da021 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A set of conditions which much match the query in sequence + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class SequenceCondition extends CompositeCondition { + + public SequenceCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches=allSubConditionsMatches(e); + if (!matches) + choicepoint.backtrack(); + return matches; + } + + protected boolean useParentheses() { + return (getParent()!=null + && ! (getParent() instanceof ChoiceCondition)); + } + + public String toInnerString() { + return toInnerString(" "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java new file mode 100644 index 00000000000..0b7b3b4a30b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which evaluates the <i>last included</i> version of + * the named condition this is a premise of. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class SuperCondition extends Condition { + + private Condition condition; + + public void setCondition(Condition condition) { + this.condition=condition; + } + + public Condition getCondition() { + return condition; + } + + public boolean doesMatch(RuleEvaluation e) { + return condition.matches(e); + } + + public String toInnerString() { + if (condition==null) + return "@super"; + else + return condition.toString(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java new file mode 100644 index 00000000000..3558ef2b227 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.semantics.engine.NameSpace; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A term in a rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class TermCondition extends Condition { + + private String term,termPlusS; + + /** Creates an invalid term */ + public TermCondition() { } + + public TermCondition(String term) { + this(null,term); + } + + public TermCondition(String label,String term) { + super(label); + this.term=term; + termPlusS=term + "s"; + } + + public String getTerm() { return term; } + + public void setTerm(String term) { + this.term=term; + termPlusS=term + "s"; + } + + protected boolean doesMatch(RuleEvaluation e) { + // TODO: Move this into the respective namespaces when query becomes one */ + if (getNameSpace()!=null) { + NameSpace nameSpace=e.getEvaluation().getNameSpace(getNameSpace()); + return nameSpace.matches(term,e); + } + else { + if (e.currentItem()==null) + return false; + + if (!labelMatches(e)) return false; + + String matchedValue=termMatches(e.currentItem().getItem(),e.getEvaluation().getStemming()); + boolean matches=matchedValue!=null && labelMatches(e.currentItem().getItem(),e); + if ((matches && !e.isInNegation() || (!matches && e.isInNegation()))) { + e.addMatch(e.currentItem(),matchedValue); + e.setValue(term); + e.next(); + } + return matches; + } + } + + /** Returns a non-null replacement term if there is a match, null otherwise */ + private String termMatches(TermItem queryTerm,boolean stemming){ + String queryTermString=queryTerm.stringValue(); + + // The terms are the same + boolean matches=queryTermString.equals(term); + if (matches) return term; + + if (stemming) + if (termMatchesWithStemming(queryTermString)) return term; + + return null; + } + + private boolean termMatchesWithStemming(String queryTermString) { + if (queryTermString.length()<3) return false; // Don't stem very short terms + + // The query term minus s is the same + boolean matches=queryTermString.equals(termPlusS); + if (matches) return true; + + // The query term plus s is the same + matches=term.equals(queryTermString + "s"); + if (matches) return true; + + return false; + } + + public String toInnerString() { + return getLabelString() + term; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java new file mode 100644 index 00000000000..6490d21e319 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A new term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class TermProduction extends Production { + + /** The label of this term, or null if none */ + private String label=null; + + /** The type of term to produce */ + private TermType termType; + + /** Creates a produced template term with no label and the default type */ + public TermProduction() { + this(null,TermType.DEFAULT); + } + + /** Creates a produced template term with the default term type */ + public TermProduction(String label) { + this(label,TermType.DEFAULT); + } + + /** Creates a produced template term with no label */ + public TermProduction(TermType termType) { + this(null,termType); + } + + public TermProduction(String label, TermType termType) { + this.label=label; + setTermType(termType); + } + + /** Sets the label of this. Set to null to use the default */ + public String getLabel() { return label; } + + /** Returns the label of this, or null if none (the default) */ + public void setLabel(String label) { this.label = label; } + + /** Returns the type of term to produce, never null. Default is DEFAULT */ + public TermType getTermType() { return termType; } + + /** Sets the term type to produce */ + public void setTermType(TermType termType) { + Validator.ensureNotNull("Type of produced Term",termType); + this.termType=termType; + } + + /** + * Inserts newItem at the position of this match + * TODO: Move to ruleevaluation + */ + protected void insertMatch(RuleEvaluation e,Match matched, Item newItem,int offset) { + newItem.setWeight(getWeight()); + int insertPosition=matched.getPosition()+offset; + + // This check is necessary (?) because earlier items may have been removed + // after we recorded the match position. It is sort of hackish. A cleaner + // solution would be to update the match position on changes + if (insertPosition>matched.getParent().getItemCount()) { + insertPosition=matched.getParent().getItemCount(); + } + + e.insertItem(newItem,matched.getParent(),insertPosition,getTermType()); + if (e.getTraceLevel()>=6) + e.trace(6,"Inserted item '" + newItem + "' at position " + insertPosition + " producing " + e.getEvaluation().getQuery().getModel().getQueryTree()); + } + + protected String getLabelString() { + if (label==null) return ""; + return label + ":"; + } + + /** All instances of this produces a parseable string output */ + public final String toInnerString() { + if (termType==null) + return toInnerTermString(); + else + return termType.toSign() + toInnerTermString(); + } + + protected abstract String toInnerTermString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/statistics/StatisticsSearcher.java b/container-search/src/main/java/com/yahoo/prelude/statistics/StatisticsSearcher.java new file mode 100644 index 00000000000..408707db2f3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/statistics/StatisticsSearcher.java @@ -0,0 +1,314 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.statistics; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.Server; +import com.yahoo.container.protect.Error; +import com.yahoo.jdisc.Metric; +import com.yahoo.log.LogLevel; +import com.yahoo.metrics.simple.MetricSettings; +import com.yahoo.metrics.simple.MetricReceiver; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.statistics.Callback; +import com.yahoo.statistics.Counter; +import com.yahoo.statistics.Handle; +import com.yahoo.statistics.Value; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.logging.Level; + +import static com.yahoo.container.protect.Error.*; + + +/** + * <p>A searcher to gather statistics such as queries completed and query latency. There + * may be more than 1 StatisticsSearcher in the Searcher chain, each identified by a + * Searcher ID. The statistics accumulated by all StatisticsSearchers are stored + * in the singleton StatisticsManager object. </p> + * <p> + * TODO: Fix events to handle more than one of these searchers properly. + * + * @author Gene Meyers + * @author Steinar Knutsen + * @author bergum + */ +@Before(PhaseNames.RAW_QUERY) +public class StatisticsSearcher extends Searcher { + + private static final String MAX_QUERY_LATENCY_METRIC = "max_query_latency"; + private static final String EMPTY_RESULTS_METRIC = "empty_results"; + private static final String HITS_PER_QUERY_METRIC = "hits_per_query"; + private static final String FAILED_QUERIES_METRIC = "failed_queries"; + private static final String MEAN_QUERY_LATENCY_METRIC = "mean_query_latency"; + private static final String QUERY_LATENCY_METRIC = "query_latency"; + private static final String QUERIES_METRIC = "queries"; + private static final String ACTIVE_QUERIES_METRIC = "active_queries"; + private static final String PEAK_QPS_METRIC = "peak_qps"; + + private Counter queries; // basic counter + private Counter failedQueries; // basic counter + private Counter nullQueries; // basic counter + private Counter illegalQueries; // basic counter + private Value queryLatency; // mean pr 5 min + private Value queryLatencyBuckets; + private Value maxQueryLatency; // separate to avoid name mangling + @SuppressWarnings("unused") // all the work is done by the callback + private Value activeQueries; // raw measure every 5 minutes + private Value peakQPS; // peak 10s QPS + private Counter emptyResults; // number of results containing no concrete hits + private Value hitsPerQuery; // mean number of hits per query + private long prevMaxQPSTime; // previous measurement time of QPS + private double queriesForQPS = 0.0; + private final Object peakQpsLock = new Object(); + + private Metric metric; + private Map<String, Metric.Context> chainContexts = new CopyOnWriteHashMap<>(); + private Map<String, Metric.Context> yamasOnlyContexts = new CopyOnWriteHashMap<>(); + + + private void initEvents(com.yahoo.statistics.Statistics manager, MetricReceiver metricReceiver) { + queries = new Counter(QUERIES_METRIC, manager, false); + failedQueries = new Counter(FAILED_QUERIES_METRIC, manager, false); + nullQueries = new Counter("null_queries", manager, false); + illegalQueries = new Counter("illegal_queries", manager, false); + queryLatency = new Value(MEAN_QUERY_LATENCY_METRIC, manager, + new Value.Parameters().setLogRaw(false).setLogMean(true).setNameExtension(false)); + maxQueryLatency = new Value(MAX_QUERY_LATENCY_METRIC, manager, + new Value.Parameters().setLogRaw(false).setLogMax(true).setNameExtension(false)); + queryLatencyBuckets = Value.buildValue("query_latency", manager, null); + activeQueries = new Value(ACTIVE_QUERIES_METRIC, manager, + new Value.Parameters().setLogRaw(true).setCallback(new ActivitySampler())); + peakQPS = new Value(PEAK_QPS_METRIC, manager, new Value.Parameters().setLogRaw(false).setLogMax(true) + .setNameExtension(false)); + hitsPerQuery = new Value(HITS_PER_QUERY_METRIC, manager, + new Value.Parameters().setLogRaw(false).setLogMean(true).setNameExtension(false)); + emptyResults = new Counter(EMPTY_RESULTS_METRIC, manager, false); + metricReceiver.declareGauge(QUERY_LATENCY_METRIC, Optional.empty(), new MetricSettings.Builder().histogram(true).build()); + } + + // Callback to measure queries in flight every five minutes + private class ActivitySampler implements Callback { + public void run(Handle h, boolean firstRun) { + if (firstRun) { + metric.set(ACTIVE_QUERIES_METRIC, Integer.valueOf(0), null); + return; + } + // TODO Server.get() is to be removed + final int searchQueriesInFlight = Server.get().searchQueriesInFlight(); + ((Value) h).put(searchQueriesInFlight); + metric.set(ACTIVE_QUERIES_METRIC, searchQueriesInFlight, null); + } + } + + StatisticsSearcher(Metric metric) { + this(com.yahoo.statistics.Statistics.nullImplementation, metric, MetricReceiver.nullImplementation); + } + + public StatisticsSearcher(com.yahoo.statistics.Statistics manager, Metric metric, MetricReceiver metricReceiver) { + this.metric = metric; + initEvents(manager, metricReceiver); + } + + public String getMyID() { + return (getId().stringValue()); + } + + private void qps(long now, Metric.Context metricContext) { + // We can either have peakQpsLock _or_ have prevMaxQpsTime as a volatile + // and queriesForQPS as an AtomicInteger. That would lead no locking, + // but two memory barriers in the common case. Don't change till we know + // that is actually better. + synchronized (peakQpsLock) { + if ((now - prevMaxQPSTime) >= (10 * 1000)) { + double ms = (double) (now - prevMaxQPSTime); + final double peakQPS = queriesForQPS / (ms / 1000); + this.peakQPS.put(peakQPS); + metric.set(PEAK_QPS_METRIC, peakQPS, metricContext); + queriesForQPS = 1.0d; + prevMaxQPSTime = now; + } else { + queriesForQPS += 1.0d; + } + } + } + + private Metric.Context getChainMetricContext(String chainName) { + Metric.Context context = chainContexts.get(chainName); + if (context == null) { + Map<String, String> dimensions = new HashMap<>(); + dimensions.put("chain", chainName); + context = this.metric.createContext(dimensions); + chainContexts.put(chainName, context); + } + return context; + } + + /** + * Generate statistics for the query passing through this Searcher + * 1) Add 1 to total query count + * 2) Add response time to total response time (time from entry to return) + * 3) ..... + */ + public Result search(com.yahoo.search.Query query, Execution execution) { + Metric.Context metricContext = getChainMetricContext(execution.chain().getId().stringValue()); + + incrQueryCount(metricContext); + logQuery(query); + long start = System.currentTimeMillis(); // Start time, in millisecs. + qps(start, metricContext); + Result result; + //handle exceptions thrown below in searchers + try { + result = execution.search(query); // Pass on down the chain + } catch (Exception e) { + incrErrorCount(null, metricContext); + throw e; + } + + + long end = System.currentTimeMillis(); // Start time, in millisecs. + long latency = end - start; + if (latency >= 0) { + addLatency(latency, metricContext); + } else { + getLogger().log( + LogLevel.WARNING, + "Apparently negative latency measure, start: " + start + + ", end: " + end + ", for query: " + query.toString()); + } + if (result.hits().getError() != null) { + incrErrorCount(result, metricContext); + incrementYamasOnlyErrors(result, execution); + } + int hitCount = result.getConcreteHitCount(); + hitsPerQuery.put((double) hitCount); + metric.set(HITS_PER_QUERY_METRIC, (double) hitCount, metricContext); + if (hitCount == 0) { + emptyResults.increment(); + metric.add(EMPTY_RESULTS_METRIC, 1, metricContext); + } + + // Update running averages + //setAverages(); + + return result; + } + + private void logQuery(com.yahoo.search.Query query) { + // Don't parse the query if it's not necessary for the logging Query.toString triggers parsing + if (getLogger().isLoggable(Level.FINER)) { + getLogger().finer("Query: " + query.toString()); + } + } + + private void addLatency(long latency, Metric.Context metricContext) { + //myStats.addLatency(latency); + queryLatency.put(latency); + metric.set(QUERY_LATENCY_METRIC, latency, metricContext); + metric.set(MEAN_QUERY_LATENCY_METRIC, latency, metricContext); + maxQueryLatency.put(latency); + metric.set(MAX_QUERY_LATENCY_METRIC, latency, metricContext); + queryLatencyBuckets.put(latency); + } + + private void incrQueryCount(Metric.Context metricContext) { + //myStats.incrQueryCnt(); + queries.increment(); + metric.add(QUERIES_METRIC, 1, metricContext); + } + + private void incrErrorCount(Result result, Metric.Context metricContext) { + //If result is null an exception was thrown further down + if (result == null) { + //myStats.incrErrorCount(); + failedQueries.increment(); + metric.add(FAILED_QUERIES_METRIC, 1, metricContext); + metric.add("error.unhandled_exception", 1, metricContext); + return; + } + + if (result.hits().getErrorHit().hasOnlyErrorCode(Error.NULL_QUERY.code)) { + nullQueries.increment(); + return; + } else if (result.hits().getErrorHit().hasOnlyErrorCode(3)) { + illegalQueries.increment(); + return; + } + //myStats.incrErrorCount(); + failedQueries.increment(); + metric.add(FAILED_QUERIES_METRIC, 1, metricContext); + } + + /** + * Creates error metric for Yamas only. These metrics are only logged to state health page + * and not forwarded to the log file. + * + * @param result The result to check for errors + */ + private void incrementYamasOnlyErrors(Result result, Execution execution) { + if(result == null) + return; + + ErrorHit error = result.hits().getErrorHit(); + if (error == null) + return; + for (ErrorMessage m : error.errors()) { + int code = m.getCode(); + Metric.Context c = getDimensions(m.getSource(), result, execution); + if (code == TIMEOUT.code) { + metric.add("error.timeout", 1, c); + } else if (code == NO_BACKENDS_IN_SERVICE.code) { + metric.add("error.backends_oos", 1, c); + } else if (code == ERROR_IN_PLUGIN.code) { + metric.add("error.plugin_failure", 1, c); + } else if (code == BACKEND_COMMUNICATION_ERROR.code) { + metric.add("error.backend_communication_error", 1, c); + } else if (code == EMPTY_DOCUMENTS.code) { + metric.add("error.empty_document_summaries", 1, c); + } else if (code == ILLEGAL_QUERY.code) { + metric.add("error.illegal_query", 1, c); + } else if (code == INVALID_QUERY_PARAMETER.code) { + metric.add("error.invalid_query_parameter", 1, c); + } else if (code == INTERNAL_SERVER_ERROR.code) { + metric.add("error.internal_server_error", 1, c); + } else if (code == SERVER_IS_MISCONFIGURED.code) { + metric.add("error.misconfigured_server", 1, c); + } else if (code == INVALID_QUERY_TRANSFORMATION.code) { + metric.add("error.invalid_query_transformation", 1, c); + } else if (code == RESULT_HAS_ERRORS.code) { + metric.add("error.result_with_errors", 1, c); + } else if (code == UNSPECIFIED.code) { + metric.add("error.unspecified", 1, c); + } + } + } + + + private Metric.Context getDimensions(String source, Result r, Execution execution) { + Metric.Context context = yamasOnlyContexts.get(source == null ? "" : source); + if (context == null) { + Map<String, String> dims = new HashMap<>(); + if (source != null) { + dims.put("source", source); + } + context = this.metric.createContext(dims); + yamasOnlyContexts.put(source == null ? "" : source, context); + } + //TODO add other relevant metric dimensions + //Would be nice to have chain as a dimension as + //we can separate errors from different chains + return context; + } + +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/statistics/package-info.java b/container-search/src/main/java/com/yahoo/prelude/statistics/package-info.java new file mode 100644 index 00000000000..a2e7f98c002 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/statistics/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.statistics; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/Context.java b/container-search/src/main/java/com/yahoo/prelude/templates/Context.java new file mode 100644 index 00000000000..be26e3230c7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/Context.java @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.util.Collection; + +import com.yahoo.text.XML; + +/** + * A set of variable bindings for template rendering + * + * @author bratseth + */ +public abstract class Context { + + private boolean xmlEscape = true; + + // These may be wrapped in an object if it gets unruly like this... + private String boldOpenTag; + private String boldCloseTag; + private String separatorTag; + + private boolean utf8Output = false; + + //prevent sub-classing outside of this package. + Context() {} + + // set|getXmlEscape no longer final on cause of HitContext subclassing _and_ wrapping Context + /** Sets whether this context should xml-escape returned values */ + public void setXmlEscape(boolean xmlEscape) { this.xmlEscape=xmlEscape; } + + /** Returns whether this context xml-escapes returned values. Default is true */ + public boolean getXmlEscape() { return xmlEscape; } + + /** + * Makes a <b>secondary</b> binding + * + * @return the old value bound to this key, or null it the key was previously unbound + */ + public abstract Object put(String key,Object value); + + /** + * <p>Returns a value by looking it up in the primary, + * and thereafter in secondary sources.</p> + * + * <p>If xml escaping is on and this is a string, xml attribute escaping is done + * </p> + */ + abstract public Object get(String key); + + /** + * Removes a <b>secondary</b> binding + * + * @return the removed value, or null if it wasn't bound + */ + public abstract Object remove(Object key); + + + // These three may be collapsed to one method + public void setBoldOpenTag(String boldOpenTag) { + this.boldOpenTag = boldOpenTag; + } + public void setBoldCloseTag(String boldCloseTag) { + this.boldCloseTag = boldCloseTag; + } + public void setSeparatorTag(String separatorTag) { + this.separatorTag = separatorTag; + } + + + protected Object normalizeValue(Object value) { + if (value == null) { + return ""; + } else if (xmlEscape && value instanceof String) { + return XML.xmlEscape((String) value, true, null); + } else { + return value; + } + } + + public String getBoldOpenTag() { + return boldOpenTag; + } + + public String getBoldCloseTag() { + return boldCloseTag; + } + + public String getSeparatorTag() { + return separatorTag; + } + + public abstract Collection<? extends Object> getKeys(); + + /** + * Used by the template to decide whether to use UTF-8 optimizations. + * + * @return whether the result encoding is UTF-8 + */ + public boolean isUtf8Output() { + return utf8Output; + } + + /** + * Used by the template to decide whether to use UTF-8 optimizations. + * TODO: TVT: Make this package private again + * @param utf8Output whether the output encoding is UTF-8 + */ + public void setUtf8Output(boolean utf8Output) { + this.utf8Output = utf8Output; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/DefaultTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/DefaultTemplateSet.java new file mode 100644 index 00000000000..1a7c5a738be --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/DefaultTemplateSet.java @@ -0,0 +1,301 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.io.ByteWriter; +import com.yahoo.net.URI; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.search.Result; +import com.yahoo.search.grouping.result.HitRenderer; +import com.yahoo.search.result.*; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; +import java.util.Iterator; +import java.util.Map; + +/** + * <p>A template set which provides a default rendering of results and hits.</p> + * + * <p>This can be extended to create custom programmatic templates. + * Create a subclass which has static inner classes extending DefaultTemplate for the templates + * you wish to override and call the set method for those templates in the subclass template set + * constructor. Some of the default templates contained utility functions, and can be overridden + * in place of DefaultTemplate to gain access to these. See TiledTemplateSet for an example.</p> + * + * @author bratseth + */ +public class DefaultTemplateSet extends UserTemplate<XMLWriter> { + + private static final Utf8String RESULT = new Utf8String("result"); + private static final Utf8String GROUP = new Utf8String("group"); + private static final Utf8String ID = new Utf8String("id"); + private static final Utf8String FIELD = new Utf8String("field"); + private static final Utf8String HIT = new Utf8String("hit"); + private static final Utf8String ERROR = new Utf8String("error"); + private static final Utf8String TOTAL_HIT_COUNT = new Utf8String("total-hit-count"); + private static final Utf8String QUERY_TIME = new Utf8String("querytime"); + private static final Utf8String SUMMARY_FETCH_TIME = new Utf8String("summaryfetchtime"); + private static final Utf8String SEARCH_TIME = new Utf8String("searchtime"); + private static final Utf8String NAME = new Utf8String("name"); + private static final Utf8String CODE = new Utf8String("code"); + private static final Utf8String COVERAGE_DOCS = new Utf8String("coverage-docs"); + private static final Utf8String COVERAGE_NODES = new Utf8String("coverage-nodes"); + private static final Utf8String COVERAGE_FULL = new Utf8String("coverage-full"); + private static final Utf8String COVERAGE = new Utf8String("coverage"); + private static final Utf8String RESULTS_FULL = new Utf8String("results-full"); + private static final Utf8String RESULTS = new Utf8String("results"); + private static final Utf8String TYPE = new Utf8String("type"); + private static final Utf8String RELEVANCY = new Utf8String("relevancy"); + private static final Utf8String SOURCE = new Utf8String("source"); + + private final CopyOnWriteHashMap<String, Utf8String> fieldNameMap = new CopyOnWriteHashMap<>(); + + + /** + * Create a template set with a name. This will be initialized with the default templates - + * use the set methods from the subclass constructor to override any of these with other template classes. + */ + protected DefaultTemplateSet(String name) { + super(name, + DEFAULT_MIMETYPE, + DEFAULT_ENCODING + ); + } + + public DefaultTemplateSet() { + this("default"); + } + + /** Uses an XML writer in this template */ + @Override + public XMLWriter wrapWriter(Writer writer) { + return XMLWriter.from(writer, 10, -1); + } + + @Override + public void header(Context context, XMLWriter writer) throws IOException { + Result result=(Result)context.get("result"); + // TODO: move setting this to Result + context.setUtf8Output("utf-8".equalsIgnoreCase(getRequestedEncoding(result.getQuery()))); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag(RESULT).attribute(TOTAL_HIT_COUNT,String.valueOf(result.getTotalHitCount())); + if (result.getQuery().getPresentation().getReportCoverage()) { + renderCoverageAttributes(result.getCoverage(false), writer); + } + renderTime(writer, result); + writer.closeStartTag(); + } + + private void renderTime(final XMLWriter writer, final Result result) { + if (!result.getQuery().getPresentation().getTiming()) { + return; + } + + final String threeDecimals = "%.3f"; + final double milli = .001d; + final long now = System.currentTimeMillis(); + final long searchTime = now - result.getElapsedTime().first(); + final double searchSeconds = ((double) searchTime) * milli; + + if (result.getElapsedTime().firstFill() != 0L) { + final long queryTime = result.getElapsedTime().weightedSearchTime(); + final long summaryFetchTime = result.getElapsedTime().weightedFillTime(); + final double querySeconds = ((double) queryTime) * milli; + final double summarySeconds = ((double) summaryFetchTime) * milli; + writer.attribute(QUERY_TIME, String.format(threeDecimals, querySeconds)); + writer.attribute(SUMMARY_FETCH_TIME, String.format(threeDecimals, summarySeconds)); + } + writer.attribute(SEARCH_TIME, String.format(threeDecimals, searchSeconds)); + } + + @Override + public void footer(Context context, XMLWriter writer) throws IOException { + writer.closeTag(); + } + + @Override + /** + * Renders the header of a hit.<br/> + * Post-condition: The hit tag is open in this XML writer + */ + public void hit(Context context, XMLWriter writer) throws IOException { + Hit hit=(Hit)context.get("hit"); + + if (hit instanceof HitGroup) { + renderHitGroup((HitGroup) hit, context, writer); + } else { + writer.openTag(HIT); + renderHitAttributes(hit,writer); + writer.closeStartTag(); + renderHitFields(context, hit, writer); + } + } + + + @Override + /** + * Renders the footer of a hit. + * + * Pre-condition: The hit tag is open in this XML writer.<br/> + * Post-condition: The hit tag is closed + */ + public void hitFooter(Context context, XMLWriter writer) throws IOException { + writer.closeTag(); + } + + @Override + public void error(Context context, XMLWriter writer) throws IOException { + ErrorMessage error=((Result)context.get("result")).hits().getError(); + writer.openTag(ERROR).attribute(CODE,error.getCode()).content(error.getMessage(),false).closeTag(); + } + + @Override + public void noHits(Context context, XMLWriter writer) throws IOException { + // no hits, do nothing :) + } + + protected static void renderCoverageAttributes(Coverage coverage, XMLWriter writer) throws IOException { + if (coverage == null) return; + writer.attribute(COVERAGE_DOCS,coverage.getDocs()); + writer.attribute(COVERAGE_NODES,coverage.getNodes()); + writer.attribute(COVERAGE_FULL,coverage.getFull()); + writer.attribute(COVERAGE,coverage.getResultPercentage()); + writer.attribute(RESULTS_FULL,coverage.getFullResultSets()); + writer.attribute(RESULTS,coverage.getResultSets()); + } + + /** + * Writes a hit's default attributes like 'type', 'source', 'relevancy'. + */ + protected void renderHitAttributes(Hit hit,XMLWriter writer) throws IOException { + writer.attribute(TYPE,hit.getTypeString()); + if (hit.getRelevance() != null) { + writer.attribute(RELEVANCY, hit.getRelevance().toString()); + } + writer.attribute(SOURCE, hit.getSource()); + } + + /** Opens (but does not close) the group hit tag */ + protected void renderHitGroup(HitGroup hit, Context context, XMLWriter writer) throws IOException { + if (HitRenderer.renderHeader(hit, writer)) { + // empty + } else if (hit.types().contains("grouphit")) { + // TODO Keep this? + renderHitGroupOfTypeGroupHit(context, hit, writer); + } else { + renderGroup(hit, writer); + } + } + + + /** + * Renders a hit group. + */ + protected void renderGroup(HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag(GROUP); + renderHitAttributes(hit, writer); + writer.closeStartTag(); + } + + // Can't name this renderGroupHit as GroupHit is a class having nothing to do with HitGroup. + // Confused yet? Good! + protected void renderHitGroupOfTypeGroupHit(Context context, HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag(HIT); + renderHitAttributes(hit, writer); + renderId(hit.getId(), writer); + writer.closeStartTag(); + } + + + protected void renderId(URI uri, XMLWriter writer) throws IOException { + if (uri != null) { + writer.openTag(ID).content(uri.stringValue(),false).closeTag(); + } + } + + /** + * Renders all fields of a hit. + * Simply calls {@link #renderField(Context, Hit, java.util.Map.Entry, XMLWriter)} for every field. + */ + protected void renderHitFields(Context context, Hit hit, XMLWriter writer) throws IOException { + renderSyntheticRelevancyField(hit, writer); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + renderField(context, hit, it.next(), writer); + } + } + + private void renderSyntheticRelevancyField(Hit hit, XMLWriter writer) throws IOException { + final String relevancyFieldName = "relevancy"; + final Relevance relevance = hit.getRelevance(); + + if (shouldRenderField(hit, relevancyFieldName) && relevance != null) { + renderSimpleField(relevancyFieldName, relevance, writer); + } + } + + protected void renderField(Context context, Hit hit, Map.Entry<String, Object> entry, XMLWriter writer) throws IOException { + String fieldName = entry.getKey(); + + if (!shouldRenderField(hit, fieldName)) return; + if (fieldName.startsWith("$")) return; // Don't render fields that start with $ // TODO: Move to should render + + writeOpenFieldElement(fieldName, writer); + renderFieldContent(context, hit, fieldName, writer); + writeCloseFieldElement(writer); + } + + private void writeOpenFieldElement(String fieldName, XMLWriter writer) throws IOException { + Utf8String utf8 = fieldNameMap.get(fieldName); + if (utf8 == null) { + utf8 = new Utf8String(fieldName); + fieldNameMap.put(fieldName, utf8); + } + writer.openTag(FIELD).attribute(NAME, utf8); + writer.closeStartTag(); + } + + private void writeCloseFieldElement(XMLWriter writer) throws IOException { // TODO: Collapse + writer.closeTag(); + } + + protected void renderFieldContent(Context context, Hit hit, + String name, XMLWriter writer) + throws IOException { + + boolean dumpedRaw = false; + if (hit instanceof FastHit && ((FastHit)hit).fieldIsNotDecoded(name)) { + writer.closeStartTag(); + if ((writer.getWriter() instanceof ByteWriter) && context.isUtf8Output()) { + dumpedRaw = dumpBytes((ByteWriter) writer.getWriter(), (FastHit) hit, name); + } + if (dumpedRaw) { + writer.content("",false); // let the xml writer note that this tag had content + } + } + if (!dumpedRaw) { + String xmlval = hit.getFieldXML(name); + if (xmlval == null) { + xmlval = "(null)"; + } + writer.escapedContent(xmlval,false); + } + } + + private void renderSimpleField(String fieldName, Object fieldValue, XMLWriter writer) throws IOException { + writeOpenFieldElement(fieldName, writer); + writer.content(fieldValue.toString(),false); + writeCloseFieldElement(writer); + } + + /** Returns whether a field should be rendered. This default implementation always returns true */ + protected boolean shouldRenderField(Hit hit, String fieldName) { + // skip depending on hit type + return true; + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/FormattingOptions.java b/container-search/src/main/java/com/yahoo/prelude/templates/FormattingOptions.java new file mode 100644 index 00000000000..307378f2106 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/FormattingOptions.java @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +/** + * Defines formatting options used with special kinds of hits. + * + * @author laboisse + */ +public class FormattingOptions { + + public static final String DEFAULT_TYPE_ATTRIBUTE_NAME = "type"; + + /** + * A structure that defines the tag name and attribute name for a field + * that sould be formatted as a field with a subtype. + * @author laboisse + * + */ + static class SubtypeField { + String tagName; + String attributeName; + String attributeValue; + } + + static class SubtypeFieldWithPrefix extends SubtypeField { + + /* Note: attributeValue should always be null for instances of this class */ + + int prefixLength; + } + + private Map<String, String> fieldsAsAttributes = new LinkedHashMap<>(); + + private Map<String, SubtypeField> fieldsWithSubtypes = new LinkedHashMap<>(); + private Map<String, SubtypeFieldWithPrefix> prefixedFieldsWithSubtypes = new LinkedHashMap<>(); + + private Set<String> fieldsNotRendered = new LinkedHashSet<>(); + private Set<String> fieldsRendered = new LinkedHashSet<>(); + + /** + * Tells to format a field as an attribute of the hit's tag. + * + * For instance, field 'query-latency' could be rendered as an attribute 'latency' by + * invoking {@code formatFieldAsAttribute("query-latency", "latency")}. + * + * Output would be: + * <pre> + * <hit latency="100"></hit> + * </pre> + * instead of: + * <pre> + * <hit><latency>100</latency></hit> + * </pre> + */ + public void formatFieldAsAttribute(String fieldName, String attributeName) { + fieldsAsAttributes.put(fieldName, attributeName); + } + + public Set<Map.Entry<String, String>> fieldsAsAttributes() { + return Collections.unmodifiableSet(this.fieldsAsAttributes.entrySet()); + } + + public String getAttributeName(String fieldName) { + return this.fieldsAsAttributes.get(fieldName); + } + + /** + * Tells to format a field using a subtype. A subtype is used when there is kind of a grouping + * for a set of fields. + * + * For instance, fields 'latency-connect', 'latency-finish' all belong to the same 'latency' logical group. + * So invoking {@code formatFieldWithSubtype("latency-connect", "latency", "type", "connect"}, + * {@code formatFieldWithSubtype("latency-finish", "latency", "type", "connect"} and so on, + * allows to have a common 'latency' tag name for all fields of the same kind. + * Note that it does no collapsing on tags. + * + * Output would be: + * <pre> + * <latency type="connect">50</latency> + * <latency type="finish">250</latency> + * </pre> + * Instead of: + * <pre> + * <hit> + * <latency-connect>50</latency-connect> + * <latency-finish>50</latency-finish> + * </pre> + */ + public void formatFieldWithSubtype(String fieldName, String tagName, String typeAttributeName, String typeAttributeValue) { + SubtypeField names = new SubtypeField(); + names.attributeName = typeAttributeName; + names.attributeValue = typeAttributeValue; + names.tagName = tagName; + fieldsWithSubtypes.put(fieldName, names); + } + + public SubtypeField getSubtype(String fieldName) { + return this.fieldsWithSubtypes.get(fieldName); + } + + /** + * Same as {@link #formatFieldWithSubtype(String, String, String, String)} except that fields + * are selected based on the beginning of their name and the type attribute value is deduced + * from the rest of their name. So this may select many fields instead of only one. + * Invoking {@code formatFieldWithSubtype("latency-", "latency", "type")} only once allows to have a common 'latency' + * tag name for all fields that start with 'latency-'. Type attribute value will be 'start' for field 'latency-start'. + * Note that it does no collapsing on tags. + * + * This is mostly used when you don't know all field names ahead. + * + * Output would be: + * <pre> + * <latency type="connect">50</latency> + * <latency type="finish">250</latency> + * </pre> + * Instead of: + * <pre> + * <hit> + * <latency-connect>50</latency-connect> + * <latency-finish>50</latency-finish> + * </pre> + * + * Note: don't use this with prefixes that start with a common substring (e.g. 'http', 'http_proxy'), I can tell you it just won't work. + */ + public void formatFieldWithSubtype(String fieldNamePrefix, String tagName, String typeAttributeName) { + SubtypeFieldWithPrefix names = new SubtypeFieldWithPrefix(); + names.attributeName = typeAttributeName; + names.tagName = tagName; + names.prefixLength = fieldNamePrefix.length(); + prefixedFieldsWithSubtypes.put(fieldNamePrefix, names); + } + + public SubtypeFieldWithPrefix getSubtypeWithPrefix(String fieldName) { + for(Map.Entry<String, SubtypeFieldWithPrefix> e : this.prefixedFieldsWithSubtypes.entrySet()) { + if(fieldName.startsWith(e.getKey())) + return e.getValue(); + } + return null; + } + + /** + * Tells whether a field should be rendered. + * + * @see #setFieldNotToRender(String) + * @see #setFieldToRender(String) + */ + public boolean shouldRenderField(String fieldName) { + if(fieldName == null) + return false; + if (fieldName.startsWith("$")) { + return false; + } + if(!this.fieldsRendered.isEmpty()) + return this.fieldsRendered.contains(fieldName); + return !this.fieldsNotRendered.contains(fieldName); + } + + /** + * Tells a field should be rendered. + * + * <p> + * Note: if at least one field is set to render, then only + * these fields should be rendered. Use {@link #setFieldNotToRender(String)} + * to only exclude specific fields. + */ + public void setFieldToRender(String fieldName) { + this.fieldsRendered.add(fieldName); + } + + /** + * Tells a field should not be rendered. + * + * <p> + * Note: all other fields should be rendered. Use {@link #setFieldToRender(String)} + * to only include specific fields. + */ + public void setFieldNotToRender(String fieldName) { + this.fieldsNotRendered.add(fieldName); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/GenericTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/GenericTemplateSet.java new file mode 100644 index 00000000000..fd43fc83a12 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/GenericTemplateSet.java @@ -0,0 +1,155 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.protect.Validator; +import com.yahoo.search.Query; + +import java.io.Writer; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Properties; + +/** + * Superclass of a set of templates for rendering (serializing) results + */ +// TODO: Deprecate everything having to do with Templates - we should only support Renderers to serialize a Result. +public class GenericTemplateSet { + + public static final String DEFAULT_MIMETYPE = "text/xml"; + public static final String DEFAULT_ENCODING = "utf-8"; + + /** Templates */ + private HashMap<String, Template<? extends Writer>> templates; + + /** The text MIME subtype this template returns, xml, plain or html */ + private String mimeType; + + /** The charset encoding this template should have */ + private String encoding; + + private String boldOpenTag = null; + private String boldCloseTag = null; + private String separatorTag = null; + + /** + * Document summary class for this template + */ + private String summaryClass = null; + + /** + * The unique name of this template set + */ + private final String name; + + /** + * Creates a template set containing no templates + */ + public GenericTemplateSet(String name, String mimeType, String encoding) { + this.mimeType = mimeType; + this.encoding = encoding; + this.name = name; + + templates = new LinkedHashMap<>(); + } + + + public String getName() { + return name; + } + + /** + * Returns the text MIME + */ + public String getMimeType() { return mimeType; } + + /** + * Returns the text encoding + */ + public String getEncoding() { return encoding; } + + /** Returns the encoding of the query, or the encoding given by the template if none is set */ + public final String getRequestedEncoding(Query query) { + String encoding = query.getModel().getEncoding(); + if (encoding != null) return encoding; + return getEncoding(); + } + + /** + * Returns the selected template + * + * @return the template to use, never null + */ + public Template<? extends Writer> getTemplate(String templateName) { + return templates.get(templateName); + } + + /** + * Sets the selected template + * + * @throws NullPointerException if the given template is null + */ + public void setTemplate(String templateName, Template<? extends Writer> template) { + templates.put(templateName,template); + } + + /** + * Sets the selected template + * + * @throws NullPointerException if the given template is null + */ + public void setTemplateNotNull(String templateName, Template<? extends Writer> template) { + Validator.ensureNotNull("Template "+templateName,template); + templates.put(templateName,template); + } + + + /** + * Sets the highligting marks for this template + * + * @param start the highlingting start mark + * @param end the highlingting end mark + * @param sep the highlingting separator mark + */ + public void setHighlightTags(String start, String end, String sep) { + boldOpenTag = start; + boldCloseTag = end; + separatorTag = sep; + } + + // may return null + public String getBoldOpenTag() { + return boldOpenTag; + } + + // may return null + public String getBoldCloseTag() { + return boldCloseTag; + } + + // may return null + public String getSeparatorTag() { + return separatorTag; + } + + + /** + * Set the default summary class to use with this template. + */ + public void setSummaryClass(String summaryClass) { + this.summaryClass = summaryClass; + } + + /** + * Type safe accessor to get the default document summary class for this + * template set. This is also here to insulate the rest of the code + * against changes in the naming of the properties in the property file. + */ + public String getSummaryClass() { + if (summaryClass != null && ! summaryClass.isEmpty()) { + return summaryClass; + } else { + return null; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/HitContext.java b/container-search/src/main/java/com/yahoo/prelude/templates/HitContext.java new file mode 100644 index 00000000000..745a5ad85fe --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/HitContext.java @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.StructuredData; +import com.yahoo.search.result.FeatureData; +import com.yahoo.text.XML; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +/** + * A context providing all the fields of a hit, and falls back to MapContext behavior for all other keys. + * + * @author tonytv + */ +public class HitContext extends Context { + + private final Hit hit; + private final Context fallbackContext; + + public HitContext(Hit hit, Context fallbackContext) { + this.hit = hit; + this.fallbackContext = fallbackContext; + } + + @Override + public Object put(String key, Object value) { + return fallbackContext.put(key, value); + } + + @Override + public Object get(String key) { + Object value = normalizedHitProperty(key); + return value != null ? + value : + fallbackContext.get(key); + } + + @Override + public Object remove(Object key) { + return fallbackContext.remove(key); + } + + @Override + public Collection<? extends Object> getKeys() { + Set<Object> keys = new HashSet<>(fallbackContext.getKeys()); + keys.addAll(hit.fieldKeys()); + return keys; + } + + @Override + public void setBoldOpenTag(String boldOpenTag) { + fallbackContext.setBoldOpenTag(boldOpenTag); + } + + @Override + public void setBoldCloseTag(String boldCloseTag) { + fallbackContext.setBoldCloseTag(boldCloseTag); + } + + @Override + public void setSeparatorTag(String separatorTag) { + fallbackContext.setSeparatorTag(separatorTag); + } + + @Override + public String getBoldOpenTag() { + return fallbackContext.getBoldOpenTag(); + } + + @Override + public String getBoldCloseTag() { + return fallbackContext.getBoldCloseTag(); + } + + @Override + public String getSeparatorTag() { + return fallbackContext.getSeparatorTag(); + } + + @Override + //TVT: TODO: Make this package private again. + public boolean isUtf8Output() { + return fallbackContext.isUtf8Output(); + } + + @Override + //TODO: TVT: make this package private again + public void setUtf8Output(boolean utf8Output) { + fallbackContext.setUtf8Output(utf8Output); + } + + @Override + public void setXmlEscape(boolean xmlEscape) { + fallbackContext.setXmlEscape(xmlEscape); + } + + @Override + public boolean getXmlEscape() { + return fallbackContext.getXmlEscape(); + } + + @Override + protected Object normalizeValue(Object value) { + return fallbackContext.normalizeValue(value); + } + + private Object normalizedHitProperty(String key) { + Object value = hit.getField(key); + return value == null ? + null : + normalizeHitFieldValue(value); + } + + private Object normalizeHitFieldValue(Object value) { + if (value instanceof HitField) { + HitField hf = (HitField) value; + if (getXmlEscape()) { + return hf.quotedContent(getBoldOpenTag(), + getBoldCloseTag(), + getSeparatorTag(), + true); + } else { + return hf.getContent(getBoldOpenTag(), + getBoldCloseTag(), + getSeparatorTag()); + } + } else if (value instanceof StructuredData) { + return value.toString(); + } else if (value instanceof XMLString || value instanceof JSONString) { + return value.toString(); + } else if (getXmlEscape()) { + return XML.xmlEscape(value.toString(), true, null); + } else { + return value.toString(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/LogExceptionUserTemplateDelegator.java b/container-search/src/main/java/com/yahoo/prelude/templates/LogExceptionUserTemplateDelegator.java new file mode 100644 index 00000000000..49163e8fa90 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/LogExceptionUserTemplateDelegator.java @@ -0,0 +1,196 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.log.LogLevel; +import com.yahoo.yolean.Exceptions; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; +import java.util.logging.Logger; + +/** + * Delegates to another UserTemplate, but handles any exceptions(except IOException) by logging them. + * @author tonytv + */ +public class LogExceptionUserTemplateDelegator<T extends Writer> extends UserTemplate<T> { + + private static Logger log = Logger.getLogger(LogExceptionUserTemplateDelegator.class.getName()); + private final UserTemplate<T> delegate; + + public LogExceptionUserTemplateDelegator(UserTemplate<T> delegate) { + super(LogExceptionUserTemplateDelegator.class.getSimpleName()); + this.delegate = delegate; + } + + @Override + public Context createContext() { + return delegate.createContext(); + } + + @Override + public T wrapWriter(Writer writer) { + return delegate.wrapWriter(writer); + } + + @Override + public boolean isDefaultTemplateSet() { + return delegate.isDefaultTemplateSet(); + } + + @Override + public String getSummaryClass() { + return delegate.getSummaryClass(); + } + + @Override + public String getBoldOpenTag() { + return delegate.getBoldOpenTag(); + } + + @Override + public String getBoldCloseTag() { + return delegate.getBoldCloseTag(); + } + + @Override + public String getSeparatorTag() { + return delegate.getSeparatorTag(); + } + + @Override + public void setSummaryClass(String summaryClass) { + delegate.setSummaryClass(summaryClass); + } + + @Override + public void setHighlightTags(String start, String end, String sep) { + delegate.setHighlightTags(start, end, sep); + } + + @Override + public String getName() { + return delegate.getName(); + } + + @Override + public String getMimeType() { + return delegate.getMimeType(); + } + + @Override + public String getEncoding() { + return delegate.getEncoding(); + } + + @Override + public Template<T> getTemplate(String templateName) { + throw new UnsupportedOperationException(); + } + + @Override + public void setTemplate(String templateName, Template<? extends Writer> template) { + throw new UnsupportedOperationException(); + } + + @Override + public void setTemplateNotNull(String templateName, Template<? extends Writer> template) { + throw new UnsupportedOperationException(); + } + + /*** Template + + @Override + public void <methodName>(Context context, T writer) throws IOException { + try { + delegate.<methodName>(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + ***/ + + /*** Begin expanded template for + header, footer, hit, hitFooter, error, noHits, queryContext, + Thanks java, for giving me the opportunely to use copy-paste ***/ + + + @Override + public void header(Context context, T writer) throws IOException { + try { + delegate.header(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void footer(Context context, T writer) throws IOException { + try { + delegate.footer(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void hit(Context context, T writer) throws IOException { + try { + delegate.hit(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void hitFooter(Context context, T writer) throws IOException { + try { + delegate.hitFooter(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void error(Context context, T writer) throws IOException { + try { + delegate.error(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void noHits(Context context, T writer) throws IOException { + try { + delegate.noHits(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void queryContext(Context context, T writer) throws IOException { + try { + delegate.queryContext(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + /*** End expanded template. ***/ + + private void handleException(Exception e) throws IOException { + if (e instanceof IOException) { + throw (IOException) e; + } else { + log.log(LogLevel.WARNING, "Exception thrown in " + getName() + + ": " + Exceptions.toMessageString(e), e); + } + } + + UserTemplate<T> getDelegate() { + return delegate; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/MapContext.java b/container-search/src/main/java/com/yahoo/prelude/templates/MapContext.java new file mode 100644 index 00000000000..328faee5c29 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/MapContext.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** A context having a map as secondary storage */ +public class MapContext extends Context { + + private Map<String, Object> map = new LinkedHashMap<>(); + + @Override + public Object get(String key) { + return normalizeValue(map.get(key)); + } + + public Object put(String name, Object value) { + return map.put(name, value); + } + + public Object remove(Object name) { + return map.remove(name); + } + + @Override + public Collection<? extends Object> getKeys() { + return map.keySet(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/PageTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/PageTemplateSet.java new file mode 100644 index 00000000000..26b51187954 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/PageTemplateSet.java @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; + +/** + * A template implementing the 'page' format. + * This is a variant of the tiled template set - see that class for details. + * + * @author bratseth + */ +public class PageTemplateSet extends TiledTemplateSet { + + public PageTemplateSet() { + super("page"); + } + + @Override + /** Uses an XML writer in this */ + public XMLWriter wrapWriter(Writer writer) { return new XMLWriter(super.wrapWriter(writer)); } + + @Override + public void header(Context context,XMLWriter writer) throws IOException { + Result result=(Result)context.get("result"); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag("page").attribute("version","1.0").attribute("layout",result.hits().getField("layout")); + renderCoverageAttributes(result.getCoverage(false), writer); + writer.closeStartTag(); + renderSectionContent(result.hits(),writer); + } + + @Override + public void footer(Context context,XMLWriter writer) throws IOException { + if (writer.isIn("content")) + writer.closeTag(); + super.footer(context,writer); + } + + @Override + protected void renderSection(HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag("section"); + writer.attribute("id",hit.getDisplayId()); + writer.attribute("layout",hit.getField("layout")); + writer.attribute("region",hit.getField("region")); + writer.closeStartTag(); + renderSectionContent(hit,writer); + } + + @Override + public void hit(Context context, XMLWriter writer) throws IOException { + Hit hit = (Hit) context.get("hit"); + if (!hit.isMeta() && !writer.isIn("content")) + writer.openTag("content"); + super.hit(context,writer); + } + + @Override + public void hitFooter(Context context, XMLWriter writer) throws IOException { + if (writer.isIn("content")) + writer.closeTag(); + super.hitFooter(context, writer); + } + + public String toString() { return "page template"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/SearchRendererAdaptor.java b/container-search/src/main/java/com/yahoo/prelude/templates/SearchRendererAdaptor.java new file mode 100644 index 00000000000..ca9dba6fc0f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/SearchRendererAdaptor.java @@ -0,0 +1,256 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.search.Result; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.rendering.Renderer; +import com.yahoo.search.result.*; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.processing.request.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.text.XMLWriter; + +import java.io.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; + +/** + * Renders a search result using the old templates API. + * + * @author tonytv + */ +@SuppressWarnings({ "rawtypes", "deprecation", "unchecked" }) +public final class SearchRendererAdaptor extends Renderer { + + private final LogExceptionUserTemplateDelegator templates; + + //Per instance members, must be created at rendering time, not construction time due to cloning. + private Context context; + + public SearchRendererAdaptor(UserTemplate userTemplate) { + templates = new LogExceptionUserTemplateDelegator(userTemplate); + } + + @Override + public void init() { + super.init(); + context = templates.createContext(); + } + + /** A legacy test utility - do not use. */ + public static void callRender(OutputStream stream, Result result) throws IOException { + Renderer rendererAdaptor = new SearchRendererAdaptor(result.getTemplating().getTemplates()); + rendererAdaptor.init(); + result.getTemplating().setRenderer(rendererAdaptor); + rendererAdaptor.render(stream, result, result.getQuery().getModel().getExecution(), result.getQuery()); + } + + @Override + public String getEncoding() { + return templates.getEncoding(); + } + + @Override + public String getMimeType() { + return templates.getMimeType(); + } + + @Override + public String getDefaultSummaryClass() { + return templates.getSummaryClass(); + } + + /** + * Renders this result + */ + public void render(Writer writer, Result result) throws java.io.IOException { + Writer wrappedWriter = wrapWriter(writer); + + beginResult(wrappedWriter, result); + + if (result.hits().getError() != null || result.hits().getQuery().errors().size() > 0) { + error(wrappedWriter, Collections.unmodifiableCollection( + all(result.hits().getQuery().errors(), result.hits().getError()))); + } + + if (result.getConcreteHitCount() == 0) { + emptyResult(wrappedWriter, result); + } + + if (result.getContext(false) != null) { + queryContext(wrappedWriter, result.getContext(false)); + } + + renderHitGroup(wrappedWriter, result.hits(), result.hits().getQuery().getOffset() + 1); + + endResult(wrappedWriter, result); + } + + + private <T> Collection<T> all(Collection<T> collection, T extra) { + Collection<T> result = new ArrayList<>(collection); + result.add(extra); + return result; + } + + + public Writer wrapWriter(Writer writer) { + return templates.wrapWriter(writer); + } + + + public void beginResult(Writer writer, Result result) throws IOException { + context.put("context", context); + context.put("result", result); + context.setBoldOpenTag(templates.getBoldOpenTag()); + context.setBoldCloseTag(templates.getBoldCloseTag()); + context.setSeparatorTag(templates.getSeparatorTag()); + + templates.header(context, writer); + } + + public void endResult(Writer writer, Result result) throws IOException { + templates.footer(context, writer); + } + + public void error(Writer writer, Collection<ErrorMessage> errorMessages) throws IOException { + templates.error(context, writer); + } + + + public void emptyResult(Writer writer, Result result) throws IOException { + templates.noHits(context, writer); + } + + public void queryContext(Writer writer, QueryContext queryContext) throws IOException { + templates.queryContext(context, writer); + } + + private void renderHitGroup(Writer writer, HitGroup hitGroup, int hitnumber) + throws IOException { + boolean defaultTemplate = templates.isDefaultTemplateSet(); + for (Hit hit : hitGroup.asList()) { + if (!defaultTemplate && hit instanceof ErrorHit) continue; // TODO: Stop doing this + + renderHit(writer, hit, hitnumber); + if (!hit.isAuxiliary()) + hitnumber++; + } + } + + + /** + * Renders this hit as xml. The default implementation will call the simpleRender() + * hook. If it returns true, nothing more is done, otherwise the + * given template set will be used for rendering. + * + * + * @param writer the writer to append this hit to + * @throws java.io.IOException if rendering fails + */ + public void renderHit(Writer writer, Hit hit, int hitno) throws IOException { + renderRegularHit(writer, hit, hitno); + } + + private void renderRegularHit(Writer writer, Hit hit, int hitno) throws IOException { + boolean renderedSimple = simpleRenderHit(writer, hit); + + if (renderedSimple) { + return; + } + + HitContext hitContext = new HitContext(hit, context); + hitContext.put("hit", hit); + hitContext.put("hitno", new Integer(hitno)); + hitContext.put("relevancy",hit.getRelevance()); + templates.hit(hitContext, writer); + + if (hit instanceof HitGroup) + renderHitGroup(writer, (HitGroup) hit, hitno); + + // Put these back - may have been changed by nested rendering + hitContext.put("hit", hit); + hitContext.put("hitno", new Integer(hitno)); + templates.hitFooter(hitContext, writer); + + + hitContext.remove("hit"); + hitContext.remove("hitno"); + } + + private boolean simpleRenderHit(Writer writer, Hit hit) throws IOException { + if (hit instanceof DefaultErrorHit) { + return simpleRenderDefaultErrorHit(writer, (DefaultErrorHit) hit); + } else if (hit instanceof GroupingListHit) { + return true; + } else { + return false; + } + } + + public static boolean simpleRenderDefaultErrorHit(Writer writer, ErrorHit defaultErrorHit) throws IOException { + XMLWriter xmlWriter=(writer instanceof XMLWriter) ? (XMLWriter)writer : new XMLWriter(writer,10,-1); + xmlWriter.openTag("errordetails"); + for (Iterator i = defaultErrorHit.errorIterator(); i.hasNext();) { + ErrorMessage error = (ErrorMessage) i.next(); + renderMessageDefaultErrorHit(xmlWriter, error); + } + xmlWriter.closeTag(); + return true; + } + + public static void renderMessageDefaultErrorHit(XMLWriter writer, ErrorMessage error) throws IOException { + writer.openTag("error"); + if (error instanceof com.yahoo.search.result.ErrorMessage) + writer.attribute("source",((com.yahoo.search.result.ErrorMessage)error).getSource()); + writer.attribute("error",error.getMessage()); + writer.attribute("code",Integer.toString(error.getCode())); + writer.content(error.getDetailedMessage(),false); + if (error.getCause()!=null) { + writer.openTag("cause"); + writer.content("\n",true); + StringWriter stackTrace=new StringWriter(); + error.getCause().printStackTrace(new PrintWriter(stackTrace)); + writer.content(stackTrace.toString(),true); + writer.closeTag(); + } + writer.closeTag(); + } + + /** + * Renders this hit as XML, disregarding the given template. + * The main error will be rendered first, the all the following errors. + */ + public boolean simpleRenderErrorHit(Writer writer, com.yahoo.search.result.ErrorHit errorHit) throws IOException { + XMLWriter xmlWriter=(writer instanceof XMLWriter) ? (XMLWriter)writer : new XMLWriter(writer,10,-1); + xmlWriter.openTag("errordetails"); + for (Iterator i = errorHit.errorIterator(); i.hasNext();) { + ErrorMessage error = (ErrorMessage) i.next(); + rendererErrorHitMessageMessage(xmlWriter, errorHit, error); + } + xmlWriter.closeTag(); + return true; + } + + public static void rendererErrorHitMessageMessage(XMLWriter writer, com.yahoo.search.result.ErrorHit errorHit, ErrorMessage error) throws IOException { + writer.openTag("error"); + if (errorHit instanceof Hit) { + writer.attribute("source", ((Hit) errorHit).getSource()); + } + writer.attribute("error",error.getMessage()); + writer.attribute("code",Integer.toString(error.getCode())); + writer.content(error.getDetailedMessage(),false); + writer.closeTag(); + } + + /** + * For internal use only + */ + public UserTemplate getAdaptee() { + return templates.getDelegate(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/Template.java b/container-search/src/main/java/com/yahoo/prelude/templates/Template.java new file mode 100644 index 00000000000..7052671a584 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/Template.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.io.Writer; + + +/** + * A template turns a template string and some state into + * an instantiated string. Add support for a particular + * template mechanism by subclassing this. + * + * @author bratseth + */ +public abstract class Template<T extends Writer> { + + /** + * Renders this template + * + * @param context the context to evaluate in + * @param writer the writer to render to + */ + public abstract void render(Context context,T writer) + throws java.io.IOException; + + + /** + * Get template name + * + * @return template name + */ + public abstract String getName(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/TemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/TemplateSet.java new file mode 100644 index 00000000000..6cf6ee640a7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/TemplateSet.java @@ -0,0 +1,214 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.text.GenericWriter; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; + +/** + * <p>A template set contains instances of the various templates + * required to render a result.</p> + * + * <p>Normal usage is to create an instance and populate it with templates, + * but this class also supports subclassing to refine the behaviour, + * like returning different templates for different hit types.</p> + * + * @author bratseth + */ +public class TemplateSet<T extends Writer> extends UserTemplate<T> { + + private static final String queryContextTemplateName = "queryContext"; + + private static final DefaultTemplateSet defaultTemplateSet=new DefaultTemplateSet(); + + /** + * Creates a template set containing no templates + * + * @param name the unique name of this template set, used for + * refering to it by clients + */ + public TemplateSet(String name, + String mimeType, + String encoding) { + super(name, mimeType,encoding); + } + + /** + * Returns the default template set. This is a template set which renders in + * the default xml format + */ + public static UserTemplate<XMLWriter> getDefault() { + return defaultTemplateSet; + } + + /** + * Returns the result header template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getHeader(Result result) { return (Template<T>) getTemplate("header"); } + + /** + * Sets the header template + * + * @param header the template to use for rendering getHeaders + * @throws NullPointerException if the given template is null + */ + public void setHeader(Template<T> header) { + setTemplateNotNull("header",header); + } + + /** + * Returns the result footer template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getFooter(Result result) { return (Template<T>) getTemplate("footer"); } + + /** + * Sets the footer template + * + * @param footer the template to use for rendering footers + * @throws NullPointerException if the given template is null + */ + public void setFooter(Template<T> footer) { + setTemplateNotNull("footer",footer); + } + + /** + * Returns the empty body template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getNohits(Result result) { return (Template<T>) getTemplate("nohits"); } + + + /** + * @return the template for rendering the query context, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getQueryContext(Result result) { + return (Template<T>) getTemplate(queryContextTemplateName); + } + + /** + * @param template The template to be used for rendering query contexts, never null. + */ + public void setQueryContext(Template<T> template) { + setTemplateNotNull(queryContextTemplateName, template); + } + + /** + * Sets the nohits template + * + * @param nohits the template to use for rendering empty results + * @throws NullPointerException if the given template is null + */ + public void setNohits(Template<T> nohits) { + setTemplateNotNull("nohits",nohits); + } + + /** + * Returns the error body template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getError(Result result) { return (Template<T>) getTemplate("error"); } + + /** + * Sets the error template + * + * @param error the template to use for rendering errors + * @throws NullPointerException if the given template is null + */ + public void setError(Template<T> error) { + setTemplateNotNull("error",error); + } + + /** + * Returns the hit template + * + * @param resultHit the hit which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getHit(Hit resultHit) { return (Template<T>) getTemplate("hit"); } + + /** + * Sets the hit template + * + * @param hit the template to use for rendering hits + * @throws NullPointerException if the given template is null + */ + public void setHit(Template<T> hit) { + setTemplateNotNull("hit",hit); + } + + /** + * Returns the hit footer template + * + * @param hit the hit which will use the template + * @return the template to use, or null if no hit footer is used + */ + @SuppressWarnings("unchecked") + public Template<T> getHitFooter(Hit hit) { return (Template<T>) getTemplate("hitfooter"); } + + public String toString() { + return "template set " + getName() + " of type " + getMimeType() + + " [header=" + getTemplate("header") + + ",footer=" + getTemplate("footer") + + ",nohits=" + getTemplate("nohits") + + ",error=" + getTemplate("error") + + ",hit=" + getTemplate("hit") + "]"; + } + + @Override + public void header(Context context, T writer) throws IOException { + getHeader(null).render(context, writer); + } + + @Override + public void footer(Context context, T writer) throws IOException { + getFooter(null).render(context, writer); + } + + @Override + public void hit(Context context, T writer) throws IOException { + getHit(null).render(context, writer); + } + + @Override + public void error(Context context, T writer) throws IOException { + getError(null).render(context, writer); + } + + @Override + public void hitFooter(Context context, T writer) throws IOException { + Template<T> hitFooter = getHitFooter(null); + if (hitFooter != null) + hitFooter.render(context, writer); + } + + @Override + public void noHits(Context context, T writer) throws IOException { + getNohits(null).render(context, writer); + } + + @Override + public void queryContext(Context context, T writer) throws IOException { + getQueryContext(null).render(context, writer); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/TiledTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/TiledTemplateSet.java new file mode 100644 index 00000000000..b2564beeb7a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/TiledTemplateSet.java @@ -0,0 +1,337 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.container.ConfigHack; +import com.yahoo.prelude.templates.FormattingOptions.SubtypeFieldWithPrefix; +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.model.Renderer; +import com.yahoo.search.pagetemplates.model.Source; +import com.yahoo.search.pagetemplates.result.SectionHitGroup; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; +import java.util.Iterator; +import java.util.Map; + +/** + * A template set which implements the 'tiled' format. + * + * This template implementation requires a few rules to be observed for it to work properly: + * <ul> + * <li>As hit fields are rendered as XML tag names, their name must be compatible with XML tag names.</li> + * <li>Results sections, meta section, provider tags are rendered based on hits having specific types (as in {@link Hit#types()}, + * see table below for a list of hit types that are needed in order for hits to render properly.</li> + * <li>Some fields inside hits corresponding to provider tags (/result/meta/provider) are formatted in a specific way, see provider fields formatting options + * below. Other fields are rendered the usual way.</li> + * </ul> + * + * <p>Hit types required for proper rendering</p> + * <table summary="Hit types required for proper rendering"> + * <tr><td>XML tag path</td><td>Required hit type</td></tr> + * <tr><td>/result/section</td><td>A hit group and have a "section" type</td></tr> + * <tr><td>/result/meta</td><td>A hit group and have a "meta" type</td></tr> + * <tr><td>/result/meta/provider</td><td>A hit that has a "logging" type</td></tr> + * </table> + * + * <p>Provider fields formatting options</p> + * <table summary="Provider fields formatting options"> + * <tr><td>Field</td><td>Formatting</td><td>Field type</td></tr> + * <tr><td>provider</td><td>name attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>scheme</td><td>scheme attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>host</td><td>host attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>port</td><td>port attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>path</td><td>path attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>status</td><td>result attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>latency_connect</td><td><latency type="connect"> tag</td><td>Provided by container</td></tr> + * <tr><td>latency_start</td><td><latency type="start"> tag</td><td>Provided by container</td></tr> + * <tr><td>latency_finish</td><td><latency type="finish"> tag</td><td>Provided by container</td></tr> + * <tr><td>query_param_*</td><td><parameter name="..."> tag</td><td>Provided by container</td></tr> + * <tr><td>header_*</td><td><header name="..."> tag</td><td>Provided by container</td></tr> + * <tr><td>response_header_*</td><td><response-header name="..."> tag</td><td>Provided by container</td></tr> + * <tr><td>count_first</td><td><count type="first"> tag</td><td>Provided by container</td></tr> + * <tr><td>count_last</td><td><count type="last"> tag</td><td>Provided by container</td></tr> + * <tr><td>count_total</td><td><count type="total"> tag</td><td>Provided by container</td></tr> + * <tr><td>count_deep</td><td><count type="deep"> tag</td><td>Provided by container</td></tr> + * <tr><td>queryattrs_xorronum</td><td><queryattrs name="xorronum"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>queryattrs_RankFeaturesRewriterAttr</td><td><queryattrs name="RankFeaturesRewriterAttr"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>queryattrs_intlannotator</td><td><queryattrs name="intlannotator"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>queryattrs_category</td><td><queryattrs name="category"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>wordcounts_*</td><td><wordcounts word="..."> tag</td><td>Provided by YST searcher</td></tr> + * </table> + * + * @author bratseth + * @author laboisse + */ +public class TiledTemplateSet extends DefaultTemplateSet { + + private FormattingOptions hitOptionsForProvider; + private FormattingOptions hitOptions; + + public TiledTemplateSet() { + this(ConfigHack.TILED_TEMPLATE); + } + + public TiledTemplateSet(String templateName) { + super(templateName); + + // Define formatting options that will be used by various rendering methods + hitOptions = new FormattingOptions(); + // Render provider field as an attribute, not as a regular field + hitOptions.formatFieldAsAttribute("provider", "provider"); + hitOptions.setFieldNotToRender("provider"); + + + // Define formatting options that will be used by various rendering methods, for /result/meta/provider tags + hitOptionsForProvider = new FormattingOptions(); + hitOptionsForProvider.formatFieldAsAttribute("provider", "name"); // Provider name is rendered a provider/@name + // hitOptionsForProvider.formatFieldAsAttribute("uri", "query"); // FIXME Issue with attribute formatting, keeping as regular field for now + hitOptionsForProvider.formatFieldAsAttribute("scheme", "scheme"); + hitOptionsForProvider.formatFieldAsAttribute("host", "host"); + hitOptionsForProvider.formatFieldAsAttribute("port", "port"); + hitOptionsForProvider.formatFieldAsAttribute("path", "path"); + hitOptionsForProvider.formatFieldAsAttribute("status", "result"); + // Latency fields are not defined using prefixes as we know all the field names and prefixes are expensive + hitOptionsForProvider.formatFieldWithSubtype("latency_connect", "latency", "type", "connect"); + hitOptionsForProvider.formatFieldWithSubtype("latency_start", "latency", "type", "start"); + hitOptionsForProvider.formatFieldWithSubtype("latency_finish", "latency", "type", "finish"); + // Must use prefix for query parameters + hitOptionsForProvider.formatFieldWithSubtype("query_param_", "parameter", "name"); + // Must use prefix for getHeaders + hitOptionsForProvider.formatFieldWithSubtype("header_", "header", "name"); + // Must use prefix for response getHeaders + hitOptionsForProvider.formatFieldWithSubtype("response_header_", "response-header", "name"); + // Count fields are not defined using prefixes as we know all the field names and prefixes are expensive + hitOptionsForProvider.formatFieldWithSubtype("count_first", "count", "type", "first"); + hitOptionsForProvider.formatFieldWithSubtype("count_last", "count", "type", "last"); + hitOptionsForProvider.formatFieldWithSubtype("count_total", "count", "type", "total"); + hitOptionsForProvider.formatFieldWithSubtype("count_deep", "count", "type", "deep"); + + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_xorronum", "queryattrs", "name", "xorronum"); + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_RankFeaturesRewriterAttr", "queryattrs", "name", "RankFeaturesRewriterAttr"); + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_intlannotator", "queryattrs", "name", "intlannotator"); + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_category", "queryattrs", "name", "category"); + + hitOptionsForProvider.formatFieldWithSubtype("wordcounts_", "wordcounts", "word"); + // Provider field should not be rendered in logging hits as we already have <provider name="..."> + hitOptionsForProvider.setFieldNotToRender("provider"); + } + + @Override + /** Uses an XML writer in this template */ + public XMLWriter wrapWriter(Writer writer) { return new XMLWriter(super.wrapWriter(writer)); } + + @Override + public void header(Context context,XMLWriter writer) throws IOException { + Result result=(Result)context.get("result"); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag("result").attribute("version","1.0"); + writer.attribute("layout", result.hits().getField("layout")); + renderCoverageAttributes(result.getCoverage(false), writer); + writer.closeStartTag(); + renderSectionContent(result.hits(),writer); + } + + /** + * Augments default hit attributes rendering with formatting options. + * There's also a hacky part: if hit is actually a hit group, tries to use + * the 'type' field in place of the hit's type, to avoid having the 'group' hit type. + */ + @Override + protected void renderHitAttributes(Hit hit, XMLWriter writer) throws IOException { + if (hit instanceof HitGroup) { + String type = hit.getTypeString(); // TODO: This logic is somewhat crazy + if("group".equals(type)) + type = String.valueOf(hit.getField("type")); + writer.attribute("type", type); + } + else { + writer.attribute("type", hit.getTypeString()); + } + + if (hit.getRelevance() != null) + writer.attribute("relevance", hit.getRelevance()); + writer.attribute("source", hit.getSource()); + + for (Map.Entry<String, String> attr : hitOptions.fieldsAsAttributes()) { + Object val = hit.getField(attr.getKey()); + if (val != null) + writer.attribute(attr.getValue(), String.valueOf(val)); + } + } + + @Override + protected void renderField(Context context, Hit hit, Map.Entry<String, Object> entry, XMLWriter writer) throws IOException { + String fieldName = entry.getKey(); + + if ( !shouldRenderField(hit, fieldName)) return; + + writer.openTag(fieldName); + renderFieldContent(context, hit, fieldName, writer); + writer.closeTag(); + } + + /** Renders all fields of the hit */ + @Override + protected void renderHitFields(Context context, Hit hit, XMLWriter writer) throws IOException { + renderId(hit.getId(), writer); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + Map.Entry<String, Object> entry = it.next(); + // Exclude fields that should not be rendered + if (hitOptions.shouldRenderField(entry.getKey())) + renderField(context, hit, entry, writer); + } + } + + @Override + protected boolean shouldRenderField(Hit hit, String fieldName) { + if (fieldName.equals("relevancy")) return false; + if (fieldName.equals("collapseId")) return false; + return true; + } + + /** + * Overrides {@link DefaultTemplateSet#hit(Context, Writer)} + * to print 'logging' type meta hits as /result/meta/provider tags. + * Fails back to {@code super.hit(context, writer)} in other cases. + */ + @Override + public void hit(Context context, XMLWriter writer) throws IOException { + Hit hit = (Hit) context.get("hit"); + if (hit.isMeta() && hit.types().contains("logging")) + renderProvider(context, hit, writer); + else + super.hit(context, writer); + } + + /** + * Overrides {@link DefaultTemplateSet#renderHitGroup(HitGroup, Context, XMLWriter)} + * for /result/section and /result/meta hit groups. + * Fails back to {@code super.renderHitGroup(hit, context, writer)} otherwise. + */ + @Override + protected void renderHitGroup(HitGroup hit, Context context, XMLWriter writer) throws IOException { + if (hit.types().contains("section")) { + renderSection(hit, writer); // Renders /result/section + } + else if (hit.types().contains("meta")) { + writer.openTag("meta"); // renders /result/meta + writer.closeStartTag(); + } + else { + super.renderHitGroup(hit, context, writer); + } + } + + /** + * Renders /result/section. + * Doesn't use {@link #renderHitAttributes(Hit, XMLWriter)}. + */ + protected void renderSection(HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag("section"); + writer.attribute("id",hit.getDisplayId()); + writer.attribute("layout",hit.getField("layout")); + writer.attribute("region",hit.getField("region")); + writer.attribute("placement",hit.getField("placement")); // deprecated in 5.0 + writer.closeStartTag(); + renderSectionContent(hit,writer); + } + + protected void renderSectionContent(HitGroup hit,XMLWriter writer) throws IOException { + if (hit instanceof SectionHitGroup) { // render additional information + SectionHitGroup sectionGroup=(SectionHitGroup)hit; + for (Source source : sectionGroup.sources()) { + writer.openTag("source").attribute("url",source.getUrl()); + renderParameters(source.parameters(),writer); + writer.closeTag(); + } + for (Renderer renderer : sectionGroup.renderers()) { + writer.openTag("renderer").attribute("for",renderer.getRendererFor()).attribute("name",renderer.getName()); + renderParameters(renderer.parameters(),writer); + writer.closeTag(); + } + } + } + + private void renderParameters(Map<String,String> parameters,XMLWriter writer) throws IOException { + // Render content + for (Map.Entry<String, String> parameter : parameters.entrySet()) + writer.openTag("parameter").attribute("name",parameter.getKey()).content(parameter.getValue(),false).closeTag(); + } + + /** + * Renders /result/meta/provider. + * Uses {@link #renderProviderHitAttributes(Hit, XMLWriter)} instead of the default {@link #renderHitAttributes(Hit, XMLWriter)}. + * @see #renderProviderHitAttributes(Hit, XMLWriter) + * @see #renderProviderHitFields(Context, Hit, XMLWriter) + */ + protected void renderProvider(Context context, Hit hit, XMLWriter writer) + throws IOException { + writer.openTag("provider"); + renderProviderHitAttributes(hit, writer); + writer.closeStartTag(); + renderProviderHitFields(context, hit, writer); + } + + /** + * Specific hit attributes rendering for 'provider' meta hits under /result/meta. + */ + protected void renderProviderHitAttributes(Hit hit, XMLWriter writer) throws IOException { + // Browse through fields that should be rendered as attributes + for (Map.Entry<String, String> attr : hitOptionsForProvider.fieldsAsAttributes()) + writer.attribute(attr.getValue(),hit.getField(attr.getKey())); + } + + + /** + * Renders fields under /result/meta/provider. + * + * @see #renderProviderField(Context, Hit, java.util.Map.Entry, XMLWriter) + */ + protected void renderProviderHitFields(Context context, Hit hit, XMLWriter writer) + throws IOException { + renderId(hit.getId(), writer); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + Map.Entry<String, Object> entry = it.next(); + // Exclude fields that have already been rendered as attributes and + // fields that should not be rendered + if (hitOptionsForProvider.getAttributeName(entry.getKey()) == null + && hitOptionsForProvider.shouldRenderField(entry.getKey())) + renderProviderField(context, hit, entry, writer); + } + } + + /** + * Renders one field under /result/meta/provider. + */ + protected void renderProviderField(Context context, Hit hit, + Map.Entry<String, Object> entry, XMLWriter writer) throws IOException { + + String name = entry.getKey(); + FormattingOptions.SubtypeField subtypeField = hitOptionsForProvider.getSubtype(name); + if (subtypeField == null) + subtypeField = hitOptionsForProvider.getSubtypeWithPrefix(name); + + if (subtypeField != null) { + writer.openTag(subtypeField.tagName); + if (subtypeField.attributeValue != null) { + writer.attribute(subtypeField.attributeName,subtypeField.attributeValue); + } + else if (subtypeField instanceof SubtypeFieldWithPrefix) { + // This is a subtype field that was defined using a prefix + // get the remaining part of the field name + writer.attribute(subtypeField.attributeName, + name.substring(((SubtypeFieldWithPrefix)subtypeField).prefixLength)); + } + } else { + writer.openTag(name); + } + writer.escapedContent(hit.getFieldXML(name),false).closeTag(); + } + + + public String toString() { return "tiled result template"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/UserTemplate.java b/container-search/src/main/java/com/yahoo/prelude/templates/UserTemplate.java new file mode 100644 index 00000000000..bcc3b3c6390 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/UserTemplate.java @@ -0,0 +1,323 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.io.ByteWriter; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.XMLField; +import com.yahoo.search.Result; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; +import java.util.logging.Logger; + + +/** + * A wrapper for a template set, suitable for subclassing. + * + * <p> + * A subclass of UserTemplate must implement header(), footer(), hit(), + * hitFooter(), error() and noHits(). + * + * @author Steinar Knutsen + */ +@SuppressWarnings("deprecation") +public abstract class UserTemplate<T extends Writer> extends GenericTemplateSet { + + // & + private static final byte[] ampersand = new byte[] { 38, 97, 109, 112, 59 }; + + // < + private static final byte[] lessThan = new byte[] { 38, 108, 116, 59 }; + // > + private static final byte[] greaterThan = new byte[] { 38, 103, 116, 59 }; + + // \\u00 + private static final byte[] quotePrefix = new byte[] { 92, 117, 48, 48 }; + + private static final Logger log = Logger.getLogger(UserTemplate.class.getName()); + + /** + * The signature of this constructor is the one which is invoked + * in a production setting. + */ + public UserTemplate(String name, String mimeType, + String encoding) { + super(name, mimeType, encoding); + } + + public UserTemplate(String name) { + this(name, + DEFAULT_MIMETYPE, + DEFAULT_ENCODING + ); + } + + /** + * This is called once before each result is rendered using this template. + * The returned writer is used in all subsequent calls. Use this if another (wrapper) + * writer of the raw incoming writer is desired in the implementation of this template. + * The class of the returned type must be given as a type argument to the template class, + * to be able to implement methods taking this wrapper writer as the argument type. + * This default implementation returns an XMLWriter. + */ + @SuppressWarnings("unchecked") + public T wrapWriter(Writer writer) { + //FIXME: Hack + return (T) XMLWriter.from(writer, 10, -1); + } + + /** + * Creates a new context suitable for this template. + * The context may be reused for several evaluations, but not multiple + * concurrent evaluations + */ + public Context createContext() { + return new MapContext(); + } + + + /** + * For internal use only + * TODO: get rid of this method * + */ + public boolean isDefaultTemplateSet() { + return getClass().equals(TemplateSet.getDefault().getClass()); + } + + /** + * Render the result set header. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, the result + * set instance + * @param writer + * the destination for rendering the result + * @throws IOException + * may be propagated from the writer + */ + public abstract void header(Context context, T writer) + throws IOException; + + /** + * Render the result set footer. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, the result + * set instance + * @param writer + * the destination for rendering the result + * @throws IOException + * may be propagated from the writer + */ + public abstract void footer(Context context, T writer) + throws IOException; + + /** + * Render a single top level hit. + * + * <p> + * The result set is available in the context object under the name + * "result". The hit itself as "hit", the index of the hit as "hitno", and + * all the fields under their normal names. + * + * @param context + * wrapper which will contain, among other thing, the hit + * instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void hit(Context context, T writer) throws IOException; + + /** + * Render a footer for a single top level hit. A typical implementation may + * do nothing. + * + * <p> + * The result set is available in the context object under the name + * "result". The hit itself as "hit", the index of the hit as "hitno", and + * all the fields under their normal names. + * + * @param context + * wrapper which will contain, among other thing, the hit + * instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void hitFooter(Context context, T writer) + throws IOException; + + /** + * Render the error message for a result set. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, main error + * and result set instances. + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void error(Context context, T writer) + throws IOException; + + /** + * Invoked when the result set has no hits. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, the result + * set instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void noHits(Context context, T writer) + throws IOException; + + /** + * Override this to add custom rendering for the query context of the result. + * Only called when the query context is present. + * + * <p> + * The result set is available in the context object under the name + * "result". The query context is retrieved from the result by calling + * result.getQuery.getContext(false) + * + * @param context + * wrapper which will contain, among other things, the result + * set instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public void queryContext(Context context, T writer) throws IOException { + Result result = (Result) context.get("result"); + result.getContext(false).render(writer); + } + + /** + * Dump UTF-8 byte array to writer, but escape low ASCII codes except + * TAB, NL and CR, and escape ampersand, less than and greater than. + * + * <p> + * It is presumed the writer is buffered (which is the case in normal + * result rendering), as the method may perform a large number of write + * operations. + * + * <p> + * public only for testing. + */ + public static void dumpAndXMLQuoteUTF8(ByteWriter writer, byte[] utf) throws java.io.IOException { + int startDump = 0; + + for (int i = 0; i < utf.length; ++i) { + byte b = utf[i]; + if (b < 0) { + // Not ASCII, above character 127 + // Don't try to do something smart with UNICODE characters, + // just pass them through. + } else if (b < 32) { + switch (b) { + case 9: + case 10: + case 13: + break; + default: + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + quoteByte(writer, b); + break; + } + } else { + // printable ASCII + // quote special characters, otherwise do nothing + switch (b) { + // case 34: // double quote + // writer.append(utf, startDump, i - startDump); + // startDump = i + 1; + // writer.append(doubleQuote); + // break; + case 38: // ampersand + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + writer.append(ampersand); + break; + case 60: // less than + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + writer.append(lessThan); + break; + case 62: // greater than + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + writer.append(greaterThan); + break; + } + } + } + if (startDump < utf.length) { + writer.append(utf, startDump, utf.length - startDump); + } + } + + /** + * If the field is available as a UTF-8 byte array, + * dump it to the writer. + */ + public static boolean dumpBytes(ByteWriter writer, + FastHit hit, + String fieldName) throws java.io.IOException { + FastHit.RawField asBytes; + try { + asBytes = hit.fetchFieldAsUtf8(fieldName); + } catch (RuntimeException e) { + asBytes = null; + } + if (asBytes != null) { + if (asBytes.needXmlEscape()) { + dumpAndXMLQuoteUTF8(writer, asBytes.getUtf8()); + } else { + writer.append(asBytes.getUtf8()); + } + return true; + } + return false; + } + + private static void quoteByte(ByteWriter writer, byte b) throws java.io.IOException { + byte[] quoted = new byte[2]; + writer.append(quotePrefix); + quoted[0] = (byte) ((b >>> 4) + 0x30); + if (quoted[0] > 0x39) { + quoted[0] = (byte) (quoted[0] + 7); + } + quoted[1] = (byte) ((b & 0x0f) + 0x30); + if (quoted[1] > 0x39) { + quoted[1] = (byte) (quoted[1] + 7); + } + writer.append(quoted); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/package-info.java b/container-search/src/main/java/com/yahoo/prelude/templates/package-info.java new file mode 100644 index 00000000000..7a273c6415f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.prelude.templates; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/Query.java b/container-search/src/main/java/com/yahoo/search/Query.java new file mode 100644 index 00000000000..20831e743b9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/Query.java @@ -0,0 +1,1060 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.collections.Tuple2; +import com.yahoo.component.Version; +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.fs4.MapEncoder; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.fastsearch.DocumentDatabase; +import com.yahoo.prelude.query.Highlight; +import com.yahoo.prelude.query.QueryException; +import com.yahoo.prelude.query.textualrepresentation.TextualQueryRepresentation; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.properties.PropertyMap; +import com.yahoo.search.query.Model; +import com.yahoo.search.query.ParameterParser; +import com.yahoo.search.query.Presentation; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.Ranking; +import com.yahoo.search.query.SessionId; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileFieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; +import com.yahoo.search.query.properties.DefaultProperties; +import com.yahoo.search.query.properties.QueryProperties; +import com.yahoo.search.query.properties.QueryPropertyAliases; +import com.yahoo.search.query.properties.RequestContextProperties; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.federation.FederationSearcher; +import com.yahoo.search.query.Properties; +import com.yahoo.search.query.Sorting.AttributeSorter; +import com.yahoo.search.query.Sorting.FieldOrder; +import com.yahoo.search.query.Sorting.Order; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.query.profile.ModelObjectMap; +import com.yahoo.search.query.profile.QueryProfileProperties; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfile; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.yql.NullItemException; +import com.yahoo.search.yql.VespaSerializer; +import com.yahoo.search.yql.YqlParser; + +import edu.umd.cs.findbugs.annotations.Nullable; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +/** + * A search query containing all the information required to produce a Result. + * <p> + * The Query contains: + * <ul> + * <li>the selection criterion received in the request - which may be a structured boolean tree of operators, + * an annotated piece of natural language text received from a user, or a combination of both + * <li>a set of field containing the additional general parameters of a query - number of hits, + * ranking, presentation etc. + * <li>a Map of properties, which can be of any object type + * </ul> + * + * <p> + * The properties has three sources + * <ol> + * <li>They may be set in some Searcher component already executed for this Query - the properties acts as + * a blackboard for communicating arbitrary objects between Searcher components. + * <li>Properties set in the search Request received - the properties acts as a way to parametrize Searcher + * components from the Request. + * <li>Properties defined in the selected {@link com.yahoo.search.query.profile.QueryProfile} - this provides + * defaults for the parameters to Searcher components. Note that by using query profile types, the components may + * define the set of parameters they support. + * </ol> + * When looked up, the properties are accessed in the priority order listed above. + * <p> + * The identity of a query is determined by its content. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author bratseth + */ +public class Query extends com.yahoo.processing.Request implements Cloneable { + + // Note to developers: If you think you should add something here you are probably wrong + // To add state to the query: Do properties.set("myNewState",new MyNewState()) instead. + + /** The type of the query */ + public enum Type { + + ALL(0,"all"), + ANY(1,"any"), + PHRASE(2,"phrase"), + ADVANCED(3,"adv"), + WEB(4,"web"), + PROGRAMMATIC(5, "prog"), + YQL(6, "yql"); + + private final int intValue; + private final String stringValue; + + Type(int intValue,String stringValue) { + this.intValue = intValue; + this.stringValue = stringValue; + } + + /** Converts a type argument value into a query type */ + public static Type getType(String typeString) { + for (Type type:Type.values()) + if(type.stringValue.equals(typeString)) + return type; + return ALL; + } + + public int asInt() { return intValue; } + + public String toString() { return stringValue; } + + } + + //-------------- Query properties treated as fields in Query --------------- + + /** The offset from the most relevant hits found from this query */ + private int offset = 0; + + /** The number of hits to return */ + private int hits = 10; + + /** The query context level, 0 means no tracing */ + private int traceLevel = 0; + + // The timeout to be used when dumping rank features + private static final long dumpTimeout = (6 * 60 * 1000); // 6 minutes + private static final long defaultTimeout = 5000; + /** The timeout of the query, in milliseconds */ + private long timeout = defaultTimeout; + + + /** Whether this query is forbidden to access cached information */ + private boolean noCache=false; + + /** Whether or not grouping should use a session cache */ + private boolean groupingSessionCache=false; + + //-------------- Generic property containers -------------------------------- + + /** + * The synchronous view of the JDisc request causing this query. + * + * @since 5.1 + */ + private final HttpRequest httpRequest; + + /** The context, or null if there is no context */ + private QueryContext context = null; + + /** Used for downstream session caches */ + private SessionId sessionId = null; + + //--------------- Owned sub-objects containing query properties ---------------- + + /** The ranking requested in this query */ + private Ranking ranking = new Ranking(this); + + /** The query query and/or query program declaration */ + private Model model = new Model(this); + + /** How results of this query should be presented */ + private Presentation presentation = new Presentation(this); + + //---------------- Tracing ---------------------------------------------------- + + private static Logger log = Logger.getLogger(Query.class.getName()); + + /** The time this query was created */ + private long startTime; + + /** Error conditions stemming from the query itself */ + private List<ErrorMessage> errors = new ArrayList<>(0); + + //---------------- Static property handling ------------------------------------ + + public static final CompoundName OFFSET = new CompoundName("offset"); + public static final CompoundName HITS = new CompoundName("hits"); + + public static final CompoundName SEARCH_CHAIN = new CompoundName("searchChain"); + public static final CompoundName TRACE_LEVEL = new CompoundName("traceLevel"); + public static final CompoundName NO_CACHE = new CompoundName("noCache"); + public static final CompoundName GROUPING_SESSION_CACHE = new CompoundName("groupingSessionCache"); + public static final CompoundName TIMEOUT = new CompoundName("timeout"); + + private static QueryProfileType argumentType; + static { + argumentType=new QueryProfileType("native"); + argumentType.setBuiltin(true); + + argumentType.addField(new FieldDescription(OFFSET.toString(), "integer", "offset start")); + argumentType.addField(new FieldDescription(HITS.toString(), "integer", "hits count")); + // TODO: Should this be added to com.yahoo.search.query.properties.QueryProperties? If not, why not? + argumentType.addField(new FieldDescription(SEARCH_CHAIN.toString(), "string")); + argumentType.addField(new FieldDescription(TRACE_LEVEL.toString(), "integer", "tracelevel")); + argumentType.addField(new FieldDescription(NO_CACHE.toString(), "boolean", "nocache")); + argumentType.addField(new FieldDescription(GROUPING_SESSION_CACHE.toString(), "boolean", "groupingSessionCache")); + argumentType.addField(new FieldDescription(TIMEOUT.toString(), "string", "timeout")); + argumentType.addField(new FieldDescription(FederationSearcher.SOURCENAME.toString(),"string")); + argumentType.addField(new FieldDescription(FederationSearcher.PROVIDERNAME.toString(),"string")); + argumentType.addField(new FieldDescription(Presentation.PRESENTATION,new QueryProfileFieldType(Presentation.getArgumentType()))); + argumentType.addField(new FieldDescription(Ranking.RANKING,new QueryProfileFieldType(Ranking.getArgumentType()))); + argumentType.addField(new FieldDescription(Model.MODEL,new QueryProfileFieldType(Model.getArgumentType()))); + argumentType.freeze(); + } + public static QueryProfileType getArgumentType() { return argumentType; } + + /** The aliases of query properties, these are always the same */ + // Note: Don't make static for now as GSM calls this through reflection + private static Map<String,CompoundName> propertyAliases; + static { + Map<String,CompoundName> propertyAliasesBuilder = new HashMap<>(); + addAliases(Query.getArgumentType(), propertyAliasesBuilder); + addAliases(Ranking.getArgumentType(), propertyAliasesBuilder); + addAliases(Model.getArgumentType(), propertyAliasesBuilder); + addAliases(Presentation.getArgumentType(), propertyAliasesBuilder); + propertyAliases = ImmutableMap.copyOf(propertyAliasesBuilder); + } + private static void addAliases(QueryProfileType arguments,Map<String,CompoundName> aliases) { + String prefix=getPrefix(arguments); + for (FieldDescription field : arguments.fields().values()) { + for (String alias : field.getAliases()) + aliases.put(alias,new CompoundName(prefix+field.getName())); + } + } + private static String getPrefix(QueryProfileType type) { + if (type.getId().getName().equals("native")) return ""; // The arguments of this directly + return type.getId().getName() + "."; + } + + public static void addNativeQueryProfileTypesTo(QueryProfileTypeRegistry registry) { + // Add modifiable copies to allow query profile types in this to add to these + registry.register(Query.getArgumentType().unfrozen()); + registry.register(Ranking.getArgumentType().unfrozen()); + registry.register(Model.getArgumentType().unfrozen()); + registry.register(Presentation.getArgumentType().unfrozen()); + registry.register(DefaultProperties.argumentType.unfrozen()); + } + + //---------------- Construction ------------------------------------ + + /** + * Constructs an empty (null) query + */ + public Query() { + this(""); + } + + /** + * Construct a query from a string formatted in the http style, e.g <code>?query=test&offset=10&hits=13</code> + * The query must be uri encoded. + */ + public Query(String query) { + this(query, null); + } + + /** + * Construct a query from a string formatted in the http style, e.g <code>?query=test&offset=10&hits=13</code> + * The query must be uri encoded. + */ + public Query(String query, CompiledQueryProfile queryProfile) { + this(HttpRequest.createTestRequest(query, com.yahoo.jdisc.http.HttpRequest.Method.GET), queryProfile); + } + + /** + * Creates a query from a request + * + * @param request the HTTP request from which this is created + * @param queryProfile the query profile to use for this query, or null if none. + */ + public Query(HttpRequest request, CompiledQueryProfile queryProfile) { + super(new QueryPropertyAliases(propertyAliases)); + this.httpRequest = request; + init(request.propertyMap(), queryProfile); + } + + /** + * Creates a query from a request + * + * @param request the HTTP request from which this is created + */ + public Query(HttpRequest request) { + this(request, null); + } + + private void init(Map<String, String> requestMap, CompiledQueryProfile queryProfile) { + startTime = System.currentTimeMillis(); + if (queryProfile != null) { + // Move all request parameters to the query profile just to validate that the parameter settings are legal + Properties queryProfileProperties=new QueryProfileProperties(queryProfile); + properties().chain(queryProfileProperties); + // TODO: Just checking legality rather than actually setting would be faster + setPropertiesFromRequestMap(requestMap, properties()); // Adds errors to the query for illegal set attempts + + // Create the full chain + properties().chain(new QueryProperties(this, queryProfile.getRegistry())). + chain(new ModelObjectMap()). + chain(new RequestContextProperties(requestMap)). + chain(queryProfileProperties). + chain(new DefaultProperties()); + + // Pass the values from the query profile which maps through a field in the Query object model + // through the property chain to cause those values to be set in the Query object model + setFieldsFrom(queryProfileProperties, requestMap); + } + else { // bypass these complications if there is no query profile to get values from and validate against + properties(). + chain(new QueryProperties(this, new CompiledQueryProfileRegistry())). + chain(new PropertyMap()). + chain(new DefaultProperties()); + setPropertiesFromRequestMap(requestMap, properties()); + } + + properties().setParentQuery(this); + traceProperties(); + } + + public Query(Query query) { + this(query, query.getStartTime()); + } + + private Query(Query query, long startTime) { + super(query.properties().clone()); + this.startTime = startTime; + this.httpRequest = query.httpRequest; + query.copyPropertiesTo(this); + } + + /** + * Creates a new query from another query, but with time sensitive + * fields reset. + * + * @return new query + */ + public static Query createNewQuery(Query query) { + return new Query(query, System.currentTimeMillis()); + } + + /** + * Calls properties().set on each value in the given properties which is declared in this query or + * one of its dependent objects. This will ensure the appropriate setters are called on this and all + * dependent objects for the appropriate subset of the given property values + */ + private void setFieldsFrom(Properties properties, Map<String,String> context) { + setFrom(properties,Query.getArgumentType(), context); + setFrom(properties,Model.getArgumentType(), context); + setFrom(properties,Presentation.getArgumentType(), context); + setFrom(properties,Ranking.getArgumentType(), context); + } + + /** + * For each field in the given query profile type, take the corresponding value from originalProperties + * (if any) set it to properties(). + */ + private void setFrom(Properties originalProperties,QueryProfileType arguments,Map<String,String> context) { + String prefix=getPrefix(arguments); + for (FieldDescription field : arguments.fields().values()) { + String fullName=prefix + field.getName(); + if (field.getType() == FieldType.genericQueryProfileType) { + for (Map.Entry<String, Object> entry : originalProperties.listProperties(fullName,context).entrySet()) { + try { + properties().set(fullName + "." + entry.getKey(), entry.getValue(), context); + } catch (IllegalArgumentException e) { + throw new QueryException("Invalid request parameter", e); + } + } + } else { + Object value=originalProperties.get(fullName,context); + if (value!=null) { + try { + properties().set(fullName,value,context); + } catch (IllegalArgumentException e) { + throw new QueryException("Invalid request parameter", e); + } + } + } + } + } + + /** Calls properties.set on all entries in requestMap */ + private void setPropertiesFromRequestMap(Map<String, String> requestMap, Properties properties) { + for (Map.Entry<String, String> entry : requestMap.entrySet()) { + try { + if (entry.getKey().equals("queryProfile")) continue; + properties.set(entry.getKey(), entry.getValue(), requestMap); + } + catch (IllegalArgumentException e) { + throw new QueryException("Invalid request parameter", e); + } + } + } + + /** Returns the properties of this query. The properties are modifiable */ + @Override + public Properties properties() { return (Properties)super.properties(); } + + /** + * Traces how properties was resolved and from where. Done after the fact to avoid special handling + * of tracelevel, which is the property deciding whether this needs to be done + */ + private void traceProperties() { + if (traceLevel==0) return; + CompiledQueryProfile profile=null; + QueryProfileProperties profileProperties=properties().getInstance(QueryProfileProperties.class); + if (profileProperties!=null) + profile=profileProperties.getQueryProfile(); + + if (profile==null) + trace("No query profile is used", false, 1); + else + trace("Using " + profile.toString(), false, 1); + if (traceLevel<4) return; + + StringBuilder b=new StringBuilder("Resolved properties:\n"); + Set<String> mentioned=new HashSet<>(); + for (Map.Entry<String,String> requestProperty : requestProperties().entrySet() ) { + Object resolvedValue = properties().get(requestProperty.getKey(), requestProperties()); + if (resolvedValue == null && requestProperty.getKey().equals("queryProfile")) + resolvedValue = requestProperty.getValue(); + + b.append(requestProperty.getKey()); + b.append("="); + b.append(String.valueOf(resolvedValue)); // (may be null) + b.append(" ("); + + if (profile != null && ! profile.isOverridable(new CompoundName(requestProperty.getKey()), requestProperties())) + b.append("value from query profile - unoverridable, ignoring request value"); + else + b.append("value from request"); + b.append(")\n"); + mentioned.add(requestProperty.getKey()); + } + if (profile!=null) { + appendQueryProfileProperties(profile,mentioned,b); + } + trace(b.toString(),false,4); + } + + private Map<String, String> requestProperties() { + return httpRequest.propertyMap(); + } + + private void appendQueryProfileProperties(CompiledQueryProfile profile,Set<String> mentioned,StringBuilder b) { + for (Map.Entry<String,Object> property : profile.listValues("",requestProperties()).entrySet()) { + if ( ! mentioned.contains(property.getKey())) + b.append(property.getKey() + "=" + property.getValue() + " (value from query profile)<br/>\n"); + } + } + + /** + * Validates this query + * + * @return the reason if it is invalid, null if it is valid + */ + public String validate() { + // Validate the query profile + QueryProfileProperties queryProfileProperties = properties().getInstance(QueryProfileProperties.class); + if (queryProfileProperties == null) return null; // Valid + StringBuilder missingName = new StringBuilder(); + if (! queryProfileProperties.isComplete(missingName, httpRequest.propertyMap())) + return "Incomplete query: Parameter '" + missingName + "' is mandatory in " + + queryProfileProperties.getQueryProfile() + " but is not set"; + else + return null; // is valid + } + + /** Returns the time (in milliseconds since epoch) when this query was started */ + public long getStartTime() { return startTime; } + + /** Returns the time (in milliseconds) since the query was started/created */ + public long getDurationTime() { + return System.currentTimeMillis() - startTime; + } + + /** + * Get the appropriate timeout for the query. + * + * @return timeout in milliseconds + **/ + public long getTimeLeft() { + return getTimeout() - getDurationTime(); + } + + public boolean requestHasProperty(String name) { + return httpRequest.hasProperty(name); + } + + /** + * Returns the number of milliseconds to wait for a response from a search backend + * before timing it out. Default is 5000. + * <p> + * Note: If Ranking.RANKFEATURES is turned on, this is hardcoded to 6 minutes. + * + * @return timeout in milliseconds. + */ + public long getTimeout() { + return properties().getBoolean(Ranking.RANKFEATURES, false) ? dumpTimeout : timeout; + } + + /** + * Sets the number of milliseconds to wait for a response from a search backend + * before time out. Default is 5000. + */ + public void setTimeout(long timeout) { + if (timeout > 1000000000 || timeout < 0) + throw new IllegalArgumentException("'timeout' must be positive and smaller than 1000000000 ms but was " + timeout); + this.timeout = timeout; + } + + /** + * Sets timeout from a string which will be parsed as a + */ + public void setTimeout(String timeoutString) { + setTimeout(ParameterParser.asMilliSeconds(timeoutString, timeout)); + } + + /** + * Resets the start time of the query. This will ensure that the query will run + * for the same amount of time as a newly created query. + */ + public void resetTimeout() { this.startTime = System.currentTimeMillis(); } + + /** + * Sets the context level of this query, 0 means no tracing + * Higher numbers means increasingly more tracing + */ + public void setTraceLevel(int traceLevel) { this.traceLevel = traceLevel; } + + /** + * Returns the context level of this query, 0 means no tracing + * Higher numbers means increasingly more tracing + */ + public int getTraceLevel() { return traceLevel; } + + /** + * Returns the context level of this query, 0 means no tracing + * Higher numbers means increasingly more tracing + */ + public final boolean isTraceable(int level) { return traceLevel >= level; } + + + /** Returns whether this query should never be served from a cache. Default is false */ + public boolean getNoCache() { return noCache; } + + /** Sets whether this query should never be server from a cache. Default is false */ + public void setNoCache(boolean noCache) { this.noCache = noCache; } + + /** Returns whether this query should use the grouping session cache. Default is false */ + public boolean getGroupingSessionCache() { return groupingSessionCache; } + + /** Sets whether this query should use the grouping session cache. Default is false */ + public void setGroupingSessionCache(boolean groupingSessionCache) { this.groupingSessionCache = groupingSessionCache; } + + /** + * Returns the offset from the most relevant hits requested by the submitter + * of this query. + * Default is 0 - to return the most relevant hits + */ + public int getOffset() { return offset; } + + /** + * Returns the number of hits requested by the submitter of this query. + * The default is 10. + */ + public int getHits() { return hits; } + + /** + * Sets the number of hits requested. If hits is less than 0, an + * IllegalArgumentException is thrown. Default number of hits is 10. + */ + public void setHits(int hits) { + if (hits < 0) + throw new IllegalArgumentException("Must be a positive number"); + this.hits = hits; + } + + /** + * Set the hit offset. Can not be less than 0. Default is 0. + */ + public void setOffset(int offset) { + if (offset < 0) + throw new IllegalArgumentException("Must be a positive number"); + this.offset = offset; + } + + /** Convenience method to set both the offset and the number of hits to return */ + public void setWindow(int offset,int hits) { + setOffset(offset); + setHits(hits); + } + + /** + * This is ignored - compression is controlled at the network level. + * + * @deprecated this is ignored + */ + @Deprecated + public void setCompress(boolean ignored) { } + + /** + * Returns false. + * + * @deprecated this always returns false + */ + @Deprecated + public boolean getCompress() { return false; } + + /** Returns a string describing this query */ + @Override + public String toString() { + String queryTree; + // getQueryTree isn't exception safe + try { + queryTree = model.getQueryTree().toString(); + } catch (Exception e) { + queryTree = "[Could not parse user input: " + model.getQueryString() + "]"; + } + return "query '" + queryTree + "'"; + } + + /** Returns a string describing this query in more detail */ + public String toDetailString() { + String queryTree; + // getQueryTree isn't exception safe + try { + queryTree = model.getQueryTree().toString(); + } catch (Exception e) { + queryTree = "Could not parse user input: " + model.getQueryString(); + } + return "query=[" + queryTree + "]" + " offset=" + getOffset() + " hits=" + getHits() + "]"; + } + + /** + * Encodes this query onto the given buffer + * + * @param buffer The buffer to encode the query to + * @return the number of encoded items + */ + public int encode(ByteBuffer buffer) { + return model.getQueryTree().encode(buffer); + } + + /** + * Adds a context message to this query and to the info log, + * if the context level of the query is sufficiently high. + * The context information will be carried over to the result at creation. + * The message parameter will be included <i>with</i> XML escaping. + * + * @param message the message to add + * @param traceLevel the context level of the message, this method will do nothing + * if the traceLevel of the query is lower than this value + */ + public void trace(String message, int traceLevel) { + trace(message, false, traceLevel); + } + + /** + * Adds a trace message to this query + * if the trace level of the query is sufficiently high. + * + * @param message the message to add + * @param includeQuery true to append the query root stringValue + * at the end of the message + * @param traceLevel the context level of the message, this method will do nothing + * if the traceLevel of the query is lower than this value + */ + public void trace(String message, boolean includeQuery, int traceLevel) { + if ( ! isTraceable(traceLevel)) return; + + if (includeQuery) + message += ": [" + queryTreeText() + "]"; + + log.log(LogLevel.DEBUG,message); + + // Pass 0 as traceLevel as the trace level check is already done above, + // and it is not propagated to trace until execution has started + // (it is done in the execution.search method) + getContext(true).trace(message, 0); + } + + /** + * Adds a trace message to this query + * if the trace level of the query is sufficiently high. + * + * @param includeQuery true to append the query root stringValue at the end of the message + * @param traceLevel the context level of the message, this method will do nothing + * if the traceLevel of the query is lower than this value + * @param messages the messages whose toStrings will be concatenated into the trace message. + * Concatenation will only happen if the trace level is sufficiently high. + */ + public void trace(boolean includeQuery, int traceLevel, Object... messages) { + if ( ! isTraceable(traceLevel)) return; + + StringBuilder concatenated = new StringBuilder(); + for (Object message : messages) + concatenated.append(String.valueOf(message)); + trace(concatenated.toString(), includeQuery, traceLevel); + } + + /** + * Set the context information for another query to be part of this query's + * context information. This is to be used if creating fresh query objects as + * part of a plug-in's execution. The query should be attached before it is + * used, in case an exception causes premature termination. This is enforced + * by an IllegalStateException. In other words, intended use is create the + * new query, and attach the context to the invoking query as soon as the new + * query is properly initialized. + * + * <p> + * This method will always set the argument query's context level to the context + * level of this query. + * + * @param query + * The query which should be traced as a part of this query. + * @throws IllegalStateException + * If the query given as argument already has context + * information. + */ + public void attachContext(Query query) throws IllegalStateException { + query.setTraceLevel(getTraceLevel()); + if (context == null) { + // Nothing to attach to. This is about the same as + // getTraceLevel() == 0, + // but is a direct test of what will make the function superfluous. + return; + } + if (query.getContext(false) != null) { + // If we added the other query's context info as a subnode in this + // query's context tree, we would have to check for loops in the + // context graph. If we simply created a new node without checking, + // we might silently overwrite useful information. + throw new IllegalStateException("Query to attach already has context information stored."); + } + query.context = context; + } + + private String queryTreeText() { + QueryTree root = getModel().getQueryTree(); + + if (getTraceLevel() < 2) + return root.toString(); + if (getTraceLevel() < 6) + return yqlRepresentation(); + else + return "\n" + yqlRepresentation() + "\n" + new TextualQueryRepresentation(root.getRoot()) + "\n"; + } + + /** + * Serialize this query as YQL+. This method will never throw exceptions, + * but instead return a human readable error message if a problem occured + * serializing the query. Hits and offset information will be included if + * different from default, while linguistics metadata are not added. + * + * @return a valid YQL+ query string or a human readable error message + * @see Query#yqlRepresentation(Tuple2, boolean) + */ + public String yqlRepresentation() { + try { + return yqlRepresentation(null, true); + } catch (NullItemException e) { + return "Query currently a placeholder, NullItem encountered."; + } catch (RuntimeException e) { + return "Failed serializing query as YQL+, please file a ticket including the query causing this: " + + Exceptions.toMessageString(e); + } + } + + private void commaSeparated(StringBuilder yql, Set<String> fields) { + int initLen = yql.length(); + for (String field : fields) { + if (yql.length() > initLen) { + yql.append(", "); + } + yql.append(field); + } + } + + /** + * Serialize this query as YQL+. This will create a string representation + * which should always be legal YQL+. If a problem occurs, a + * RuntimeException is thrown. + * + * @param segmenterVersion + * linguistics metadata used in federation, set to null if the + * annotation is not necessary + * @param includeHitsAndOffset + * whether to include hits and offset parameters converted to a + * offset/limit slice + * @return a valid YQL+ query string + * @throws RuntimeException if there is a problem serializing the query tree + */ + public String yqlRepresentation(@Nullable Tuple2<String, Version> segmenterVersion, boolean includeHitsAndOffset) { + String q = VespaSerializer.serialize(this); + + Set<String> sources = getModel().getSources(); + Set<String> fields = getPresentation().getSummaryFields(); + StringBuilder yql = new StringBuilder("select "); + if (fields.isEmpty()) { + yql.append('*'); + } else { + commaSeparated(yql, fields); + } + yql.append(" from "); + if (sources.isEmpty()) { + yql.append("sources *"); + } else { + if (sources.size() > 1) { + yql.append("sources "); + } + commaSeparated(yql, sources); + } + yql.append(" where "); + if (segmenterVersion != null) { + yql.append("[{\"segmenter\": {\"version\": \"") + .append(segmenterVersion.second.toString()) + .append("\", \"backend\": \"") + .append(segmenterVersion.first).append("\"}}]("); + } + yql.append(q); + if (segmenterVersion != null) { + yql.append(')'); + } + if (getRanking().getSorting() != null && getRanking().getSorting().fieldOrders().size() > 0) { + serializeSorting(yql); + } + if (includeHitsAndOffset) { + if (getOffset() != 0) { + yql.append(" limit ") + .append(Integer.toString(getHits() + getOffset())) + .append(" offset ") + .append(Integer.toString(getOffset())); + } else if (getHits() != 10) { + yql.append(" limit ").append(Integer.toString(getHits())); + } + } + if (getTimeout() != 5000L) { + yql.append(" timeout ").append(Long.toString(getTimeout())); + } + yql.append(';'); + return yql.toString(); + } + + private void serializeSorting(StringBuilder yql) { + yql.append(" order by "); + int initLen = yql.length(); + for (FieldOrder f : getRanking().getSorting().fieldOrders()) { + if (yql.length() > initLen) { + yql.append(", "); + } + final Class<? extends AttributeSorter> sorterType = f.getSorter() + .getClass(); + if (sorterType == Sorting.RawSorter.class) { + yql.append("[{\"").append(YqlParser.SORTING_FUNCTION) + .append("\": \"").append(Sorting.RAW).append("\"}]"); + } else if (sorterType == Sorting.LowerCaseSorter.class) { + yql.append("[{\"").append(YqlParser.SORTING_FUNCTION) + .append("\": \"").append(Sorting.LOWERCASE) + .append("\"}]"); + } else if (sorterType == Sorting.UcaSorter.class) { + Sorting.UcaSorter uca = (Sorting.UcaSorter) f.getSorter(); + String ucaLocale = uca.getLocale(); + Sorting.UcaSorter.Strength ucaStrength = uca.getStrength(); + yql.append("[{\"").append(YqlParser.SORTING_FUNCTION) + .append("\": \"").append(Sorting.UCA).append("\""); + if (ucaLocale != null) { + yql.append(", \"").append(YqlParser.SORTING_LOCALE) + .append("\": \"").append(ucaLocale).append('"'); + } + if (ucaStrength != Sorting.UcaSorter.Strength.UNDEFINED) { + yql.append(", \"").append(YqlParser.SORTING_STRENGTH) + .append("\": \"").append(ucaStrength.name()) + .append('"'); + } + yql.append("}]"); + } + yql.append(f.getFieldName()); + if (f.getSortOrder() == Order.DESCENDING) { + yql.append(" desc"); + } + } + } + + /** Returns the context of this query, possibly creating it if missing. Returns the context, or null */ + public QueryContext getContext(boolean create) { + if (context==null && create) + context=new QueryContext(getTraceLevel(),this); + return context; + } + + /** Returns a hash of this query based on (some of) its content. */ + @Override + public int hashCode() { + return ranking.hashCode()+3*presentation.hashCode()+5* model.hashCode()+ 11*offset+ 13*hits; + } + + /** Returns whether the given query is equal to this */ + @Override + public boolean equals(Object other) { + if (this==other) return true; + + if ( ! (other instanceof Query)) return false; + Query q = (Query) other; + + if (getOffset() != q.getOffset()) return false; + if (getHits() != q.getHits()) return false; + if ( ! getPresentation().equals(q.getPresentation())) return false; + if ( ! getRanking().equals(q.getRanking())) return false; + if ( ! getModel().equals(q.getModel())) return false; + + // TODO: Compare property settings + + return true; + } + + /** Returns a clone of this query */ + @Override + public Query clone() { + Query clone = (Query) super.clone(); + copyPropertiesTo(clone); + return clone; + } + + private void copyPropertiesTo(Query clone) { + clone.model = model.cloneFor(clone); + clone.ranking = (Ranking) ranking.clone(); + clone.presentation = (Presentation) presentation.clone(); + clone.context = getContext(true).cloneFor(clone); + + if (errors != null) + clone.errors = new ArrayList<>(errors); + + // Correct the Query instance in properties + clone.properties().setParentQuery(clone); + assert (clone.properties().getParentQuery() == clone); + + clone.setTraceLevel(getTraceLevel()); + clone.setHits(getHits()); + clone.setOffset(getOffset()); + clone.setNoCache(getNoCache()); + clone.setGroupingSessionCache(getGroupingSessionCache()); + } + + /** Returns the presentation to be used for this query, never null */ + public Presentation getPresentation() { return presentation; } + + /** Returns the ranking to be used for this query, never null */ + public Ranking getRanking() { return ranking; } + + /** Returns the query representation model to be used for this query, never null */ + public Model getModel() { return model; } + + /** + * Return the HTTP request which caused this query. This will never be null + * when running with queries from the network. + * (Except when running with deprecated code paths, in which case this will + * return null but getRequest() will not.) + */ + public HttpRequest getHttpRequest() { return httpRequest; } + + /** + * Returns the unique and stable session id of this query. + * + * @param create if true this is created if not already set + * @return the session id of this query, or null if not set and create is false + */ + public SessionId getSessionId(boolean create) { + if (sessionId == null && create) + this.sessionId = SessionId.next(); + return sessionId; + } + + public boolean hasEncodableProperties() { + if ( ! ranking.getProperties().isEmpty()) return true; + if ( ! ranking.getFeatures().isEmpty()) return true; + if ( ranking.getFreshness() != null) return true; + if ( model.getSearchPath() != null) return true; + if ( model.getDocumentDb() != null) return true; + if ( presentation.getHighlight() != null && ! presentation.getHighlight().getHighlightItems().isEmpty()) return true; + return false; + } + + /** + * Encodes properties of this query. + * + * @param buffer the buffer to encode to + * @param encodeQueryData true to encode all properties, false to only include session information, not actual query data + * @return the encoded length + */ + public int encodeAsProperties(ByteBuffer buffer, boolean encodeQueryData) { + // Make sure we don't encode anything here if we have turned the property feature off + // Due to sendQuery we sometimes end up turning this feature on and then encoding a 0 int as the number of + // property maps - that's ok (probably we should simplify by just always turning the feature on) + if (! hasEncodableProperties()) return 0; + + int start = buffer.position(); + + int mapCountPosition = buffer.position(); + buffer.putInt(0); // map count will go here + + int mapCount = 0; + + // TODO: Push down + mapCount += ranking.getProperties().encode(buffer, encodeQueryData); + if (encodeQueryData) mapCount += ranking.getFeatures().encode(buffer); + + // TODO: Push down + if (encodeQueryData && presentation.getHighlight() != null) mapCount += MapEncoder.encodeStringMultiMap(Highlight.HIGHLIGHTTERMS, presentation.getHighlight().getHighlightTerms(), buffer); + + // TODO: Push down + if (encodeQueryData) mapCount += MapEncoder.encodeSingleValue("model", "searchpath", model.getSearchPath(), buffer); + mapCount += MapEncoder.encodeSingleValue(DocumentDatabase.MATCH_PROPERTY, DocumentDatabase.SEARCH_DOC_TYPE_KEY, model.getDocumentDb(), buffer); + + mapCount += MapEncoder.encodeMap("caches", createCacheSettingMap(), buffer); + + buffer.putInt(mapCountPosition, mapCount); + + return buffer.position() - start; + } + + private Map<String, Boolean> createCacheSettingMap() { + if (getGroupingSessionCache() && ranking.getQueryCache()) { + Map<String, Boolean> cacheSettingMap = new HashMap<>(); + cacheSettingMap.put("grouping", true); + cacheSettingMap.put("query", true); + return cacheSettingMap; + } + if (getGroupingSessionCache()) + return Collections.singletonMap("grouping", true); + if (ranking.getQueryCache()) + return Collections.singletonMap("query", true); + return Collections.<String,Boolean>emptyMap(); + } + + /** + * Prepares this for binary serialization. + * <p> + * This must be invoked after all changes have been made to this query before it is passed + * on to a receiving backend. Calling it is somewhat expensive, so it should only happen once. + * If a prepared query is cloned, it stays prepared. + */ + public void prepare() { + getModel().prepare(getRanking()); + getPresentation().prepare(); + getRanking().prepare(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/Result.java b/container-search/src/main/java/com/yahoo/search/Result.java new file mode 100644 index 00000000000..b6a88200084 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/Result.java @@ -0,0 +1,365 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search; + +import com.yahoo.collections.ListMap; +import com.yahoo.net.URI; +import com.yahoo.protect.Validator; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.result.*; +import com.yahoo.search.statistics.ElapsedTime; + +import java.util.Iterator; + +/** + * The Result contains all the data produced by executing a Query: Some very limited global information, and + * a single HitGroup containing hits of the result. The HitGroup may contain Hits, which are the individual + * result items, as well as further HitGroups, making up a <i>composite</i> structure. This allows the hits of a result + * to be hierarchically organized. A Hit is polymorphic and may contain any kind of information deemed + * an approriate partial answer to the Query. + * + * @author bratseth + */ +public final class Result extends com.yahoo.processing.Response implements Cloneable { + + // Note to developers: If you think you should add something here you are probably wrong + // To add some new kind of data, create a Hit subclass carrying the data and add that instead + + /** The top level hit group of this result */ + private HitGroup hits; + + /** The estimated total number of hits which would in theory be displayed this result is a part of */ + private long totalHitCount; + + /** + * The estimated total number of <i>deep</i> hits, which includes every object which matches the query. + * This is always at least the same as totalHitCount. A lower value will cause hitCount to be returned. + */ + private long deepHitCount; + + /** The time spent producing this result */ + private ElapsedTime timeAccountant = new ElapsedTime(); + + /** Coverage information for this result. */ + private Coverage coverage = null; + + /** + * Headers containing "envelope" meta information to be returned with this result. + * Used for HTTP getHeaders when the return protocol is HTTP. + */ + private ListMap<String,String> headers=null; + + /** + * Result rendering infrastructure. + */ + private final Templating templating; + + /** Creates a new Result where the top level hit group has id "toplevel" */ + public Result(Query query) { + this(query, new HitGroup("toplevel")); + } + + /** + * Create an empty result. + * A source creating a result is <b>required</b> to call + * {@link #setTotalHitCount} before releasing this result. + * + * @param query the query which produced this result + * @param hits the hit container which this will return from {@link #hits()} + */ + @SuppressWarnings("deprecation") + public Result(Query query, HitGroup hits) { + super(query); + if (query==null) throw new NullPointerException("The query reference in a result cannot be null"); + this.hits=hits; + hits.setQuery(query); + if (query.getRanking().getSorting() != null) { + setHitOrderer(new HitSortOrderer(query.getRanking().getSorting())); + } + templating = new Templating(this); + } + + /** Create a result containing an error */ + public Result(Query query, ErrorMessage errorMessage) { + this(query); + hits.setError(errorMessage); + } + + /** + * Merges <b>meta information</b> from a result into this. + * This does not merge hits, but the other information associated + * with a result. It should <b>always</b> be called when adding + * hits from a result, but there is no constraints on the order of the calls. + */ + @SuppressWarnings("deprecation") + public void mergeWith(Result result) { + if (templating.usesDefaultTemplate()) + templating.setRenderer(result.templating.getRenderer()); + totalHitCount += result.getTotalHitCount(); + deepHitCount += result.getDeepHitCount(); + timeAccountant.merge(result.getElapsedTime()); + boolean create=true; + if (result.getCoverage(!create) != null || getCoverage(!create) != null) + getCoverage(create).merge(result.getCoverage(create)); + } + + /** + * Merges meta information produced when a Hit already + * contained in this result has been filled using another + * result as an intermediary. @see mergeWith(Result) mergeWith. + */ + public void mergeWithAfterFill(Result result) { + timeAccountant.merge(result.getElapsedTime()); + } + + /** + * Returns the number of hit objects available in the top level group of this result. + * Note that this number is allowed to be higher than the requested number + * of hits, because a searcher is allowed to add <i>meta</i> hits as well + * as the requested number of concrete hits. + */ + public int getHitCount() { + return hits.size(); + } + + /** + * <p>Returns the total number of concrete hits contained (directly or in subgroups) in this result. + * This should equal the requested hits count if the query has that many matches.</p> + */ + public int getConcreteHitCount() { + return hits.getConcreteSize(); + } + + /** + * Returns the <b>estimated</b> total number of concrete hits which would be returned for this query. + */ + public long getTotalHitCount() { + return totalHitCount; + } + + /** + * Returns the estimated total number of <i>deep</i> hits, which includes every object which matches the query. + * This is always at least the same as totalHitCount. A lower value will cause hitCount to be returned. + */ + public long getDeepHitCount() { + if (deepHitCount<totalHitCount) return totalHitCount; + return deepHitCount; + } + + + /** Sets the estimated total number of hits this result is a subset of */ + public void setTotalHitCount(long totalHitCount) { + this.totalHitCount = totalHitCount; + } + + /** Sets the estimated total number of deep hits this result is a subset of */ + public void setDeepHitCount(long deepHitCount) { + this.deepHitCount = deepHitCount; + } + + public ElapsedTime getElapsedTime() { + return timeAccountant; + } + + public void setElapsedTime(ElapsedTime t) { + timeAccountant = t; + } + + /** + * Returns true only if _all_ hits in this result originates from a cache. + */ + public boolean isCached() { + return hits.isCached(); + } + + /** + * Returns whether all hits in this result have been filled with + * the properties contained in the given summary class. Note that + * this method will also return true if no hits in this result are + * fillable. + */ + public boolean isFilled(String summaryClass) { + return hits.isFilled(summaryClass); + } + + /** Returns the query which produced this result */ + public Query getQuery() { return hits.getQuery(); } + + /** Sets a query for this result */ + public void setQuery(Query query) { hits.setQuery(query); } + + /** + * <p>Sets the hit orderer to be used for the top level hit group.</p> + * + * @param hitOrderer the new hit orderer, or null to use default relevancy ordering + */ + public void setHitOrderer(HitOrderer hitOrderer) { hits.setOrderer(hitOrderer); } + + /** Returns the orderer used by the top level group, or null if the default relevancy order is used */ + public HitOrderer getHitOrderer() { return hits.getOrderer(); } + + public void setDeletionBreaksOrdering(boolean flag) { hits.setDeletionBreaksOrdering(flag); } + + public boolean getDeletionBreaksOrdering() { return hits.getDeletionBreaksOrdering(); } + + /** Update cached and filled by iterating through the hits of this result */ + public void analyzeHits() { hits.analyze(); } + + /** Returns the top level hit group containing all the hits of this result */ + public HitGroup hits() { return hits; } + + @Override + public com.yahoo.processing.response.DataList<?> data() { + return hits; + } + + + /** Sets the top level hit group containing all the hits of this result */ + public void setHits(HitGroup hits) { + Validator.ensureNotNull("The top-level hit group of " + this,hits); + this.hits=hits; + } + + /** + * Deep clones this result - copies are made of all hits and subgroups of hits, + * <i>but not of the query referenced by this</i>. + */ + public Result clone() { + Result resultClone = (Result) super.clone(); + + resultClone.hits = hits.clone(); + + resultClone.getTemplating().setRenderer(null); // TODO: Kind of wrong + resultClone.setElapsedTime(new ElapsedTime()); + return resultClone; + } + + + public String toString() { + if (hits.getError() != null) { + return "Result: " + hits.getErrorHit().errors().iterator().next(); + } else { + return "Result (" + getConcreteHitCount() + " of total " + getTotalHitCount() + " hits)"; + } + } + + /** + * Adds a context message to this query containing the entire content of this result, + * if tracelevel is 5 or more. + * + * @param name the name of the searcher instance returning this result + */ + public void trace(String name) { + if (hits().getQuery().getTraceLevel() < 5) { + return; + } + StringBuilder hitBuffer = new StringBuilder(name); + + hitBuffer.append(" returns:\n"); + int counter = 0; + + for (Iterator<Hit> i = hits.unorderedIterator(); i.hasNext();) { + Hit hit = i.next(); + + if (hit.isMeta()) continue; + + hitBuffer.append(" #: "); + hitBuffer.append(counter); + + traceExtraHitProperties(hitBuffer, hit); + + hitBuffer.append(", relevancy: "); + hitBuffer.append(hit.getRelevance()); + + hitBuffer.append(", addno: "); + hitBuffer.append(hit.getAddNumber()); + + hitBuffer.append(", source: "); + hitBuffer.append(hit.getSource()); + + hitBuffer.append(", uri: "); + URI uri = hit.getId(); + + if (uri != null) { + hitBuffer.append(uri.getHost()); + } else { + hitBuffer.append("(no uri)"); + } + hitBuffer.append("\n"); + counter++; + } + if (counter == 0) { + hitBuffer.append("(no hits)\n"); + } + hits.getQuery().trace(hitBuffer.toString(), false, 5); + } + + /** + * For tracing custom properties of a hit, see trace(String). An example of + * using this is in com.yahoo.prelude.Result. + * + * @param hitBuffer + * the render target + * @param hit + * the hit to be analyzed + */ + protected void traceExtraHitProperties(StringBuilder hitBuffer, Hit hit) { + return; + } + + /** Returns the context of this result - this is equal to getQuery().getContext(create) */ + public QueryContext getContext(boolean create) { return getQuery().getContext(create); } + + public void setCoverage(Coverage coverage) { this.coverage = coverage; } + + // Coverage a part of tracing? + // Coverage logic might me moved around, but it should not be a part of tracing. + // Coverage is status information about access to a corpus, tracing is voluntary, + // diagnostic search status. + /** + * Returns coverage information + * + * @param create if true the coverage information of this result is created if missing + * @return the coverage information of this, or null if none and create is false + */ + public Coverage getCoverage(boolean create) { + if (coverage == null && create) { + if (hits.getError() == null) { + // No error here implies full coverage. + // Don't count this as a result set if there's no data - avoid counting empty results made + // to simplify code paths + coverage = new Coverage(0L, 0, true, (hits().size()==0 ? 0 : 1)); + } else { + coverage = new Coverage(0L, 0, false); + } + } + return coverage; + } + + /** + * Returns the set of "envelope" headers to be returned with this result. + * This returns the live map in modifiable form - modify this to change the + * headers. Or null if none, and it should not be created. + * <p> + * Used for HTTP headers when the return protocol is HTTP, e.g + * <pre>result.getHeaders(true).put("Cache-Control","max-age=120")</pre> + * + * @param create if true, create the header ListMap if it does not exist + * @return returns the ListMap of current headers, or null if no headers are set and <pre>create</pre> is false + */ + public ListMap<String, String> getHeaders(boolean create) { + if (headers == null && create) + headers = new ListMap<>(); + return headers; + } + + /** + * The Templating object contains helper methods and data containers for + * result rendering. + * + * @return helper object for result rendering + */ + public Templating getTemplating() { + return templating; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/Searcher.java b/container-search/src/main/java/com/yahoo/search/Searcher.java new file mode 100644 index 00000000000..95b4f92ca56 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/Searcher.java @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search; + +import com.yahoo.component.ComponentId; +import com.yahoo.processing.Processor; +import com.yahoo.processing.Response; +import com.yahoo.search.searchchain.Execution; + +import java.util.logging.Logger; + +/** + * Superclass of all {@link com.yahoo.component.Component Components} which produces Results in response to + * Queries by calling the {@link #search search} method. + * <p> + * Searchers are participants in <i>chain of responsibility</i> {@link com.yahoo.search.searchchain.SearchChain search chains} + * where they passes the Queries downwards by synchroneously calling the next Searcher in the chain, and returns the + * Results back up as the response. + * <p> + * Any Searcher may + * <ul> + * <li>Do modifications to the Query before passing it on (a <i>query rerwiter</i>) + * <li>Do modifications to the Result before passing it on up, e.g removing altering, reorganizing or adding Hits + * (a <i>result processor</i>) + * <li>Pass the Query on to multiple other search chains, either in series + * (by creating a new {@link com.yahoo.search.searchchain.Execution} for each chain), or in parallel (by creating a + * {@link com.yahoo.search.searchchain.AsyncExecution}) (a <i>federator</i>) + * <li>Create a Result and pass it back up, either by calling some other node(s) to get the data, or by creating the + * Result from internal data (a <i>source</i>) + * <li>Pass some query on downwards multiple times, or in different ways, typically each time depending of the Result + * returned the last time (a <i>workflow</i>) + * </ul> + * + * <p>...or some combination of the above of course. Note that as Searchers work synchronously, any information can be + * retained on the stack in the Searcher from the Query is received until the Result is returned simply by declaring + * variables for the data in the search method (or whatever it calls), and for the same reason workflows are + * implemented as Java code. However, searchers are executed by many threads, for different Queries, in parallell, so + * any mutable data shared between queries (and hence stored as instance members must be accessed multithread safely. + * In many cases, shared data can simply be instantiated in the constructor and used in read-only mode afterwards + * <p> + * <b>Searcher lifecycle:</b> A searcher has a simple life-cycle: + * + * <ul> + * <li><b>Construction: While a constructor is running.</b> A searcher is handed its id and configuration + * (if any) in the constructor. During construction, the searcher should build any in-memory structures needed. + * A new instance of the searcher will be created when the configuration is changed. + * Constructors are called with this priority: + * + * <ul> + * <li>The constructor taking a ComponentId, followed by the highest number of config classes (subclasses of + * {@link com.yahoo.config.ConfigInstance}) as arguments. + * <li>The constructor taking a string id followed by the highest number of config classes as arguments. + * <li>The constructor taking only the highest number of config classes as arguments. + * <li>The constructor taking a ComponentId as the only argument + * <li>The constructor taking a string id as the only argument + * <li>The default (no-argument) constructor. + * </ul> + * + * If none of these constructors are declared, searcher construction will fail. + * + * <li><b>In service: After the constructor has returned.</b> In this phase, searcher service methods are + * called at any time by multiple threads in parallel. + * Implementations should avoid synchronization and access to volatiles as much as possible by keeping + * data structures build in construction read-only. + * + * <li><b>Deconstruction: While deconstruct is running.</b> All Searcher service method calls have completed when + * this method is called. When it returns, the searcher will be eligible for garbage collection. + * + * </ul> + * + * @author bratseth + */ +public abstract class Searcher extends Processor { + + // Note to developers: If you think you should add something here you are probably wrong + // Create a subclass containing the new method instead. + + private final Logger logger = Logger.getLogger(getClass().getName()); + + public Searcher() {} + + /** Creates a searcher from an id */ + public Searcher(ComponentId id) { + super(); + initId(id); + } + + /** + * Override this to implement your searcher. + * <p> + * Searcher implementation subclasses will, depending on their type of logic, do one of the following: + * <ul> + * <li><b>Query processors:</b> Access the query, then call execution.search and return the result + * <li><b>Result processors:</b> Call execution.search to get the result, access it and return + * <li><b>Sources</b> (which produces results): Create a result, add the desired hits and return it. + * <li><b>Federators</b> (which forwards the search to multiple subchains): Call search on the + * desired subchains in parallel and get the results. Combine the results to one and return it. + * <li><b>Workflows:</b> Call execution.search as many times as desired, using different queries. + * Eventually return a result. + * </ul> + * <p> + * Hits come in two kinds - <i>concrete hits</i> are actual + * content of the kind requested by the user, <i>meta hits</i> are + * hits which provides information about the collection of hits, + * on the query, the service and so on. + * <p> + * The query specifies a window into a larger result list that must be returned from the searcher + * through <i>hits</i> and <i>offset</i>; + * Searchers which returns list of hits in the top level in the result + * must return at least <i>hits</i> number of hits (or if impossible; all that are available), + * starting at the given offset. + * In addition, searchers are allowed to return + * any number of meta hits (although this number is expected to be low). + * For hits contained in nested hit groups, the concept of a window defined by hits and offset + * is not well defined and does not apply. + * <p> + * Error handling in searchers: + * <ul> + * <li>Unexpected events: Throw any RuntimeException. This query will fail + * with the exception message, and the error will be logged + * <li>Expected events: Create (new Result(Query, ErrorMessage) or add + * result.setErrorIfNoOtherErrors(ErrorMessage) an error message to the Result. + * <li>Recoverable user errors: Add a FeedbackHit explaining the condition + * and how to correct it. + * </ul> + * + * @param query the query + * @return the result of making this query + */ + public abstract Result search(Query query,Execution execution); + + /** Use the search method in Searcher processors. This forwards to it. */ + @Override + public final Response process(com.yahoo.processing.Request request, com.yahoo.processing.execution.Execution execution) { + return search((Query)request,(Execution)execution); + } + + /** + * Fill hit properties with data using the given summary class. + * Calling this on already filled results has no cost. + * <p> + * This needs to be overridden by <i>federating</i> searchers to contact search sources again by + * propagating the fill call down through the search chain, and by <i>source</i> searchers + * which talks to fill capable backends to request the data to be filled. Other searchers do + * not need to override this. + * + * @param result the result to fill + * @param summaryClass the name of the collection of fields to fetch the values of, or null to use the default + */ + public void fill(Result result, String summaryClass, Execution execution) { + execution.fill(result,summaryClass); + } + + /** + * Fills the result if it is not already filled for the given summary class. + * See the fill method. + **/ + public final void ensureFilled(Result result, String summaryClass, Execution execution) { + if (summaryClass == null) + summaryClass = result.getQuery().getPresentation().getSummary(); + + if (!result.isFilled(summaryClass)) { + fill(result, summaryClass, execution); + } + } + + /** Returns a logger unique for the instance subclass */ + protected Logger getLogger() { return logger; } + + /** Returns "searcher 'getId()'" */ + public @Override String toString() { + return "searcher '" + getIdString() + "'"; + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/app/.gitignore b/container-search/src/main/java/com/yahoo/search/app/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/app/.gitignore diff --git a/container-search/src/main/java/com/yahoo/search/cache/package-info.java b/container-search/src/main/java/com/yahoo/search/cache/package-info.java new file mode 100644 index 00000000000..292b491c52b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cache/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Cache package, exported to keep the ignored legacy cache config around until Vespa 7. + * + * @author bratseth + */ +@ExportPackage +package com.yahoo.search.cache; + +import com.yahoo.osgi.annotation.ExportPackage;
\ No newline at end of file diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java new file mode 100644 index 00000000000..de67369a231 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +import java.util.logging.Logger; + +import com.yahoo.search.result.ErrorMessage; + + +/** + * A node monitor is responsible for maintaining the state of a monitored node. + * It has the following properties: + * <ul> + * <li>A node is taken out of operation if it fails</li> + * <li>A node is put back in operation when it responds correctly again + * <i>responseAfterFailLimit</i> times <b>unless</b> + * it has failed <i>failQuarantineLimit</i>. In the latter case it won't + * be put into operation again before that time period has expired</li> + * </ul> + * + * @author bratseth + */ +public abstract class BaseNodeMonitor<T> { + + protected static Logger log=Logger.getLogger(BaseNodeMonitor.class.getName()); + + /** The object representing the monitored node */ + protected T node; + + protected boolean isWorking=true; + + /** Whether this node is quarantined for unstability */ + protected boolean isQuarantined=false; + + /** The last time this node failed, in ms */ + protected long failedAt=0; + + /** The last time this node responded (failed or succeeded), in ms */ + protected long respondedAt=0; + + /** The last time this node responded successfully */ + protected long succeededAt=0; + + /** The configuration of this monitor */ + protected MonitorConfiguration configuration; + + /** Is the node we monitor part of an internal Vespa cluster or not */ + private boolean internal=false; + + public BaseNodeMonitor(boolean internal) { + this.internal=internal; + } + + public T getNode() { return node; } + + /** + * Returns whether this node is currently in a state suitable + * for receiving traffic. As far as we know, that is + */ + public boolean isWorking() { return isWorking; } + + public boolean isQuarantined() { return isQuarantined; } + + /** + * Called when this node fails. + * + * @param error a description of the error + */ + public abstract void failed(ErrorMessage error); + + /** + * Called when a response is received from this node. If the node was + * quarantined and it has been in that state for more than QuarantineTime + * milliseconds, it is taken out of quarantine. + * + * if it is not in quarantine but is not working, it may be set to working + * if this method is called at least responseAfterFailLimit times + */ + public abstract void responded(); + + public boolean isIdle() { + return (now()-respondedAt) >= configuration.getIdleLimit(); + } + + protected long now() { + return System.currentTimeMillis(); + } + + /** Thread-safely changes the state of this node if required */ + protected abstract void setWorking(boolean working,String explanation); + + /** Returns whether or not this is monitoring an internal node. Default is false. */ + public boolean isInternal() { return internal; } +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java new file mode 100644 index 00000000000..1c50ea5d904 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + + +import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.concurrent.ThreadFactoryFactory; +import com.yahoo.search.result.ErrorMessage; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Monitors of a cluster of remote nodes. + * The monitor uses an internal thread for node monitoring. + * All <i>public</i> methods of this class are multithread safe. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ClusterMonitor<T> { + + private MonitorConfiguration configuration=new MonitorConfiguration(); + + private static Logger log=Logger.getLogger(ClusterMonitor.class.getName()); + + private NodeManager<T> nodeManager; + + private MonitorThread monitorThread; + + private volatile boolean shutdown = false; + + /** A map from Node to corresponding MonitoredNode */ + private Map<T,BaseNodeMonitor<T>> nodeMonitors= + Collections.synchronizedMap(new java.util.LinkedHashMap<T, BaseNodeMonitor<T>>()); + + public ClusterMonitor(NodeManager<T> manager, String monitorConfigID) { + nodeManager=manager; + monitorThread=new MonitorThread("search.clustermonitor"); + monitorThread.start(); + log.fine("checkInterval is " + configuration.getCheckInterval()+" ms"); + } + + /** Returns the configuration of this cluster monitor */ + public MonitorConfiguration getConfiguration() { return configuration; } + + /** + * Adds a new node for monitoring. + * The object representing the node must + * <ul> + * <li>Have a sensible toString</li> + * <li>Have a sensible identity (equals and hashCode)</li> + * </ul> + * + * @param node the object representing the node + * @param internal whether or not this node is internal to this cluster + */ + public void add(T node,boolean internal) { + BaseNodeMonitor<T> monitor=new TrafficNodeMonitor<>(node,configuration,internal); + // BaseNodeMonitor monitor=new NodeMonitor(node,configuration); + nodeMonitors.put(node,monitor); + } + + /** + * Returns the monitor of the given node, or null if this node has not been added + */ + public BaseNodeMonitor<T> getNodeMonitor(T node) { + return nodeMonitors.get(node); + } + + /** Called from ClusterSearcher/NodeManager when a node failed */ + public synchronized void failed(T node, ErrorMessage error) { + BaseNodeMonitor<T> monitor=nodeMonitors.get(node); + boolean wasWorking=monitor.isWorking(); + monitor.failed(error); + if (wasWorking && !monitor.isWorking()) { + nodeManager.failed(node); + } + } + + /** Called when a node responded */ + public synchronized void responded(T node) { + BaseNodeMonitor<T> monitor = nodeMonitors.get(node); + boolean wasFailing=!monitor.isWorking(); + monitor.responded(); + if (wasFailing && monitor.isWorking()) { + nodeManager.working(monitor.getNode()); + } + } + + /** + * Ping all nodes which needs pinging to discover state changes + */ + public void ping(Executor executor) { + for (Iterator<BaseNodeMonitor<T>> i=nodeMonitorIterator(); i.hasNext(); ) { + BaseNodeMonitor<T> monitor= i.next(); + // always ping + // if (monitor.isIdle()) + nodeManager.ping(monitor.getNode(),executor); // Cause call to failed or responded + } + } + + /** Returns a thread-safe snapshot of the NodeMonitors of all added nodes */ + public Iterator<BaseNodeMonitor<T>> nodeMonitorIterator() { + return nodeMonitors().iterator(); + } + + /** Returns a thread-safe snapshot of the NodeMonitors of all added nodes */ + public List<BaseNodeMonitor<T>> nodeMonitors() { + synchronized (nodeMonitors) { + return new java.util.ArrayList<>(nodeMonitors.values()); + } + } + + /** Must be called when this goes out of use */ + public void shutdown() { + shutdown = true; + monitorThread.interrupt(); + } + + private class MonitorThread extends Thread { + MonitorThread(String name) { + super(name); + } + + public void run() { + log.fine("Starting cluster monitor thread"); + // Pings must happen in a separate thread from this to handle timeouts + // By using a cached thread pool we ensured that 1) a single thread will be used + // for all pings when there are no problems (important because it ensures that + // any thread local connections are reused) 2) a new thread will be started to execute + // new pings when a ping is not responding + Executor pingExecutor=Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory("search.ping")); + while (!isInterrupted()) { + try { + Thread.sleep(configuration.getCheckInterval()); + log.finest("Activating ping"); + ping(pingExecutor); + } + catch (Exception e) { + if (shutdown && e instanceof InterruptedException) { + break; + } else { + log.log(Level.WARNING,"Error in monitor thread",e); + } + } + } + log.fine("Stopped cluster monitor thread"); + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java new file mode 100644 index 00000000000..da3d0d8e20b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java @@ -0,0 +1,374 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +import com.yahoo.component.ComponentId; +import com.yahoo.container.protect.Error; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.cluster.Hasher.NodeList; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +import java.util.List; +import java.util.concurrent.*; + +/** + * Implements clustering (failover and load balancing) over a set of client + * connections to a homogenous cluster of nodes. Searchers which wants to make + * clustered connections to some service should use this. + * <p> + * This replaces the usual searcher methods by ones which have the same contract + * and semantics but which takes an additional parameter which is the Connection + * selected by the cluster searcher which the method should use. Overrides of + * these connection methods <i>must not</i> call the super methods to pass on + * but must use the methods on execution. + * <p> + * The type argument is the class (of any type) representing the connections. + * The connection objects should implement a good toString to ease diagnostics. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public abstract class ClusterSearcher<T> extends PingableSearcher implements NodeManager<T> { + + private Hasher<T> hasher = new Hasher<>(); + private ClusterMonitor<T> monitor = new ClusterMonitor<>(this, "dummy"); + + /** + * Creates a new cluster searcher + * + * @param id + * the id of this searcher + * @param connections + * the connections of the cluster + * @param internal + * whether or not this cluster is internal (part of the same + * installation) + */ + public ClusterSearcher(ComponentId id, List<T> connections, boolean internal) { + this(id, connections, new Hasher<T>(), internal); + } + + public ClusterSearcher(ComponentId id, List<T> connections, Hasher<T> hasher, boolean internal) { + super(id); + this.hasher = hasher; + for (T connection : connections) { + monitor.add(connection, internal); + hasher.add(connection); + } + } + + /** + * Pinging a node by sending a query NodeManager method, called from + * ClusterMonitor + */ + public final @Override void ping(T p, Executor executor) { + log(LogLevel.FINE, "Sending ping to: ", p); + Pinger pinger = new Pinger(p); + FutureTask<Pong> future = new FutureTask<>(pinger); + + executor.execute(future); + Pong pong; + Throwable logThrowable = null; + + try { + pong = future.get(monitor.getConfiguration().getFailLimit(), + TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + pong = new Pong(); + pong.addError(ErrorMessage + .createUnspecifiedError("Ping was interrupted: " + p)); + logThrowable = e; + } catch (ExecutionException e) { + pong = new Pong(); + pong.addError(ErrorMessage + .createUnspecifiedError("Execution was interrupted: " + p)); + logThrowable = e; + } catch (LinkageError e) { // Typically Osgi woes + pong = new Pong(); + pong.addError(ErrorMessage.createErrorInPluginSearcher("Class loading problem",e)); + logThrowable=e; + } catch (TimeoutException e) { + pong = new Pong(); + pong.addError(ErrorMessage + .createNoAnswerWhenPingingNode("Ping thread timed out.")); + } + future.cancel(true); + + if (pong.badResponse()) { + monitor.failed(p, pong.getError(0)); + log(LogLevel.FINE, "Failed ping - ", pong); + } else { + monitor.responded(p); + log(LogLevel.FINE, "Answered ping - ", p); + } + + if (logThrowable != null) { // This looks strange, but yes - it is + // needed + String logMsg; + if (logThrowable instanceof TimeoutException) { + logMsg = "Ping timed out for " + getId().getName() + "."; + } else { + StackTraceElement[] trace = logThrowable.getStackTrace(); + String traceAsString = null; + if (trace != null) { + StringBuilder b = new StringBuilder(": "); + for (StackTraceElement k : trace) { + if (k == null) { + b.append("null\n"); + } else { + b.append(k.toString()).append('\n'); + } + } + traceAsString = b.toString(); + } + logMsg = "Caught " + logThrowable.getClass().getName() + + " exception in " + getId().getName() + " ping" + + (trace == null ? ", no stack trace available." : traceAsString); + } + getLogger().warning(logMsg); + } + + } + + /** + * Pings this connection. Pings may be sent "out of band" at any time by the + * monitoring subsystem to determine the status of this connection. If the + * ping fails, it is ok both to set an error in the pong or to throw an + * exception. + */ + protected abstract Pong ping(Ping ping, T connection); + + protected T getFirstConnection(NodeList<T> nodes, int code, int trynum, Query query) { + return nodes.select(code, trynum); + } + + @Override + public final Result search(Query query, Execution execution) { + int tries = 0; + + Hasher.NodeList<T> nodes = getHasher().getNodes(); + + if (nodes.getNodeCount() == 0) + return search(query, execution, ErrorMessage + .createNoBackendsInService("No nodes in service in " + this + " (" + monitor.nodeMonitors().size() + + " was configured, none is responding)")); + + int code = query.hashCode(); + Result result; + T connection = getFirstConnection(nodes, code, tries, query); + do { + // The loop is in case there are other searchers available + // able to produce results + if (connection == null) + return search(query, execution, ErrorMessage + .createNoBackendsInService("No in node could handle " + query + " according to " + + hasher + " in " + this)); + if (timedOut(query)) + return new Result(query, ErrorMessage.createTimeout("No time left for searching")); + + if (query.getTraceLevel() >= 8) + query.trace("Trying " + connection, false, 8); + + result = robustSearch(query, execution, connection); + + if (!shouldRetry(query, result)) + return result; + + if (query.getTraceLevel() >= 6) + query.trace("Error from connection " + connection + " : " + result.hits().getError(), false, 6); + + if (result.hits().getError().getCode() == Error.TIMEOUT.code) + return result; // Retry is unlikely to help + + log(LogLevel.FINER, "No result, checking for timeout."); + tries++; + connection = nodes.select(code, tries); + } while (tries < nodes.getNodeCount()); + + // only error result gets returned here. + return result; + + } + + /** + * Returns whether this query and result should be retried against another + * connection if possible. This default implementation returns true if the + * result contains some error. + */ + protected boolean shouldRetry(Query query, Result result) { + return result.hits().getError() != null; + } + + /** + * This is called (instead of search(quer,execution,connextion) to handle + * searches where no (suitable) backend was available. The default + * implementation returns an error result. + */ + protected Result search(Query query, Execution execution, ErrorMessage message) { + return new Result(query, message); + } + + /** + * Call search(Query,Execution,T) and handle any exceptions returned which + * we do not want to propagate upwards By default this catches all runtime + * exceptions and puts them into the result + */ + protected Result robustSearch(Query query, Execution execution, T connection) { + Result result; + try { + result = search(query, execution, connection); + } catch (RuntimeException e) { //TODO: Exceptions should not be used to signal backend communication errors + log(LogLevel.WARNING, "An exception occurred while invoking backend searcher.", e); + result = new Result(query, ErrorMessage + .createBackendCommunicationError("Failed calling " + + connection + " in " + this + " for " + query + + ": " + Exceptions.toMessageString(e))); + } + + if (result == null) + result = new Result(query, ErrorMessage + .createBackendCommunicationError("No result returned in " + + this + " from " + connection + " for " + query)); + + if (result.hits().getError() != null) { + log(LogLevel.FINE, "FAILED: ", query); + } else if (!result.isCached()) { + log(LogLevel.FINE, "WORKING: ", query); + } else { + log(LogLevel.FINE, "CACHE HIT: ", query); + } + return result; + } + + /** + * Perform the search against the given connection. Return a result + * containing an error or throw an exception on failures. + */ + protected abstract Result search(Query query, Execution execution, T connection); + + public @Override + final void fill(Result result, String summaryClass, Execution execution) { + Query query = result.getQuery(); + Hasher.NodeList<T> nodes = getHasher().getNodes(); + int code = query.hashCode(); + + T connection = nodes.select(code, 0); + if (connection != null) { + if (timedOut(query)) { + result.hits().addError( + ErrorMessage.createTimeout( + "No time left to get summaries for " + + result)); + } else { + // query.setTimeout(getNodeTimeout(query)); + doFill(connection, result, summaryClass, execution); + } + } else { + result.hits().addError( + ErrorMessage.createNoBackendsInService("Could not fill '" + + result + "' in '" + this + "'")); + } + } + + private void doFill(T connection, Result result, String summaryClass, Execution execution) { + try { + fill(result, summaryClass, execution, connection); + } catch (RuntimeException e) { + result.hits().addError( + ErrorMessage + .createBackendCommunicationError("Error filling " + + result + " from " + connection + ": " + + Exceptions.toMessageString(e))); + } + if (result.hits().getError() != null) { + log(LogLevel.FINE, "FAILED: ", result.getQuery()); + } else if (!result.isCached()) { + log(LogLevel.FINE, "WORKING: ", result.getQuery()); + } else { + log(LogLevel.FINE, "CACHE HIT: " + result.getQuery()); + } + } + + /** + * Perform the fill against the given connection. Add an error to the result + * or throw an exception on failures. + */ + protected abstract void fill(Result result, String summaryClass, + Execution execution, T connection); + + /** NodeManager method, called from ClusterMonitor */ + public @Override + void working(T node) { + getHasher().add(node); + } + + /** NodeManager method, called from ClusterMonitor */ + public @Override + void failed(T node) { + getHasher().remove(node); + } + + /** + * Returns the hasher used internally in this. Do not mutate this hasher + * while in use. + */ + public Hasher<T> getHasher() { + return hasher; + } + + /** Returns the monitor of these nodes */ + public ClusterMonitor<T> getMonitor() { + return monitor; + } + + /** Returns true if this query has timed out now */ + protected boolean timedOut(Query query) { + long duration = query.getDurationTime(); + return duration >= query.getTimeout(); + } + + protected void log(java.util.logging.Level level, Object... objects) { + if (!getLogger().isLoggable(level)) + return; + StringBuilder sb = new StringBuilder(); + for (Object object : objects) { + sb.append(object); + } + getLogger().log(level, sb.toString()); + } + + public @Override void deconstruct() { + super.deconstruct(); + monitor.shutdown(); + } + + private class Pinger implements Callable<Pong> { + + private T connection; + + public Pinger(T connection) { + this.connection = connection; + } + + public Pong call() { + Pong pong; + try { + pong = ping(new Ping(monitor.getConfiguration().getRequestTimeout()), connection); + } catch (RuntimeException e) { + pong = new Pong(); + pong.addError( + ErrorMessage.createBackendCommunicationError( + "Exception when pinging " + + connection + ": " + + Exceptions.toMessageString(e))); + } + return pong; + } + + } +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/Hasher.java b/container-search/src/main/java/com/yahoo/search/cluster/Hasher.java new file mode 100644 index 00000000000..7ef71a7968d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/Hasher.java @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +/** + * A hasher load balances between a set of nodes, represented by object ids. + * + * @author Arne B Fossaa + * @author bratseth + * @author Prashanth B. Bhat + */ +public class Hasher<T> { + + public static class NodeFactor<T> { + private final T node; + /** + * The relative weight of the different nodes. + * Hashing are based on the proportions of the weights. + */ + private final int load; + public NodeFactor(T node, int load) { + this.node = node; + this.load = load; + } + public final T getNode() { return node; } + public final int getLoad() { return load; } + } + + public static class NodeList<T> { + private final NodeFactor<T>[] nodes; + + private int totalLoadFactor; + + public NodeList(NodeFactor<T>[] nodes) { + this.nodes = nodes; + totalLoadFactor = 0; + if(nodes != null) { + for(NodeFactor<T> node:nodes) { + totalLoadFactor += node.getLoad(); + } + } + } + + public int getNodeCount() { + return nodes.length; + } + + public T select(int code, int trynum) { + if (totalLoadFactor <= 0) return null; + + // Multiply by a prime number much bigger than the likely number of hosts + int hashValue=(Math.abs(code*76103)) % totalLoadFactor; + int sumLoad=0; + int targetNode=0; + for (targetNode=0; targetNode<nodes.length; targetNode++) { + sumLoad +=nodes[targetNode].getLoad(); + if (sumLoad > hashValue) + break; + } + // Skip the ones we have tried before. + targetNode += trynum; + targetNode %= nodes.length; + return nodes[targetNode].getNode(); + } + + public boolean hasNode(T node) { + for(int i = 0;i<nodes.length;i++) { + if(node == nodes[i].getNode()) { + return true; + } + } + return false; + } + + } + + private volatile NodeList<T> nodes; + + @SuppressWarnings("unchecked") + public Hasher() { + this.nodes = new NodeList<T>(new NodeFactor[0]); + } + + /** Adds a node with load factor 100 */ + public void add(T node) { + add(node,100); + } + + /** + * Adds a code with a load factor. + * The load factor is relative to the load of the other added nodes + * and determines how often this node will be selected compared + * to the other nodes + */ + public synchronized void add(T node,int load) { + assert(nodes != null); + if(!nodes.hasNode(node)) { + NodeFactor<T>[] oldNodes = nodes.nodes; + @SuppressWarnings("unchecked") + NodeFactor<T>[] newNodes = (NodeFactor<T>[]) new NodeFactor[oldNodes.length+ 1]; + System.arraycopy(oldNodes,0,newNodes,0,oldNodes.length); + newNodes[newNodes.length-1] = new NodeFactor<>(node, load); + + //Atomic switch due to volatile + nodes = new NodeList<>(newNodes); + } + } + + /** Removes a node */ + public synchronized void remove(T node) { + if( nodes.hasNode(node)) { + NodeFactor<T>[] oldNodes = nodes.nodes; + @SuppressWarnings("unchecked") + NodeFactor<T>[] newNodes = (NodeFactor<T>[]) new NodeFactor[oldNodes.length - 1]; + for (int i = 0, j = 0; i < oldNodes.length; i++) { + if (oldNodes[i].getNode() != node) { + newNodes[j++] = oldNodes[i]; + } + } + // An atomic switch due to volatile. + nodes = new NodeList<>(newNodes); + } + } + + /** + * Returns a list of nodes that are up. + */ + public NodeList<T> getNodes() { + return nodes; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java b/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java new file mode 100644 index 00000000000..c68b60a743b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +/** + * The configuration of a cluster monitor instance + * + * @author bratseth + */ +public class MonitorConfiguration { + + /** + * The interval in ms between consecutive checks of the monitored + * nodes + */ + private long checkInterval=1000; + + /** + * The number of times a failed node must respond before getting + * traffic again + */ + private int responseAfterFailLimit=3; + + /** + * The number of ms a node is allowed to stay idle before it is + * pinged + */ + private long idleLimit=3000; + + /** + * The number of milliseconds to attempt to complete a request + * before giving up + */ + private long requestTimeout = 5000; + + /** + * The number of milliseconds a node is allowed to fail before we + * mark it as not working + */ + private long failLimit=5000; + + /** + * The number of times a node is allowed to fail in one hour + * before it is quarantined for an hour + */ + private int failQuarantineLimit=3; + + /** + * The number of ms to quarantine an unstable node + */ + private long quarantineTime=1000*60*60; + + /** + * Sets the interval between each ping of idle or failing nodes + * Default is 1000ms + */ + public void setCheckInterval(long intervalMs) { + this.checkInterval=intervalMs; + } + + /** + * Returns the interval between each ping of idle or failing nodes + * Default is 1000ms + */ + public long getCheckInterval() { + return checkInterval; + } + + /** + * Sets the number of times a failed node must respond before it is put + * back in service. Default is 3. + */ + public void setResponseAfterFailLimit(int responseAfterFailLimit) { + this.responseAfterFailLimit=responseAfterFailLimit; + } + + /** + * Sets the number of ms a node (failing or working) is allowed to + * stay idle before it is pinged. Default is 3000 + */ + public void setIdleLimit(int idleLimit) { + this.idleLimit=idleLimit; + } + + /** + * Gets the number of ms a node (failing or working) + * is allowed to stay idle before it is pinged. Default is 3000 + */ + public long getIdleLimit() { + return idleLimit; + } + + /** + * Returns the number of milliseconds to attempt to service a request + * (at different nodes) before giving up. Default is 5000 ms. + */ + public long getRequestTimeout() { return requestTimeout; } + + /** + * Sets the number of milliseconds a node is allowed to fail before we + * mark it as not working + */ + public void setFailLimit(long failLimit) { this.failLimit=failLimit; } + + /** + * Returns the number of milliseconds a node is allowed to fail before we + * mark it as not working + */ + public long getFailLimit() { return failLimit; } + + /** + * The number of times a node must fail in one hour to be placed + * in quarantine. Once in quarantine it won't be put back in + * productuion before quarantineTime has expired even if it is + * working. Default is 3 + */ + public void setFailQuarantineLimit(int failQuarantineLimit) { + this.failQuarantineLimit=failQuarantineLimit; + } + + /** + * The number of ms an unstable node is quarantined. Default is + * 100*60*60 + */ + public void setQuarantineTime(long quarantineTime) { + this.quarantineTime=quarantineTime; + } + + public String toString() { + return "monitor configuration [" + + "checkInterval: " + checkInterval + + " responseAfterFailLimit: " + responseAfterFailLimit + + " idleLimit: " + idleLimit + + " requestTimeout " + requestTimeout + + " feilLimit " + failLimit + + " failQuerantineLimit " + failQuarantineLimit + + " quarantineTime " + quarantineTime + + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java new file mode 100644 index 00000000000..7071867c8c7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/NodeManager.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +import java.util.concurrent.Executor; + +/** + * Must be implemented by a node collection which wants + * it's node state monitored by a ClusterMonitor + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public interface NodeManager<T> { + + /** Called when a failed node is working (ready for production) again */ + public void working(T node); + + /** Called when a working node fails */ + public void failed(T node); + + /** Called when a node should be pinged */ + public void ping(T node, Executor executor); + +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/PingableSearcher.java b/container-search/src/main/java/com/yahoo/search/cluster/PingableSearcher.java new file mode 100644 index 00000000000..486473eba8d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/PingableSearcher.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +import com.yahoo.component.ComponentId; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * A searcher to which we can send a ping to probe if it is alive + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class PingableSearcher extends Searcher { + + public PingableSearcher() { + } + + public PingableSearcher(ComponentId id) { + super(id); + } + + /** Send a ping request downwards to probe if this searcher chain is in functioning order */ + public Pong ping(Ping ping, Execution execution) { + return execution.ping(ping); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java new file mode 100644 index 00000000000..6464f0101be --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.cluster; + +import com.yahoo.search.result.ErrorMessage; + + +/** + * This node monitor is responsible for maintaining the state of a monitored node. + * It has the following properties: + * <ul> + * <li>A node is taken out of operation if it gives no response in 10 s</li> + * <li>A node is put back in operation when it responds correctly again + * </ul> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> { + /** + * Creates a new node monitor for a node + */ + public TrafficNodeMonitor(T node,MonitorConfiguration configuration,boolean internal) { + super(internal); + this.node=node; + this.configuration=configuration; + } + + /** Whether or not this has ever responded successfully */ + private boolean atStartUp = true; + + public T getNode() { return node; } + + /** + * Called when this node fails. + * + * @param error A container which should contain a short description + */ + @Override + public void failed(ErrorMessage error) { + respondedAt=now(); + + switch (error.getCode()) { + // TODO: Remove hard coded error messages. + // Refer to docs/errormessages + case 10: + case 11: + // Only count not being able to talk to backend at all + // as errors we care about + if ((respondedAt-succeededAt) > 10000) { + setWorking(false,"Not working for 10 s: " + error.toString()); + } + break; + default: + succeededAt = respondedAt; + break; + } + } + + /** + * Called when a response is received from this node. + */ + public void responded() { + respondedAt=now(); + succeededAt=respondedAt; + atStartUp = false; + + if (!isWorking) { + setWorking(true,"Responds correctly"); + } + } + + /** Thread-safely changes the state of this node if required */ + protected synchronized void setWorking(boolean working,String explanation) { + if (this.isWorking==working) return; // Old news + + if (explanation==null) { + explanation=""; + } else { + explanation=": " + explanation; + } + + if (working) { + log.info("Putting " + node + " in service" + explanation); + } + else { + if (!atStartUp || !isInternal()) + log.warning("Taking " + node + " out of service" + explanation); + failedAt=now(); + } + + this.isWorking=working; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/cluster/package-info.java b/container-search/src/main/java/com/yahoo/search/cluster/package-info.java new file mode 100644 index 00000000000..b470d8c8150 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/cluster/package-info.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Standard searchers to compose in <i>source</i> search chains (those containing searchers specific for one source and + * which ends with a call to some provider) which calls a cluster of provider nodes. These searchers provides hashing + * and failover of the provider nodes. + */ +@ExportPackage +@PublicApi +package com.yahoo.search.cluster; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/config/dispatchprototype/package-info.java b/container-search/src/main/java/com/yahoo/search/config/dispatchprototype/package-info.java new file mode 100644 index 00000000000..2a7b4f96aa8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/config/dispatchprototype/package-info.java @@ -0,0 +1,9 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Package for dispatchprototype config. + * @author tonytv + */ +@ExportPackage +package com.yahoo.search.config.dispatchprototype; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/config/package-info.java b/container-search/src/main/java/com/yahoo/search/config/package-info.java new file mode 100644 index 00000000000..84eb92be0ea --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/config/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.search.config; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/debug/BackendStatistics.java b/container-search/src/main/java/com/yahoo/search/debug/BackendStatistics.java new file mode 100644 index 00000000000..8086048890f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/BackendStatistics.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import static com.yahoo.search.debug.SearcherUtils.clusterSearchers; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.ArrayUtils; + +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.jrt.Int32Array; +import com.yahoo.jrt.Request; +import com.yahoo.jrt.StringArray; +import com.yahoo.jrt.Value; +import com.yahoo.jrt.Values; +import com.yahoo.prelude.cluster.ClusterSearcher; +import com.yahoo.yolean.Exceptions; + +/** + * @author tonytv + */ +public class BackendStatistics implements DebugMethodHandler { + public JrtMethodSignature getSignature() { + String returnTypes = "" + (char)Value.STRING_ARRAY + (char)Value.INT32_ARRAY + (char)Value.INT32_ARRAY; + String parametersTypes = "" + (char)Value.STRING; + + return new JrtMethodSignature(returnTypes, parametersTypes); + } + + public void invoke(Request request) { + try { + Collection<ClusterSearcher> searchers = clusterSearchers(request); + List<String> backendIdentificators = new ArrayList<>(); + List<Integer> activeConnections = new ArrayList<>(); + List<Integer> totalConnections = new ArrayList<>(); + + for (ClusterSearcher searcher : searchers) { + for (Map.Entry<String,Backend.BackendStatistics> statistics : searcher.getBackendStatistics().entrySet()) { + backendIdentificators.add(statistics.getKey()); + activeConnections.add(statistics.getValue().activeConnections); + totalConnections.add(statistics.getValue().totalConnections()); + } + } + Values returnValues = request.returnValues(); + returnValues.add(new StringArray(backendIdentificators.toArray(new String[0]))); + addInt32Array(returnValues, activeConnections); + addInt32Array(returnValues, totalConnections); + + } catch (Exception e) { + request.setError(1000, Exceptions.toMessageString(e)); + } + } + + private void addInt32Array(Values returnValues, List<Integer> ints) { + returnValues.add(new Int32Array(ArrayUtils.toPrimitive(ints.toArray(new Integer[0])))); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/DebugMethodHandler.java b/container-search/src/main/java/com/yahoo/search/debug/DebugMethodHandler.java new file mode 100644 index 00000000000..55f36b9670e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/DebugMethodHandler.java @@ -0,0 +1,13 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import com.yahoo.jrt.MethodHandler; + +/** + * A method handler that can describe its signature. + * + * @author tonytv + */ +interface DebugMethodHandler extends MethodHandler { + JrtMethodSignature getSignature(); +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/DebugRpcAdaptor.java b/container-search/src/main/java/com/yahoo/search/debug/DebugRpcAdaptor.java new file mode 100644 index 00000000000..2309f23985c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/DebugRpcAdaptor.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import com.yahoo.container.osgi.AbstractRpcAdaptor; +import com.yahoo.jrt.Method; +import com.yahoo.jrt.Supervisor; +import com.yahoo.fs4.PacketDumper.PacketType; + +/** + * Handles rpc calls for retrieving debug information. + * + * @author tonytv + */ +public final class DebugRpcAdaptor extends AbstractRpcAdaptor { + private static final String debugPrefix = "debug."; + + public void bindCommands(Supervisor supervisor) { + addTraceMethod(supervisor, "query", PacketType.query); + addTraceMethod(supervisor, "result", PacketType.result); + addMethod(supervisor, "output-search-chain", new OutputSearchChain()); + addMethod(supervisor, "backend-statistics", new BackendStatistics()); + } + + private void addTraceMethod(Supervisor supervisor, String name, PacketType packetType) { + addMethod(supervisor, constructTraceMethodName(name), new TracePackets(packetType)); + } + + private void addMethod(Supervisor supervisor, String name, DebugMethodHandler handler) { + JrtMethodSignature typeStrings = handler.getSignature(); + supervisor.addMethod( + new Method(debugPrefix + name, + typeStrings.parametersTypes, + typeStrings.returnTypes, + handler)); + + } + + //example: debug.dump-query-packets + private String constructTraceMethodName(String name) { + return debugPrefix + "dump-" + name + "-packets"; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/IndentStringBuilder.java b/container-search/src/main/java/com/yahoo/search/debug/IndentStringBuilder.java new file mode 100644 index 00000000000..acb9be8294f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/IndentStringBuilder.java @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import java.io.Serializable; + +/** + * A StringBuilder that also handles indentation for append operations. + * @author tonytv + */ +@SuppressWarnings("serial") +final class IndentStringBuilder implements Serializable, Appendable, CharSequence { + private final StringBuilder builder = new StringBuilder(); + private final String singleIndentation; + + private int level = 0; + private boolean newline = true; + + private void appendIndentation() { + if (newline) { + for (int i=0; i<level; i++) { + builder.append(singleIndentation); + } + } + newline = false; + } + + public IndentStringBuilder(String singleIndentation) { + this.singleIndentation = singleIndentation; + } + + public IndentStringBuilder() { + this(" "); + } + + public void resetIndentLevel(int level) { + this.level = level; + } + + //returns the indent level before indenting. + public int newlineAndIndent() { + newline(); + return indent(); + } + + //returns the indent level before indenting. + public int indent() { + return level++; + } + + public IndentStringBuilder newline() { + newline = true; + builder.append('\n'); + return this; + } + + public IndentStringBuilder append(Object o) { + appendIndentation(); + builder.append(o); + return this; + } + + public IndentStringBuilder append(String s) { + appendIndentation(); + builder.append(s); + return this; + } + + public IndentStringBuilder append(CharSequence charSequence) { + appendIndentation(); + builder.append(charSequence); + return this; + } + + public IndentStringBuilder append(CharSequence charSequence, int i, int i1) { + appendIndentation(); + builder.append(charSequence, i, i1); + return this; + } + + public IndentStringBuilder append(char c) { + appendIndentation(); + builder.append(c); + return this; + } + + public String toString() { + return builder.toString(); + } + + public int length() { + return builder.length(); + } + + public char charAt(int i) { + return builder.charAt(i); + } + + public CharSequence subSequence(int i, int i1) { + return builder.subSequence(i, i1); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/JrtMethodSignature.java b/container-search/src/main/java/com/yahoo/search/debug/JrtMethodSignature.java new file mode 100644 index 00000000000..0383360487f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/JrtMethodSignature.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +/** + * Represents the signatures of a jrt method. + * + * @author tonytv + */ +final class JrtMethodSignature { + final String returnTypes; + final String parametersTypes; + + JrtMethodSignature(String returnTypes, String parametersTypes) { + this.returnTypes = returnTypes; + this.parametersTypes = parametersTypes; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/OutputSearchChain.java b/container-search/src/main/java/com/yahoo/search/debug/OutputSearchChain.java new file mode 100644 index 00000000000..4413ea462c8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/OutputSearchChain.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import static com.yahoo.protect.Validator.ensureNotNull; + +import com.yahoo.jrt.Request; +import com.yahoo.jrt.StringValue; +import com.yahoo.jrt.Value; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.handler.SearchHandler; +import com.yahoo.search.searchchain.SearchChain; +import com.yahoo.search.searchchain.SearchChainRegistry; + +/** + * Outputs a human readable representation of a given search chain. + * + * @author tonytv + */ +final class OutputSearchChain implements DebugMethodHandler { + private String getSearchChainName(Request request) { + final int numParameters = request.parameters().size(); + + if (numParameters == 0) + return SearchHandler.defaultSearchChainName; + else if (numParameters == 1) + return request.parameters().get(0).asString(); + else + throw new RuntimeException("Too many parameters given."); + } + + private SearchChain getSearchChain(SearchChainRegistry registry, String searchChainName) { + SearchChain searchChain = registry.getComponent(searchChainName); + ensureNotNull("There is no search chain named '" + searchChainName + "'", searchChain); + return searchChain; + } + + public JrtMethodSignature getSignature() { + String returnTypes = "" + (char)Value.STRING; + String parametersTypes = "*"; //optional string + return new JrtMethodSignature(returnTypes, parametersTypes); + } + + public void invoke(Request request) { + try { + SearchHandler searchHandler = SearcherUtils.getSearchHandler(); + SearchChainRegistry searchChainRegistry = searchHandler.getSearchChainRegistry(); + SearchChain searchChain = getSearchChain(searchChainRegistry, + getSearchChainName(request)); + + SearchChainTextRepresentation textRepresentation = new SearchChainTextRepresentation(searchChain, searchChainRegistry); + request.returnValues().add(new StringValue(textRepresentation.toString())); + } catch (Exception e) { + request.setError(1000, Exceptions.toMessageString(e)); + } + } + + +} + diff --git a/container-search/src/main/java/com/yahoo/search/debug/SearchChainTextRepresentation.java b/container-search/src/main/java/com/yahoo/search/debug/SearchChainTextRepresentation.java new file mode 100644 index 00000000000..2e9da99f85b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/SearchChainTextRepresentation.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import com.yahoo.component.chain.Chain; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.ForkingSearcher; +import com.yahoo.search.searchchain.SearchChain; +import com.yahoo.search.searchchain.SearchChainRegistry; + +import java.util.Collection; + +/** + * Text representation of a given search chain intended for debugging purposes. + * + * @author tonytv + */ +public class SearchChainTextRepresentation { + + private final SearchChainRegistry searchChainRegistry; + + private static class Block { + private static final String openBlock = " {"; + private static final char closeBlock = '}'; + private final IndentStringBuilder str; + private final int level; + + Block(IndentStringBuilder str) { + this.str = str; + level = str.append(openBlock).newlineAndIndent(); + } + + void close() { + str.resetIndentLevel(level); + str.append(closeBlock).newline(); + } + } + + private final String textRepresentation; + + private void outputChain(IndentStringBuilder str, Chain<Searcher> chain) { + if (chain == null) { + str.append(" [Unresolved Searchchain]"); + } else { + str.append(chain.getId()).append(" [Searchchain] "); + Block block = new Block(str); + + for (Searcher searcher : chain.components()) + outputSearcher(str, searcher); + + block.close(); + } + } + + private void outputSearcher(IndentStringBuilder str, Searcher searcher) { + str.append(searcher.getId()).append(" [Searcher]"); + if ( ! (searcher instanceof ForkingSearcher) ) { + str.newline(); + return; + } + Collection<ForkingSearcher.CommentedSearchChain> chains = + ((ForkingSearcher)searcher).getSearchChainsForwarded(searchChainRegistry); + if (chains.isEmpty()) { + str.newline(); + return; + } + Block block = new Block(str); + for (ForkingSearcher.CommentedSearchChain chain : chains) { + if (chain.comment != null) + str.append(chain.comment).newline(); + outputChain(str, chain.searchChain); + } + block.close(); + } + + @Override + public String toString() { + return textRepresentation; + } + + public SearchChainTextRepresentation(SearchChain searchChain, SearchChainRegistry searchChainRegistry) { + this.searchChainRegistry = searchChainRegistry; + + IndentStringBuilder stringBuilder = new IndentStringBuilder(); + outputChain(stringBuilder, searchChain); + textRepresentation = stringBuilder.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/SearcherUtils.java b/container-search/src/main/java/com/yahoo/search/debug/SearcherUtils.java new file mode 100644 index 00000000000..1633196a585 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/SearcherUtils.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import static com.yahoo.protect.Validator.ensureNotNull; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import com.yahoo.component.provider.ComponentRegistry; +import org.apache.commons.collections.CollectionUtils; + +import com.yahoo.container.Container; +import com.yahoo.jrt.Request; +import com.yahoo.prelude.cluster.ClusterSearcher; +import com.yahoo.search.Searcher; +import com.yahoo.search.handler.SearchHandler; +import com.yahoo.search.searchchain.SearchChainRegistry; + +/** + * Utility functions for searchers and search chains. + * + * @author tonytv + */ +final class SearcherUtils { + private static Collection<Searcher> allSearchers() { + SearchChainRegistry searchChainRegistry = getSearchHandler().getSearchChainRegistry(); + ComponentRegistry<Searcher> searcherRegistry = searchChainRegistry.getSearcherRegistry(); + return searcherRegistry.allComponents(); + } + + private static Collection<ClusterSearcher> allClusterSearchers() { + return filter(allSearchers(), ClusterSearcher.class); + } + + private static <T> Collection<T> filter(Collection<?> collection, Class<T> classToMatch) { + List<T> filtered = new ArrayList<>(); + for (Object candidate : collection) { + if (classToMatch.isInstance(candidate)) + filtered.add(classToMatch.cast(candidate)); + } + return filtered; + } + + public static Collection<ClusterSearcher> clusterSearchers(final String clusterName) { + Collection<ClusterSearcher> searchers = allClusterSearchers(); + CollectionUtils.filter(searchers, + o -> clusterName.equalsIgnoreCase(((ClusterSearcher)o).getClusterModelName())); + return searchers; + } + + //Return value is never null + static SearchHandler getSearchHandler() { + SearchHandler searchHandler = (SearchHandler) Container.get().getRequestHandlerRegistry().getComponent("com.yahoo.search.handler.SearchHandler"); + ensureNotNull("The standard search handler is not available.", searchHandler); + return searchHandler; + } + + //Retrieve all the cluster searchers as specified by the first parameter of the request. + static Collection<ClusterSearcher> clusterSearchers(Request request) { + String clusterName = request.parameters().get(0).asString(); + Collection<ClusterSearcher> searchers = clusterSearchers(clusterName); + if (searchers.isEmpty()) + throw new RuntimeException("No cluster named " + clusterName); + return searchers; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/debug/TracePackets.java b/container-search/src/main/java/com/yahoo/search/debug/TracePackets.java new file mode 100644 index 00000000000..de71b2e3f26 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/debug/TracePackets.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.debug; + +import static com.yahoo.search.debug.SearcherUtils.clusterSearchers; + +import java.util.Collection; + +import com.yahoo.jrt.Request; +import com.yahoo.jrt.Value; +import com.yahoo.prelude.cluster.ClusterSearcher; +import com.yahoo.fs4.PacketDumper; +import com.yahoo.yolean.Exceptions; + +/** + * Rpc method for enabling packet dumping for a specific packet type. + * + * @author tonytv + */ +final class TracePackets implements DebugMethodHandler { + private final PacketDumper.PacketType packetType; + + public void invoke(Request request) { + try { + Collection<ClusterSearcher> searchers = clusterSearchers(request); + boolean on = request.parameters().get(1).asInt8() != 0; + + for (ClusterSearcher searcher : searchers) + searcher.dumpPackets(packetType, on); + + } catch (Exception e) { + request.setError(1000, Exceptions.toMessageString(e)); + } + } + + TracePackets(PacketDumper.PacketType packetType) { + this.packetType = packetType; + } + + public JrtMethodSignature getSignature() { + String returnTypes = ""; + String parametersTypes = "" + (char)Value.STRING + (char)Value.INT8; + return new JrtMethodSignature(returnTypes, parametersTypes); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Client.java b/container-search/src/main/java/com/yahoo/search/dispatch/Client.java new file mode 100644 index 00000000000..19d6a0c523b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Client.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.dispatch; + +import com.yahoo.compress.CompressionType; +import com.yahoo.prelude.fastsearch.FastHit; + +import java.util.List; +import java.util.Optional; + +/** + * A dispatch client. + * + * @author bratseth + */ +interface Client { + + void getDocsums(List<FastHit> hits, NodeConnection node, CompressionType compression, + int uncompressedLength, byte[] compressedSlime, Dispatcher.GetDocsumsResponseReceiver responseReceiver, + double timeoutSeconds); + + /** Creates a connection to a particular node in this */ + NodeConnection createConnection(String hostname, int port); + + class GetDocsumsResponseOrError { + + // One of these will be non empty and the other not + private Optional<GetDocsumsResponse> response; + private Optional<String> error; + + public static GetDocsumsResponseOrError fromResponse(GetDocsumsResponse response) { + return new GetDocsumsResponseOrError(Optional.of(response), Optional.empty()); + } + + public static GetDocsumsResponseOrError fromError(String error) { + return new GetDocsumsResponseOrError(Optional.empty(), Optional.of(error)); + } + + private GetDocsumsResponseOrError(Optional<GetDocsumsResponse> response, Optional<String> error) { + this.response = response; + this.error = error; + } + + /** Returns the response, or empty if there is an error */ + public Optional<GetDocsumsResponse> response() { return response; } + + /** Returns the error or empty if there is a response */ + public Optional<String> error() { return error; } + + } + + class GetDocsumsResponse { + + private final byte compression; + private final int uncompressedSize; + private final byte[] compressedSlimeBytes; + private final List<FastHit> hitsContext; + + public GetDocsumsResponse(byte compression, int uncompressedSize, byte[] compressedSlimeBytes, List<FastHit> hitsContext) { + this.compression = compression; + this.uncompressedSize = uncompressedSize; + this.compressedSlimeBytes = compressedSlimeBytes; + this.hitsContext = hitsContext; + } + + public byte compression() { + return compression; + } + + public int uncompressedSize() { + return uncompressedSize; + } + + public byte[] compressedSlimeBytes() { + return compressedSlimeBytes; + } + + public List<FastHit> hitsContext() { + return hitsContext; + } + + } + + interface NodeConnection { + + /** Closes this connection */ + void close(); + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java new file mode 100644 index 00000000000..e4d1fb0b1d5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -0,0 +1,228 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.dispatch; + +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; +import com.yahoo.collections.ListMap; +import com.yahoo.component.AbstractComponent; +import com.yahoo.compress.CompressionType; +import com.yahoo.compress.Compressor; +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.TimeoutException; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.slime.BinaryFormat; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.data.access.Inspector; +import com.yahoo.vespa.config.search.DispatchConfig; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * A dispatcher communicates with search nodes to (in the future) perform queries and (now) fill hits. + * This class is multithread safe. + * + * @author bratseth + */ +public class Dispatcher extends AbstractComponent { + + private final static Logger log = Logger.getLogger(Dispatcher.class.getName()); + private final Client client; + + /** Connections to the search nodes this talks to, indexed by node id ("partid") */ + private final ImmutableMap<Integer, Client.NodeConnection> nodes; + + private final Compressor compressor = new Compressor(); + + @Inject + public Dispatcher(DispatchConfig dispatchConfig) { + this.client = new RpcClient(); + ImmutableMap.Builder<Integer, Client.NodeConnection> nodesBuilder = new ImmutableMap.Builder<>(); + for (DispatchConfig.Node node : dispatchConfig.node()) { + nodesBuilder.put(node.key(), client.createConnection(node.host(), node.port())); + } + nodes = nodesBuilder.build(); + } + + /** For testing */ + public Dispatcher(Map<Integer, Client.NodeConnection> nodeConnections, Client client) { + this.nodes = ImmutableMap.copyOf(nodeConnections); + this.client = client; + } + + /** Fills the given summary class by sending RPC requests to the right search nodes */ + public void fill(Result result, String summaryClass, CompressionType compression) { + try { + ListMap<Integer, FastHit> hitsByNode = hitsByNode(result); + + GetDocsumsResponseReceiver responseReceiver = new GetDocsumsResponseReceiver(hitsByNode.size(), compressor, result); + for (Map.Entry<Integer, List<FastHit>> nodeHits : hitsByNode.entrySet()) { + sendGetDocsumsRequest(nodeHits.getKey(), nodeHits.getValue(), summaryClass, compression, result, responseReceiver); + } + responseReceiver.processResponses(result.getQuery()); + } + catch (TimeoutException e) { + result.hits().addError(ErrorMessage.createTimeout("Summary data is incomplete: " + e.getMessage())); + } + } + + /** Return a map of hits by their search node (partition) id */ + private ListMap<Integer, FastHit> hitsByNode(Result result) { + ListMap<Integer, FastHit> hitsByPartition = new ListMap<>(); + for (Iterator<Hit> i = result.hits().deepIterator() ; i.hasNext(); ) { + Hit h = i.next(); + if ( ! (h instanceof FastHit)) continue; + FastHit hit = (FastHit)h; + + hitsByPartition.put(hit.getDistributionKey(), hit); + } + return hitsByPartition; + } + + /** Send a getDocsums request to a node. Responses will be added to the given receiver. */ + private void sendGetDocsumsRequest(int nodeId, List<FastHit> hits, String summaryClass, + CompressionType compression, + Result result, GetDocsumsResponseReceiver responseReceiver) { + Client.NodeConnection node = nodes.get(nodeId); + if (node == null) { + result.hits().addError(ErrorMessage.createEmptyDocsums("Could not fill hits from unknown node " + nodeId)); + log.warning("Got hits with partid " + nodeId + ", which is not included in the current dispatch config"); + return; + } + + byte[] serializedSlime = BinaryFormat.encode(toSlime(summaryClass, hits)); + double timeoutSeconds = ((double)result.getQuery().getTimeLeft()-3.0)/1000.0; + Compressor.Compression compressionResult = compressor.compress(compression, serializedSlime); + client.getDocsums(hits, node, compressionResult.type(), + serializedSlime.length, compressionResult.data(), responseReceiver, timeoutSeconds); + } + + public Slime toSlime(String summaryClass, List<FastHit> hits) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + if (summaryClass != null) { + root.setString("class", summaryClass); + } + Cursor gids = root.setArray("gids"); + for (FastHit hit : hits) { + gids.addData(hit.getGlobalId().getRawId()); + } + return slime; + } + + @Override + public void deconstruct() { + for (Client.NodeConnection nodeConnection : nodes.values()) + nodeConnection.close(); + } + + /** Receiver of the responses to a set of getDocsums requests */ + public static class GetDocsumsResponseReceiver { + + private final BlockingQueue<Client.GetDocsumsResponseOrError> responses; + private final Compressor compressor; + private final Result result; + + /** Whether we have already logged/notified about an error - to avoid spamming */ + private boolean hasReportedError = false; + + /** The number of responses we should receive (and process) before this is complete */ + private int outstandingResponses; + + public GetDocsumsResponseReceiver(int requestCount, Compressor compressor, Result result) { + this.compressor = compressor; + responses = new LinkedBlockingQueue<>(requestCount); + outstandingResponses = requestCount; + this.result = result; + } + + /** Called by a thread belonging to the client when a valid response becomes available */ + public void receive(Client.GetDocsumsResponseOrError response) { + responses.add(response); + } + + private void throwTimeout() throws TimeoutException { + throw new TimeoutException("Timed out waiting for summary data. " + outstandingResponses + " responses outstanding."); + } + + /** + * Call this from the dispatcher thread to initiate and complete processing of responses. + * This will block until all responses are available and processed, or to timeout. + */ + public void processResponses(Query query) throws TimeoutException { + try { + while (outstandingResponses > 0) { + long timeLeftMs = query.getTimeLeft(); + if (timeLeftMs <= 0) { + throwTimeout(); + } + Client.GetDocsumsResponseOrError response = responses.poll(timeLeftMs, TimeUnit.MILLISECONDS); + if (response == null) + throwTimeout(); + processResponse(response); + outstandingResponses--; + } + } + catch (InterruptedException e) { + // TODO: Add error + } + } + + private void processResponse(Client.GetDocsumsResponseOrError responseOrError) { + if (responseOrError.error().isPresent()) { + if (hasReportedError) return; + String error = responseOrError.error().get(); + result.hits().addError(ErrorMessage.createBackendCommunicationError(error)); + log.log(Level.WARNING, "Error fetching summary data: "+ error); + } + else { + Client.GetDocsumsResponse response = responseOrError.response().get(); + CompressionType compression = CompressionType.valueOf(response.compression()); + byte[] slimeBytes = compressor.decompress(response.compressedSlimeBytes(), compression, response.uncompressedSize()); + fill(response.hitsContext(), slimeBytes); + } + } + + private void fill(List<FastHit> hits, byte[] slimeBytes) { + Inspector summaries = new SlimeAdapter(BinaryFormat.decode(slimeBytes).get().field("docsums")); + if ( ! summaries.valid()) + throw new IllegalArgumentException("Expected a Slime root object containing a 'docsums' field"); + for (int i = 0; i < hits.size(); i++) { + fill(hits.get(i), summaries.entry(i).field("docsum")); + } + } + + private void fill(FastHit hit, Inspector summary) { + summary.traverse((String name, Inspector value) -> { + hit.setField(name, nativeTypeOf(value)); + }); + } + + private Object nativeTypeOf(Inspector inspector) { + switch (inspector.type()) { + case ARRAY: return inspector; + case OBJECT: return inspector; + case BOOL: return inspector.asBool(); + case DATA: return inspector.asData(); + case DOUBLE: return inspector.asDouble(); + case LONG: return inspector.asLong(); + case STRING: return inspector.asString(); // TODO: Keep as utf8 + case EMPTY : return null; + default: throw new IllegalArgumentException("Unexpected Slime type " + inspector.type()); + } + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/RpcClient.java b/container-search/src/main/java/com/yahoo/search/dispatch/RpcClient.java new file mode 100644 index 00000000000..0305b06e92f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/dispatch/RpcClient.java @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.dispatch; + +import com.yahoo.compress.CompressionType; +import com.yahoo.jrt.DataValue; +import com.yahoo.jrt.Int32Value; +import com.yahoo.jrt.Int8Value; +import com.yahoo.jrt.Request; +import com.yahoo.jrt.RequestWaiter; +import com.yahoo.jrt.Spec; +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Target; +import com.yahoo.jrt.Transport; +import com.yahoo.jrt.Values; +import com.yahoo.prelude.fastsearch.FastHit; + +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +/** + * A client which uses rpc request to search nodes to implement the Client API. + * + * @author bratseth + */ +class RpcClient implements Client { + + private final Supervisor supervisor = new Supervisor(new Transport()); + + @Override + public NodeConnection createConnection(String hostname, int port) { + return new RpcNodeConnection(hostname, port, supervisor); + } + + @Override + public void getDocsums(List<FastHit> hits, NodeConnection node, CompressionType compression, int uncompressedLength, + byte[] compressedSlime, Dispatcher.GetDocsumsResponseReceiver responseReceiver, double timeoutSeconds) { + Request request = new Request("proton.getDocsums"); + request.parameters().add(new Int8Value(compression.getCode())); + request.parameters().add(new Int32Value(uncompressedLength)); + request.parameters().add(new DataValue(compressedSlime)); + + request.setContext(hits); + RpcNodeConnection rpcNode = ((RpcNodeConnection) node); + rpcNode.invokeAsync(request, timeoutSeconds, new RpcResponseWaiter(rpcNode, responseReceiver)); + } + + private static class RpcNodeConnection implements NodeConnection { + + // Information about the connected node + private final Supervisor supervisor; + private final String hostname; + private final int port; + private final String description; + + // The current shared connection. This will be recycled when it becomes invalid. + // All access to this must be synchronized + private Target target = null; + + public RpcNodeConnection(String hostname, int port, Supervisor supervisor) { + this.supervisor = supervisor; + this.hostname = hostname; + this.port = port; + description = "rpc node connection to " + hostname + ":" + port; + } + + public void invokeAsync(Request req, double timeout, RequestWaiter waiter) { + // TODO: Consider replacing this by a watcher on the target + synchronized(this) { // ensure we have exactly 1 valid connection across threads + if (target == null || ! target.isValid()) + target = supervisor.connect(new Spec(hostname, port)); + } + target.invokeAsync(req, timeout, waiter); + } + + @Override + public void close() { + target.close(); + } + + @Override + public String toString() { + return description; + } + + } + + private static class RpcResponseWaiter implements RequestWaiter { + + /** The node to which we made the request we are waiting for - for error messages only */ + private final RpcNodeConnection node; + + /** The handler to which the response is forwarded */ + private final Dispatcher.GetDocsumsResponseReceiver handler; + + public RpcResponseWaiter(RpcNodeConnection node, Dispatcher.GetDocsumsResponseReceiver handler) { + this.node = node; + this.handler = handler; + } + + @Override + public void handleRequestDone(Request requestWithResponse) { + if (requestWithResponse.isError()) { + handler.receive(GetDocsumsResponseOrError.fromError("Error response from " + node + ": " + + requestWithResponse.errorMessage())); + return; + } + + Values returnValues = requestWithResponse.returnValues(); + if (returnValues.size() < 3) { + handler.receive(GetDocsumsResponseOrError.fromError("Invalid getDocsums response from " + node + + ": Expected 3 return arguments, got " + + returnValues.size())); + return; + } + + byte compression = returnValues.get(0).asInt8(); + int uncompressedSize = returnValues.get(1).asInt32(); + byte[] compressedSlimeBytes = returnValues.get(2).asData(); + List<FastHit> hits = (List<FastHit>) requestWithResponse.getContext(); + handler.receive(GetDocsumsResponseOrError.fromResponse(new GetDocsumsResponse(compression, + uncompressedSize, + compressedSlimeBytes, + hits))); + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/CommonFields.java b/container-search/src/main/java/com/yahoo/search/federation/CommonFields.java new file mode 100644 index 00000000000..912a1db6202 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/CommonFields.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation; +/** + * A set of string constants for common hit field names. + * @author laboisse + * + */ +public class CommonFields { + + public static final String TITLE = "title"; + public static final String URL = "url"; + public static final String DESCRIPTION = "description"; + public static final String DATE = "date"; + public static final String SIZE = "size"; + public static final String DISP_URL = "dispurl"; + public static final String BASE_URL = "baseurl"; + public static final String MIME_TYPE = "mimetype"; + public static final String RELEVANCY = "relevancy"; + public static final String THUMBNAIL_URL = "thumbnailUrl"; + public static final String THUMBNAIL_WIDTH = "thumbnailWidth"; + public static final String THUMBNAIL_HEIGHT = "thumbnailHeight"; +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java new file mode 100644 index 00000000000..4ec04d0d577 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/FederationSearcher.java @@ -0,0 +1,948 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.errorhandling.Results.Builder; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.federation.selection.FederationTarget; +import com.yahoo.search.federation.selection.TargetSelector; +import com.yahoo.search.federation.sourceref.SearchChainInvocationSpec; +import com.yahoo.search.federation.sourceref.SearchChainResolver; +import com.yahoo.search.federation.sourceref.SingleTarget; +import com.yahoo.search.federation.sourceref.SourceRefResolver; +import com.yahoo.search.federation.sourceref.SourcesTarget; +import com.yahoo.search.federation.sourceref.Target; +import com.yahoo.search.federation.sourceref.UnresolvedSearchChainException; +import com.yahoo.search.query.Properties; +import com.yahoo.search.query.properties.QueryProperties; +import com.yahoo.search.query.properties.SubProperties; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.result.HitOrderer; +import com.yahoo.search.searchchain.AsyncExecution; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.ForkingSearcher; +import com.yahoo.search.searchchain.FutureResult; +import com.yahoo.search.searchchain.SearchChainRegistry; +import com.yahoo.search.searchchain.model.federation.FederationOptions; +import com.yahoo.errorhandling.Results; + +import org.apache.commons.lang.StringUtils; + +import static com.google.common.base.Preconditions.checkNotNull; +import static com.yahoo.collections.CollectionUtil.first; +import static com.yahoo.container.util.Util.quote; +import static com.yahoo.search.federation.StrictContractsConfig.PropagateSourceProperties; + +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +/** + * This searcher takes a set of sources, looks them up in config and fire off the correct searchchains. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author tonytv + */ +@Provides(FederationSearcher.FEDERATION) +@After("*") +public class FederationSearcher extends ForkingSearcher { + public static final String FEDERATION = "Federation"; + + private static abstract class TargetHandler { + abstract Chain<Searcher> getChain(); + abstract void modifyTargetQuery(Query query); + abstract void modifyTargetResult(Result result); + + ComponentId getId() { + return getChain().getId(); + } + + public abstract FederationOptions federationOptions(); + + @Override + public String toString() { + return getChain().getId().stringValue(); + } + + } + + private static class StandardTargetHandler extends TargetHandler { + private final SearchChainInvocationSpec target; + private final Chain<Searcher> chain; + + public StandardTargetHandler(SearchChainInvocationSpec target, Chain<Searcher> chain) { + this.target = target; + this.chain = chain; + } + + @Override + Chain<Searcher> getChain() { + return chain; + } + + @Override + void modifyTargetQuery(Query query) {} + @Override + void modifyTargetResult(Result result) {} + + @Override + public FederationOptions federationOptions() { + return target.federationOptions; + } + } + + + private static class CustomTargetHandler<T> extends TargetHandler { + private final TargetSelector<T> selector; + private final FederationTarget<T> target; + + CustomTargetHandler(TargetSelector<T> selector, FederationTarget<T> target) { + this.selector = selector; + this.target = target; + } + + @Override + Chain<Searcher> getChain() { + return target.getChain(); + } + + @Override + public void modifyTargetQuery(Query query) { + selector.modifyTargetQuery(target, query); + } + + @Override + public void modifyTargetResult(Result result) { + selector.modifyTargetResult(target, result); + } + + @Override + public FederationOptions federationOptions() { + return target.getFederationOptions(); + } + } + + + + private static class ExecutionInfo { + final TargetHandler targetHandler; + final FederationOptions federationOptions; + final FutureResult futureResult; + + public ExecutionInfo(TargetHandler targetHandler, FederationOptions federationOptions, FutureResult futureResult) { + this.targetHandler = targetHandler; + this.federationOptions = federationOptions; + this.futureResult = futureResult; + } + } + + private static class CompoundKey { + private final String sourceName; + private final String propertyName; + CompoundKey(String sourceName, String propertyName) { + this.sourceName = sourceName; + this.propertyName = propertyName; + } + + @Override + public int hashCode() { + return sourceName.hashCode() ^ propertyName.hashCode(); + } + + @Override + public boolean equals(Object o) { + CompoundKey rhs = (CompoundKey) o; + return sourceName.equals(rhs.sourceName) && propertyName.equals(rhs.propertyName); + } + + @Override + public String toString() { + return sourceName + '.' + propertyName; + } + } + + private static class SourceKey extends CompoundKey { + public static final String SOURCE = "source."; + SourceKey(String sourceName, String propertyName) { + super(sourceName, propertyName); + } + + @Override + public int hashCode() { + return super.hashCode() ^ 7; + } + + @Override + public boolean equals(Object o) { + return (o instanceof SourceKey) && super.equals(o); + } + + @Override + public String toString() { + return SOURCE + super.toString(); + } + } + private static class ProviderKey extends CompoundKey { + public static final String PROVIDER = "provider."; + ProviderKey(String sourceName, String propertyName) { + super(sourceName, propertyName); + } + + @Override + public int hashCode() { + return super.hashCode() ^ 17; + } + + @Override + public boolean equals(Object o) { + return (o instanceof ProviderKey) && super.equals(o); + } + + @Override + public String toString() { + return PROVIDER + super.toString(); + } + } + + private static final Logger log = Logger.getLogger(FederationSearcher.class.getName()); + + /** The name of the query property containing the source name added to the query to each source by this */ + public final static CompoundName SOURCENAME = new CompoundName("sourceName"); + public final static CompoundName PROVIDERNAME = new CompoundName("providerName"); + + + /** Logging field name constants */ + public static final String LOG_COUNT_PREFIX = "count_"; + + private final SearchChainResolver searchChainResolver; + private final PropagateSourceProperties.Enum propagateSourceProperties; + private final SourceRefResolver sourceRefResolver; + private final CopyOnWriteHashMap<CompoundKey, CompoundName> map = new CopyOnWriteHashMap<>(); + + private final boolean strictSearchchain; + private final TargetSelector<?> targetSelector; + + + @Inject + public FederationSearcher(FederationConfig config, StrictContractsConfig strict, + ComponentRegistry<TargetSelector> targetSelectors) { + this(createResolver(config), strict.searchchains(), strict.propagateSourceProperties(), + resolveSelector(config.targetSelector(), targetSelectors)); + } + + private static TargetSelector resolveSelector(String selectorId, ComponentRegistry<TargetSelector> targetSelectors) { + if (selectorId.isEmpty()) + return null; + + return checkNotNull( + targetSelectors.getComponent(selectorId), + "Missing target selector with id" + quote(selectorId)); + } + + //for testing + public FederationSearcher(ComponentId id, SearchChainResolver searchChainResolver) { + this(searchChainResolver, false, PropagateSourceProperties.ALL, null); + } + + private FederationSearcher(SearchChainResolver searchChainResolver, boolean strictSearchchain, + PropagateSourceProperties.Enum propagateSourceProperties, + TargetSelector targetSelector) { + this.searchChainResolver = searchChainResolver; + sourceRefResolver = new SourceRefResolver(searchChainResolver); + this.strictSearchchain = strictSearchchain; + this.propagateSourceProperties = propagateSourceProperties; + this.targetSelector = targetSelector; + } + + + private static SearchChainResolver createResolver(FederationConfig config) { + SearchChainResolver.Builder builder = new SearchChainResolver.Builder(); + + for (FederationConfig.Target target : config.target()) { + boolean isDefaultProviderForSource = true; + + for (FederationConfig.Target.SearchChain searchChain : target.searchChain()) { + if (searchChain.providerId() == null || searchChain.providerId().isEmpty()) { + addSearchChain(builder, target, searchChain); + } else { + addSourceForProvider(builder, target, searchChain, isDefaultProviderForSource); + isDefaultProviderForSource = false; + } + } + + //Allow source groups to use by default. + if (target.useByDefault()) + builder.useTargetByDefault(target.id()); + } + + return builder.build(); + } + + private static void addSearchChain(SearchChainResolver.Builder builder, + FederationConfig.Target target, FederationConfig.Target.SearchChain searchChain) { + if (!target.id().equals(searchChain.searchChainId())) + throw new RuntimeException("Invalid federation config, " + target.id() + " != " + searchChain.searchChainId()); + + builder.addSearchChain(ComponentId.fromString(searchChain.searchChainId()), + federationOptions(searchChain), searchChain.documentTypes()); + } + + private static void addSourceForProvider(SearchChainResolver.Builder builder, FederationConfig.Target target, + FederationConfig.Target.SearchChain searchChain, boolean isDefaultProvider) { + builder.addSourceForProvider( + ComponentId.fromString(target.id()), + ComponentId.fromString(searchChain.providerId()), + ComponentId.fromString(searchChain.searchChainId()), + isDefaultProvider, federationOptions(searchChain), + searchChain.documentTypes()); + } + + private static FederationOptions federationOptions(FederationConfig.Target.SearchChain searchChain) { + return new FederationOptions(). + setOptional(searchChain.optional()). + setUseByDefault(searchChain.useByDefault()). + setTimeoutInMilliseconds(searchChain.timeoutMillis()). + setRequestTimeoutInMilliseconds(searchChain.requestTimeoutMillis()); + } + + private static long calculateTimeout(Query query, List<TargetHandler> targets) { + + class PartitionByOptional { + final List<TargetHandler> mandatoryTargets; + final List<TargetHandler> optionalTargets; + + PartitionByOptional(List<TargetHandler> targets) { + List<TargetHandler> mandatoryTargets = new ArrayList<>(); + List<TargetHandler> optionalTargets = new ArrayList<>(); + + for (TargetHandler target : targets) { + if (target.federationOptions().getOptional()) { + optionalTargets.add(target); + } else { + mandatoryTargets.add(target); + } + } + + this.mandatoryTargets = Collections.unmodifiableList(mandatoryTargets); + this.optionalTargets = Collections.unmodifiableList(optionalTargets); + } + } + + if (query.requestHasProperty("timeout") || targets.isEmpty()) { + return query.getTimeLeft(); + } else { + PartitionByOptional partition = new PartitionByOptional(targets); + long queryTimeout = query.getTimeout(); + + return partition.mandatoryTargets.isEmpty() ? + maximumTimeout(partition.optionalTargets, queryTimeout) : + maximumTimeout(partition.mandatoryTargets, queryTimeout); + } + } + + private static long maximumTimeout(List<TargetHandler> invocationSpecs, long queryTimeout) { + long timeout = 0; + for (TargetHandler target : invocationSpecs) { + timeout = Math.max(timeout, + target.federationOptions().getSearchChainExecutionTimeoutInMilliseconds(queryTimeout)); + } + return timeout; + } + + private void addSearchChainTimedOutError(Query query, + ComponentId searchChainId) { + ErrorMessage timeoutMessage= + ErrorMessage.createTimeout("The search chain '" + searchChainId + "' timed out."); + timeoutMessage.setSource(searchChainId.stringValue()); + query.errors().add(timeoutMessage); + } + + private void mergeResult(Query query, TargetHandler targetHandler, + Result mergedResults, Result result) { + + + targetHandler.modifyTargetResult(result); + final ComponentId searchChainId = targetHandler.getId(); + Chain<Searcher> searchChain = targetHandler.getChain(); + + mergedResults.mergeWith(result); + HitGroup group = result.hits(); + group.setId("source:" + searchChainId.getName()); + + group.setSearcherSpecificMetaData(this, searchChain); + group.setMeta(false); // Set hit groups as non-meta as a default + group.setAuxiliary(true); // Set hit group as auxiliary so that it doesn't contribute to count + group.setSource(searchChainId.getName()); + group.setQuery(result.getQuery()); + + for (Iterator<Hit> it = group.unorderedDeepIterator(); it.hasNext();) { + Hit hit = it.next(); + hit.setSearcherSpecificMetaData(this, searchChain); + hit.setSource(searchChainId.stringValue()); + + // This is the backend request meta hit, that is holding logging information + // See HTTPBackendSearcher, where this hit is created + if (hit.isMeta() && hit.types().contains("logging")) { + // Augment this hit with count fields + hit.setField(LOG_COUNT_PREFIX + "deep", result.getDeepHitCount()); + hit.setField(LOG_COUNT_PREFIX + "total", result.getTotalHitCount()); + int offset = result.getQuery().getOffset(); + hit.setField(LOG_COUNT_PREFIX + "first", offset + 1); + hit.setField(LOG_COUNT_PREFIX + "last", result.getConcreteHitCount() + offset); + } + + } + if (query.getTraceLevel()>=4) + query.trace("Got " + group.getConcreteSize() + " hits from " + group.getId(),false, 4); + mergedResults.hits().add(group); + } + + private boolean successfullyCompleted(FutureResult result) { + return result.isDone() && !result.isCancelled(); + } + + private Query setupSingleQuery(Query query, long timeout, TargetHandler targetHandler) { + if (strictSearchchain) { + query.resetTimeout(); + return setupFederationQuery(query, query, + windowParameters(query.getHits(), query.getOffset()), timeout, targetHandler); + } else { + return cloneFederationQuery(query, + windowParameters(query.getHits(), query.getOffset()), timeout, targetHandler); + } + } + + private Result startExecuteSingleQuery(Query query, TargetHandler chain, long timeout, Execution execution) { + Query outgoing = setupSingleQuery(query, timeout, chain); + Execution exec = new Execution(chain.getChain(), execution.context()); + return exec.search(outgoing); + } + + private List<ExecutionInfo> startExecuteQueryForEachTarget( + Query query, Collection<TargetHandler> targets, long timeout, Execution execution) { + + List<ExecutionInfo> results = new ArrayList<>(); + + Map<String, Object> windowParameters; + if (targets.size()==1) // preserve requested top-level offset by default as an optimization + windowParameters = Collections.unmodifiableMap(windowParameters(query.getHits(), query.getOffset())); + else // request from offset 0 to enable correct upstream blending into a single top-level hit list + windowParameters = Collections.unmodifiableMap(windowParameters(query.getHits() + query.getOffset(), 0)); + + for (TargetHandler targetHandler : targets) { + long executeTimeout = timeout; + if (targetHandler.federationOptions().getRequestTimeoutInMilliseconds() != -1) + executeTimeout = targetHandler.federationOptions().getRequestTimeoutInMilliseconds(); + results.add(new ExecutionInfo(targetHandler, targetHandler.federationOptions(), + createFutureSearch(query, windowParameters, targetHandler, executeTimeout, execution))); + } + + return results; + } + + private Map<String, Object> windowParameters(int hits, int offset) { + Map<String, Object> params = new HashMap<>(); + params.put(Query.HITS.toString(), hits); + params.put(Query.OFFSET.toString(), offset); + return params; + } + + private FutureResult createFutureSearch(Query query, Map<String, Object> windowParameters, TargetHandler targetHandler, + long timeout, Execution execution) { + Query clonedQuery = cloneFederationQuery(query, windowParameters, timeout, targetHandler); + return new AsyncExecution(targetHandler.getChain(), execution).search(clonedQuery); + } + + + private Query cloneFederationQuery(Query query, + Map<String, Object> windowParameters, long timeout, TargetHandler targetHandler) { + Query clonedQuery = Query.createNewQuery(query); + return setupFederationQuery(query, clonedQuery, windowParameters, timeout, targetHandler); + } + + private Query setupFederationQuery(Query query, Query outgoing, + Map<String, Object> windowParameters, long timeout, TargetHandler targetHandler) { + + ComponentId chainId = targetHandler.getChain().getId(); + + String sourceName = chainId.getName(); + outgoing.properties().set(SOURCENAME, sourceName); + String providerName = chainId.getName(); + if (chainId.getNamespace() != null) + providerName = chainId.getNamespace().getName(); + outgoing.properties().set(PROVIDERNAME, providerName); + + outgoing.setTimeout(timeout); + + switch (propagateSourceProperties) { + case ALL: + propagatePerSourceQueryProperties(query, outgoing, windowParameters, sourceName, providerName, + QueryProperties.PER_SOURCE_QUERY_PROPERTIES); + break; + case OFFSET_HITS: + propagatePerSourceQueryProperties(query, outgoing, windowParameters, sourceName, providerName, + new CompoundName[]{Query.OFFSET, Query.HITS}); + break; + } + + //TODO: FederationTarget + //TODO: only for target produced by this, not others + targetHandler.modifyTargetQuery(outgoing); + return outgoing; + } + + private void propagatePerSourceQueryProperties(Query original, Query outgoing, + Map<String, Object> windowParameters, + String sourceName, String providerName, + CompoundName[] queryProperties) { + + for (CompoundName key : queryProperties) { + Object value = getSourceOrProviderProperty(original, key, sourceName, providerName, windowParameters.get(key.toString())); + if (value != null) { + outgoing.properties().set(key, value); + } + } + } + + private Object getSourceOrProviderProperty(Query query, CompoundName propertyName, + String sourceName, String providerName, + Object defaultValue) { + Object result = getProperty(query, new SourceKey(sourceName, propertyName.toString())); + if (result == null) + result = getProperty(query, new ProviderKey(providerName, propertyName.toString())); + if (result == null) + result = defaultValue; + + return result; + } + + private Object getProperty(Query query, CompoundKey key) { + + CompoundName name = map.get(key); + if (name == null) { + name = new CompoundName(key.toString()); + map.put(key, name); + } + return query.properties().get(name); + } + + private ErrorMessage missingSearchChainsErrorMessage(List<UnresolvedSearchChainException> unresolvedSearchChainExceptions) { + StringBuilder sb = new StringBuilder(); + sb.append(StringUtils.join(getMessagesSet(unresolvedSearchChainExceptions), ' ')); + + + sb.append(" Valid source refs are "); + sb.append( + StringUtils.join(allSourceRefDescriptions().iterator(), + ", ")).append('.'); + + return ErrorMessage.createInvalidQueryParameter(sb.toString()); + } + + private List<String> allSourceRefDescriptions() { + List<String> descriptions = new ArrayList<>(); + + for (Target target : searchChainResolver.allTopLevelTargets()) { + descriptions.add(target.searchRefDescription()); + } + return descriptions; + } + + private Set<String> getMessagesSet(List<UnresolvedSearchChainException> unresolvedSearchChainExceptions) { + Set<String> messages = new LinkedHashSet<>(); + for (UnresolvedSearchChainException exception : unresolvedSearchChainExceptions) { + messages.add(exception.getMessage()); + } + return messages; + } + + private void warnIfUnresolvedSearchChains(List<UnresolvedSearchChainException> missingTargets, + HitGroup errorHitGroup) { + + if (!missingTargets.isEmpty()) { + errorHitGroup.addError(missingSearchChainsErrorMessage(missingTargets)); + } + } + + @Override + public Collection<CommentedSearchChain> getSearchChainsForwarded(SearchChainRegistry registry) { + List<CommentedSearchChain> searchChains = new ArrayList<>(); + + for (Target target : searchChainResolver.allTopLevelTargets()) { + if (target instanceof SourcesTarget) { + searchChains.addAll(commentedSourceProviderSearchChains((SourcesTarget)target, registry)); + } else if (target instanceof SingleTarget) { + searchChains.add(commentedSearchChain((SingleTarget)target, registry)); + } else { + log.warning("Invalid target type " + target.getClass().getName()); + } + } + + return searchChains; + } + + private CommentedSearchChain commentedSearchChain(SingleTarget singleTarget, SearchChainRegistry registry) { + return new CommentedSearchChain("If source refs contains '" + singleTarget.getId() + "'.", + registry.getChain(singleTarget.getId())); + } + + private List<CommentedSearchChain> commentedSourceProviderSearchChains(SourcesTarget sourcesTarget, + SearchChainRegistry registry) { + + List<CommentedSearchChain> commentedSearchChains = new ArrayList<>(); + String ifMatchingSourceRefPrefix = "If source refs contains '" + sourcesTarget.getId() + "' and provider is '"; + + commentedSearchChains.add( + new CommentedSearchChain(ifMatchingSourceRefPrefix + sourcesTarget.defaultProviderSource().provider + + "'(or not given).", registry.getChain(sourcesTarget.defaultProviderSource().searchChainId))); + + for (SearchChainInvocationSpec providerSource : sourcesTarget.allProviderSources()) { + if (!providerSource.equals(sourcesTarget.defaultProviderSource())) { + commentedSearchChains.add( + new CommentedSearchChain(ifMatchingSourceRefPrefix + providerSource.provider + "'.", + registry.getChain(providerSource.searchChainId))); + } + } + return commentedSearchChains; + } + + /** Returns the set of properties set for the source or provider given in the query (if any). + * + * If the query has not set sourceName or providerName, null will be returned */ + public static Properties getSourceProperties(Query query) { + String sourceName = query.properties().getString(SOURCENAME); + String providerName = query.properties().getString(PROVIDERNAME); + if (sourceName == null || providerName == null) + return null; + Properties sourceProperties = new SubProperties("source." + sourceName, query.properties()); + Properties providerProperties = new SubProperties("provider." + providerName, query.properties()); + sourceProperties.chain(providerProperties); + return sourceProperties; + } + + @Override + public void fill(final Result result, final String summaryClass, Execution execution) { + List<FutureResult> filledResults = new ArrayList<>(); + UniqueExecutionsToResults uniqueExecutionsToResults = new UniqueExecutionsToResults(); + addResultsToFill(result.hits(), result, summaryClass, uniqueExecutionsToResults); + final Set<Entry<Chain<Searcher>, Map<Query, Result>>> resultsForAllChains = uniqueExecutionsToResults.resultsToFill + .entrySet(); + int numberOfCallsToFillNeeded = 0; + + for (Entry<Chain<Searcher>, Map<Query, Result>> resultsToFillForAChain : resultsForAllChains) { + numberOfCallsToFillNeeded += resultsToFillForAChain.getValue().size(); + } + + for (Entry<Chain<Searcher>, Map<Query, Result>> resultsToFillForAChain : resultsForAllChains) { + Chain<Searcher> chain = resultsToFillForAChain.getKey(); + Execution chainExecution = (chain == null) ? execution : new Execution(chain, execution.context()); + + for (Entry<Query, Result> resultsToFillForAChainAndQuery : resultsToFillForAChain.getValue().entrySet()) { + Result resultToFill = resultsToFillForAChainAndQuery.getValue(); + if (numberOfCallsToFillNeeded == 1) { + chainExecution.fill(resultToFill, summaryClass); + propagateErrors(resultToFill, result); + } else { + AsyncExecution asyncFill = new AsyncExecution(chainExecution); + filledResults.add(asyncFill.fill(resultToFill, summaryClass)); + } + } + } + for (FutureResult filledResult : filledResults) { + propagateErrors(filledResult.get(result.getQuery().getTimeLeft(), TimeUnit.MILLISECONDS), result); + } + } + + private void propagateErrors(Result source, Result destination) { + ErrorMessage error = source.hits().getError(); + if (error != null) + destination.hits().addError(error); + } + + /** A map from a unique search chain and query instance to a result */ + private static class UniqueExecutionsToResults { + + /** Implemented as a nested identity hashmap */ + final Map<Chain<Searcher>,Map<Query,Result>> resultsToFill = new IdentityHashMap<>(); + + /** Returns a result to fill for a query and chain, by creating it if necessary */ + public Result get(Chain<Searcher> chain, Query query) { + Map<Query,Result> resultsToFillForAChain = resultsToFill.get(chain); + if (resultsToFillForAChain == null) { + resultsToFillForAChain = new IdentityHashMap<>(); + resultsToFill.put(chain,resultsToFillForAChain); + } + + Result resultsToFillForAChainAndQuery = resultsToFillForAChain.get(query); + if (resultsToFillForAChainAndQuery == null) { + resultsToFillForAChainAndQuery = new Result(query); + resultsToFillForAChain.put(query,resultsToFillForAChainAndQuery); + } + + return resultsToFillForAChainAndQuery; + } + + } + + private void addResultsToFill(HitGroup hitGroup, Result result, String summaryClass, + UniqueExecutionsToResults uniqueExecutionsToResults) { + for (Hit hit : hitGroup) { + if (hit instanceof HitGroup) { + addResultsToFill((HitGroup) hit, result, summaryClass, uniqueExecutionsToResults); + } else { + if ( ! hit.isFilled(summaryClass)) + getSearchChainGroup(hit,result,uniqueExecutionsToResults).hits().add(hit); + } + } + } + + private Result getSearchChainGroup(Hit hit, Result result, UniqueExecutionsToResults uniqueExecutionsToResults) { + @SuppressWarnings("unchecked") + Chain<Searcher> chain = (Chain<Searcher>) hit.getSearcherSpecificMetaData(this); + Query query = hit.getQuery() !=null ? hit.getQuery() : result.getQuery(); + + return uniqueExecutionsToResults.get(chain,query); + } + + private void searchMultipleTargets(Query query, Result mergedResults, + Collection<TargetHandler> targets, + long timeout, + Execution execution) { + + List<ExecutionInfo> executionInfos = startExecuteQueryForEachTarget(query, targets, timeout, execution); + waitForMandatoryTargets(executionInfos, query.getTimeout()); + + HitOrderer s=null; + for (ExecutionInfo executionInfo : executionInfos) { + if ( ! successfullyCompleted(executionInfo.futureResult)) { + addSearchChainTimedOutError(query, executionInfo.targetHandler.getId()); + } else { + if (s == null) { + s = dirtyCopyIfModifiedOrderer(mergedResults.hits(), executionInfo.futureResult.get().hits().getOrderer()); + } + mergeResult(query, executionInfo.targetHandler, mergedResults, executionInfo.futureResult.get()); + + } + } + } + + /** + * TODO This is probably a dirty hack for bug 4711376. There are probably better ways. + * But I will leave that to trd-processing@ + * + * @param group The merging hitgroup to be updated if necessary + * @param orderer The per provider hit orderer. + * @return The hitorderer chosen + */ + private HitOrderer dirtyCopyIfModifiedOrderer(HitGroup group, HitOrderer orderer) { + if (orderer != null) { + HitOrderer old = group.getOrderer(); + if ((old == null) || ! orderer.equals(old)) { + group.setOrderer(orderer); + } + } + + return orderer; + } + + private void waitForMandatoryTargets(List<ExecutionInfo> executionInfos, long queryTimeout) { + FutureWaiter futureWaiter = new FutureWaiter(); + + boolean hasMandatoryTargets = false; + for (ExecutionInfo executionInfo : executionInfos) { + if (isMandatory(executionInfo)) { + futureWaiter.add(executionInfo.futureResult, + getSearchChainExecutionTimeoutInMilliseconds(executionInfo, queryTimeout)); + hasMandatoryTargets = true; + } + } + + if (!hasMandatoryTargets) { + for (ExecutionInfo executionInfo : executionInfos) { + futureWaiter.add(executionInfo.futureResult, + getSearchChainExecutionTimeoutInMilliseconds(executionInfo, queryTimeout)); + } + } + + futureWaiter.waitForFutures(); + } + + private long getSearchChainExecutionTimeoutInMilliseconds(ExecutionInfo executionInfo, long queryTimeout) { + return executionInfo.federationOptions. + getSearchChainExecutionTimeoutInMilliseconds(queryTimeout); + } + + private boolean isMandatory(ExecutionInfo executionInfo) { + return !executionInfo.federationOptions.getOptional(); + } + + private void searchSingleTarget(Query query, Result mergedResults, + TargetHandler targetHandler, + long timeout, + Execution execution) { + Result result = startExecuteSingleQuery(query, targetHandler, timeout, execution); + mergeResult(query, targetHandler, mergedResults, result); + } + + + private Results<SearchChainInvocationSpec, UnresolvedSearchChainException> getTargets(Set<String> sources, Properties properties, IndexFacts indexFacts) { + return sources.isEmpty() ? + defaultSearchChains(properties): + resolveSources(sources, properties, indexFacts); + } + + private Results<SearchChainInvocationSpec, UnresolvedSearchChainException> resolveSources(Set<String> sources, Properties properties, IndexFacts indexFacts) { + Results.Builder<SearchChainInvocationSpec, UnresolvedSearchChainException> result = new Builder<>(); + + for (String source : sources) { + try { + result.addAllData(sourceRefResolver.resolve(asSourceSpec(source), properties, indexFacts)); + } catch (UnresolvedSearchChainException e) { + result.addError(e); + } + } + + return result.build(); + } + + + public Results<SearchChainInvocationSpec, UnresolvedSearchChainException> defaultSearchChains(Properties sourceToProviderMap) { + Results.Builder<SearchChainInvocationSpec, UnresolvedSearchChainException> result = new Builder<>(); + + for (Target target : searchChainResolver.defaultTargets()) { + try { + result.addData(target.responsibleSearchChain(sourceToProviderMap)); + } catch (UnresolvedSearchChainException e) { + result.addError(e); + } + } + + return result.build(); + } + + + private ComponentSpecification asSourceSpec(String source) { + try { + return new ComponentSpecification(source); + } catch(Exception e) { + throw new IllegalArgumentException("The source ref '" + source + + "' used for federation is not valid.", e); + } + } + + @Override + public Result search(Query query, Execution execution) { + Result mergedResults = execution.search(query); + + Results<SearchChainInvocationSpec, UnresolvedSearchChainException> targets = + getTargets(query.getModel().getSources(), query.properties(), execution.context().getIndexFacts()); + warnIfUnresolvedSearchChains(targets.errors(), mergedResults.hits()); + + Collection<SearchChainInvocationSpec> prunedTargets = + pruneTargetsWithoutDocumentTypes(query.getModel().getRestrict(), targets.data()); + + Results<TargetHandler, ErrorMessage> regularTargetHandlers = resolveSearchChains(prunedTargets, execution.searchChainRegistry()); + query.errors().addAll(regularTargetHandlers.errors()); + + List<TargetHandler> targetHandlers = new ArrayList<>(regularTargetHandlers.data()); + targetHandlers.addAll(getAdditionalTargets(query, execution, targetSelector)); + + final long targetsTimeout = calculateTimeout(query, targetHandlers); + if (targetsTimeout < 0) + return new Result(query, ErrorMessage.createTimeout("Timed out when about to federate")); + + traceTargets(query, targetHandlers); + + if (targetHandlers.size() == 0) { + return mergedResults; + } else if (targetHandlers.size() == 1 && ! shouldExecuteTargetLongerThanThread(query, targetHandlers.get(0))) { + TargetHandler chain = first(targetHandlers); + searchSingleTarget(query, mergedResults, chain, targetsTimeout, execution); + } else { + searchMultipleTargets(query, mergedResults, targetHandlers, targetsTimeout, execution); + } + + return mergedResults; + } + + private void traceTargets(Query query, List<TargetHandler> targetHandlers) { + final int traceFederationLevel = 2; + if ( ! query.isTraceable(traceFederationLevel)) return; + query.trace("Federating to " + targetHandlers, traceFederationLevel); + } + + /** + * Returns true if we are requested to keep executing a target longer than we're waiting for it. + * This is useful to populate caches inside targets. + */ + private boolean shouldExecuteTargetLongerThanThread(Query query, TargetHandler target) { + return target.federationOptions().getRequestTimeoutInMilliseconds() > query.getTimeout(); + } + + private static Results<TargetHandler, ErrorMessage> resolveSearchChains( + Collection<SearchChainInvocationSpec> prunedTargets, + SearchChainRegistry registry) { + + Results.Builder<TargetHandler, ErrorMessage> targetHandlers = new Results.Builder<>(); + + for (SearchChainInvocationSpec target: prunedTargets) { + Chain<Searcher> chain = registry.getChain(target.searchChainId); + if (chain == null) { + targetHandlers.addError(ErrorMessage.createIllegalQuery( + "Could not find search chain '" + target.searchChainId + "'")); + } else { + targetHandlers.addData(new StandardTargetHandler(target, chain)); + } + } + + return targetHandlers.build(); + } + + private static <T> List<TargetHandler> getAdditionalTargets(Query query, Execution execution, TargetSelector<T> targetSelector) { + if (targetSelector == null) + return Collections.emptyList(); + + ArrayList<TargetHandler> result = new ArrayList<>(); + for (FederationTarget<T> target: targetSelector.getTargets(query, execution.searchChainRegistry())) + result.add(new CustomTargetHandler<>(targetSelector, target)); + + return result; + } + + private Collection<SearchChainInvocationSpec> pruneTargetsWithoutDocumentTypes(Set<String> restrict, List<SearchChainInvocationSpec> targets) { + if (restrict.isEmpty()) + return targets; + + Collection<SearchChainInvocationSpec> prunedTargets = new ArrayList<>(); + + for (SearchChainInvocationSpec target : targets) { + if (target.documentTypes.isEmpty() || documentTypeIntersectionIsNonEmpty(restrict, target)) + prunedTargets.add(target); + } + + return prunedTargets; + } + + private boolean documentTypeIntersectionIsNonEmpty(Set<String> restrict, SearchChainInvocationSpec target) { + for (String documentType : target.documentTypes) { + if (restrict.contains(documentType)) + return true; + } + + return false; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/ForwardingSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/ForwardingSearcher.java new file mode 100644 index 00000000000..b43798113de --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/ForwardingSearcher.java @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation; + +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.cluster.PingableSearcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; + +/** + * A lightweight searcher to forward all incoming requests to a single search + * chain defined in config. An alternative to federation searcher when standard + * semantics are not necessary for the application. + * + * @see FederationSearcher + * @since 5.0.13 + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After("*") +public class ForwardingSearcher extends PingableSearcher { + private final ComponentSpecification target; + + public ForwardingSearcher(final SearchchainForwardConfig config) { + if (config.target() == null) { + throw new RuntimeException( + "Configuration value searchchain-forward.target was null."); + } + try { + target = new ComponentSpecification(config.target()); + } catch (RuntimeException e) { + throw new RuntimeException( + "Failed constructing the component specification from searchchain-forward.target: " + + config.target(), e); + } + } + + @Override + public Result search(final Query query, final Execution execution) { + Execution next = createForward(execution); + + if (next == null) { + return badResult(query); + } else { + return next.search(query); + } + } + + private Result badResult(final Query query) { + final ErrorMessage error = noSearchchain(); + return new Result(query, error); + } + + @Override + public Pong ping(final Ping ping, final Execution execution) { + Execution next = createForward(execution); + + if (next == null) { + return badPong(); + } else { + return next.ping(ping); + } + } + + private Pong badPong() { + final Pong pong = new Pong(); + pong.addError(noSearchchain()); + return pong; + } + + @Override + public void fill(final Result result, final String summaryClass, + final Execution execution) { + Execution next = createForward(execution); + if (next == null) { + badFill(result.hits()); + return; + } else { + next.fill(result, summaryClass); + } + } + + private void badFill(HitGroup hits) { + hits.addError(noSearchchain()); + } + + private Execution createForward(Execution execution) { + Chain<Searcher> targetChain = execution.context().searchChainRegistry() + .getComponent(target); + if (targetChain == null) { + return null; + } + return new Execution(targetChain, execution.context()); + } + + private ErrorMessage noSearchchain() { + return ErrorMessage + .createServerIsMisconfigured("Could not get search chain matching component specification: " + target); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/FutureWaiter.java b/container-search/src/main/java/com/yahoo/search/federation/FutureWaiter.java new file mode 100644 index 00000000000..52cd5397489 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/FutureWaiter.java @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation; + +import com.yahoo.search.searchchain.FutureResult; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * @author tonytv + */ +class FutureWaiter { + private class Future { + final FutureResult result; + final long timeoutInMilliseconds; + + public Future(FutureResult result, long timeoutInMilliseconds) { + this.result = result; + this.timeoutInMilliseconds = timeoutInMilliseconds; + } + } + + private List<Future> futures = new ArrayList<>(); + + public void add(FutureResult futureResult, long timeoutInMilliseconds) { + futures.add(new Future(futureResult, timeoutInMilliseconds)); + } + + public void waitForFutures() { + sortFuturesByTimeoutDescending(); + + final long startTime = System.currentTimeMillis(); + + for (Future future : futures) { + long timeToWait = startTime + future.timeoutInMilliseconds - System.currentTimeMillis(); + if (timeToWait <= 0) + break; + + future.result.get(timeToWait, TimeUnit.MILLISECONDS); + } + } + + private void sortFuturesByTimeoutDescending() { + Collections.sort(futures, new Comparator<Future>() { + @Override + public int compare(Future lhs, Future rhs) { + return -compareLongs(lhs.timeoutInMilliseconds, rhs.timeoutInMilliseconds); + } + + private int compareLongs(long lhs, long rhs) { + return new Long(lhs).compareTo(rhs); + } + }); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/TimeoutException.java b/container-search/src/main/java/com/yahoo/search/federation/TimeoutException.java new file mode 100644 index 00000000000..8b7e8a1d9d5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/TimeoutException.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation; + +/** + * Thrown on timeouts + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +@SuppressWarnings("serial") +public class TimeoutException extends RuntimeException { + + public TimeoutException(String message) { + super(message); + } + + public TimeoutException(String message,Throwable cause) { + super(message,cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredHTTPClientSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredHTTPClientSearcher.java new file mode 100644 index 00000000000..576c16f68db --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredHTTPClientSearcher.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import java.util.Collections; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.federation.ProviderConfig; +import com.yahoo.search.Result; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; + + +/** + * Superclass for http client searchers which depends on config. All this is doing is translating + * the provider and cache configurations to parameters which are passed upwards. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class ConfiguredHTTPClientSearcher extends HTTPClientSearcher { + + /** Create this from a configuraton */ + public ConfiguredHTTPClientSearcher(final ComponentId id, final ProviderConfig providerConfig, Statistics manager) { + super(id, ConfiguredSearcherHelper.toConnectionList(providerConfig), new HTTPParameters(providerConfig), manager); + } + + /** Create an instance from direct parameters having a single connection. Useful for testing */ + public ConfiguredHTTPClientSearcher(String idString,String host,int port,String path, Statistics manager) { + super(new ComponentId(idString), Collections.singletonList(new Connection(host,port)),path, manager); + } + + /** Forwards to the next in chain fill(result,summaryName) */ + public @Override void fill(Result result,String summaryName, Execution execution,Connection connection) { + execution.fill(result,summaryName); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredHTTPProviderSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredHTTPProviderSearcher.java new file mode 100644 index 00000000000..25253f768bd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredHTTPProviderSearcher.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.federation.ProviderConfig; +import com.yahoo.search.cache.QrBinaryCacheConfig; +import com.yahoo.search.cache.QrBinaryCacheRegionConfig; +import com.yahoo.search.Result; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; + +import java.util.Collections; + + +/** + * Superclass for http provider searchers which depends on config. All this is doing is translating + * the provider and cache configurations to parameters which are passed upwards. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author bratseth + */ +public abstract class ConfiguredHTTPProviderSearcher extends HTTPProviderSearcher { + + /** Create this from a configuraton */ + public ConfiguredHTTPProviderSearcher(final ComponentId id, final ProviderConfig providerConfig, Statistics manager) { + super(id,ConfiguredSearcherHelper.toConnectionList(providerConfig),new HTTPParameters(providerConfig), manager); + } + + /** Create this from a configuraton */ + public ConfiguredHTTPProviderSearcher(final ComponentId id, final ProviderConfig providerConfig, + HTTPParameters parameters, Statistics manager) { + super(id,ConfiguredSearcherHelper.toConnectionList(providerConfig),parameters, manager); + } + + /** Create this from a configuraton with a configured cache */ + public ConfiguredHTTPProviderSearcher(final ComponentId id, final ProviderConfig providerConfig, + final QrBinaryCacheConfig cacheConfig, + final QrBinaryCacheRegionConfig regionConfig, Statistics manager) { + super(id,ConfiguredSearcherHelper.toConnectionList(providerConfig),new HTTPParameters(providerConfig), manager); + configureCache(cacheConfig,regionConfig); + } + + /** Create this from a configuraton with a configured cache */ + public ConfiguredHTTPProviderSearcher(final ComponentId id, final ProviderConfig providerConfig, + final QrBinaryCacheConfig cacheConfig, + final QrBinaryCacheRegionConfig regionConfig, HTTPParameters parameters, Statistics manager) { + super(id,ConfiguredSearcherHelper.toConnectionList(providerConfig),parameters, manager); + configureCache(cacheConfig,regionConfig); + } + + /** Create an instance from direct parameters having a single connection. Useful for testing */ + public ConfiguredHTTPProviderSearcher(String idString,String host,int port,String path, Statistics manager) { + super(new ComponentId(idString), Collections.singletonList(new Connection(host,port)),path, manager); + } + + /** Create an instance from direct parameters having a single connection. Useful for testing */ + public ConfiguredHTTPProviderSearcher(String idString,String host,int port,HTTPParameters parameters, Statistics manager) { + super(new ComponentId(idString), Collections.singletonList(new Connection(host,port)),parameters, manager); + } + + /** + * Override this to provider multi-phase result filling towards a backend. + * This default implementation does nothing. + */ + public @Override void fill(Result result,String summaryName, Execution execution,Connection connection) { + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredSearcherHelper.java b/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredSearcherHelper.java new file mode 100644 index 00000000000..8d3ee016b4f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/ConfiguredSearcherHelper.java @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import java.util.ArrayList; +import java.util.List; + +import com.yahoo.search.federation.ProviderConfig; + +/** + * Some static helper classes for configured*Searcher classes + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class ConfiguredSearcherHelper { + + /** No instantiation */ + private ConfiguredSearcherHelper() { } + + public static List<Connection> toConnectionList(ProviderConfig providerConfig) { + List<Connection> connections=new ArrayList<>(); + for(ProviderConfig.Node node : providerConfig.node()) { + connections.add(new Connection(node.host(), node.port())); + } + return connections; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/Connection.java b/container-search/src/main/java/com/yahoo/search/federation/http/Connection.java new file mode 100644 index 00000000000..88e2c6ad0a0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/Connection.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +/** + * Represents a connection to a particular node (host/port). + * Right now this is just a container of connection parameters, but might be extended to + * contain an open connection later. + * The host and port state is immutable. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Connection { + + private String host; + private int port; + + public Connection(String host,int port) { + this.host=host; + this.port=port; + } + + public String getHost() { return host; } + + public int getPort() { return port; } + + public String toString() { + return "http connection '" + host + ":" + port + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/GzipDecompressingEntity.java b/container-search/src/main/java/com/yahoo/search/federation/http/GzipDecompressingEntity.java new file mode 100644 index 00000000000..1dc58ecd65e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/GzipDecompressingEntity.java @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import org.apache.http.HttpEntity; +import org.apache.http.entity.HttpEntityWrapper; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.zip.GZIPInputStream; + +/** + * Used by HTTPSearcher when talking to services returning compressed content. + * + * @author <a href="mailto:mainak@yahoo-inc.com">Mainak Mandal</a> + */ +public class GzipDecompressingEntity extends HttpEntityWrapper { + + private static class Resources { + + byte [] buffer; + int total; + + Resources() { + total = 0; + buffer = new byte[65536]; + } + void drain(InputStream zipStream) throws IOException { + int numRead = zipStream.read(buffer, total, buffer.length); + while (numRead != -1) { + total += numRead; + if ((total + 65536) > buffer.length) { + buffer = Arrays.copyOf(buffer, buffer.length + numRead); + } + numRead = zipStream.read(buffer, total, buffer.length - total); + } + } + + } + + private final Resources resources = new Resources(); + + public GzipDecompressingEntity(final HttpEntity entity) throws IllegalStateException, IOException { + super(entity); + GZIPInputStream gz = new GZIPInputStream(entity.getContent()); + InputStream zipStream = new BufferedInputStream(gz); + try { + resources.drain(zipStream); + } catch (IOException e) { + throw e; + } finally { + zipStream.close(); + } + } + + @Override + public InputStream getContent() throws IOException, IllegalStateException { + + final ByteBuffer buff = ByteBuffer.wrap(resources.buffer, 0, resources.total); + return new InputStream() { + + @Override + public int available() throws IOException { + return buff.remaining(); + } + + @Override + public int read() throws IOException { + if (buff.hasRemaining()) + return buff.get() & 0xFF; + + return -1; + } + + @Override + public int read(byte[] b) throws IOException { + if (!buff.hasRemaining()) + return -1; + + int len = b.length; + if (len > buff.remaining()) + len = buff.remaining(); + buff.get(b, 0, len); + return len; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (!buff.hasRemaining()) + return -1; + + if (len > buff.remaining()) + len = buff.remaining(); + buff.get(b, off, len); + return len; + } + + @Override + public long skip(long n) throws IOException { + if (!buff.hasRemaining()) + return -1; + + if (n > buff.remaining()) + n = buff.remaining(); + + buff.position(buff.position() + (int) n); + return n; + } + }; + } + + @Override + public long getContentLength() { + return resources.total; + } + + @Override + public void writeTo(OutputStream outstream) throws IOException { + outstream.write(resources.buffer, 0, resources.total); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/HTTPClientSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPClientSearcher.java new file mode 100644 index 00000000000..1459fb6f226 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPClientSearcher.java @@ -0,0 +1,276 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import com.yahoo.component.ComponentId; +import com.yahoo.jdisc.http.CertificateStore; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; + +import org.apache.http.HttpEntity; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +/** + * A utility parent for searchers which gets data from web services which is incorporated into the query. + * This searcher will take care of implementing the search method while the extending class implements + * {@link #getQueryMap} and {@link #handleResponse} to create the http request and handle the response, respectively. + * + * <p>This class automatically adds a meta hit containing latency and other + * meta information about the obtained HTTP data using createRequestMeta(). + * The fields available in the hit are:</p> + * + * <dl><dt> + * HTTPSearcher.LOG_LATENCY_START + * <dd> + * The latency of the external provider answering a request. + * <dt> + * HTTPSearcher.LOG_LATENCY_FINISH + * <dd> + * Total time of the HTTP traffic, but also decoding of the data, is this + * happens at the same time. + * <dt> + * HTTPSearcher.LOG_URI + * <dd> + * The complete URI used for external service. + * <dt> + * HTTPSearcher.LOG_SCHEME + * <dd> + * The scheme of the request URI sent. + * <dt> + * HTTPSearcher.LOG_HOST + * <dd> + * The host used for the request URI sent. + * <dt> + * HTTPSearcher.LOG_PORT + * <dd> + * The port used for the request URI sent. + * <dt> + * HTTPSearcher.LOG_PATH + * <dd> + * Path element of the request URI sent. + * <dt> + * HTTPSearcher.LOG_STATUS + * <dd> + * Status code of the HTTP response. + * <dt> + * HTTPSearcher.LOG_PROXY_TYPE + * <dd> + * The proxy type used, if any. Default is "http". + * <dt> + * HTTPSearcher.LOG_PROXY_HOST + * <dd> + * The proxy host, if any. + * <dt> + * HTTPSearcher.LOG_PROXY_PORT + * <dd> + * The proxy port, if any. + * <dt> + * HTTPSearcher.LOG_HEADER_PREFIX prepended to request header field name + * <dd> + * The content of any additional request header fields. + * <dt> + * HTTPSearcher.LOG_RESPONSE_HEADER_PREFIX prepended to response header field name + * <dd> + * The content of any additional response header fields. + * </dl> + + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author bratseth + */ +public abstract class HTTPClientSearcher extends HTTPSearcher { + + static final CompoundName REQUEST_META_CARRIER = new CompoundName("com.yahoo.search.federation.http.HTTPClientSearcher_requestMeta"); + + protected final static Logger log = Logger.getLogger(HTTPClientSearcher.class.getName()); + + /** + * Creates a client searcher + * + * @param id the id of this instance + * @param connections the connections this will load balance and fail over between + * @param path the path portion of the url to be used + */ + public HTTPClientSearcher(ComponentId id, List<Connection> connections,String path,Statistics statistics) { + super(id, connections, path, statistics); + } + + public HTTPClientSearcher(ComponentId id, List<Connection> connections,String path,Statistics statistics, + CertificateStore certificateStore) { + super(id, connections, path, statistics, certificateStore); + } + + public HTTPClientSearcher(ComponentId id, List<Connection> connections, HTTPParameters parameters, Statistics statistics) { + super(id, connections, parameters, statistics); + } + /** + * Creates a client searcher + * + * @param id the id of this instance + * @param connections the connections this will load balance and fail over between + * @param parameters the parameters to use when making http calls + * @param certificateStore the certificate store to use to pass certificates in requests + */ + public HTTPClientSearcher(ComponentId id, List<Connection> connections, HTTPParameters parameters, + Statistics statistics, CertificateStore certificateStore) { + super(id, connections, parameters, statistics, certificateStore); + } + + /** Overridden to avoid interfering with errors from nested searchers, which is inappropriate for a <i>client</i> */ + @Override + public Result robustSearch(Query query, Execution execution, Connection connection) { + return search(query,execution,connection); + } + + /** Implements a search towards the connection chosen by the cluster searcher for this query */ + @Override + public Result search(Query query, Execution execution, Connection connection) { + Hit requestMeta = doHttpRequest(query, connection); + Result result = execution.search(query); + result.hits().add(requestMeta); + return result; + } + + private Hit doHttpRequest(Query query, Connection connection) { + URI uri; + // Create default meta hit for holding logging information + Hit requestMeta = createRequestMeta(); + query.properties().set(REQUEST_META_CARRIER, requestMeta); + + query.trace("Created request information hit",false,9); + try { + uri = getURI(query, connection); + } catch (MalformedURLException e) { + query.errors().add(createMalformedUrlError(query,e)); + return requestMeta; + } catch (URISyntaxException e) { + query.errors().add(createMalformedUrlError(query,e)); + return requestMeta; + } + + HttpEntity entity; + try { + if (query.getTraceLevel()>=1) + query.trace("Fetching " + uri.toString(), false, 1); + entity = getEntity(uri, requestMeta, query); + } catch (IOException e) { + query.errors().add(ErrorMessage.createBackendCommunicationError( + "Error when trying to connect to HTTP backend in " + this + " using " + connection + " for " + + query + ": " + Exceptions.toMessageString(e))); + return requestMeta; + } catch (TimeoutException e) { + query.errors().add(ErrorMessage.createTimeout("HTTP traffic timed out in " + + this + " for " + query + ": " + e.getMessage())); + return requestMeta; + } + if (entity==null) { + query.errors().add(ErrorMessage.createBackendCommunicationError( + "No result from connecting to HTTP backend in " + this + " using " + connection + " for " + query)); + return requestMeta; + } + + try { + query = handleResponse(entity,query); + } + catch (IOException e) { + query.errors().add(ErrorMessage.createBackendCommunicationError( + "Error when trying to consume input in " + this + ": " + Exceptions.toMessageString(e))); + } finally { + cleanupHttpEntity(entity); + } + return requestMeta; + } + + /** Overrides to pass the query on to the next searcher */ + @Override + public Result search(Query query, Execution execution, ErrorMessage error) { + query.errors().add(error); + return execution.search(query); + } + + /** Do nothing on fill in client searchers */ + @Override + public void fill(Result result,String summaryClass,Execution execution,Connection connection) { + } + + /** + * Convenience hook for unmarshalling the response and adding the information to the query. + * Implement this or <code>handleResponse(entity,query)</code> in any subclass. + * This default implementation throws an exception. + * + * @param inputStream the stream containing the data from the http service + * @param contentLength the length of the content in the stream in bytes, or a negative number if not known + * @param query the current query, to which information from the stream should be added + * @return query the query to propagate down the chain. This should almost always be the + * query instance given as a parameter. + */ + public Query handleResponse(InputStream inputStream, long contentLength, Query query) throws IOException { + throw new UnsupportedOperationException("handleResponse must be implemented by " + this); + } + + /** + * Unmarshals the response and adds the resulting data to the given query. + * This default implementation calls + * <code>return handleResponse(entity.getContent(), entity.getContentLength(), query);</code> + * (and does some detailed query tracing). + * + * @param query the current query, to which information from the stream should be added + * @return query the query to propagate down the chain. This should almost always be the + * query instance given as a parameter. + */ + public Query handleResponse(HttpEntity entity, Query query) throws IOException { + long len = entity.getContentLength(); + if (query.getTraceLevel()>=4) + query.trace("Received " + len + " bytes response in " + this, false, 4); + query = handleResponse(entity.getContent(), len, query); + if (query.getTraceLevel()>=2) + query.trace("Handled " + len + " bytes response in " + this, false, 2); + return query; + } + + /** Never retry individual queries to clients for now */ + @Override + protected boolean shouldRetry(Query query,Result result) { return false; } + + /** + * numHits and offset should not be part of the cache key as cache supports + * partial read/write that is only one cache entry is maintained per query + * irrespective of the offset and numhits. + */ + public abstract Map<String, String> getCacheKey(Query q); + + /** + * Adds all key-values starting by "service." + getClientName() in query.properties(). + * Returns the empty map if {@link #getServiceName} is not overridden. + */ + @Override + public Map<String,String> getQueryMap(Query query) { + LinkedHashMap<String, String> queryMap=new LinkedHashMap<>(); + if (getServiceName().isEmpty()) return queryMap; + + for (Map.Entry<String,Object> objectProperty : query.properties().listProperties("service." + getServiceName()).entrySet()) // TODO: Make more efficient using CompoundName + queryMap.put(objectProperty.getKey(),objectProperty.getValue().toString()); + return queryMap; + } + + /** + * Override this to return the name of the service this is a client of. + * This is used to look up service specific properties as service.getServiceName.serviceSpecificProperty. + * This default implementation returns "", which means service specific parameters will not be used. + */ + protected String getServiceName() { return ""; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/HTTPParameters.java b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPParameters.java new file mode 100644 index 00000000000..19fe1df3e2e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPParameters.java @@ -0,0 +1,315 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import com.google.common.base.Preconditions; +import com.yahoo.search.federation.ProviderConfig.PingOption; +import org.apache.http.conn.params.ConnManagerParams; +import org.apache.http.conn.params.ConnPerRouteBean; +import org.apache.http.params.BasicHttpParams; +import org.apache.http.params.HttpConnectionParams; +import org.apache.http.params.HttpParams; + +import com.yahoo.search.federation.ProviderConfig; + +/** + * A set of parameters for talking to an http backend + * + * @author bratseth + */ +public final class HTTPParameters { + + public static final String RETRIES = "com.yahoo.search.federation.http.retries"; + + private boolean frozen=false; + + // All timing parameters below are in milliseconds + /** The url request path portion */ + private String path="/"; + private int connectionTimeout=2000; + private int readTimeout=5000; + private boolean persistentConnections=true; + private boolean enableProxy = false; + private String proxyHost = "localhost"; + private int proxyPort = 1080; + private String method = "GET"; + private String schema = "http"; + private String inputEncoding = "utf-8"; + private String outputEncoding = "utf-8"; + private int maxTotalConnections=10000; + private int maxConnectionsPerRoute=10000; + private int socketBufferSizeBytes=-1; + private int retries = 1; + private int configuredReadTimeout = -1; + private int configuredConnectionTimeout = -1; + private int connectionPoolTimeout = -1; + private String ycaProxy = null; + private int ycaPort = 0; + private String ycaApplicationId = null; + private boolean ycaUseProxy = false; + private long ycaTtl = 0L; + private long ycaRetry = 0L; + + private PingOption.Enum pingOption = PingOption.NORMAL; + + + private boolean followRedirects = true; + + public HTTPParameters() {} + + public HTTPParameters(String path) { + setPath(path); + } + + public HTTPParameters(ProviderConfig providerConfig) { + configuredReadTimeout = (int) (providerConfig.readTimeout() * 1000.0d); + configuredConnectionTimeout = (int) (providerConfig.connectionTimeout() * 1000.0d); + connectionPoolTimeout = (int) (providerConfig.connectionPoolTimeout() * 1000.0d); + retries = providerConfig.retries(); + setPath(providerConfig.path()); + ycaUseProxy = providerConfig.yca().useProxy(); + if (ycaUseProxy) { + ycaProxy = providerConfig.yca().host(); + ycaPort = providerConfig.yca().port(); + } + ycaApplicationId = providerConfig.yca().applicationId(); + ycaTtl = providerConfig.yca().ttl() * 1000L; + ycaRetry = providerConfig.yca().retry() * 1000L; + followRedirects = providerConfig.followRedirects(); + pingOption = providerConfig.pingOption(); + } + + /** + * Set the url path to use in queries to this. If the argument is null or empty the path is set to "/". + * If a leading "/" is missing, it is added automatically. + */ + public final void setPath(String path) { + if (path==null || path.isEmpty()) path="/"; + + if (! path.startsWith("/")) + path="/" + path; + this.path = path; + } + + public PingOption.Enum getPingOption() { + return pingOption; + } + + public void setPingOption(PingOption.Enum pingOption) { + Preconditions.checkNotNull(pingOption); + ensureNotFrozen(); + this.pingOption = pingOption; + } + + /** Returns the url path. Default is "/". */ + public String getPath() { return path; } + + public boolean getFollowRedirects() { + return followRedirects; + } + + public void setFollowRedirects(boolean followRedirects) { + ensureNotFrozen(); + this.followRedirects = followRedirects; + } + + + public void setConnectionTimeout(int connectionTimeout) { + ensureNotFrozen(); + this.connectionTimeout=connectionTimeout; + } + + /** Returns the connection timeout in milliseconds. Default is 2000. */ + public int getConnectionTimeout() { return connectionTimeout; } + + public void setReadTimeout(int readTimeout) { + ensureNotFrozen(); + this.readTimeout=readTimeout; + } + + /** Returns the read timeout in milliseconds. Default is 5000. */ + public int getReadTimeout() { return readTimeout; } + + /** + * <b>Note: This is currently largely a noop: Connections are reused even when this is set to true. + * The setting will change from sharing connections between threads to only reusing it within a thread + * but it is still reused.</b> + */ + public void setPersistentConnections(boolean persistentConnections) { + ensureNotFrozen(); + this.persistentConnections=persistentConnections; + } + + /** Returns whether this should use persistent connections. Default is true. */ + public boolean getPersistentConnections() { return persistentConnections; } + + /** Returns whether proxying should be enabled. Default is false. */ + public boolean getEnableProxy() { return enableProxy; } + + public void setEnableProxy(boolean enableProxy ) { + ensureNotFrozen(); + this.enableProxy=enableProxy; + } + + /** Returns the proxy type to use (if enabled). Default is "http". */ + public String getProxyType() { + return "http"; + } + + public void setProxyHost(String proxyHost) { + ensureNotFrozen(); + this.proxyHost=proxyHost; + } + + /** Returns the proxy host to use (if enabled). Default is "localhost". */ + public String getProxyHost() { return proxyHost; } + + public void setProxyPort(int proxyPort) { + ensureNotFrozen(); + this.proxyPort=proxyPort; + } + + /** Returns the proxy port to use (if enabled). Default is 1080. */ + public int getProxyPort() { return proxyPort; } + + public void setMethod(String method) { + ensureNotFrozen(); + this.method=method; + } + + /** Returns the http method to use. Default is "GET". */ + public String getMethod() { return method; } + + public void setSchema(String schema) { + ensureNotFrozen(); + this.schema=schema; + } + + /** Returns the schema to use. Default is "http". */ + public String getSchema() { return schema; } + + public void setInputEncoding(String inputEncoding) { + ensureNotFrozen(); + this.inputEncoding=inputEncoding; + } + + /** Returns the input encoding. Default is "utf-8". */ + public String getInputEncoding() { return inputEncoding; } + + public void setOutputEncoding(String outputEncoding) { + ensureNotFrozen(); + this.outputEncoding=outputEncoding; + } + + /** Returns the output encoding. Default is "utf-8". */ + public String getOutputEncoding() { return outputEncoding; } + + /** Make this unmodifiable. Note that any thread synchronization must be done outside this object. */ + public void freeze() { + frozen=true; + } + + private void ensureNotFrozen() { + if (frozen) throw new IllegalStateException("Cannot modify frozen " + this); + } + + /** + * Returns the eligible subset of this as a HttpParams snapshot + * AND configures the Apache HTTP library with the parameters of this + */ + public HttpParams toHttpParams() { + return toHttpParams(connectionTimeout, readTimeout); + } + + /** + * Returns the eligible subset of this as a HttpParams snapshot + * AND configures the Apache HTTP library with the parameters of this + */ + public HttpParams toHttpParams(int connectionTimeout, int readTimeout) { + HttpParams params = new BasicHttpParams(); + // force use of configured value if available + if (configuredConnectionTimeout > 0) { + HttpConnectionParams.setConnectionTimeout(params, configuredConnectionTimeout); + } else { + HttpConnectionParams.setConnectionTimeout(params, connectionTimeout); + } + if (configuredReadTimeout > 0) { + HttpConnectionParams.setSoTimeout(params, configuredReadTimeout); + } else { + HttpConnectionParams.setSoTimeout(params, readTimeout); + } + if (socketBufferSizeBytes > 0) { + HttpConnectionParams.setSocketBufferSize(params, socketBufferSizeBytes); + } + if (connectionPoolTimeout > 0) { + ConnManagerParams.setTimeout(params, connectionPoolTimeout); + } + ConnManagerParams.setMaxTotalConnections(params, maxTotalConnections); + ConnManagerParams.setMaxConnectionsPerRoute(params, new ConnPerRouteBean(maxConnectionsPerRoute)); + if (retries >= 0) { + params.setIntParameter(RETRIES, retries); + } + params.setParameter("http.protocol.handle-redirects", followRedirects); + return params; + } + + public int getMaxTotalConnections() { + return maxTotalConnections; + } + + public void setMaxTotalConnections(int maxTotalConnections) { + ensureNotFrozen(); + this.maxTotalConnections = maxTotalConnections; + } + + public int getMaxConnectionsPerRoute() { + return maxConnectionsPerRoute; + } + + public void setMaxConnectionsPerRoute(int maxConnectionsPerRoute) { + ensureNotFrozen(); + this.maxConnectionsPerRoute = maxConnectionsPerRoute; + } + + public int getSocketBufferSizeBytes() { + return socketBufferSizeBytes; + } + + public void setSocketBufferSizeBytes(int socketBufferSizeBytes) { + ensureNotFrozen(); + this.socketBufferSizeBytes = socketBufferSizeBytes; + } + + public int getRetries() { + return retries; + } + + public void setRetries(int retries) { + ensureNotFrozen(); + this.retries = retries; + } + + public String getYcaProxy() { + return ycaProxy; + } + + public int getYcaPort() { + return ycaPort; + } + + public String getYcaApplicationId() { + return ycaApplicationId; + } + + public boolean getYcaUseProxy() { + return ycaUseProxy; + } + + public long getYcaTtl() { + return ycaTtl; + } + + public long getYcaRetry() { + return ycaRetry; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/HTTPProviderSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPProviderSearcher.java new file mode 100644 index 00000000000..c2bc6b2196b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPProviderSearcher.java @@ -0,0 +1,260 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import com.google.common.collect.ImmutableList; +import com.yahoo.component.ComponentId; +import com.yahoo.jdisc.http.CertificateStore; +import com.yahoo.search.cache.QrBinaryCacheConfig; +import com.yahoo.search.cache.QrBinaryCacheRegionConfig; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.federation.FederationSearcher; +import com.yahoo.search.query.Properties; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Counter; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +import org.apache.http.HttpEntity; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Superclass of searchers which talks to HTTP backends. Implement a subclass to talk to a backend + * over HTTP which is not supported by the platform out of the box. + * <p> + * Implementations must override one of the <code>unmarshal</code> methods to unmarshal the response. + * </p> + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author bratseth + */ +public abstract class HTTPProviderSearcher extends HTTPSearcher { + + private final Counter emptyResults; + private final Value hitsPerQuery; + private final Value responseLatency; + private final Counter readTimeouts; + + private final static List<String> excludedSourceProperties = ImmutableList.of("offset", "hits", "provider"); + + protected final static Logger log = Logger.getLogger(HTTPProviderSearcher.class.getName()); + + /** The name of the cache used (which is just getid().stringValue(), or null if no cache is used */ + protected String cacheName=null; + + public HTTPProviderSearcher(ComponentId id, List<Connection> connections,String path, Statistics statistics) { + this(id,connections,new HTTPParameters(path), statistics); + } + + /** Creates a http provider searcher using id.getName as provider name */ + public HTTPProviderSearcher(ComponentId id, List<Connection> connections, String path, + Statistics statistics, CertificateStore certificateStore) { + this(id, connections, new HTTPParameters(path), statistics, certificateStore); + } + + public HTTPProviderSearcher(ComponentId id, List<Connection> connections, HTTPParameters parameters, + Statistics statistics) { + this(id, connections, parameters, statistics, new ThrowingCertificateStore()); + } + + /** + * Creates a provider searcher + * + * @param id the id of this instance + * @param connections the connections this will load balance and fail over between + * @param parameters the parameters to use when making http calls + */ + public HTTPProviderSearcher(ComponentId id, List<Connection> connections, HTTPParameters parameters, + Statistics statistics, CertificateStore certificateStore) { + super(id, connections, parameters, statistics, certificateStore); + String suffix = "_" + getId().getName().replace('.', '_'); + hitsPerQuery = new Value("hits_per_query" + suffix, statistics, + new Value.Parameters().setLogRaw(false).setNameExtension(false).setLogMean(true)); + responseLatency = new Value(LOG_LATENCY_START + suffix, statistics, + new Value.Parameters().setLogRaw(false).setLogMean(true).setNameExtension(false)); + emptyResults = new Counter("empty_results" + suffix, statistics, false); + readTimeouts = new Counter(LOG_READ_TIMEOUT_PREFIX + suffix, statistics, false); + } + + /** @deprecated this method does nothing */ + @Deprecated + protected void configureCache(final QrBinaryCacheConfig cacheConfig,final QrBinaryCacheRegionConfig regionConfig) { + } + + /** + * Unmarshal the stream by converting it to hits and adding the hits to the given result. + * A convenience hook called by the default <code>unmarshal(entity,result).</code> + * Override this in subclasses which does not override <code>unmarshal(entity,result).</code> + * <p> + * This default implementation throws an exception. + * + * @param stream the stream of data returned + * @param contentLength the length of the content in bytes if known, or a negative number if unknown + * @param result the result to which unmarshalled data should be added + */ + public void unmarshal(final InputStream stream, long contentLength, final Result result) throws IOException { + throw new UnsupportedOperationException("Unmarshal must be implemented by " + this); + } + + /** + * Unmarshal the result from an http entity. This default implementation calls + * <code>unmarshal(entity.getContent(), entity.getContentLength(), result)</code> + * (and does some detailed query tracing). + * + * @param entity the entity containing the data to unmarshal + * @param result the result to which unmarshalled data should be added + */ + public void unmarshal(HttpEntity entity,Result result) throws IOException { + Query query=result.getQuery(); + long len = entity.getContentLength(); + if (query.getTraceLevel()>=4) + query.trace("Received " + len + " bytes response in " + this, false, 4); + query.trace("Unmarshaling result.", false, 6); + unmarshal(entity.getContent(), len, result); + + if (query.getTraceLevel()>=2) + query.trace("Handled " + len + " bytes response in " + this, false, 2); + + } + + protected void addNonExcludedSourceProperties(Query query, Map<String, String> queryMap) { + Properties sourceProperties = FederationSearcher.getSourceProperties(query); + if (sourceProperties != null) { + for(Map.Entry<String, Object> entry : sourceProperties.listProperties("").entrySet()) { + if (!excludedSourceProperties.contains(entry.getKey())) { + queryMap.put(entry.getKey(), entry.getValue().toString()); + } + } + } + } + + /** + * Hook called at the moment the result is returned from this searcher. This default implementation + * does <code>return result</code>. + * + * @param result the result which is to be returned + * @param requestMeta the request information hit, or null if none was created (e.g if this was a cache lookup) + * @param e the exception caused during execution of this query, or null if none + * @return the result which is returned upwards + */ + protected Result inspectAndReturnFinalResult(Result result, Hit requestMeta, Exception e) { + return result; + } + + private Result statisticsBeforeInspection(Result result, + Hit requestMeta, Exception e) { + int hitCount = result.getConcreteHitCount(); + if (hitCount == 0) { + emptyResults.increment(); + } + hitsPerQuery.put((double) hitCount); + + if (requestMeta != null) { + requestMeta.setField(LOG_HITCOUNT, Integer.valueOf(hitCount)); + } + + return inspectAndReturnFinalResult(result, + requestMeta, e); + } + + + @Override + protected void logResponseLatency(long latency) { + responseLatency.put((double) latency); + } + + @Override + public Result search(Query query, Execution execution,Connection connection) { + // Create default meta hit for holding logging information + Hit requestMeta = createRequestMeta(); + Result result = new Result(query); + result.hits().add(requestMeta); + query.trace("Created request information hit", false, 9); + + try { + URI uri = getURI(query, requestMeta, connection); + if (query.getTraceLevel()>=1) + query.trace("Fetching " + uri.toString(), false, 1); + long requestStartTime = System.currentTimeMillis(); + + HttpEntity entity = getEntity(uri, requestMeta, query); + + // Why should consumeEntity call inspectAndReturnFinalResult itself? + // Seems confusing to me. + return entity == null + ? statisticsBeforeInspection(result, requestMeta, null) + : consumeEntity(entity, query, result, requestMeta, requestStartTime); + + } catch (MalformedURLException|URISyntaxException e) { + result.hits().addError(createMalformedUrlError(query,e)); + return statisticsBeforeInspection(result, requestMeta, e); + } catch (TimeoutException e) { + result.hits().addError(ErrorMessage.createTimeout("No time left for HTTP traffic in " + + this + + " for " + query + ": " + e.getMessage())); + return statisticsBeforeInspection(result, requestMeta, e); + } catch (IOException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError( + "Error when trying to connect to HTTP backend in " + this + + " for " + query + ": " + Exceptions.toMessageString(e))); + return statisticsBeforeInspection(result, requestMeta, e); + } + } + + private Result consumeEntity(HttpEntity entity, Query query, Result result, Hit logHit, long requestStartTime) { + + try { + // remove some time from timeout to allow for close calls with return result + unmarshal(new TimedHttpEntity(entity, query.getStartTime(), Math.max(1, query.getTimeout() - 10)), result); + logHit.setField(LOG_LATENCY_FINISH, System.currentTimeMillis() - requestStartTime); + return statisticsBeforeInspection(result, logHit, null); + } catch (IOException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError( + "Error when trying to consume input in " + this + ": " + Exceptions.toMessageString(e))); + return statisticsBeforeInspection(result, logHit, e); + } catch (TimeoutException e) { + readTimeouts.increment(); + result.hits().addError(ErrorMessage + .createTimeout("Timed out while reading/unmarshaling from backend in " + + this + " for " + query + + ": " + e.getMessage())); + return statisticsBeforeInspection(result, logHit, e); + } finally { // TODO: The scope of this finally must be enlarged to release the connection also on errors + cleanupHttpEntity(entity); + } + } + + /** + * Returns the key-value pairs that should be added as properties to the request url sent to the service. + * Must be overridden in subclasses to add the key-values expected by the service in question, unless + * {@link #getURI} (from which this is called) is overridden. + * <p> + * This default implementation returns the query.properties() prefixed by + * "source.[sourceName]" or "property.[propertyName]" + * (by calling {@link #addNonExcludedSourceProperties}). + */ + @Override + public Map<String,String> getQueryMap(Query query) { + Map<String,String> queryMap = super.getQueryMap(query); + addNonExcludedSourceProperties(query, queryMap); + return queryMap; + } + + /** + * @deprecated the cache key is ignored as there is no built-in caching support + */ + @Deprecated + public abstract Map<String, String> getCacheKey(Query q); + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java new file mode 100644 index 00000000000..65ce7b3647c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java @@ -0,0 +1,958 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.jdisc.http.CertificateStore; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.cluster.ClusterSearcher; +import com.yahoo.search.federation.ProviderConfig.PingOption; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.statistics.Counter; +import com.yahoo.statistics.Statistics; +import com.yahoo.text.Utf8; + +import org.apache.http.*; +import org.apache.http.client.HttpClient; +import org.apache.http.client.HttpRequestRetryHandler; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.conn.ClientConnectionManager; +import org.apache.http.conn.ConnectTimeoutException; +import org.apache.http.conn.params.ConnManagerParams; +import org.apache.http.conn.params.ConnRoutePNames; +import org.apache.http.conn.routing.HttpRoutePlanner; +import org.apache.http.conn.scheme.PlainSocketFactory; +import org.apache.http.conn.scheme.Scheme; +import org.apache.http.conn.scheme.SchemeRegistry; +import org.apache.http.conn.ssl.SSLSocketFactory; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.impl.conn.DefaultHttpRoutePlanner; +import org.apache.http.impl.conn.SingleClientConnManager; +import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager; +import org.apache.http.params.HttpParams; +import org.apache.http.params.HttpProtocolParams; +import org.apache.http.protocol.BasicHttpContext; +import org.apache.http.protocol.ExecutionContext; +import org.apache.http.protocol.HttpContext; +import org.apache.http.protocol.HttpRequestExecutor; +import org.apache.http.util.EntityUtils; + +import javax.net.ssl.SSLHandshakeException; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.io.UnsupportedEncodingException; +import java.net.*; +import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Generic superclass of searchers making connections to some HTTP service. This + * supports clustered connections - a list of alternative servers may be given, + * requests will be hashed across these and failed over in case some are down. + * <p> + * This simply provides some utility methods for working with http connections + * and implements ping against the service. + * + * <p>This searcher contains code from the Apache httpcomponents client library, + * licensed to the Apache Software Foundation under the Apache License, Version + * 2.0. Please refer to http://www.apache.org/licenses/LICENSE-2.0 for details. + * + * <p>This class automatically adds a meta hit containing latency and other + * meta information about the obtained HTTP data using createRequestMeta(). + * The fields available in the hit are:</p> + * + * <dl><dt> + * HTTPSearcher.LOG_LATENCY_START + * <dd> + * The latency of the external provider answering a request. + * <dt> + * HTTPSearcher.LOG_LATENCY_FINISH + * <dd> + * Total time of the HTTP traffic, but also decoding of the data, as this + * happens at the same time. + * <dt> + * HTTPSearcher.LOG_HITCOUNT + * <dd> + * Number of concrete hits in the result returned by this provider. + * <dt> + * HTTPSearcher.LOG_URI + * <dd> + * The complete URI used for external service. + * <dt> + * HTTPSearcher.LOG_SCHEME + * <dd> + * The scheme of the request URI sent. + * <dt> + * HTTPSearcher.LOG_HOST + * <dd> + * The host used for the request URI sent. + * <dt> + * HTTPSearcher.LOG_PORT + * <dd> + * The port used for the request URI sent. + * <dt> + * HTTPSearcher.LOG_PATH + * <dd> + * Path element of the request URI sent. + * <dt> + * HTTPSearcher.LOG_STATUS + * <dd> + * Status code of the HTTP response. + * <dt> + * HTTPSearcher.LOG_PROXY_TYPE + * <dd> + * The proxy type used, if any. Default is "http". + * <dt> + * HTTPSearcher.LOG_PROXY_HOST + * <dd> + * The proxy host, if any. + * <dt> + * HTTPSearcher.LOG_PROXY_PORT + * <dd> + * The proxy port, if any. + * <dt> + * HTTPSearcher.LOG_HEADER_PREFIX prepended to request header field name + * <dd> + * The content of any additional request header fields. + * <dt> + * HTTPSearcher.LOG_RESPONSE_HEADER_PREFIX prepended to response header field name + * <dd> + * The content of any additional response header fields. + * </dl> + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public abstract class HTTPSearcher extends ClusterSearcher<Connection> { + + protected static final String YCA_HTTP_HEADER = "Yahoo-App-Auth"; + + private static final Charset iso8859Charset = Charset.forName("ISO-8859-1"); + + // Logging field name constants + public static final String LOG_PATH = "path"; + public static final String LOG_PORT = "port"; + public static final String LOG_HOST = "host"; + public static final String LOG_IP_ADDRESS = "ip_address"; + public static final String IP_ADDRESS_UNKNOWN = "unknown"; + + public static final String LOG_SCHEME = "scheme"; + public static final String LOG_URI = "uri"; + public static final String LOG_PROXY_PORT = "proxy_port"; + public static final String LOG_PROXY_HOST = "proxy_host"; + public static final String LOG_PROXY_TYPE = "proxy_type"; + public static final String LOG_STATUS = "status"; + public static final String LOG_LATENCY_FINISH = "latency_finish"; + public static final String LOG_LATENCY_START = "latency_start"; + public static final String LOG_LATENCY_CONNECT = "latency_connect"; + public static final String LOG_QUERY_PARAM_PREFIX = "query_param_"; + public static final String LOG_HEADER_PREFIX = "header_"; + public static final String LOG_RESPONSE_HEADER_PREFIX = "response_header_"; + public static final String LOG_HITCOUNT = "hit_count"; + public static final String LOG_CONNECT_TIMEOUT_PREFIX = "connect_timeout_"; + public static final String LOG_READ_TIMEOUT_PREFIX = "read_timeout_"; + + protected final Logger log = Logger.getLogger(HTTPSearcher.class.getName()); + + /** The HTTP parameters to use. Assigned in the constructor */ + private HTTPParameters httpParameters; + + private final Counter connectTimeouts; + + /** Whether to use certificates */ + protected boolean useCertificate = false; + + private final CertificateStore certificateStore; + + /** The (optional) YCA application ID. */ + private String ycaApplicationId = null; + + /** The (optional) YCA proxy */ + protected HttpHost ycaProxy = null; + + /** YCA cache TTL in ms */ + private long ycaTtl = 0L; + + /** YCA retry rate in the cache if no cert is found, in ms */ + private long ycaRetry = 0L; + + /** Set at construction if this is using persistent connections */ + private ClientConnectionManager sharedConnectionManager = null; + + /** Set at construction if using non-persistent connections */ + private ThreadLocal<SingleClientConnManager> singleClientConnManagerThreadLocal = null; + + private static final SchemeRegistry schemeRegistry = new SchemeRegistry(); + + static { + schemeRegistry.register(new Scheme("http", PlainSocketFactory + .getSocketFactory(), 80)); + schemeRegistry.register(new Scheme("https", SSLSocketFactory + .getSocketFactory(), 443)); + } + + public HTTPSearcher(ComponentId componentId, List<Connection> connections,String path, Statistics statistics) { + this(componentId, connections, new HTTPParameters(path), statistics, new ThrowingCertificateStore()); + } + + /** Creates a http searcher with default connection and read timeouts (currently 2 and 5s respectively) */ + public HTTPSearcher(ComponentId componentId, List<Connection> connections,String path, Statistics statistics, + CertificateStore certificateStore) { + this(componentId, connections, new HTTPParameters(path), statistics, certificateStore); + } + + public HTTPSearcher(ComponentId componentId, List<Connection> connections, HTTPParameters parameters, + Statistics statistics) { + this(componentId, connections, parameters, statistics, new ThrowingCertificateStore()); + } + /** + * Creates a http searcher + * + * @param componentId the id of this instance + * @param connections the connections to establish to the backend nodes + * @param parameters the http parameters to use. This object will be frozen if it isn't already + */ + @Inject + public HTTPSearcher(ComponentId componentId, List<Connection> connections, HTTPParameters parameters, + Statistics statistics, CertificateStore certificateStore) { + super(componentId,connections,false); + String suffix = "_" + getId().getName().replace('.', '_'); + + connectTimeouts = new Counter(LOG_CONNECT_TIMEOUT_PREFIX + suffix, statistics, false); + + parameters.freeze(); + this.httpParameters = parameters; + this.certificateStore = certificateStore; + + if (parameters.getPersistentConnections()) { + HttpParams params=parameters.toHttpParams(); + HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1); + ConnManagerParams.setTimeout(params, 10); + sharedConnectionManager = new ThreadSafeClientConnManager(params, schemeRegistry); + Thread connectionPurgerThread = new Thread(() -> { + //this is the default value in yahoo jvm installations + long DNSTTLSec = 120; + while (true) { + try { + Thread.sleep(DNSTTLSec * 1000); + if (sharedConnectionManager == null) + continue; + + sharedConnectionManager.closeExpiredConnections(); + DNSTTLSec = Long.valueOf(java.security.Security + .getProperty("networkaddress.cache.ttl")); + //No DNS TTL, no need to close idle connections + if (DNSTTLSec <= 0) { + DNSTTLSec = 120; + continue; + } + sharedConnectionManager.closeIdleConnections(2 * DNSTTLSec, TimeUnit.SECONDS); + } catch (InterruptedException e) { + return; + } catch (NumberFormatException e) { + continue; + } + } + }); + connectionPurgerThread.setDaemon(true); + connectionPurgerThread.start(); + + } + else { + singleClientConnManagerThreadLocal =new ThreadLocal<>(); + } + + initializeYCA(httpParameters, certificateStore); + } + + /** + * Initialize YCA certificate and proxy if they have been set to non-null, + * non-empty values. It will wrap thrown exceptions from the YCA layer into + * RuntimeException and propagate them. + */ + private void initializeYCA(HTTPParameters parameters, CertificateStore certificateStore) { + String applicationId = parameters.getYcaApplicationId(); + String proxy = parameters.getYcaProxy(); + int port = parameters.getYcaPort(); + long ttl = parameters.getYcaTtl(); + long retry = parameters.getYcaRetry(); + + if (applicationId != null && !applicationId.trim().isEmpty()) { + initializeCertificate(applicationId, ttl, retry, certificateStore); + } + + if (parameters.getYcaUseProxy()) { + initializeProxy(proxy, port); + } + } + + /** Returns the HTTP parameters used in this. This is always frozen */ + public HTTPParameters getParameters() { return httpParameters; } + + /** + * Returns the key-value pairs that should be added as properties to the request url sent to the service. + * Must be overridden in subclasses to add the key-values expected by the service in question, unless + * {@link #getURI} (from which this is called) is overridden. + * <p> + * This default implementation returns an empty LinkedHashMap. + */ + public Map<String,String> getQueryMap(Query query) { + return new LinkedHashMap<>(); + } + + /** + * Initialize the YCA certificate. + * This will warn but not throw if certificates could not be loaded, as the certificates + * are external state which can fail independently. + */ + private void initializeCertificate(String applicationId, long ttl, long retry, CertificateStore certificateStore) { + try { + // get the certificate, i.e. init the cache and check integrity + String certificate = certificateStore.getCertificate(applicationId, ttl, retry); + if (certificate == null) { + getLogger().log(LogLevel.WARNING, "No certificate found for application '" + applicationId + "'"); + return; + } + + this.useCertificate = true; + this.ycaApplicationId = applicationId; + this.ycaTtl = ttl; + this.ycaRetry = retry; + getLogger().log(LogLevel.CONFIG, "Got certificate: " + certificate); + } + catch (Exception e) { + getLogger().log(LogLevel.WARNING,"Exception while initializing certificate for application '" + + applicationId + "' in " + this, e); + } + } + + /** + * Initialize the YCA proxy setting. + */ + private void initializeProxy(String host, int port) { + ycaProxy = new HttpHost(host, port); + getLogger().log(LogLevel.CONFIG,"Proxy is configured; will use proxy: " + ycaProxy); + } + + /** + * Same a {@code getURI(query, offset, hits, null)}. + * @see #getURI(Query, Hit, Connection) + */ + protected URI getURI(Query query,Connection connection) throws MalformedURLException, URISyntaxException { + Hit requestMeta; + try { + requestMeta = (Hit) query.properties().get(HTTPClientSearcher.REQUEST_META_CARRIER); + } catch (ClassCastException e) { + requestMeta = null; + } + return getURI(query, requestMeta, connection); + } + + /** + * Creates the URI for a query. + * Populates the {@code requestMeta} meta hit with the created URI HTTP properties. + * + * @param requestMeta a meta hit that holds logging information about this request (may be {@code null}). + */ + protected URI getURI(Query query, Hit requestMeta, Connection connection) + throws MalformedURLException, URISyntaxException { + StringBuilder parameters = new StringBuilder(); + + Map<String, String> queries = getQueryMap(query); + if (queries.size() > 0) { + Iterator<Map.Entry<String, String>> mapIterator = queries.entrySet().iterator(); + parameters.append("?"); + try { + Map.Entry<String, String> entry; + while (mapIterator.hasNext()) { + entry = mapIterator.next(); + + if (requestMeta != null) + requestMeta.setField(LOG_QUERY_PARAM_PREFIX + + entry.getKey(), entry.getValue()); + + parameters.append(entry.getKey() + "=" + URLEncoder.encode(entry.getValue(), + httpParameters.getInputEncoding())); + if (mapIterator.hasNext()) { + parameters.append("&"); + } + } + } catch (UnsupportedEncodingException e) { + throw new RuntimeException("Unknown input encoding set in " + this, e); + } + } + + URI uri = new URL(httpParameters.getSchema(), connection.getHost(), + connection.getPort(), getPath() + parameters.toString()).toURI(); + if (requestMeta != null) { + requestMeta.setField(LOG_URI, uri.toString()); + requestMeta.setField(LOG_SCHEME, uri.getScheme()); + requestMeta.setField(LOG_HOST, uri.getHost()); + requestMeta.setField(LOG_PORT, uri.getPort()); + requestMeta.setField(LOG_PATH, uri.getPath()); + } + return uri; + } + + /** + * Called by getURI() to get the path of the URI for the external service. + * The default implementation returns httpParameters.getPath(); subclasses + * which only wants to override the path from httpParameters may use this + * method instead of overriding all of getURI(). + * + * @return the path to use for getURI + */ + protected String getPath() { + return httpParameters.getPath(); + } + + /** + * The URI that is used to check if the provider is up or down. This will again be used in the + * checkPing method by checking that we get a response that has a good status code (below 300). If better + * validation than just status code checking is needed, override the checkPing method. + */ + protected URI getPingURI(Connection connection) throws MalformedURLException, URISyntaxException { + return new URL(httpParameters.getSchema(),connection.getHost(),connection.getPort(),getPingPath()).toURI(); + } + + /** + * Called by getPingURI() to get the path of the URI for pinging the + * external service. The default implementation returns + * httpParameters.getPath(); subclasses which only wants to override the + * path from httpParameters may use this method instead of overriding all of + * getPingURI(). + * + * @return the path to use for getPingURI + */ + protected String getPingPath() { + return httpParameters.getPath(); + } + + /** + * Checks if the response is valid. + * @param response The response from the ping request + * @param pong The pong result to return back to the calling method. This method + * will add an error to the pong result (using addError) if the status of the HTTP response is 300 or above. + */ + protected void checkPing(HttpResponse response, Pong pong) { + if (response.getStatusLine().getStatusCode() >= 300) { + pong.addError(com.yahoo.search.result.ErrorMessage.createBackendCommunicationError( + "Got error " + response.getStatusLine().getStatusCode() + + " when contacting backend") + ); + } + } + + /** + * Pinging in HTTPBackend is done by creating a PING uri from http://host:port/path. + * If this returns a status that is below 300, the ping is considered good. + * + * If another uri is needed for pinging, reimplement getPingURI. + * + * Override either this method to change how ping + */ + @Override + public Pong ping(Ping ping, Connection connection) { + URI uri = null; + Pong pong = new Pong(); + HttpResponse response = null; + + if (httpParameters.getPingOption() == PingOption.DISABLE) + return pong; + + try { + uri = getPingURI(connection); + if (uri == null) + pong.addError(ErrorMessage.createIllegalQuery("Ping uri is null")); + if (uri.getHost()==null) { + pong.addError(ErrorMessage.createIllegalQuery("Ping uri has no host")); + uri=null; + } + } catch (MalformedURLException | URISyntaxException e) { + pong.addError(ErrorMessage.createIllegalQuery("Malformed ping uri '" + uri + "': " + + Exceptions.toMessageString(e))); + } catch (RuntimeException e) { + log.log(Level.WARNING,"Unexpected exception while attempting to ping " + connection + " using uri '" + uri + "'",e); + pong.addError(ErrorMessage.createIllegalQuery("Unexpected problem with ping uri '" + uri + "': " + + Exceptions.toMessageString(e))); + } + + if (uri == null) return pong; + pong.setPingInfo("using uri '" + uri + "'"); + + try { + response = getPingResponse(uri, ping); + checkPing(response, pong); + } catch (IOException e) { + //We do not have a valid ping + pong.addError(ErrorMessage.createBackendCommunicationError( + "Exception thrown when pinging with url '" + uri + "': " + Exceptions.toMessageString(e))); + } catch (TimeoutException e) { + pong.addError(ErrorMessage.createTimeout("Timeout for ping " + + uri + " in " + this + ": " + e.getMessage())); + } catch (RuntimeException e) { + log.log(Level.WARNING,"Unexpected exception while attempting to ping " + connection + " using uri '" + uri + "'",e); + pong.addError(ErrorMessage.createIllegalQuery("Unexpected problem with ping uri '" + uri + "': " + + Exceptions.toMessageString(e))); + } finally { + if (response != null) { + cleanupHttpEntity(response.getEntity()); + } + } + + return pong; + } + + private HttpResponse getPingResponse(URI uri, Ping ping) throws IOException { + long timeLeft = ping.getTimeout(); + int connectionTimeout = (int) (timeLeft / 4L); + int readTimeout = (int) (timeLeft * 3L / 4L); + + Map<String, String> requestHeaders = null; + if (httpParameters.getPingOption() == PingOption.YCA) + requestHeaders = generateYCAHeaders(); + + return getResponse(uri, null, requestHeaders, null, connectionTimeout, readTimeout); + } + + /** + * Same a {@code getEntity(uri, null)}. + * @param uri resource to fetch + * @param query the originating query + * @throws TimeoutException If query.timeLeft() equal to or lower than 0 + */ + protected HttpEntity getEntity(URI uri, Query query) throws IOException{ + return getEntity(uri, null, query); + } + + + /** + * Gets the HTTP entity that holds the response contents. + * @param uri the request URI. + * @param requestMeta a meta hit that holds logging information about this request (may be {@code null}). + * @param query the originating query + * @return the http entity, or null if none + * @throws java.io.IOException Whenever HTTP status code is in the 300 or higher range. + * @throws TimeoutException If query.timeLeft() equal to or lower than 0 + */ + protected HttpEntity getEntity(URI uri, Hit requestMeta, Query query) throws IOException { + if (query.getTimeLeft() <= 0) { + throw new TimeoutException("No time left for querying external backend."); + } + HttpResponse response = getResponse(uri, requestMeta, query); + StatusLine statusLine = response.getStatusLine(); + + // Logging + if (requestMeta != null) { + requestMeta.setField(LOG_STATUS, statusLine.getStatusCode()); + for (HeaderIterator headers = response.headerIterator(); headers.hasNext(); ) { + Header h = headers.nextHeader(); + requestMeta.setField(LOG_RESPONSE_HEADER_PREFIX + h.getName(), h.getValue()); + } + } + + if (statusLine.getStatusCode() >= 300) { + HttpEntity entity = response.getEntity(); + String message = createServerReporterErrorMessage(statusLine, entity); + cleanupHttpEntity(response.getEntity()); + throw new IOException(message); + } + + return response.getEntity(); + } + + private String createServerReporterErrorMessage(StatusLine statusLine, HttpEntity entity) { + String message = "Error when trying to connect to HTTP backend: " + + statusLine.getStatusCode() + " : " + statusLine.getReasonPhrase(); + + try { + if (entity != null) { + message += "(Message = " + EntityUtils.toString(entity) + ")"; + } + } catch (Exception e) { + log.log(LogLevel.WARNING, "Could not get message.", e); + } + + return message; + } + + /** + * Creates a meta hit dedicated to holding logging information. This hit has + * the 'logging:[searcher's ID]' type. + */ + protected Hit createRequestMeta() { + Hit requestMeta = new Hit("logging:" + getId().toString()); + requestMeta.setMeta(true); + requestMeta.types().add("logging"); + return requestMeta; + } + + protected void cleanupHttpEntity(HttpEntity entity) { + if (entity == null) return; + + try { + entity.consumeContent(); + } catch (IOException e) { + // It is ok if do not consume it, the resource will be freed after + // timeout. + // But log it just in case. + log.log(LogLevel.getVespaLogLevel(LogLevel.DEBUG), + "Not able to consume after processing: " + Exceptions.toMessageString(e)); + } + } + + /** + * Same as {@code getResponse(uri, null)}. + */ + protected HttpResponse getResponse(URI uri, Query query) throws IOException{ + return getResponse(uri, null, query); + } + + /** + * Executes an HTTP request and gets the response. + * @param uri the request URI. + * @param requestMeta a meta hit that holds logging information about this request (may be {@code null}). + * @param query the originating query, used to calculate timeouts + */ + protected HttpResponse getResponse(URI uri, Hit requestMeta, Query query) throws IOException { + long timeLeft = query.getTimeLeft(); + int connectionTimeout = (int) (timeLeft / 4L); + int readTimeout = (int) (timeLeft * 3L / 4L); + connectionTimeout = connectionTimeout <= 0 ? 1 : connectionTimeout; + readTimeout = readTimeout <= 0 ? 1 : readTimeout; + HttpEntity reqEntity = getRequestEntity(query, requestMeta); + Map<String, String> reqHeaders = getRequestHeaders(query, requestMeta); + if ((reqEntity == null) && (reqHeaders == null)) { + return getResponse(uri, requestMeta, connectionTimeout, readTimeout); + } else { + return getResponse(uri, reqEntity, reqHeaders, requestMeta, connectionTimeout, readTimeout); + } + } + + /** + * Returns the set of headers to be passed in the http request to provider backend. The default + * implementation returns null, unless YCA is in use. If YCA is used, it will return a map + * only containing the needed YCA headers. + */ + protected Map<String, String> getRequestHeaders(Query query, Hit requestMeta) { + if (useCertificate) { + return generateYCAHeaders(); + } + return null; + } + + /** + * Returns the HTTP request entity to use when making the request for this query. + * This default implementation returns null. + * + * <p> Do return a repeatable entity if HTTP retry is active. + * + * @return the http request entity to use, or null to use the default entity + */ + protected HttpEntity getRequestEntity(Query query, Hit requestMeta) { + return null; + } + + /** + * Executes an HTTP request and gets the response. + * @param uri the request URI. + * @param requestMeta a meta hit that holds logging information about this request (may be {@code null}). + * @param connectionTimeout how long to wait for getting a connection + * @param readTimeout timeout for reading HTTP data + */ + protected HttpResponse getResponse(URI uri, Hit requestMeta, int connectionTimeout, int readTimeout) + throws IOException { + return getResponse(uri, null, null, requestMeta, connectionTimeout, readTimeout); + } + + + /** + * Executes an HTTP request and gets the response. + * @param uri the request URI. + * @param requestMeta a meta hit that holds logging information about this request (may be {@code null}). + * @param connectionTimeout how long to wait for getting a connection + * @param readTimeout timeout for reading HTTP data + */ + protected HttpResponse getResponse(URI uri, HttpEntity reqEntity, + Map<String, String> reqHeaders, Hit requestMeta, + int connectionTimeout, int readTimeout) throws IOException { + + HttpParams httpParams = httpParameters.toHttpParams(connectionTimeout, readTimeout); + HttpClient httpClient = createClient(httpParams); + long start = 0L; + HttpUriRequest request; + if (httpParameters.getEnableProxy() && "http".equals(httpParameters.getProxyType())) { + HttpHost proxy = new HttpHost(httpParameters.getProxyHost(), + httpParameters.getProxyPort(), httpParameters.getProxyType()); + httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy); + // Logging + if (requestMeta != null) { + requestMeta.setField(LOG_PROXY_TYPE, httpParameters.getProxyType()); + requestMeta.setField(LOG_PROXY_HOST, httpParameters.getProxyHost()); + requestMeta.setField(LOG_PROXY_PORT, httpParameters.getProxyPort()); + } + } + if (reqEntity == null) { + request = createRequest(httpParameters.getMethod(), uri); + } else { + request = createRequest(httpParameters.getMethod(), uri, reqEntity); + } + + if (reqHeaders != null) { + for (Entry<String, String> entry : reqHeaders.entrySet()) { + if (entry.getValue() == null || isAscii(entry.getValue())) { + request.addHeader(entry.getKey(), entry.getValue()); + } else { + byte[] asBytes = Utf8.toBytes(entry.getValue()); + String asLyingString = new String(asBytes, 0, asBytes.length, iso8859Charset); + request.addHeader(entry.getKey(), asLyingString); + } + } + } + + // Logging + if (requestMeta != null) { + for (HeaderIterator headers = request.headerIterator(); headers.hasNext();) { + Header h = headers.nextHeader(); + requestMeta.setField(LOG_HEADER_PREFIX + h.getName(), h.getValue()); + } + start = System.currentTimeMillis(); + } + + HttpResponse response; + + try { + HttpContext context = new BasicHttpContext(); + response = httpClient.execute(request, context); + + if (requestMeta != null) { + requestMeta.setField(LOG_IP_ADDRESS, getIpAddress(context)); + } + } catch (ConnectTimeoutException e) { + connectTimeouts.increment(); + throw e; + } + + // Logging + long latencyStart = System.currentTimeMillis() - start; + if (requestMeta != null) { + requestMeta.setField(LOG_LATENCY_START, latencyStart); + } + logResponseLatency(latencyStart); + return response; + } + + private String getIpAddress(HttpContext context) { + HttpConnection connection = (HttpConnection) context.getAttribute(ExecutionContext.HTTP_CONNECTION); + if (connection instanceof HttpInetConnection) { + InetAddress address = ((HttpInetConnection) connection).getRemoteAddress(); + String hostAddress = address.getHostAddress(); + return hostAddress == null ? + IP_ADDRESS_UNKNOWN: + hostAddress; + } else { + getLogger().log(LogLevel.DEBUG, "Unexpected connection type: " + connection.getClass().getName()); + return IP_ADDRESS_UNKNOWN; + } + } + + private boolean isAscii(String value) { + char[] scanBuffer = new char[value.length()]; + value.getChars(0, value.length(), scanBuffer, 0); + for (char c: scanBuffer) + if (c > 127) return false; + return true; + } + + protected void logResponseLatency(long latency) { } + + /** + * Creates a http client for one request. Override to customize the client + * to use, e.g for testing. This default implementation will add the YCA + * proxy to params if is necessary, and then do + * <code>return new SearcherHttpClient(getConnectionManager(params), params);</code> + */ + protected HttpClient createClient(HttpParams params) { + if (ycaProxy != null) { + params.setParameter(ConnRoutePNames.DEFAULT_PROXY, ycaProxy); + } + return new SearcherHttpClient(getConnectionManager(params), params); + } + + /** + * Creates a HttpRequest. Override to customize the request. + * This default implementation does <code>return new HttpRequest(method,uri);</code> + */ + protected HttpUriRequest createRequest(String method,URI uri) { + return createRequest(method, uri, null); + } + + /** + * Creates a HttpRequest. Override to customize the request. + * This default implementation does <code>return new HttpRequest(method,uri);</code> + */ + protected HttpUriRequest createRequest(String method,URI uri, HttpEntity entity) { + return new SearcherHttpRequest(method,uri); + } + + /** Get a connection manager which may be used safely from this thread */ + protected ClientConnectionManager getConnectionManager(HttpParams params) { + if (sharedConnectionManager != null) {// We are using shared connections + return sharedConnectionManager; + } else { + SingleClientConnManager singleClientConnManager = singleClientConnManagerThreadLocal.get(); + if (singleClientConnManager == null) { + singleClientConnManager = new SingleClientConnManager(params, schemeRegistry); + singleClientConnManagerThreadLocal.set(singleClientConnManager); + } + return singleClientConnManager; + } + } + + /** Utility method for creating error messages when a url is incorrect */ + protected ErrorMessage createMalformedUrlError(Query query,Exception e) { + return ErrorMessage.createErrorInPluginSearcher("Malformed url in " + this + " for " + query + + ": " + Exceptions.toMessageString(e)); + } + + private Map<String, String> generateYCAHeaders() { + Map<String, String> headers = new HashMap<>(); + String certificate = certificateStore.getCertificate(ycaApplicationId, ycaTtl, ycaRetry); + headers.put(YCA_HTTP_HEADER, certificate); + return headers; + } + + protected static class SearcherHttpClient extends DefaultHttpClient { + + private final int retries; + + public SearcherHttpClient(final ClientConnectionManager conman, final HttpParams params) { + super(conman, params); + retries = params.getIntParameter(HTTPParameters.RETRIES, 1); + addRequestInterceptor((request, context) -> { + if (!request.containsHeader("Accept-Encoding")) { + request.addHeader("Accept-Encoding", "gzip"); + } + }); + addResponseInterceptor((response, context) -> { + HttpEntity entity = response.getEntity(); + if (entity == null) return; + Header ceheader = entity.getContentEncoding(); + if (ceheader == null) return; + for (HeaderElement codec : ceheader.getElements()) { + if (codec.getName().equalsIgnoreCase("gzip")) { + response.setEntity(new GzipDecompressingEntity(response.getEntity())); + return; + } + } + }); + } + + @Override + protected HttpRequestExecutor createRequestExecutor() { + return new HttpRequestExecutor(); + } + + @Override + protected HttpRoutePlanner createHttpRoutePlanner() { + return new DefaultHttpRoutePlanner(getConnectionManager().getSchemeRegistry()); + } + + @Override + protected HttpRequestRetryHandler createHttpRequestRetryHandler() { + return new SearcherHttpRequestRetryHandler(retries); + } + } + + /** A retry handler which avoids retrying forever on errors misclassified as transient */ + private static class SearcherHttpRequestRetryHandler implements HttpRequestRetryHandler { + private final int retries; + + public SearcherHttpRequestRetryHandler(int retries) { + this.retries = retries; + } + + @Override + public boolean retryRequest(IOException e, int executionCount, HttpContext httpContext) { + if (e == null) { + throw new IllegalArgumentException("Exception parameter may not be null"); + } + if (executionCount > retries) { + return false; + } + if (e instanceof NoHttpResponseException) { + // Retry if the server dropped connection on us + return true; + } + if (e instanceof InterruptedIOException) { + // Timeout from federation layer + return false; + } + if (e instanceof UnknownHostException) { + // Unknown host + return false; + } + if (e instanceof SSLHandshakeException) { + // SSL handshake exception + return false; + } + return true; + } + + + } + + private static class SearcherHttpRequest extends HttpRequestBase { + String method; + + public SearcherHttpRequest(String method, final URI uri) { + super(); + this.method = method; + setURI(uri); + } + + @Override + public String getMethod() { + return method; + } + } + + /** + * Only for testing. + */ + public void shutdownConnectionManagers() { + ClientConnectionManager manager; + if (sharedConnectionManager != null) { + manager = sharedConnectionManager; + } else { + manager = singleClientConnManagerThreadLocal.get(); + } + if (manager != null) { + manager.shutdown(); + } + } + + protected static final class ThrowingCertificateStore implements CertificateStore { + + @Override + public String getCertificate(String key, long ttl, long retry) { + throw new UnsupportedOperationException("A certificate store is not available"); + } + + } + +} + diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/TimedHttpEntity.java b/container-search/src/main/java/com/yahoo/search/federation/http/TimedHttpEntity.java new file mode 100644 index 00000000000..9d89a318c32 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/TimedHttpEntity.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.http.Header; +import org.apache.http.HttpEntity; + +/** + * Wrapper for adding timeout to an HttpEntity instance. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TimedHttpEntity implements HttpEntity { + /** + * The wrapped entity. Never null. + */ + private final HttpEntity entity; + private final long startTime; + private final long timeout; + + public TimedHttpEntity(HttpEntity entity, long startTime, long timeout) { + if (entity == null) { + throw new IllegalArgumentException("TimedHttpEntity cannot be instantiated with null HttpEntity."); + } + this.entity = entity; + this.startTime = startTime; + this.timeout = timeout; + } + + + @Override + public InputStream getContent() throws IOException, IllegalStateException { + InputStream content = entity.getContent(); + if (content == null) { + return null; + } else { + return new TimedStream(content, startTime, timeout); + } + } + + + // START OF PURE FORWARDING METHODS + @Override + public void consumeContent() throws IOException { + entity.consumeContent(); + } + + + @Override + public Header getContentEncoding() { + return entity.getContentEncoding(); + } + + @Override + public long getContentLength() { + return entity.getContentLength(); + } + + @Override + public Header getContentType() { + return entity.getContentType(); + } + + @Override + public boolean isChunked() { + return entity.isChunked(); + } + + @Override + public boolean isRepeatable() { + return entity.isRepeatable(); + } + + @Override + public boolean isStreaming() { + return entity.isStreaming(); + } + + @Override + public void writeTo(OutputStream outstream) throws IOException { + entity.writeTo(outstream); + } + // END OF PURE FORWARDING METHODS + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/TimedStream.java b/container-search/src/main/java/com/yahoo/search/federation/http/TimedStream.java new file mode 100644 index 00000000000..02777afb43c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/TimedStream.java @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A stream which throws a TimeoutException if query timeout has been reached. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TimedStream extends InputStream { + + /** + * A time barrier value, the point in time from which on read operations will cause an exception. + */ + private final long limit; + + /** + * A wrapped InputStream instance. + */ + private final InputStream content; + + /** + * Wrap an InputStream to make read operations potentially fire off + * TimeoutException. + * + * <p>Typical use would be<br> + * <code>new TimedStream(httpEntity.getContent(), query.getStartTime(), query.getTimeout())</code> + * + * @param content + * the InputStream to wrap + * @param startTime + * start time of query + * @param timeout + * how long the query is allowed to run + */ + public TimedStream(InputStream content, long startTime, long timeout) { + if (content == null) { + throw new IllegalArgumentException("Cannot instantiate TimedStream with null InputStream"); + } + this.content = content; + // The reasion for doing it in here instead of outside the constructor + // is this makes the usage of the class more intuitive IMHO + this.limit = startTime + timeout; + } + + private void checkTime(String message) { + if (System.currentTimeMillis() >= limit) { + throw new TimeoutException(message); + } + } + + // START FORWARDING METHODS: + // All methods below are forwarding methods to the contained stream, where + // some do a timeout check. + @Override + public int read() throws IOException { + int data = content.read(); + checkTime("Timed out during read()."); + return data; + } + + @Override + public int available() throws IOException { + return content.available(); + } + + @Override + public void close() throws IOException { + content.close(); + } + + @Override + public synchronized void mark(int readlimit) { + content.mark(readlimit); + } + + @Override + public boolean markSupported() { + return content.markSupported(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int length = content.read(b, off, len); + checkTime("Timed out during read(byte[], int, int)"); + return length; + } + + @Override + public int read(byte[] b) throws IOException { + int length = content.read(b); + checkTime("Timed out during read(byte[])"); + return length; + } + + @Override + public synchronized void reset() throws IOException { + content.reset(); + } + + @Override + public long skip(long n) throws IOException { + long skipped = content.skip(n); + checkTime("Timed out during skip(long)"); + return skipped; + } + // END FORWARDING METHODS + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/TimeoutException.java b/container-search/src/main/java/com/yahoo/search/federation/http/TimeoutException.java new file mode 100644 index 00000000000..9e0536ea053 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/TimeoutException.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.http; + +/** + * Timeout marker for slow HTTP connections. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TimeoutException extends RuntimeException { + + /** + * Auto-generated version ID. + */ + private static final long serialVersionUID = 7084147598258586559L; + + public TimeoutException(String message) { + super(message); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/package-info.java b/container-search/src/main/java/com/yahoo/search/federation/http/package-info.java new file mode 100644 index 00000000000..aa3d249ab66 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/http/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.federation.http; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/federation/package-info.java b/container-search/src/main/java/com/yahoo/search/federation/package-info.java new file mode 100644 index 00000000000..008e339db4b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/package-info.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The federation layer on top of the search container. This contains + * + * <ul> + * <li>A model of Sources which can be selected in and for a Query and which are implemented + * by a Search Chain, and Providers which represents the connection to specific backends (these + * two are often 1-1 but not always) + * <li>The federation searcher responsible for forking a query to multiple sources in parallel + * <li>A simple searcher which can talk to other vespa services + * </ul> + */ +@ExportPackage +package com.yahoo.search.federation; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/federation/selection/FederationTarget.java b/container-search/src/main/java/com/yahoo/search/federation/selection/FederationTarget.java new file mode 100644 index 00000000000..676292d6a3a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/selection/FederationTarget.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.selection; + +import java.util.Optional; +import com.yahoo.component.chain.Chain; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.model.federation.FederationOptions; + +import static com.google.common.base.Preconditions.checkNotNull; + +/** + * Represents a search chain that the federation searcher should send a query to, + * along with a timeout and + * custom data reserved for use by the TargetSelector. + * + * @author tonytv + */ +public final class FederationTarget<T> { + private final Chain<Searcher> chain; + private final FederationOptions federationOptions; + private final T customData; + + public FederationTarget(Chain<Searcher> chain, FederationOptions federationOptions, T customData) { + checkNotNull(chain); + checkNotNull(federationOptions); + + this.chain = chain; + this.federationOptions = federationOptions; + this.customData = customData; + } + + public Chain<Searcher> getChain() { + return chain; + } + + public FederationOptions getFederationOptions() { + return federationOptions; + } + + /** + * Any data that the TargetSelector wants to associate with this target. + * Owned exclusively by the TargetSelector that created this instance. + */ + public T getCustomData() { + return customData; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + FederationTarget that = (FederationTarget) o; + + if (!chain.equals(that.chain)) return false; + if (customData != null ? !customData.equals(that.customData) : that.customData != null) return false; + if (!federationOptions.equals(that.federationOptions)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = chain.hashCode(); + result = 31 * result + federationOptions.hashCode(); + return result; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/selection/TargetSelector.java b/container-search/src/main/java/com/yahoo/search/federation/selection/TargetSelector.java new file mode 100644 index 00000000000..0f6bf2d5b71 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/selection/TargetSelector.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.selection; + +import com.yahoo.processing.execution.chain.ChainRegistry; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.federation.selection.FederationTarget; + +import java.util.Collection; + +/** + * Allows adding extra targets that the federation searcher should federate to. + * + * For each federation search call, the federation searcher will call targetSelector.getTargets. + * + * Then, for each target, it will: + * 1) call modifyTargetQuery(target, query) + * 2) call modifyTargetResult(target, result) + * + * @author tonytv + */ +public interface TargetSelector<T> { + Collection<FederationTarget<T>> getTargets(Query query, ChainRegistry<Searcher> searcherChainRegistry); + + /** + * For modifying the query before sending it to a the target + */ + void modifyTargetQuery(FederationTarget<T> target, Query query); + + /** + * For modifying the result produced by the target. + */ + void modifyTargetResult(FederationTarget<T> target, Result result); +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/selection/package-info.java b/container-search/src/main/java/com/yahoo/search/federation/selection/package-info.java new file mode 100644 index 00000000000..f3c289f6b43 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/selection/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.federation.selection; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainInvocationSpec.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainInvocationSpec.java new file mode 100644 index 00000000000..7e82801d85f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainInvocationSpec.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.searchchain.model.federation.FederationOptions; + +import java.util.List; + +/** + * Specifices which search chain should be run and how it should be run. + * + * @author tonytv + */ +public class SearchChainInvocationSpec implements Cloneable { + public final ComponentId searchChainId; + + public final ComponentId source; + public final ComponentId provider; + + public final FederationOptions federationOptions; + public final List<String> documentTypes; + + SearchChainInvocationSpec(ComponentId searchChainId, + ComponentId source, ComponentId provider, FederationOptions federationOptions, + List<String> documentTypes) { + this.searchChainId = searchChainId; + this.source = source; + this.provider = provider; + this.federationOptions = federationOptions; + this.documentTypes = documentTypes; + } + + @Override + public SearchChainInvocationSpec clone() throws CloneNotSupportedException { + return (SearchChainInvocationSpec)super.clone(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java new file mode 100644 index 00000000000..fc70fb5e5e7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SearchChainResolver.java @@ -0,0 +1,160 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.processing.request.Properties; +import com.yahoo.search.searchchain.model.federation.FederationOptions; + +import java.util.Collections; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * Resolves (source, provider) component specifications to a search chain invocation spec. + * The provider component specification is given by the entry in the queryMap with key + * 'source.<source-name>.provider'. + * + * <p> + * The diagram shows the relationship between source, provider and the result: + * (source is used to select row, provider is used to select column.) + * Provider id = null is used for regular search chains. + * </p> + * + * <pre> + * Provider id + * null + * |----+---+---+---| + * | o | | | | + * |----+---+---+---| + * Source id | | o | o | | + * |----+---+---+---| + * | | | | o | + * |----+---+---+---| + * + * o: SearchChainInvocationSpec + * </pre> + * + * @author tonytv + */ +public class SearchChainResolver { + private final ComponentRegistry<Target> targets; + private final SortedSet<Target> defaultTargets; + + public static class Builder { + + private SortedSet<Target> defaultTargets = new TreeSet<>(); + + private final ComponentRegistry<Target> targets = new ComponentRegistry<Target>() { + @Override + public void freeze() { + for (Target target : allComponents()) { + target.freeze(); + } + super.freeze(); + } + }; + + public Builder addSearchChain(ComponentId searchChainId) { + return addSearchChain(searchChainId, Collections.<String>emptyList()); + } + + public Builder addSearchChain(ComponentId searchChainId, FederationOptions federationOptions) { + return addSearchChain(searchChainId, federationOptions, Collections.<String>emptyList()); + } + + public Builder addSearchChain(ComponentId searchChainId, List<String> documentTypes) { + return addSearchChain(searchChainId, new FederationOptions(), documentTypes); + } + + public Builder addSearchChain(ComponentId searchChainId, FederationOptions federationOptions, + List<String> documentTypes) { + registerTarget(new SingleTarget(searchChainId, + new SearchChainInvocationSpec(searchChainId, null, null, federationOptions, documentTypes), false)); + return this; + } + + private Builder registerTarget(SingleTarget singleTarget) { + targets.register(singleTarget.getId(), singleTarget); + if (singleTarget.useByDefault()) { + defaultTargets.add(singleTarget); + } + return this; + } + + public Builder addSourceForProvider(ComponentId sourceId, ComponentId providerId, ComponentId searchChainId, + boolean isDefaultProviderForSource, FederationOptions federationOptions, + List<String> documentTypes) { + + SearchChainInvocationSpec searchChainInvocationSpec = + new SearchChainInvocationSpec(searchChainId, sourceId, providerId, federationOptions, documentTypes); + + SourcesTarget sourcesTarget = getOrRegisterSourceTarget(sourceId); + sourcesTarget.addSource(providerId, searchChainInvocationSpec, isDefaultProviderForSource); + + registerTarget(new SingleTarget(searchChainId, searchChainInvocationSpec, true)); + return this; + } + + private SourcesTarget getOrRegisterSourceTarget(ComponentId sourceId) { + Target sourcesTarget = targets.getComponent(sourceId); + if (sourcesTarget == null) { + targets.register(sourceId, new SourcesTarget(sourceId)); + return getOrRegisterSourceTarget(sourceId); + } else if (sourcesTarget instanceof SourcesTarget) { + return (SourcesTarget) sourcesTarget; + } else { + throw new IllegalStateException("Expected " + sourceId + " to be a source."); + } + } + + public void useTargetByDefault(String targetId) { + Target target = targets.getComponent(targetId); + assert target != null : "Target not added yet."; + + defaultTargets.add(target); + } + + public SearchChainResolver build() { + targets.freeze(); + return new SearchChainResolver(targets, defaultTargets); + } + } + + private SearchChainResolver(ComponentRegistry<Target> targets, SortedSet<Target> defaultTargets) { + this.targets = targets; + this.defaultTargets = Collections.unmodifiableSortedSet(defaultTargets); + } + + + public SearchChainInvocationSpec resolve(ComponentSpecification sourceRef, Properties sourceToProviderMap) + throws UnresolvedSearchChainException { + + Target target = resolveTarget(sourceRef); + return target.responsibleSearchChain(sourceToProviderMap); + } + + private Target resolveTarget(ComponentSpecification sourceRef) throws UnresolvedSearchChainException { + Target target = targets.getComponent(sourceRef); + if (target == null) { + throw UnresolvedSourceRefException.createForMissingSourceRef(sourceRef); + } + return target; + } + + public SortedSet<Target> allTopLevelTargets() { + SortedSet<Target> topLevelTargets = new TreeSet<>(); + for (Target target : targets.allComponents()) { + if (!target.isDerived) { + topLevelTargets.add(target); + } + } + return topLevelTargets; + } + + public SortedSet<Target> defaultTargets() { + return defaultTargets; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java new file mode 100644 index 00000000000..4210b56a501 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SingleTarget.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import com.yahoo.component.ComponentId; +import com.yahoo.processing.request.Properties; + +/** + * TODO: What is this? + * +* @author tonytv +*/ +public class SingleTarget extends Target { + private final SearchChainInvocationSpec searchChainInvocationSpec; + + public SingleTarget(ComponentId id, SearchChainInvocationSpec searchChainInvocationSpec, boolean isDerived) { + super(id, isDerived); + this.searchChainInvocationSpec = searchChainInvocationSpec; + } + + @Override + public SearchChainInvocationSpec responsibleSearchChain(Properties queryProperties) { + return searchChainInvocationSpec; + } + + @Override + public String searchRefDescription() { + return localId.toString(); + } + + @Override + void freeze() {} + + public final boolean useByDefault() { + return searchChainInvocationSpec.federationOptions.getUseByDefault(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java new file mode 100644 index 00000000000..8de6635e517 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourceRefResolver.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import static com.yahoo.container.util.Util.quote; + +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import com.yahoo.component.ComponentSpecification; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.processing.request.Properties; + +/** + * Maps a source reference to search chain invocation specs. + * + * @author tonytv + */ +public class SourceRefResolver { + private final SearchChainResolver searchChainResolver; + + public SourceRefResolver(SearchChainResolver searchChainResolver) { + this.searchChainResolver = searchChainResolver; + } + public Set<SearchChainInvocationSpec> resolve(ComponentSpecification sourceRef, Properties sourceToProviderMap, + IndexFacts indexFacts) + throws UnresolvedSearchChainException { + + try { + return new LinkedHashSet<>(Arrays.asList(searchChainResolver.resolve(sourceRef, sourceToProviderMap))); + } catch (UnresolvedSourceRefException e) { + return resolveClustersWithDocument(sourceRef, sourceToProviderMap, indexFacts); + } + } + + private Set<SearchChainInvocationSpec> resolveClustersWithDocument(ComponentSpecification sourceRef, + Properties sourceToProviderMap, + IndexFacts indexFacts) + throws UnresolvedSearchChainException { + + if (hasOnlyName(sourceRef)) { + Set<SearchChainInvocationSpec> clusterSearchChains = new LinkedHashSet<>(); + + List<String> clusters = indexFacts.clustersHavingSearchDefinition(sourceRef.getName()); + for (String cluster : clusters) { + clusterSearchChains.add(resolveClusterSearchChain(cluster, sourceRef, sourceToProviderMap)); + } + + if (!clusterSearchChains.isEmpty()) + return clusterSearchChains; + } + + throw UnresolvedSourceRefException.createForMissingSourceRef(sourceRef); + + } + + private SearchChainInvocationSpec resolveClusterSearchChain(String cluster, ComponentSpecification sourceRef, + Properties sourceToProviderMap) throws UnresolvedSearchChainException { + try { + return searchChainResolver.resolve(new ComponentSpecification(cluster), sourceToProviderMap); + } catch (UnresolvedSearchChainException e) { + throw new UnresolvedSearchChainException("Failed to resolve cluster search chain " + quote(cluster) + + " when using source ref " + quote(sourceRef) + " as a document name."); + } + } + + private boolean hasOnlyName(ComponentSpecification sourceSpec) { + return new ComponentSpecification(sourceSpec.getName()).equals(sourceSpec); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java new file mode 100644 index 00000000000..bb1de051ed0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/SourcesTarget.java @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + + +import com.google.common.base.Joiner; +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.model.ComponentAdaptor; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.processing.request.Properties; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + + +public class SourcesTarget extends Target { + private ComponentRegistry<ComponentAdaptor<SearchChainInvocationSpec>> providerSources = + new ComponentRegistry<ComponentAdaptor<SearchChainInvocationSpec>>() {}; + private SearchChainInvocationSpec defaultProviderSource; + + public SourcesTarget(ComponentId sourceId) { + super(sourceId); + } + + @Override + public SearchChainInvocationSpec responsibleSearchChain(Properties queryProperties) throws UnresolvedSearchChainException { + ComponentSpecification providerSpecification = providerSpecificationForSource(queryProperties); + if (providerSpecification == null) { + return defaultProviderSource; + } else { + return lookupProviderSource(providerSpecification); + } + } + + @Override + public String searchRefDescription() { + StringBuilder builder = new StringBuilder(sourceId().stringValue()); + builder.append("[provider = "). + append(Joiner.on(", ").join(allProviderIdsStringValue())). + append("]"); + return builder.toString(); + } + + private SortedSet<String> allProviderIdsStringValue() { + SortedSet<String> result = new TreeSet<>(); + for (ComponentAdaptor<SearchChainInvocationSpec> providerSource : providerSources.allComponents()) { + result.add(providerSource.getId().stringValue()); + } + return result; + } + + private SearchChainInvocationSpec lookupProviderSource(ComponentSpecification providerSpecification) + throws UnresolvedSearchChainException { + ComponentAdaptor<SearchChainInvocationSpec> providerSource = providerSources.getComponent(providerSpecification); + + if (providerSource == null) + throw UnresolvedProviderException.createForMissingProvider(sourceId(), providerSpecification); + + return providerSource.model; + } + + public void freeze() { + if (defaultProviderSource == null) + throw new RuntimeException("Null default provider source for source " + sourceId() + "."); + + providerSources.freeze(); + } + + public void addSource(ComponentId providerId, SearchChainInvocationSpec searchChainInvocationSpec, + boolean isDefaultProviderForSource) { + providerSources.register(providerId, new ComponentAdaptor<>(providerId, searchChainInvocationSpec)); + + if (isDefaultProviderForSource) { + setDefaultProviderSource(searchChainInvocationSpec); + } + } + + private void setDefaultProviderSource(SearchChainInvocationSpec searchChainInvocationSpec) { + if (defaultProviderSource != null) + throw new RuntimeException("Tried to set two default providers for source " + sourceId() + "."); + + defaultProviderSource = searchChainInvocationSpec; + } + + ComponentId sourceId() { + return localId; + } + + + /** + * Looks up source.(sourceId).provider in the query properties. + * @return null if the default provider should be used + */ + private ComponentSpecification providerSpecificationForSource(Properties queryProperties) { + String spec = queryProperties.getString("source." + sourceId().stringValue() + ".provider"); + return ComponentSpecification.fromString(spec); + } + + public SearchChainInvocationSpec defaultProviderSource() { + return defaultProviderSource; + } + + public List<SearchChainInvocationSpec> allProviderSources() { + List<SearchChainInvocationSpec> allProviderSources = new ArrayList<>(); + for (ComponentAdaptor<SearchChainInvocationSpec> component : providerSources.allComponents()) { + allProviderSources.add(component.model); + } + return allProviderSources; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java new file mode 100644 index 00000000000..4cf5d406959 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/Target.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.ComponentId; +import com.yahoo.processing.request.Properties; + +/** + * TODO: What's this? + * +* @author tonytv +*/ +public abstract class Target extends AbstractComponent { + final ComponentId localId; + final boolean isDerived; + + Target(ComponentId localId, boolean derived) { + super(localId); + this.localId = localId; + isDerived = derived; + } + + Target(ComponentId localId) { + this(localId, false); + } + + public abstract SearchChainInvocationSpec responsibleSearchChain(Properties queryProperties) throws UnresolvedSearchChainException; + public abstract String searchRefDescription(); + + abstract void freeze(); +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedProviderException.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedProviderException.java new file mode 100644 index 00000000000..50b2dc95660 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedProviderException.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; + +import static com.yahoo.container.util.Util.quote; + +/** + * @author tonytv + */ +@SuppressWarnings("serial") +class UnresolvedProviderException extends UnresolvedSearchChainException { + UnresolvedProviderException(String msg) { + super(msg); + } + + static UnresolvedSearchChainException createForMissingProvider(ComponentId source, + ComponentSpecification provider) { + return new UnresolvedProviderException("No provider " + quote(provider) + " for source " + quote(source) + "."); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSearchChainException.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSearchChainException.java new file mode 100644 index 00000000000..b8417a3d05a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSearchChainException.java @@ -0,0 +1,13 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +/** + * Thrown if a search chain can not be resolved from one or more ids. + * @author tonytv + */ +@SuppressWarnings("serial") +public class UnresolvedSearchChainException extends Exception { + public UnresolvedSearchChainException(String msg) { + super(msg); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSourceRefException.java b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSourceRefException.java new file mode 100644 index 00000000000..4c15366914b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/sourceref/UnresolvedSourceRefException.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.sourceref; + +import com.yahoo.component.ComponentSpecification; + +import static com.yahoo.container.util.Util.quote; + +/** + * @author tonytv + */ +@SuppressWarnings("serial") +class UnresolvedSourceRefException extends UnresolvedSearchChainException { + UnresolvedSourceRefException(String msg) { + super(msg); + } + + + static UnresolvedSearchChainException createForMissingSourceRef(ComponentSpecification source) { + return new UnresolvedSourceRefException("Could not resolve source ref " + quote(source) + "."); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/vespa/QueryMarshaller.java b/container-search/src/main/java/com/yahoo/search/federation/vespa/QueryMarshaller.java new file mode 100644 index 00000000000..554424c267f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/vespa/QueryMarshaller.java @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.vespa; + +import java.util.Iterator; + +import com.yahoo.prelude.query.*; + +/** + * Marshal a query stack into an advanced query string suitable for + * passing to another QRS. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:rafan@yahoo-inc.com">Rong-En Fan</a> + */ +public class QueryMarshaller { + private boolean atRoot = true; + + public String marshal(Item root) { + if (root == null || root instanceof NullItem) { + return null; + } + StringBuilder s = new StringBuilder(); + marshal(root, s); + atRoot = true; + return s.toString(); + } + + /** + * We do not yet care about exact match indices + */ + private void marshal(Item root, StringBuilder s) { + switch (root.getItemType()) { + case OR: + marshalOr((OrItem) root, s); + break; + case AND: + marshalAnd((CompositeItem) root, s); + break; + case NOT: + marshalNot((NotItem) root, s); + break; + case RANK: + marshalRank((RankItem) root, s); + break; + case WORD: + case INT: + case PREFIX: + case SUBSTRING: + case SUFFIX: + marshalWord((TermItem) root, s); + break; + case PHRASE: + // PhraseItem and PhraseSegmentItem don't add quotes for segmented + // termse + if (root instanceof PhraseSegmentItem) { + marshalPhrase((PhraseSegmentItem) root, s); + } else { + marshalPhrase((PhraseItem) root, s); + } + break; + case NEAR: + marshalNear((NearItem) root, s); + break; + case ONEAR: + marshalNear((ONearItem) root, s); + break; + case WEAK_AND: + marshalWeakAnd((WeakAndItem)root, s); + default: + break; + } + } + + + private void marshalWord(TermItem item, StringBuilder s) { + String index = item.getIndexName(); + if (index.length() != 0) { + s.append(item.getIndexName()).append(':'); + } + s.append(item.stringValue()); + if (item.getWeight() != Item.DEFAULT_WEIGHT) + s.append("!").append(item.getWeight()); + } + + private void marshalRank(RankItem root, StringBuilder s) { + marshalComposite("RANK", root, s); + } + + private void marshalNot(NotItem root, StringBuilder s) { + marshalComposite("ANDNOT", root, s); + } + + private void marshalOr(OrItem root, StringBuilder s) { + marshalComposite("OR", root, s); + } + + /** + * Dump WORD items, and add space between each of them unless those + * words came from segmentation. + * + * @param root CompositeItem + * @param s current marshaled query + */ + private void dumpWords(CompositeItem root, StringBuilder s) { + for (Iterator<Item> i = root.getItemIterator(); i.hasNext();) { + Item word = i.next(); + boolean useSeparator = true; + if (word instanceof TermItem) { + s.append(((TermItem) word).stringValue()); + if (word instanceof WordItem) { + useSeparator = !((WordItem) word).isFromSegmented(); + } + } else { + dumpWords((CompositeItem) word, s); + } + if (useSeparator && i.hasNext()) { + s.append(' '); + } + } + } + + private void marshalPhrase(PhraseItem root, StringBuilder s) { + marshalPhrase(root, s, root.isExplicit(), false); + } + + private void marshalPhrase(PhraseSegmentItem root, StringBuilder s) { + marshalPhrase(root, s, root.isExplicit(), true); + } + + private void marshalPhrase(IndexedItem root, StringBuilder s, boolean isExplicit, boolean isSegmented) { + String index = root.getIndexName(); + if (index.length() != 0) { + s.append(root.getIndexName()).append(':'); + } + if (isExplicit || !isSegmented) s.append('"'); + dumpWords((CompositeItem) root, s); + if (isExplicit || !isSegmented) s.append('"'); + } + + private void marshalNear(NearItem root, StringBuilder s) { + marshalComposite(root.getName() + "(" + root.getDistance() + ")", root, s); + } + + // Not only AndItem returns ItemType.AND + private void marshalAnd(CompositeItem root, StringBuilder s) { + marshalComposite("AND", root, s); + } + + private void marshalWeakAnd(WeakAndItem root, StringBuilder s) { + marshalComposite("WAND(" + root.getN() + ")", root, s); + } + + private void marshalComposite(String operator, CompositeItem root, StringBuilder s) { + boolean useParen = !atRoot; + if (useParen) { + s.append("( "); + } else { + atRoot = false; + } + for (Iterator<Item> i = root.getItemIterator(); i.hasNext();) { + Item item = i.next(); + marshal(item, s); + if (i.hasNext()) + s.append(' ').append(operator).append(' '); + } + if (useParen) { + s.append(" )"); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/vespa/ResultBuilder.java b/container-search/src/main/java/com/yahoo/search/federation/vespa/ResultBuilder.java new file mode 100644 index 00000000000..1361c7c14db --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/vespa/ResultBuilder.java @@ -0,0 +1,642 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.vespa; + +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.result.Relevance; +import com.yahoo.text.XML; +import com.yahoo.text.DoubleParser; +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.helpers.XMLReaderFactory; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * Parse Vespa XML results and create Result instances. + * + * <p> TODO: Ripe for a rewrite or major refactoring. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("deprecation") +public class ResultBuilder extends DefaultHandler { + private static final String ERROR = "error"; + + private static final String FIELD = "field"; + + private static Logger log = Logger.getLogger(ResultBuilder.class.getName()); + + /** Namespaces feature id (http://xml.org/sax/features/namespaces). */ + protected static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces"; + + /** + * Namespace prefixes feature id + * (http://xml.org/sax/features/namespace-prefixes). + */ + protected static final String NAMESPACE_PREFIXES_FEATURE_ID = "http://xml.org/sax/features/namespace-prefixes"; + + /** Validation feature id (http://xml.org/sax/features/validation). */ + protected static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; + + /** + * Schema validation feature id + * (http://apache.org/xml/features/validation/schema). + */ + protected static final String SCHEMA_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/schema"; + + /** + * Dynamic validation feature id + * (http://apache.org/xml/features/validation/dynamic). + */ + protected static final String DYNAMIC_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/dynamic"; + + // default settings + + /** Default parser name. */ + protected static final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser"; + + /** Default namespaces support (false). */ + protected static final boolean DEFAULT_NAMESPACES = false; + + /** Default namespace prefixes (false). */ + protected static final boolean DEFAULT_NAMESPACE_PREFIXES = false; + + /** Default validation support (false). */ + protected static final boolean DEFAULT_VALIDATION = false; + + /** Default Schema validation support (false). */ + protected static final boolean DEFAULT_SCHEMA_VALIDATION = false; + + /** Default dynamic validation support (false). */ + protected static final boolean DEFAULT_DYNAMIC_VALIDATION = false; + + private StringBuilder fieldContent; + + private String fieldName; + + private int fieldLevel = 0; + + private boolean hasLiteralTags = false; + + private Map<String, Object> hitFields = new HashMap<>(); + private String hitType; + private String hitRelevance; + private String hitSource; + + private int offset = 0; + + private List<Tag> tagStack = new ArrayList<>(); + + private final XMLReader parser; + + private Query query; + + private Result result; + + private static enum ResultPart { + ROOT, ERRORDETAILS, HIT, HITGROUP; + } + + Deque<ResultPart> location = new ArrayDeque<>(10); + + private String currentErrorCode; + + private String currentError; + + private Deque<HitGroup> hitGroups = new ArrayDeque<>(5); + + private static class Tag { + public final String name; + + /** + * Offset is a number which is generated for all data and tags inside + * fields, used to determine whether a tag was closed without enclosing + * any characters or other tags. + */ + public final int offset; + + public Tag(final String name, final int offset) { + this.name = name; + this.offset = offset; + } + + @Override + public String toString() { + return name + '(' + Integer.valueOf(offset) + ')'; + } + } + + /** Default constructor. */ + public ResultBuilder() throws RuntimeException { + this(createParser()); + } + + public ResultBuilder(XMLReader parser) { + this.parser = parser; + this.parser.setContentHandler(this); + this.parser.setErrorHandler(this); + } + + public static XMLReader createParser() { + ClassLoader savedContextClassLoader = Thread.currentThread().getContextClassLoader(); + Thread.currentThread().setContextClassLoader(org.apache.xerces.parsers.SAXParser.class.getClassLoader()); + + try { + XMLReader reader = XMLReaderFactory.createXMLReader(DEFAULT_PARSER_NAME); + setParserFeatures(reader); + return reader; + } catch (Exception e) { + throw new RuntimeException("error: Unable to instantiate parser (" + + DEFAULT_PARSER_NAME + ")", e); + } finally { + Thread.currentThread().setContextClassLoader(savedContextClassLoader); + } + } + + private static void setParserFeatures(XMLReader reader) { + try { + reader.setFeature(NAMESPACES_FEATURE_ID, DEFAULT_NAMESPACES); + } catch (SAXException e) { + log.log(LogLevel.WARNING, "warning: Parser does not support feature (" + + NAMESPACES_FEATURE_ID + ")"); + } + try { + reader.setFeature(NAMESPACE_PREFIXES_FEATURE_ID, + DEFAULT_NAMESPACE_PREFIXES); + } catch (SAXException e) { + log.log(LogLevel.WARNING, "warning: Parser does not support feature (" + + NAMESPACE_PREFIXES_FEATURE_ID + ")"); + } + try { + reader.setFeature(VALIDATION_FEATURE_ID, DEFAULT_VALIDATION); + } catch (SAXException e) { + log.log(LogLevel.WARNING, "warning: Parser does not support feature (" + + VALIDATION_FEATURE_ID + ")"); + } + try { + reader.setFeature(SCHEMA_VALIDATION_FEATURE_ID, + DEFAULT_SCHEMA_VALIDATION); + } catch (SAXNotRecognizedException e) { + log.log(LogLevel.WARNING, "warning: Parser does not recognize feature (" + + SCHEMA_VALIDATION_FEATURE_ID + ")"); + + } catch (SAXNotSupportedException e) { + log.log(LogLevel.WARNING, "warning: Parser does not support feature (" + + SCHEMA_VALIDATION_FEATURE_ID + ")"); + } + + try { + reader.setFeature(DYNAMIC_VALIDATION_FEATURE_ID, + DEFAULT_DYNAMIC_VALIDATION); + } catch (SAXNotRecognizedException e) { + log.log(LogLevel.WARNING, "warning: Parser does not recognize feature (" + + DYNAMIC_VALIDATION_FEATURE_ID + ")"); + + } catch (SAXNotSupportedException e) { + log.log(LogLevel.WARNING, "warning: Parser does not support feature (" + + DYNAMIC_VALIDATION_FEATURE_ID + ")"); + } + } + + @Override + public void startDocument() throws SAXException { + reset(); + result = new Result(query); + hitGroups.addFirst(result.hits()); + location.addFirst(ResultPart.ROOT); + return; + } + + private void reset() { + result = null; + fieldLevel = 0; + hasLiteralTags = false; + tagStack = null; + fieldContent = null; + offset = 0; + currentError = null; + currentErrorCode = null; + hitGroups.clear(); + location.clear(); + } + + @Override + public void startElement(String uri, String local, String raw, + Attributes attrs) throws SAXException { + // "Everybody" wants this switch to be moved into the + // enum class instead, but in this case, I find the classic + // approach more readable. + switch (location.peekFirst()) { + case HIT: + if (fieldLevel > 0) { + tagInField(raw, attrs, FIELD); + ++offset; + return; + } + if (FIELD.equals(raw)) { + ++fieldLevel; + fieldName = attrs.getValue("name"); + fieldContent = new StringBuilder(); + hasLiteralTags = false; + } + break; + case ERRORDETAILS: + if (fieldLevel > 0) { + tagInField(raw, attrs, ERROR); + ++offset; + return; + } + if (ERROR.equals(raw)) { + if (attrs != null) { + currentErrorCode = attrs.getValue("code"); + currentError = attrs.getValue("error"); + } + ++fieldLevel; + fieldContent = new StringBuilder(); + hasLiteralTags = false; + } + break; + case HITGROUP: + if ("hit".equals(raw)) { + startHit(attrs); + } else if ("group".equals(raw)) { + startHitGroup(attrs); + } + break; + case ROOT: + if ("hit".equals(raw)) { + startHit(attrs); + } else if ("errordetails".equals(raw)) { + location.addFirst(ResultPart.ERRORDETAILS); + } else if ("result".equals(raw)) { + if (attrs != null) { + String total = attrs.getValue("total-hit-count"); + if (total != null) { + result.setTotalHitCount(Long.valueOf(total)); + } + } + } else if ("group".equals(raw)) { + startHitGroup(attrs); + } else if (ERROR.equals(raw)) { + if (attrs != null) { + currentErrorCode = attrs.getValue("code"); + fieldContent = new StringBuilder(); + } + } + break; + } + ++offset; + } + + private void startHitGroup(Attributes attrs) { + HitGroup g = new HitGroup(); + Set<String> types = g.types(); + + final String source; + if (attrs != null) { + String groupType = attrs.getValue("type"); + if (groupType != null) { + for (String s : groupType.split(" ")) { + if (s.length() > 0) { + types.add(s); + } + } + } + + source = attrs.getValue("source"); + } else { + source = null; + } + + g.setId((source != null) ? source : "dummy"); + + hitGroups.peekFirst().add(g); + hitGroups.addFirst(g); + location.addFirst(ResultPart.HITGROUP); + } + + private void startHit(Attributes attrs) { + hitFields.clear(); + location.addFirst(ResultPart.HIT); + if (attrs != null) { + hitRelevance = attrs.getValue("relevancy"); + hitSource = attrs.getValue("source"); + hitType = attrs.getValue("type"); + } else { + hitRelevance = null; + hitSource = null; + hitType = null; + } + } + + private void tagInField(String tag, Attributes attrs, String enclosingTag) { + if (!hasLiteralTags) { + hasLiteralTags = true; + String fieldTillNow = XML.xmlEscape(fieldContent.toString(), false); + fieldContent = new StringBuilder(fieldTillNow); + tagStack = new ArrayList<>(); + } + if (enclosingTag.equals(tag)) { + ++fieldLevel; + } + if (tagStack.size() > 0) { + Tag prevTag = tagStack.get(tagStack.size() - 1); + if (prevTag != null && (prevTag.offset + 1) == offset) { + fieldContent.append(">"); + } + } + fieldContent.append("<").append(tag); + if (attrs != null) { + int attrCount = attrs.getLength(); + for (int i = 0; i < attrCount; i++) { + fieldContent.append(" ").append(attrs.getQName(i)) + .append("=\"").append( + XML.xmlEscape(attrs.getValue(i), true)).append( + "\""); + } + } + tagStack.add(new Tag(tag, offset)); + } + + private void endElementInField(String qName, String enclosingTag) { + Tag prevTag = tagStack.get(tagStack.size() - 1); + if (qName.equals(prevTag.name) && offset == (prevTag.offset + 1)) { + fieldContent.append(" />"); + } else { + fieldContent.append("</").append(qName).append('>'); + } + if (prevTag.name.equals(qName)) { + tagStack.remove(tagStack.size() - 1); + } + } + + private void endElementInHitField(String qName) { + if (FIELD.equals(qName) && --fieldLevel == 0) { + Object content; + if (hasLiteralTags) { + content = new XMLString(fieldContent.toString()); + } else { + content = fieldContent.toString(); + } + hitFields.put(fieldName, content); + if ("collapseId".equals(fieldName)) { + hitFields.put(fieldName, Integer.valueOf(content.toString())); + } + fieldName = null; + fieldContent = null; + tagStack = null; + } else { + Tag prevTag = tagStack.get(tagStack.size() - 1); + if (qName.equals(prevTag.name) && offset == (prevTag.offset + 1)) { + fieldContent.append(" />"); + } else { + fieldContent.append("</").append(qName).append('>'); + } + if (prevTag.name.equals(qName)) { + tagStack.remove(tagStack.size() - 1); + } + } + } + @Override + public void characters(char ch[], int start, int length) + throws SAXException { + + switch (location.peekFirst()) { + case ERRORDETAILS: + case HIT: + if (fieldLevel > 0) { + if (hasLiteralTags) { + if (tagStack.size() > 0) { + Tag tag = tagStack.get(tagStack.size() - 1); + if (tag != null && (tag.offset + 1) == offset) { + fieldContent.append(">"); + } + } + fieldContent.append( + XML.xmlEscape(new String(ch, start, length), false)); + } else { + fieldContent.append(ch, start, length); + } + } + break; + default: + if (fieldContent != null) { + fieldContent.append(ch, start, length); + } + break; + } + ++offset; + } + + @Override + public void ignorableWhitespace(char ch[], int start, int length) + throws SAXException { + return; + } + + @Override + public void processingInstruction(String target, String data) + throws SAXException { + return; + } + + @Override + public void endElement(String namespaceURI, String localName, String qName) + throws SAXException { + switch (location.peekFirst()) { + case HITGROUP: + if ("group".equals(qName)) { + hitGroups.removeFirst(); + location.removeFirst(); + } + break; + case HIT: + if (fieldLevel > 0) { + endElementInHitField(qName); + } else if ("hit".equals(qName)) { + //assert(hitKeys.size() == hitValues.size()); + //We try to get either uri or documentID and use that as id + Object docId = extractDocumentID(); + Hit newHit = new Hit(docId.toString()); + if (hitRelevance != null) newHit.setRelevance(new Relevance(DoubleParser.parse(hitRelevance))); + if(hitSource != null) newHit.setSource(hitSource); + if(hitType != null) { + for(String type: hitType.split(" ")) { + newHit.types().add(type); + } + } + for(Map.Entry<String, Object> field : hitFields.entrySet()) { + newHit.setField(field.getKey(), field.getValue()); + } + + hitGroups.peekFirst().add(newHit); + location.removeFirst(); + } + break; + case ERRORDETAILS: + if (fieldLevel == 1 && ERROR.equals(qName)) { + ErrorMessage error = new ErrorMessage(Integer.valueOf(currentErrorCode), + currentError, + fieldContent.toString()); + hitGroups.peekFirst().addError(error); + currentError = null; + currentErrorCode = null; + fieldContent = null; + tagStack = null; + fieldLevel = 0; + } else if (fieldLevel > 0) { + endElementInField(qName, ERROR); + } else if ("errordetails".equals(qName)) { + location.removeFirst(); + } + break; + case ROOT: + if (ERROR.equals(qName)) { + ErrorMessage error = new ErrorMessage(Integer.valueOf(currentErrorCode), + fieldContent.toString()); + hitGroups.peekFirst().setError(error); + currentErrorCode = null; + fieldContent = null; + } + break; + default: + break; + } + ++offset; + } + + private Object extractDocumentID() { + Object docId = null; + if (hitFields.containsKey("uri")) { + docId = hitFields.get("uri"); + } else { + final String documentId = "documentId"; + if (hitFields.containsKey(documentId)) { + docId = hitFields.get(documentId); + } else { + final String lcDocumentId = toLowerCase(documentId); + for (Map.Entry<String, Object> e : hitFields.entrySet()) { + String key = e.getKey(); + // case insensitive matching, checking length first hoping to avoid some lowercasing + if (documentId.length() == key.length() && lcDocumentId.equals(toLowerCase(key))) { + docId = e.getValue(); + break; + } + } + } + } + if (docId == null) { + docId = "dummy"; + log.info("Results from vespa backend did not contain either uri or documentId"); + } + return docId; + } + + @Override + public void warning(SAXParseException ex) throws SAXException { + printError("Warning", ex); + } + + @Override + public void error(SAXParseException ex) throws SAXException { + printError("Error", ex); + } + + @Override + public void fatalError(SAXParseException ex) throws SAXException { + printError("Fatal Error", ex); + // throw ex; + } + + /** Prints the error message. */ + protected void printError(String type, SAXParseException ex) { + StringBuilder errorMessage = new StringBuilder(); + + errorMessage.append(type); + if (ex != null) { + String systemId = ex.getSystemId(); + if (systemId != null) { + int index = systemId.lastIndexOf('/'); + if (index != -1) + systemId = systemId.substring(index + 1); + errorMessage.append(' ').append(systemId); + } + } + errorMessage.append(':') + .append(ex.getLineNumber()) + .append(':') + .append(ex.getColumnNumber()) + .append(": ") + .append(ex.getMessage()); + log.log(LogLevel.WARNING, errorMessage.toString()); + + } + + public Result parse(String identifier, Query query) { + Result toReturn; + + setQuery(query); + try { + parser.parse(identifier); + } catch (SAXParseException e) { + // ignore + } catch (Exception e) { + log.log(LogLevel.WARNING, "Error parsing result from Vespa",e); + Exception se = e; + if (e instanceof SAXException) { + se = ((SAXException) e).getException(); + } + if (se != null) + se.printStackTrace(System.err); + else + e.printStackTrace(System.err); + } + toReturn = result; + reset(); + return toReturn; + } + + public Result parse(InputSource input, Query query) { + Result toReturn; + + setQuery(query); + try { + parser.parse(input); + } catch (SAXParseException e) { + // ignore + } catch (Exception e) { + log.log(LogLevel.WARNING, "Error parsing result from Vespa",e); + Exception se = e; + if (e instanceof SAXException) { + se = ((SAXException) e).getException(); + } + if (se != null) + se.printStackTrace(System.err); + else + e.printStackTrace(System.err); + } + toReturn = result; + reset(); + return toReturn; + } + + + private void setQuery(Query query) { + this.query = query; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java new file mode 100644 index 00000000000..26c9b8ad2cd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java @@ -0,0 +1,270 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.federation.vespa; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Map; +import java.util.Set; + +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +import com.google.inject.Inject; +import com.yahoo.collections.Tuple2; +import com.yahoo.component.ComponentId; +import com.yahoo.component.Version; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.language.Linguistics; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.cache.QrBinaryCacheConfig; +import com.yahoo.search.cache.QrBinaryCacheRegionConfig; +import com.yahoo.search.federation.FederationSearcher; +import com.yahoo.search.federation.ProviderConfig; +import com.yahoo.search.federation.http.ConfiguredHTTPProviderSearcher; +import com.yahoo.search.federation.http.Connection; +import com.yahoo.search.intent.model.IntentModel; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.textserialize.TextSerialize; +import com.yahoo.search.yql.MinimalQueryInserter; +import com.yahoo.statistics.Statistics; + +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Backend searcher for external Vespa clusters (queried over http). + * + * <p>If the "sources" argument should be honored on an external cluster + * when using YQL+, override {@link #chooseYqlSources(Set)}.</p> + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Provides("Vespa") +@After("*") +public class VespaSearcher extends ConfiguredHTTPProviderSearcher { + private final ThreadLocal<XMLReader> readerHolder = new ThreadLocal<>(); + private final Query.Type queryType; + private final Tuple2<String, Version> segmenterVersion; + + private static final CompoundName select = new CompoundName("select"); + private static final CompoundName streamingUserid = new CompoundName( + "streaming.userid"); + private static final CompoundName streamingGroupname = new CompoundName( + "streaming.groupname"); + private static final CompoundName streamingSelection = new CompoundName( + "streaming.selection"); + + /** Create an instance from configuration */ + public VespaSearcher(ComponentId id, ProviderConfig config, + QrBinaryCacheConfig c, QrBinaryCacheRegionConfig r, + Statistics statistics) { + this(id, config, c, r, statistics, null); + } + + /** + * Create an instance from configuration + * + * @param linguistics used for generating meta info for YQL+ + */ + @Inject + public VespaSearcher(ComponentId id, ProviderConfig config, + QrBinaryCacheConfig c, QrBinaryCacheRegionConfig r, + Statistics statistics, @Nullable Linguistics linguistics) { + super(id, config, c, r, statistics); + queryType = toQueryType(config.queryType()); + if (linguistics == null) { + segmenterVersion = null; + } else { + segmenterVersion = linguistics.getVersion(Linguistics.Component.SEGMENTER); + } + } + + /** + * Create an instance from direct parameters having a single connection. + * Useful for testing + */ + public VespaSearcher(String idString, String host, int port, String path) { + super(idString, host, port, path, Statistics.nullImplementation); + queryType = toQueryType(ProviderConfig.QueryType.LEGACY); + segmenterVersion = null; + } + + void addProperty(Map<String, String> queryMap, Query query, + CompoundName property) { + Object o = query.properties().get(property); + if (o != null) { + queryMap.put(property.toString(), o.toString()); + } + } + + @Override + public Map<String, String> getQueryMap(Query query) { + Map<String, String> queryMap = getQueryMapWithoutHitsOffset(query); + queryMap.put("offset", Integer.toString(query.getOffset())); + queryMap.put("hits", Integer.toString(query.getHits())); + queryMap.put("presentation.format", "xml"); + + addProperty(queryMap, query, select); + addProperty(queryMap, query, streamingUserid); + addProperty(queryMap, query, streamingGroupname); + addProperty(queryMap, query, streamingSelection); + return queryMap; + } + + @Override + public Map<String, String> getCacheKey(Query q) { + return getQueryMapWithoutHitsOffset(q); + } + + private Map<String, String> getQueryMapWithoutHitsOffset(Query query) { + Map<String, String> queryMap = super.getQueryMap(query); + if (queryType == Query.Type.YQL) { + queryMap.put(MinimalQueryInserter.YQL.toString(), marshalQuery(query)); + } else { + queryMap.put("query", marshalQuery(query.getModel().getQueryTree())); + queryMap.put("type", queryType.toString()); + } + + addNonExcludedSourceProperties(query, queryMap); + return queryMap; + } + + Query.Type toQueryType(ProviderConfig.QueryType.Enum providerQueryType) { + if (providerQueryType == ProviderConfig.QueryType.LEGACY) { + return Query.Type.ADVANCED; + } else if (providerQueryType == ProviderConfig.QueryType.PROGRAMMATIC) { + return Query.Type.PROGRAMMATIC; + } else if (providerQueryType == ProviderConfig.QueryType.YQL) { + return Query.Type.YQL; + } else { + throw new RuntimeException("Query type " + providerQueryType + + " unsupported."); + } + } + + /** + * Serialize the query parameter for outgoing queries. For YQL+ queries, + * sources and fields will be set to all sources and all fields, to follow + * the behavior of other query types. + * + * @param query + * the current, outgoing query + * @return a string to include in an HTTP request + */ + public String marshalQuery(Query query) { + if (queryType != Query.Type.YQL) { + return marshalQuery(query.getModel().getQueryTree()); + } + + Query workQuery = query.clone(); + String error = QueryCanonicalizer.canonicalize(workQuery); + if (error != null) { + getLogger().log(LogLevel.WARNING, + "Could not normalize [" + query.toString() + "]: " + error); + // Just returning null here is the pattern from existing code... + return null; + } + chooseYqlSources(workQuery.getModel().getSources()); + chooseYqlSummaryFields(workQuery.getPresentation().getSummaryFields()); + return workQuery.yqlRepresentation(getSegmenterVersion(), false); + } + + public String marshalQuery(QueryTree root) { + QueryCanonicalizer.QueryWrapper qw = new QueryCanonicalizer.QueryWrapper(); + root = root.clone(); + qw.setRoot(root.getRoot()); + boolean could = QueryCanonicalizer.treeCanonicalize(qw, root.getRoot(), + null); + if (!could) { + return null; + } + return marshalRoot(qw.getRoot()); + } + + private String marshalRoot(Item root) { + switch (queryType) { + case ADVANCED: + QueryMarshaller marshaller = new QueryMarshaller(); + return marshaller.marshal(root); + case PROGRAMMATIC: + return TextSerialize.serialize(root); + default: + throw new RuntimeException("Unsupported query type."); + } + } + + private XMLReader getReader() { + XMLReader reader = readerHolder.get(); + if (reader == null) { + reader = ResultBuilder.createParser(); + readerHolder.set(reader); + } + return reader; + } + + @Override + public void unmarshal(InputStream stream, long contentLength, Result result) { + ResultBuilder parser = new ResultBuilder(getReader()); + Result mResult = parser.parse(new InputSource(stream), + result.getQuery()); + result.mergeWith(mResult); + result.hits().addAll(mResult.hits().asUnorderedHits()); + } + + /** Returns the canonical Vespa ping URI, http://host:port/status.html */ + @Override + public URI getPingURI(Connection connection) throws MalformedURLException, + URISyntaxException { + return new URL(getParameters().getSchema(), connection.getHost(), + connection.getPort(), "/status.html").toURI(); + } + + /** + * Get the segmenter version data used when creating YQL queries. Useful if + * overriding {@link #marshalQuery(Query)}. + * + * @return a tuple with the name of the segmenting engine in use, and its + * version + */ + protected Tuple2<String, Version> getSegmenterVersion() { + return segmenterVersion; + } + + /** + * Choose which source arguments to use for the external cluster when + * generating a YQL+ query string. This is called from + * {@link #marshalQuery(Query)}. The default implementation clears the set, + * i.e. requests all sources. Other implementations may modify the source + * set as they see fit, or simply do nothing. + * + * @param sources + * the set of source names to use for the outgoing query + */ + protected void chooseYqlSources(Set<String> sources) { + sources.clear(); + } + + /** + * Choose which summary fields to request from the external cluster when + * generating a YQL+ query string. This is called from + * {@link #marshalQuery(Query)}. The default implementation clears the set, + * i.e. requests all fields. Other implementations may modify the summary + * field set as they see fit, or simply do nothing. + * + * @param summaryFields + * the set of source names to use for the outgoing query + */ + protected void chooseYqlSummaryFields(Set<String> summaryFields) { + summaryFields.clear(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/federation/vespa/package-info.java b/container-search/src/main/java/com/yahoo/search/federation/vespa/package-info.java new file mode 100644 index 00000000000..6a9f1decb21 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/federation/vespa/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.federation.vespa; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/grouping/Continuation.java b/container-search/src/main/java/com/yahoo/search/grouping/Continuation.java new file mode 100644 index 00000000000..63139348ab3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/Continuation.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping; + +import com.yahoo.search.grouping.vespa.ContinuationDecoder; + +/** + * <p>This class represents a piece of data stored by the grouping framework within a grouping result, which can + * subsequently be sent back along with the original request to navigate across a large result set. It is an opaque + * data object that is not intended to be human readable.</p> + * + * <p>To render a Cookie within a result set, you simply need to call {@link #toString()}.</p> + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class Continuation { + + public static final String NEXT_PAGE = "next"; + public static final String PREV_PAGE = "prev"; + public static final String THIS_PAGE = "this"; + + public static Continuation fromString(String str) { + return ContinuationDecoder.decode(str); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/GroupingQueryParser.java b/container-search/src/main/java/com/yahoo/search/grouping/GroupingQueryParser.java new file mode 100644 index 00000000000..39bdd48c05e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/GroupingQueryParser.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +/** + * This searcher is responsible for turning the "select" parameter into a corresponding {@link GroupingRequest}. It will + * also parse any "timezone" parameter as the timezone for time expressions such as {@link + * com.yahoo.search.grouping.request.DayOfMonthFunction} and {@link com.yahoo.search.grouping.request.HourOfDayFunction}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(GroupingQueryParser.SELECT_PARAMETER_PARSING) +public class GroupingQueryParser extends Searcher { + + public static final String SELECT_PARAMETER_PARSING = "SelectParameterParsing"; + public static final CompoundName PARAM_CONTINUE = new CompoundName("continue"); + public static final CompoundName PARAM_REQUEST = new CompoundName("select"); + public static final CompoundName PARAM_TIMEZONE = new CompoundName("timezone"); + private static final ThreadLocal<ZoneCache> zoneCache = new ThreadLocal<>(); + + @Override + public Result search(Query query, Execution execution) { + String reqParam = query.properties().getString(PARAM_REQUEST); + if (reqParam == null) { + return execution.search(query); + } + List<Continuation> continuations = getContinuations(query.properties().getString(PARAM_CONTINUE)); + TimeZone zone = getTimeZone(query.properties().getString(PARAM_TIMEZONE, "utc")); + for (GroupingOperation op : GroupingOperation.fromStringAsList(reqParam)) { + GroupingRequest grpRequest = GroupingRequest.newInstance(query); + grpRequest.setRootOperation(op); + grpRequest.setTimeZone(zone); + grpRequest.continuations().addAll(continuations); + } + return execution.search(query); + } + + private List<Continuation> getContinuations(String param) { + if (param == null) { + return Collections.emptyList(); + } + List<Continuation> ret = new LinkedList<>(); + for (String str : param.split(" ")) { + ret.add(Continuation.fromString(str)); + } + return ret; + } + + private TimeZone getTimeZone(String name) { + ZoneCache cache = zoneCache.get(); + if (cache == null) { + cache = new ZoneCache(); + zoneCache.set(cache); + } + TimeZone timeZone = cache.get(name); + if (timeZone == null) { + timeZone = TimeZone.getTimeZone(name); + cache.put(name, timeZone); + } + return timeZone; + } + + @SuppressWarnings("serial") + private static class ZoneCache extends LinkedHashMap<String, TimeZone> { + + ZoneCache() { + super(16, 0.75f, true); + } + + @Override + protected boolean removeEldestEntry(Map.Entry<String, TimeZone> entry) { + return size() > 128; // large enough to cache common cases + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/GroupingRequest.java b/container-search/src/main/java/com/yahoo/search/grouping/GroupingRequest.java new file mode 100644 index 00000000000..8ace3ed72de --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/GroupingRequest.java @@ -0,0 +1,164 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping; + +import com.yahoo.net.URI; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.grouping.result.Group; +import com.yahoo.search.grouping.result.RootGroup; +import com.yahoo.search.result.Hit; + +import java.util.*; + +/** + * An instance of this class represents one of many grouping requests that are attached to a {@link Query}. Use the + * factory method {@link #newInstance(com.yahoo.search.Query)} to create a new instance of this, then create and set the + * {@link GroupingOperation} using {@link #setRootOperation(GroupingOperation)}. Once the search returns, access the + * result {@link Group} using the {@link #getResultGroup(Result)} method. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class GroupingRequest { + + private final static CompoundName PROP_REQUEST = new CompoundName(GroupingRequest.class.getName() + ".Request"); + private final List<Continuation> continuations = new ArrayList<>(); + private final int requestId; + private GroupingOperation root; + private TimeZone timeZone; + private URI resultId; + + private GroupingRequest(int requestId) { + this.requestId = requestId; + } + + /** + * Returns the id of this GroupingRequest. This id is injected into the {@link RootGroup} of the final result, and + * allows tracking of per-request meta data. + * + * @return The id of this. + */ + public int getRequestId() { + return requestId; + } + + /** + * Returns the root {@link GroupingOperation} that defines this request. As long as this remains unset, the request + * is void. + * + * @return The root operation. + */ + public GroupingOperation getRootOperation() { + return root; + } + + /** + * Sets the root {@link GroupingOperation} that defines this request. As long as this remains unset, the request is + * void. + * + * @param root The root operation to set. + * @return This, to allow chaining. + */ + public GroupingRequest setRootOperation(GroupingOperation root) { + this.root = root; + return this; + } + + /** + * Returns the {@link TimeZone} used when resolving time expressions such as {@link + * com.yahoo.search.grouping.request.DayOfMonthFunction} and {@link com.yahoo.search.grouping.request.HourOfDayFunction}. + * + * @return The time zone in use. + */ + public TimeZone getTimeZone() { + return timeZone; + } + + /** + * Sets the {@link TimeZone} used when resolving time expressions such as {@link + * com.yahoo.search.grouping.request.DayOfMonthFunction} and {@link com.yahoo.search.grouping.request.HourOfDayFunction}. + * + * @param timeZone The time zone to set. + * @return This, to allow chaining. + */ + public GroupingRequest setTimeZone(TimeZone timeZone) { + this.timeZone = timeZone; + return this; + } + + /** + * Returns the root result {@link RootGroup} that corresponds to this request. This is not available until the + * search returns. Because searchers are allowed to modify both {@link Result} and {@link Hit} objects freely, this + * method requires that you pass it the current {@link Result} object as argument. + * + * @param result The search result that contains the root group. + * @return The result {@link RootGroup} of this request, or null if not found. + */ + public RootGroup getResultGroup(Result result) { + Hit root = result.hits().get(resultId, -1); + if (!(root instanceof RootGroup)) { + return null; + } + return (RootGroup)root; + } + + /** + * Sets the result {@link RootGroup} of this request. This is used by the executing grouping searcher, and should + * not be called by a requesting searcher. + * + * @param group The result to set. + * @return This, to allow chaining. + */ + public GroupingRequest setResultGroup(RootGroup group) { + this.resultId = group.getId(); + return this; + } + + /** + * Returns the list of {@link Continuation}s of this request. This is used by the executing grouping searcher to + * allow pagination of grouping results. + * + * @return The list of Continuations. + */ + public List<Continuation> continuations() { + return continuations; + } + + /** + * Creates and attaches a new instance of this class to the given {@link Query}. This is necessary to allow {@link + * #getRequests(Query)} to return all created requests. + * + * @param query The query to attach the request to. + * @return The created request. + */ + public static GroupingRequest newInstance(Query query) { + List<GroupingRequest> lst = getRequests(query); + if (lst.isEmpty()) { + lst = new LinkedList<>(); + query.properties().set(PROP_REQUEST, lst); + } + GroupingRequest ret = new GroupingRequest(lst.size()); + lst.add(ret); + return ret; + } + + /** + * Returns all instances of this class that have been attached to the given {@link Query}. If no requests have been + * attached to the {@link Query}, this method returns an empty list. + * + * @param query The query whose requests to return. + * @return The list of grouping requests. + */ + @SuppressWarnings({ "unchecked" }) + public static List<GroupingRequest> getRequests(Query query) { + Object lst = query.properties().get(PROP_REQUEST); + if (lst == null) { + return Collections.emptyList(); + } + if (!(lst instanceof List)) { + throw new IllegalArgumentException("Expected " + GroupingRequest.class + ", got " + lst.getClass() + "."); + } + return (List<GroupingRequest>)lst; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/GroupingValidator.java b/container-search/src/main/java/com/yahoo/search/grouping/GroupingValidator.java new file mode 100644 index 00000000000..1366fe1201b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/GroupingValidator.java @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.search.grouping.request.AttributeValue; +import com.yahoo.search.grouping.request.ExpressionVisitor; +import com.yahoo.search.grouping.request.GroupingExpression; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.HashSet; +import java.util.Set; + +import static com.yahoo.search.grouping.GroupingQueryParser.SELECT_PARAMETER_PARSING; + +/** + * This searcher ensure that all {@link GroupingRequest} objects attached to a {@link Query} makes sense to the search + * cluster for which this searcher has been deployed. This searcher uses exceptions to signal invalid grouping + * requests. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@Before(PhaseNames.BACKEND) +@After(SELECT_PARAMETER_PARSING) +@Provides(GroupingValidator.GROUPING_VALIDATED) +public class GroupingValidator extends Searcher { + + public static final String GROUPING_VALIDATED = "GroupingValidated"; + public static final CompoundName PARAM_ENABLED = new CompoundName("validate_" + GroupingQueryParser.PARAM_REQUEST); + private final Set<String> attributeNames = new HashSet<>(); + private final String clusterName; + private final boolean enabled; + + /** + * Constructs a new instance of this searcher with the given component id and config. + * + * @param qrsConfig The shared config for all searchers. + * @param clusterConfig The config for the cluster that this searcher is deployed for. + */ + @Inject + public GroupingValidator(QrSearchersConfig qrsConfig, ClusterConfig clusterConfig, + AttributesConfig attributesConfig) { + int clusterId = clusterConfig.clusterId(); + QrSearchersConfig.Searchcluster.Indexingmode.Enum indexingMode = qrsConfig.searchcluster(clusterId).indexingmode(); + enabled = (indexingMode != QrSearchersConfig.Searchcluster.Indexingmode.STREAMING); + clusterName = enabled ? qrsConfig.searchcluster(clusterId).name() : null; + for (AttributesConfig.Attribute attr : attributesConfig.attribute()) { + attributeNames.add(attr.name()); + } + } + + @Override + public Result search(Query query, Execution execution) { + if (enabled && query.properties().getBoolean(PARAM_ENABLED, true)) { + ExpressionVisitor visitor = new MyVisitor(); + for (GroupingRequest req : GroupingRequest.getRequests(query)) { + req.getRootOperation().visitExpressions(visitor); + } + } + return execution.search(query); + } + + private class MyVisitor implements ExpressionVisitor { + + @Override + public void visitExpression(GroupingExpression exp) { + if (exp instanceof AttributeValue) { + String name = ((AttributeValue)exp).getAttributeName(); + if (!attributeNames.contains(name)) { + throw new UnavailableAttributeException(clusterName, name); + } + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/UnavailableAttributeException.java b/container-search/src/main/java/com/yahoo/search/grouping/UnavailableAttributeException.java new file mode 100644 index 00000000000..7e147c88625 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/UnavailableAttributeException.java @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping; + +/** + * This exception is thrown by the {@link GroupingValidator} if it a {@link GroupingRequest} contains a reference to an + * unavailable attribute. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@SuppressWarnings("serial") +public class UnavailableAttributeException extends RuntimeException { + + private final String clusterName; + private final String attributeName; + + /** + * Constructs a new instance of this class. + * + * @param clusterName The name of the cluster for which the request is illegal. + * @param attributeName The name of the attribute which is referenced but not available. + */ + public UnavailableAttributeException(String clusterName, String attributeName) { + super("Grouping request references attribute '" + attributeName + "' which is not available " + + "in cluster '" + clusterName + "'."); + this.clusterName = clusterName; + this.attributeName = attributeName; + } + + /** + * Returns the name of the cluster for which the request is illegal. + * + * @return The cluster name. + */ + public String getClusterName() { + return clusterName; + } + + /** + * Returns the name of the attribute which is referenced but not available. + * + * @return The attribute name. + */ + public String getAttributeName() { + return attributeName; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/UniqueGroupingSearcher.java b/container-search/src/main/java/com/yahoo/search/grouping/UniqueGroupingSearcher.java new file mode 100644 index 00000000000..f4145a31f33 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/UniqueGroupingSearcher.java @@ -0,0 +1,279 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.log.LogLevel; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.request.AllOperation; +import com.yahoo.search.grouping.request.AttributeValue; +import com.yahoo.search.grouping.request.CountAggregator; +import com.yahoo.search.grouping.request.EachOperation; +import com.yahoo.search.grouping.request.GroupingExpression; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.grouping.request.MaxAggregator; +import com.yahoo.search.grouping.request.MinAggregator; +import com.yahoo.search.grouping.request.NegFunction; +import com.yahoo.search.grouping.request.SummaryValue; +import com.yahoo.search.grouping.result.Group; +import com.yahoo.search.grouping.result.GroupList; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitOrderer; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.logging.Logger; + +/** + * Implements 'unique' using a grouping expression. + * + * It doesn't work for multi-level sorting. + * + * @author andreer + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +public class UniqueGroupingSearcher extends Searcher { + + public static final CompoundName PARAM_UNIQUE = new CompoundName("unique"); + private static final Logger log = Logger.getLogger(UniqueGroupingSearcher.class.getName()); + private static final HitOrderer NOP_ORDERER = new HitOrderer() { + + @Override + public void order(List<Hit> hits) { + // The order of hits is given by the grouping framework, and should not be re-ordered when we copy the hits + // from the groups to the base HitGroup in the result. + } + }; + static final String LABEL_COUNT = "uniqueCount"; + static final String LABEL_GROUPS = "uniqueGroups"; + static final String LABEL_HITS = "uniqueHits"; + + /** + * Implements the deprecated "unique" api for deduplication by using grouping. We create a grouping expression on + * the field we wish to dedup on (which must be an attribute). + * Total hits is calculated using the new count unique groups functionality. + */ + @Override + public Result search(Query query, Execution execution) { + // Determine if we should remove duplicates + String unique = query.properties().getString(PARAM_UNIQUE); + if (unique == null || unique.trim().isEmpty()) { + return execution.search(query); + } + query.trace("Performing deduping by attribute '" + unique + "'.", true, 3); + return dedupe(query, execution, unique); + } + + /** + * Until we can use the grouping pagination features in 5.1, we'll have to support offset + * by simply requesting and discarding hit #0 up to hit #offset. + */ + private static Result dedupe(Query query, Execution execution, String dedupField) { + Sorting sorting = query.getRanking().getSorting(); + if (sorting != null && sorting.fieldOrders().size() > 1) { + query.trace("Can not use grouping for deduping with multi-level sorting.", 3); + // To support this we'd have to generate a grouping expression with as many levels + // as there are levels in the sort spec. This is probably too slow and costly that + // we'd ever want to actually use it (and a bit harder to implement as well). + return execution.search(query); + } + + int hits = query.getHits(); + int offset = query.getOffset(); + int groupingHits = hits + offset; + + GroupingRequest groupingRequest = GroupingRequest.newInstance(query); + groupingRequest.setRootOperation( + buildGroupingExpression( + dedupField, + groupingHits, + query.getPresentation().getSummary(), + sorting)); + + query.setHits(0); + query.setOffset(0); + Result result = execution.search(query); + + query = result.getQuery(); // query could have changed further down in the chain + query.setHits(hits); + query.setOffset(offset); + + Group root = groupingRequest.getResultGroup(result); + if (null == root) { + String msg = "Result group not found for deduping grouping request, returning empty result."; + query.trace(msg, 3); + log.log(LogLevel.WARNING, msg); + throw new IllegalStateException("Failed to produce deduped result set."); + } + result.hits().remove(root.getId().toString()); // hide our tracks + + GroupList resultGroups = root.getGroupList(dedupField); + if (resultGroups == null) { + query.trace("Deduping grouping request returned no hits, returning empty result.", 3); + return result; + } + + // Make sure that .addAll() doesn't re-order the hits we copy from the grouping + // framework. The groups are already in the order they should be. + result.hits().setOrderer(NOP_ORDERER); + result.hits().addAll(getRequestedHits(resultGroups, offset, hits)); + + Long countField = (Long) root.getField(LABEL_COUNT); + long count = countField != null ? countField : 0; + result.setTotalHitCount(count); + + return result; + } + + /** + * Create a hit ordering clause based on the sorting spec. + * + * @param sortingSpec A (single level!) sorting specification + * @return a grouping expression which produces a sortable value + */ + private static List<GroupingExpression> createHitOrderingClause(Sorting sortingSpec) { + List<GroupingExpression> orderingClause = new ArrayList<>(); + for (Sorting.FieldOrder fieldOrder : sortingSpec.fieldOrders()) { + Sorting.Order sortOrder = fieldOrder.getSortOrder(); + switch (sortOrder) { + case ASCENDING: + case UNDEFINED: + // When we want ascending order, the hit with the smallest value should come first (and be surfaced). + orderingClause.add(new MinAggregator(new AttributeValue(fieldOrder.getFieldName()))); + break; + case DESCENDING: + // When we sort in descending order, the hit with the largest value should come first (and be surfaced). + orderingClause.add(new NegFunction(new MaxAggregator(new AttributeValue(fieldOrder.getFieldName())))); + break; + default: + throw new UnsupportedOperationException("Can not handle sort order " + sortOrder + "."); + } + } + return orderingClause; + } + + /** + * Create a hit ordering clause based on the sorting spec. + * + * @param sortingSpec A (single level!) sorting specification + * @return a grouping expression which produces a sortable value + */ + private static GroupingExpression createGroupOrderingClause(Sorting sortingSpec) { + GroupingExpression groupingClause = null; + for (Sorting.FieldOrder fieldOrder : sortingSpec.fieldOrders()) { + Sorting.Order sortOrder = fieldOrder.getSortOrder(); + switch (sortOrder) { + case ASCENDING: + case UNDEFINED: + groupingClause = new AttributeValue(fieldOrder.getFieldName()); + break; + case DESCENDING: + // To sort descending, just take the negative. This is the most common case + groupingClause = new NegFunction(new AttributeValue(fieldOrder.getFieldName())); + break; + default: + throw new UnsupportedOperationException("Can not handle sort order " + sortOrder + "."); + } + } + return groupingClause; + } + + /** + * Retrieve the actually unique hits from the grouping results. + * + * @param resultGroups the results of the dedup grouping expression. + * @param offset the requested offset. Hits before this are discarded. + * @param hits the requested number of hits. Hits in excess of this are discarded. + * @return A list of the actually requested hits, sorted as by the grouping expression. + */ + private static List<Hit> getRequestedHits(GroupList resultGroups, int offset, int hits) { + List<Hit> receivedHits = getAllHitsFromGroupingResult(resultGroups); + if (receivedHits.size() <= offset) { + return Collections.emptyList(); // There weren't any hits as far out as requested. + } + int lastRequestedHit = Math.min(offset + hits, receivedHits.size()); + return receivedHits.subList(offset, lastRequestedHit); + } + + /** + * Get all the hits returned by the grouping request. This might be more or less than the user requested. + * This method handles the results from two different types of grouping expression, depending on whether + * sorting was used for the query or not. + * + * @param resultGroups The result group of the dedup grouping request + * @return A (correctly sorted) list of all the hits returned by the grouping expression. + */ + private static List<Hit> getAllHitsFromGroupingResult(GroupList resultGroups) { + List<Hit> hits = new ArrayList<>(resultGroups.size()); + for (Hit groupHit : resultGroups) { + Group group = (Group)groupHit; + GroupList sorted = group.getGroupList(LABEL_GROUPS); + if (sorted != null) { + group = (Group)sorted.iterator().next(); + } + for (Hit hit : group.getHitList(LABEL_HITS)) { + hits.add(hit); + } + } + return hits; + } + + static GroupingOperation buildGroupingExpression(String dedupField, int groupingHits, String summaryClass, + Sorting sortSpec) { + if (sortSpec != null) { + return buildGroupingExpressionWithSorting(dedupField, groupingHits, summaryClass, sortSpec); + } else { + return buildGroupingExpressionWithRanking(dedupField, groupingHits, summaryClass); + } + } + + /** + * Create the grouping expression when ranking is used for ordering + * (which is the default for grouping expressions, so ranking is not explicitly mentioned). + * See unit test for examples + */ + private static GroupingOperation buildGroupingExpressionWithRanking(String dedupField, int groupingHits, + String summaryClass) { + return new AllOperation() + .setGroupBy(new AttributeValue(dedupField)) + .addOutput(new CountAggregator().setLabel(LABEL_COUNT)) + .setMax(groupingHits) + .addChild(new EachOperation() + .setMax(1) + .addChild(new EachOperation() + .setLabel(LABEL_HITS) + .addOutput(summaryClass == null ? new SummaryValue() : new SummaryValue(summaryClass)))); + } + + /** + * Create the grouping expression when sorting is used for ordering + * This grouping expression is more complicated and probably quite a bit heavier to execute. + * See unit test for examples + */ + private static GroupingOperation buildGroupingExpressionWithSorting(String dedupField, int groupingHits, + String summaryClass, Sorting sortSpec) { + return new AllOperation() + .setGroupBy(new AttributeValue(dedupField)) + .addOutput(new CountAggregator().setLabel(LABEL_COUNT)) + .setMax(groupingHits) + .addOrderBy(createHitOrderingClause(sortSpec)) + .addChild(new EachOperation() + .addChild(new AllOperation() + .setGroupBy(createGroupOrderingClause(sortSpec)) + .addOrderBy(createHitOrderingClause(sortSpec)) + .setMax(1) + .addChild(new EachOperation() + .setLabel(LABEL_GROUPS) + .addChild(new EachOperation() + .setLabel(LABEL_HITS) + .addOutput(summaryClass == null ? new SummaryValue() : new SummaryValue(summaryClass)))))); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/package-info.java b/container-search/src/main/java/com/yahoo/search/grouping/package-info.java new file mode 100644 index 00000000000..f569115008a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.grouping; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AddFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AddFunction.java new file mode 100644 index 00000000000..2f321a5854d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AddFunction.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents an add-function in a {@link GroupingExpression}. It evaluates to a number that equals the + * result of adding the results of all arguments together in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AddFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public AddFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private AddFunction(List<GroupingExpression> args) { + super("add", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static AddFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new AddFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AggregatorNode.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AggregatorNode.java new file mode 100644 index 00000000000..0df204506c1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AggregatorNode.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an aggregated value in a {@link GroupingExpression}. Because it operates on a list of data, it + * can not be used as a document-level expression (i.e. level 0, see {@link GroupingExpression#resolveLevel(int)}). The + * contained expression is evaluated at the level of the aggregator minus 1. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class AggregatorNode extends GroupingExpression { + + private final GroupingExpression exp; + + protected AggregatorNode(String image) { + super(image + "()"); + this.exp = null; + } + + protected AggregatorNode(String image, GroupingExpression exp) { + super(image + "(" + exp.toString() + ")"); + this.exp = exp; + } + + /** + * Returns the expression that this node aggregates on. + * + * @return The expression. + */ + public GroupingExpression getExpression() { + return exp; + } + + @Override + public void resolveLevel(int level) { + super.resolveLevel(level); + if (level < 1) { + throw new IllegalArgumentException("Expression '" + this + "' not applicable for " + + GroupingOperation.getLevelDesc(level) + "."); + } + if (exp != null) { + exp.resolveLevel(level - 1); + } + } + + @Override + public void visit(ExpressionVisitor visitor) { + super.visit(visitor); + if (exp != null) { + exp.visit(visitor); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AllOperation.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AllOperation.java new file mode 100644 index 00000000000..e78be0c1c1a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AllOperation.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This is a grouping operation that processes the input list as a whole, as opposed to {@link EachOperation} which + * processes each element of that list separately. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AllOperation extends GroupingOperation { + + /** + * Constructs a new instance of this class. + */ + public AllOperation() { + super("all"); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AndFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AndFunction.java new file mode 100644 index 00000000000..3053153e5a3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AndFunction.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents an and-function in a {@link GroupingExpression}. It evaluates to a long that equals the result + * of and'ing the results of all arguments together in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AndFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a long. + * @param arg2 The second compulsory argument, must evaluate to a long. + * @param argN The optional arguments, must evaluate to a long. + */ + public AndFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private AndFunction(List<GroupingExpression> args) { + super("and", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static AndFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new AndFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ArrayAtLookup.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ArrayAtLookup.java new file mode 100644 index 00000000000..1e613066bd4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ArrayAtLookup.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import com.google.common.annotations.Beta; + +/** + * Represents access of array element in a document attribute in a {@link GroupingExpression}. + * + * The first argument should be the name of an array attribute in the + * input {@link com.yahoo.search.result.Hit}, while the second + * argument is evaluated as an integer and used as the index in that array. + * If the index argument is less than 0 returns the first array element; + * if the index is greater than or equal to size(array) returns the last array element; + * if the array is empty returns 0 (or NaN?). + * @author arnej27959 + */ +@Beta +public class ArrayAtLookup extends DocumentValue { + + private final String attributeName; + private final GroupingExpression arg2; + + /** + * Constructs a new instance of this class. + * + * @param attributeName The attribute name to assign to this. + */ + public ArrayAtLookup(String attributeName, GroupingExpression indexArg) { + super("array.at(" + attributeName + ", " + indexArg + ")"); + this.attributeName = attributeName; + this.arg2 = indexArg; + } + + /** + * Returns the name of the attribute to retrieve from the input hit. + * + * @return The attribute name. + */ + public String getAttributeName() { + return attributeName; + } + + /** + * get the expression to evaluate before indexing + * @return grouping expression argument + */ + public GroupingExpression getIndexArgument() { + return arg2; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AttributeFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AttributeFunction.java new file mode 100644 index 00000000000..c16903ddca8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AttributeFunction.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document attribute function in a {@link GroupingExpression}. It evaluates to the value of the + * named attribute in the input {@link com.yahoo.search.result.Hit}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AttributeFunction extends DocumentValue { + + private final String name; + + /** + * Constructs a new instance of this class. + * + * @param attributeName The attribute name to assign to this. + */ + public AttributeFunction(String attributeName) { + super("attribute(" + attributeName + ")"); + name = attributeName; + } + + /** + * Returns the name of the attribute to retrieve from the input hit. + * + * @return The attribute name. + */ + public String getAttributeName() { + return name; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AttributeValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AttributeValue.java new file mode 100644 index 00000000000..135463bf108 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AttributeValue.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document attribute value in a {@link GroupingExpression}. It evaluates to the value of the + * named attribute in the input {@link com.yahoo.search.result.Hit}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AttributeValue extends DocumentValue { + + private final String name; + + /** + * Constructs a new instance of this class. + * + * @param attributeName The attribute name to assign to this. + */ + public AttributeValue(String attributeName) { + super(attributeName); + name = attributeName; + } + + /** + * Returns the name of the attribute to retrieve from the input hit. + * + * @return The attribute name. + */ + public String getAttributeName() { + return name; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AvgAggregator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AvgAggregator.java new file mode 100644 index 00000000000..749b419488f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AvgAggregator.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an average-aggregator in a {@link GroupingExpression}. It evaluates to the average value that + * the contained expression evaluated to over all the inputs. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AvgAggregator extends AggregatorNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to aggregate on. + */ + public AvgAggregator(GroupingExpression exp) { + super("avg", exp); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/AvgFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/AvgFunction.java new file mode 100644 index 00000000000..c0474064741 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/AvgFunction.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a min-function in a {@link GroupingExpression}. It evaluates to a number that equals the + * average of the results of all arguments. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class AvgFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public AvgFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private AvgFunction(List<GroupingExpression> args) { + super("avg", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static AvgFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new AvgFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/BooleanValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/BooleanValue.java new file mode 100644 index 00000000000..c41cfa4c4f2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/BooleanValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a constant {@link Boolean} value in a {@link GroupingExpression}. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class BooleanValue extends ConstantValue<Boolean> { + + /** + * Constructs a new instance of this class. + * + * @param value The immutable value to assign to this. + */ + public BooleanValue(Boolean value) { + super(value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/BucketResolver.java b/container-search/src/main/java/com/yahoo/search/grouping/request/BucketResolver.java new file mode 100644 index 00000000000..735347cde87 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/BucketResolver.java @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.LinkedList; +import java.util.List; + +/** + * This is a helper class for resolving buckets to a list of + * {@link GroupingExpression} objects. To resolve a list simply + * {@link #push(ConstantValue, boolean)} onto it, before calling + * {@link #resolve(GroupingExpression)} to retrieve the list of corresponding + * grouping expression object. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class BucketResolver { + + private final List<BucketValue> buckets = new LinkedList<>(); + private ConstantValue<?> prev = null; + private boolean previnclusive = false; + private int idx = 0; + + /** + * Pushes the given expression onto this bucket resolver. Once all buckets have been pushed using this method, call + * {@link #resolve(GroupingExpression)} to retrieve to combined grouping expression. + * + * @param val The expression to push. + * @param inclusive Whether or not the value is inclusive or not. + * @throws IllegalArgumentException Thrown if the expression is incompatible. + */ + public BucketResolver push(ConstantValue<?> val, boolean inclusive) { + if (prev == null) { + prev = val; + } else if (!(prev instanceof InfiniteValue || val instanceof InfiniteValue) + && !prev.getClass().equals(val.getClass())) { + throw new IllegalArgumentException("Bucket type mismatch, expected '" + prev.getClass().getSimpleName() + + "' got '" + val.getClass().getSimpleName() + "'."); + } else if (prev instanceof InfiniteValue && val instanceof InfiniteValue) { + throw new IllegalArgumentException("Bucket type mismatch, cannot both be infinity."); + } + if ((++idx % 2) == 0) { + ConstantValue<?> begin = previnclusive ? prev : nextValue(prev); + ConstantValue<?> end = inclusive ? nextValue(val) : val; + if (begin instanceof DoubleValue || end instanceof DoubleValue) { + buckets.add(new DoubleBucket(begin, end)); + } else if (begin instanceof LongValue || end instanceof LongValue) { + buckets.add(new LongBucket(begin, end)); + } else if (begin instanceof StringValue || end instanceof StringValue) { + buckets.add(new StringBucket(begin, end)); + } else if (begin instanceof RawValue || end instanceof RawValue) { + buckets.add(new RawBucket(begin, end)); + } else { + throw new UnsupportedOperationException("Bucket type '" + val.getClass() + "' not supported."); + } + } + prev = val; + previnclusive = inclusive; + return this; + } + + /** + * Resolves and returns the list of grouping expressions that correspond to the previously pushed buckets. + * + * @param exp The expression to assign to the function. + * @return The list corresponding to the pushed buckets. + */ + public PredefinedFunction resolve(GroupingExpression exp) { + if ((idx % 2) == 1) { + throw new IllegalStateException("Missing to-limit of last bucket."); + } + int len = buckets.size(); + if (len == 0) { + throw new IllegalStateException("Expected at least one bucket, got none."); + } + ConstantValue<?> begin = buckets.get(0).getFrom(); + ConstantValue<?> end = buckets.get(0).getTo(); + if (begin instanceof DoubleValue || end instanceof DoubleValue) { + if (len == 1) { + return new DoublePredefined(exp, (DoubleBucket)buckets.get(0)); + } else { + return new DoublePredefined(exp, (DoubleBucket)buckets.get(0), + buckets.subList(1, len).toArray(new DoubleBucket[len - 1])); + } + } else if (begin instanceof LongValue || end instanceof LongValue) { + if (len == 1) { + return new LongPredefined(exp, (LongBucket)buckets.get(0)); + } else { + return new LongPredefined(exp, (LongBucket)buckets.get(0), + buckets.subList(1, len).toArray(new LongBucket[len - 1])); + } + } else if (begin instanceof StringValue || end instanceof StringValue) { + if (len == 1) { + return new StringPredefined(exp, (StringBucket)buckets.get(0)); + } else { + return new StringPredefined(exp, (StringBucket)buckets.get(0), + buckets.subList(1, len).toArray(new StringBucket[len - 1])); + } + } else if (begin instanceof RawValue || end instanceof RawValue) { + if (len == 1) { + return new RawPredefined(exp, (RawBucket)buckets.get(0)); + } else { + return new RawPredefined(exp, (RawBucket)buckets.get(0), + buckets.subList(1, len).toArray(new RawBucket[len - 1])); + } + } + throw new UnsupportedOperationException("Bucket type '" + begin.getClass() + "' not supported."); + } + + private ConstantValue<?> nextValue(ConstantValue<?> value) { + if (value instanceof LongValue) { + return LongBucket.nextValue((LongValue)value); + } else if (value instanceof DoubleValue) { + return DoubleBucket.nextValue((DoubleValue)value); + } else if (value instanceof StringValue) { + return StringBucket.nextValue((StringValue)value); + } else if (value instanceof RawValue) { + return RawBucket.nextValue((RawValue)value); + } + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/BucketValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/BucketValue.java new file mode 100644 index 00000000000..858a44e2fe8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/BucketValue.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a bucket in a {@link PredefinedFunction}. The generic T is the data type of the range values + * 'from' and 'to'. The range is inclusive-from and exclusive-to. All supported data types are represented as subclasses + * of this. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class BucketValue extends GroupingExpression implements Comparable<BucketValue> { + + private final ConstantValue<?> from; + private final ConstantValue<?> to; + private final ConstantValueComparator comparator = new ConstantValueComparator(); + + protected BucketValue(ConstantValue<?> inclusiveFrom, ConstantValue<?> exclusiveTo) { + super("bucket[" + asImage(inclusiveFrom) + ", " + asImage(exclusiveTo) + ">"); + if (comparator.compare(exclusiveTo, inclusiveFrom) < 0) { + throw new IllegalArgumentException("Bucket to-value can not be less than from-value."); + } + from = inclusiveFrom; + to = exclusiveTo; + } + + /** + * Returns the inclusive-from value of this bucket. + * + * @return The from-value. + */ + public ConstantValue<?> getFrom() { + return from; + } + + /** + * Returns the exclusive-to value of this bucket. + * + * @return The to-value. + */ + public ConstantValue<?> getTo() { + return to; + } + + @Override + public int compareTo(BucketValue rhs) { + if (comparator.compare(to, rhs.from) <= 0) { + return -1; + } + if (comparator.compare(from, rhs.to) >= 0) { + return 1; + } + return 0; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/CatFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/CatFunction.java new file mode 100644 index 00000000000..9bc276bda92 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/CatFunction.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a cat-function in a {@link GroupingExpression}. It evaluates to a byte array that equals the + * concatenation of the binary result of all arguments in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class CatFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument. + * @param arg2 The second compulsory argument. + * @param argN The optional arguments. + */ + public CatFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private CatFunction(List<GroupingExpression> args) { + super("cat", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static CatFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new CatFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ConstantValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ConstantValue.java new file mode 100644 index 00000000000..8b8d92b5ae8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ConstantValue.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a constant value in a {@link GroupingExpression}. Because it does not operate on any input, + * this expression type can be used at any input level (see {@link GroupingExpression#resolveLevel(int)}). All supported + * data types are represented as subclasses of this. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@SuppressWarnings("rawtypes") +public abstract class ConstantValue<T extends Comparable> extends GroupingExpression { + + private final T value; + + protected ConstantValue(T value) { + super(asImage(value)); + this.value = value; + } + + /** + * Returns the constant value of this. + * + * @return The value. + */ + public T getValue() { + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ConstantValueComparator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ConstantValueComparator.java new file mode 100644 index 00000000000..e8017bbb796 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ConstantValueComparator.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Comparator; + +/** + * This class compares two constant values, and takes into account that one of + * the arguments may be the very special infinity value. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +@SuppressWarnings("rawtypes") +public class ConstantValueComparator implements Comparator<ConstantValue> { + @SuppressWarnings("unchecked") + @Override + public int compare(ConstantValue lhs, ConstantValue rhs) { + // Run infinite comparison method if one of the arguments are infinite. + if (rhs instanceof InfiniteValue) { + return (-1 * rhs.getValue().compareTo(lhs)); + } + return (lhs.getValue().compareTo(rhs.getValue())); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/CountAggregator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/CountAggregator.java new file mode 100644 index 00000000000..f54d92cdbf5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/CountAggregator.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an count-aggregator in a {@link GroupingExpression}. It evaluates to the number of elements + * there are in the input. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class CountAggregator extends AggregatorNode { + + /** + * Constructs a new instance of this class. + */ + public CountAggregator() { + super("count"); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DateFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DateFunction.java new file mode 100644 index 00000000000..3d416b31d95 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DateFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a timestamp-formatter function in a {@link GroupingExpression}. It evaluates to a string on the + * form "YYYY-MM-DD" of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DateFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public DateFunction(GroupingExpression exp) { + super("time.date", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfMonthFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfMonthFunction.java new file mode 100644 index 00000000000..4ead68cc8f1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfMonthFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a day-of-month timestamp-function in a {@link GroupingExpression}. It evaluates to a long that + * equals the day of month (1-31) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DayOfMonthFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public DayOfMonthFunction(GroupingExpression exp) { + super("time.dayofmonth", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfWeekFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfWeekFunction.java new file mode 100644 index 00000000000..f91344e2e7b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfWeekFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a day-of-week timestamp-function in a {@link GroupingExpression}. It evaluates to a long that + * equals the day of week (0 - 6) of the result of the argument, Monday being 0. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DayOfWeekFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public DayOfWeekFunction(GroupingExpression exp) { + super("time.dayofweek", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfYearFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfYearFunction.java new file mode 100644 index 00000000000..20313864493 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DayOfYearFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a day-of-year timestamp-function in a {@link GroupingExpression}. It evaluates to a long that + * equals the day of year (0-365) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DayOfYearFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public DayOfYearFunction(GroupingExpression exp) { + super("time.dayofyear", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DebugWaitFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DebugWaitFunction.java new file mode 100644 index 00000000000..c2f26e6b3b0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DebugWaitFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents debug_wait function in a {@link GroupingExpression}. For each hit evaluated, + * it waits for the time specified as the second argument. The third argument specifies if the wait + * should be a busy-wait or not. The first argument is then evaluated. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class DebugWaitFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, the expression to proxy. + * @param arg2 The second compulsory argument, must evaluate to a positive number. + * @param arg3 The third compulsory argument, specifying busy wait or not. + */ + public DebugWaitFunction(GroupingExpression arg1, DoubleValue arg2, BooleanValue arg3) { + super("debugwait", Arrays.asList(arg1, arg2, arg3)); + } + + /** + * Returns the time to wait when evaluating this function. + * + * @return the number of seconds to wait. + */ + public double getWaitTime() { + return ((DoubleValue)getArg(1)).getValue(); + } + + /** + * Returns whether or not the debug node should busy-wait. + * + * @return true if busy-wait, false if not. + */ + public boolean getBusyWait() { + return ((BooleanValue)getArg(2)).getValue(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DivFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DivFunction.java new file mode 100644 index 00000000000..9ed263362fa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DivFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a div-function in a {@link GroupingExpression}. It evaluates to a number that equals the result + * of dividing the results of all arguments in the order they were given to the constructor (divide first argument by + * second, result by third, ...). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DivFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public DivFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private DivFunction(List<GroupingExpression> args) { + super("div", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static DivFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new DivFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DocIdNsSpecificValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DocIdNsSpecificValue.java new file mode 100644 index 00000000000..02c8d66be5d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DocIdNsSpecificValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document id specific value in a {@link GroupingExpression}. It evaluates to the namespace- + * specific value of the document id of the input {@link com.yahoo.search.result.Hit}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DocIdNsSpecificValue extends DocumentValue { + + /** + * Constructs a new instance of this class. + */ + public DocIdNsSpecificValue() { + super("docidnsspecific()"); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DocumentValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DocumentValue.java new file mode 100644 index 00000000000..98d5a6fe21f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DocumentValue.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document value in a {@link GroupingExpression}. As such, the subclasses of this can only be + * used as document-level expressions (i.e. level 0, see {@link GroupingExpression#resolveLevel(int)}). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class DocumentValue extends GroupingExpression { + + protected DocumentValue(String image) { + super(image); + } + + @Override + public void resolveLevel(int level) { + if (level != 0) { + throw new IllegalArgumentException("Expression '" + this + "' not applicable for " + + GroupingOperation.getLevelDesc(level) + "."); + } + super.resolveLevel(level); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DoubleBucket.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DoubleBucket.java new file mode 100644 index 00000000000..4e12e96272e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DoubleBucket.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; +import java.text.ChoiceFormat; + +/** + * This class represents a {@link Double} bucket in a {@link PredefinedFunction}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DoubleBucket extends BucketValue { + + /** + * Returns the next distinct value. + * + * @param value The base value. + * @return the next value. + */ + public static DoubleValue nextValue(DoubleValue value) { + return (new DoubleValue(ChoiceFormat.nextDouble(value.getValue()))); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public DoubleBucket(double from, double to) { + super(new DoubleValue(from), new DoubleValue(to)); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public DoubleBucket(ConstantValue<?> from, ConstantValue<?> to) { + super(from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DoublePredefined.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DoublePredefined.java new file mode 100644 index 00000000000..59265359715 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DoublePredefined.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a predefined bucket-function in a {@link GroupingExpression} for expressions that evaluate to a + * double. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DoublePredefined extends PredefinedFunction { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a double. + * @param arg1 The compulsory bucket. + * @param argN The optional buckets. + */ + public DoublePredefined(GroupingExpression exp, DoubleBucket arg1, DoubleBucket... argN) { + this(exp, asList(arg1, argN)); + } + + private DoublePredefined(GroupingExpression exp, List<DoubleBucket> args) { + super(exp, args); + } + + @Override + public DoubleBucket getBucket(int i) { + return (DoubleBucket)getArg(i + 1); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param exp The expression to evaluate, must evaluate to a double. + * @param args The buckets to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the list of buckets is empty. + */ + public static DoublePredefined newInstance(GroupingExpression exp, List<DoubleBucket> args) { + if (args.isEmpty()) { + throw new IllegalArgumentException("Expected at least one bucket, got none."); + } + return new DoublePredefined(exp, args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/DoubleValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/DoubleValue.java new file mode 100644 index 00000000000..682102533ff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/DoubleValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a constant {@link Double} value in a {@link GroupingExpression}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DoubleValue extends ConstantValue<Double> { + + /** + * Constructs a new instance of this class. + * + * @param value The immutable value to assign to this. + */ + public DoubleValue(double value) { + super(value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/EachOperation.java b/container-search/src/main/java/com/yahoo/search/grouping/request/EachOperation.java new file mode 100644 index 00000000000..12f6df1f497 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/EachOperation.java @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This is a grouping operation that processes each element of the input list separately, as opposed to {@link + * AllOperation} which processes that list as a whole. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class EachOperation extends GroupingOperation { + + /** + * Constructs a new instance of this class. + */ + public EachOperation() { + super("each"); + } + + @Override + public void resolveLevel(int level) { + if (level == 0) { + throw new IllegalArgumentException("Operation '" + this + "' can not operate on " + getLevelDesc(level) + "."); + } + super.resolveLevel(level - 1); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ExpressionVisitor.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ExpressionVisitor.java new file mode 100644 index 00000000000..ba411ac45ce --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ExpressionVisitor.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This interface defines the necessary callback to recursively visit all {@link GroupingExpression} objects in a {@link + * GroupingOperation}. It is used by the {@link com.yahoo.search.grouping.GroupingValidator} to ensure that all + * referenced attributes are valid for the cluster being queried. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public interface ExpressionVisitor { + + /** + * This method is called for every {@link GroupingExpression} object in the targeted {@link GroupingOperation}. + * + * @param exp The expression being visited. + */ + public void visitExpression(GroupingExpression exp); +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/FixedWidthFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/FixedWidthFunction.java new file mode 100644 index 00000000000..9ac3870718b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/FixedWidthFunction.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a fixed-width bucket-function in a {@link GroupingExpression}. It maps the input into the given + * number of buckets by the result of the argument expression. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class FixedWidthFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + * @param width The width of each bucket. + */ + public FixedWidthFunction(GroupingExpression exp, Number width) { + super("fixedwidth", Arrays.asList(exp, width instanceof Double ? new DoubleValue(width.doubleValue()) : new LongValue(width.longValue()))); + } + + /** + * Returns the number of buckets to divide the result into. + * + * @return The bucket count. + */ + public Number getWidth() { + GroupingExpression w = getArg(1); + return (w instanceof LongValue) ? ((LongValue)w).getValue() : ((DoubleValue)w).getValue(); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/FunctionNode.java b/container-search/src/main/java/com/yahoo/search/grouping/request/FunctionNode.java new file mode 100644 index 00000000000..3003ce69abe --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/FunctionNode.java @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.*; + +/** + * This class represents a function in a {@link GroupingExpression}. Because it operate on other expressions (as opposed + * to {@link AggregatorNode} and {@link DocumentValue} that operate on inputs), this expression type can be used at any + * input level (see {@link GroupingExpression#resolveLevel(int)}). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class FunctionNode extends GroupingExpression implements Iterable<GroupingExpression> { + + private final List<GroupingExpression> args = new ArrayList<>(); + + protected FunctionNode(String image, List<GroupingExpression> args) { + super(image + "(" + asString(args) + ")"); + this.args.addAll(args); + } + + /** + * Returns the number of arguments that were given to this function at construction. + * + * @return The argument count. + */ + public int getNumArgs() { + return args.size(); + } + + /** + * Returns the argument at the given index. + * + * @param i The index of the argument to return. + * @return The argument at the given index. + * @throws IndexOutOfBoundsException If the index is out of range. + */ + public GroupingExpression getArg(int i) { + return args.get(i); + } + + @Override + public Iterator<GroupingExpression> iterator() { + return Collections.unmodifiableList(args).iterator(); + } + + @Override + public void resolveLevel(int level) { + super.resolveLevel(level); + for (GroupingExpression arg : args) { + arg.resolveLevel(level); + } + } + + @Override + public void visit(ExpressionVisitor visitor) { + super.visit(visitor); + for (GroupingExpression arg : args) { + arg.visit(visitor); + } + } + + @SuppressWarnings("unchecked") + protected static <T> List<T> asList(T arg1, T... argN) { + return asList(Arrays.asList(arg1), Arrays.asList(argN)); + } + + @SuppressWarnings("unchecked") + protected static <T> List<T> asList(T arg1, T arg2, T... argN) { + return asList(Arrays.asList(arg1, arg2), Arrays.asList(argN)); + } + + protected static <T> List<T> asList(List<T> foo, List<T> bar) { + List<T> ret = new LinkedList<>(foo); + ret.addAll(bar); + return ret; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingExpression.java b/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingExpression.java new file mode 100644 index 00000000000..6015557f81e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingExpression.java @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import com.yahoo.javacc.UnicodeUtilities; + +import java.util.List; + +/** + * This class represents an expression in a {@link GroupingOperation}. You may manually construct this expression, or + * you may use the {@link com.yahoo.search.grouping.request.parser.GroupingParser} to generate one from a query-string. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class GroupingExpression extends GroupingNode { + + private Integer level = null; + + protected GroupingExpression(String image) { + super(image); + } + + /** + * Resolves the conceptual level of this expression. This level represents the type of data that is consumed by this + * expression, where level 0 is a single hit, level 1 is a group, level 2 is a list of groups, and so forth. This + * method verifies the input level against the expression type, and recursively resolves the level of all argument + * expressions. + * + * @param level The level of the input data. + * @throws IllegalArgumentException Thrown if the level of this expression could not be resolved. + * @throws IllegalStateException Thrown if type failed to accept the number of arguments provided. + */ + public void resolveLevel(int level) { + if (level < 0) { + throw new IllegalArgumentException("Expression '" + this + "' recurses through a single hit."); + } + this.level = level; + } + + /** + * Returns the conceptual level of this expression. + * + * @return The level. + * @throws IllegalArgumentException Thrown if the level of this expression has not been resolved. + * @see #resolveLevel(int) + */ + public int getLevel() { + if (level == null) { + throw new IllegalStateException("Level for expression '" + this + "' has not been resolved."); + } + return level; + } + + /** + * Recursively calls {@link ExpressionVisitor#visitExpression(GroupingExpression)} for this expression and all of + * its argument expressions. + * + * @param visitor The visitor to call. + */ + public void visit(ExpressionVisitor visitor) { + visitor.visitExpression(this); + } + + /** + * Returns a string description of the given list of expressions. This is a comma-separated list of the expressions + * own {@link GroupingExpression#toString()} output. + * + * @param lst The list of expressions to output. + * @return The string description. + */ + public static String asString(List<GroupingExpression> lst) { + StringBuilder ret = new StringBuilder(); + for (int i = 0, len = lst.size(); i < len; ++i) { + ret.append(lst.get(i)); + if (i < len - 1) { + ret.append(", "); + } + } + return ret.toString(); + } + + /** + * Returns a string representation of an object that can be used in the 'image' constructor argument of {@link + * GroupingNode}. This method ensures that strings are quoted, and that all complex characters are escaped. + * + * @param obj The object to output. + * @return The string representation. + */ + public static String asImage(Object obj) { + if (!(obj instanceof String)) { + return obj.toString(); + } + return UnicodeUtilities.quote((String)obj, '"'); + } + + @Override + public GroupingExpression setLabel(String label) { + super.setLabel(label); + return this; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingNode.java b/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingNode.java new file mode 100644 index 00000000000..b400dfe5737 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingNode.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This is the abstract super class of both {@link GroupingOperation} and {@link GroupingExpression}. All nodes can be + * assigned a {@link String} label which in turn can be used to identify the corresponding result objects. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class GroupingNode { + + private final String image; + private String label = null; + + protected GroupingNode(String image) { + this.image = image; + } + + /** + * Returns the label assigned to this grouping expression. + * + * @return The label string. + */ + public String getLabel() { + return label; + } + + /** + * Assigns a label to this grouping expression. The label is applied to the results of this expression so that they + * can be identified by the caller when processing the output. + * + * @param str The label to assign to this. + * @return This, to allow chaining. + */ + public GroupingNode setLabel(String str) { + label = str; + return this; + } + + @Override + public String toString() { + return image; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingOperation.java b/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingOperation.java new file mode 100644 index 00000000000..d49713ba9f2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/GroupingOperation.java @@ -0,0 +1,582 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import com.yahoo.collections.LazyMap; +import com.yahoo.collections.LazySet; +import com.yahoo.search.grouping.request.parser.GroupingParser; +import com.yahoo.search.grouping.request.parser.GroupingParserInput; +import com.yahoo.search.grouping.request.parser.ParseException; +import com.yahoo.search.grouping.request.parser.TokenMgrError; + +import java.util.*; + +/** + * This class represents a single node in a grouping operation tree. You may manually construct this tree, or you may + * use the {@link #fromString(String)} method to generate one from a query-string. To execute, assign it to a {@link + * com.yahoo.search.grouping.GroupingRequest} using the {@link com.yahoo.search.grouping.GroupingRequest#setRootOperation(GroupingOperation)} + * method. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class GroupingOperation extends GroupingNode { + + private final List<GroupingExpression> orderBy = new ArrayList<>(); + private final List<GroupingExpression> outputs = new ArrayList<>(); + private final List<GroupingOperation> children = new ArrayList<>(); + private final Map<String, GroupingExpression> alias = LazyMap.newHashMap(); + private final Set<String> hints = LazySet.newHashSet(); + + private GroupingExpression groupBy = null; + private GroupingOperation parent = null; + private String where = null; + private boolean forceSinglePass = false; + private double accuracy = 0.95; + private int precision = 0; + private int level = -1; + private int max = -1; + + protected GroupingOperation(String image) { + super(image); + } + + /** + * Registers an alias with this operation. An alias is made available to expressions in both this node and all child + * nodes. + * + * @param id The id of the alias to put. + * @param exp The expression to associate with the id. + * @return This, to allow chaining. + */ + public GroupingOperation putAlias(String id, GroupingExpression exp) { + alias.put(id, exp); + return this; + } + + /** + * Returns the alias associated with the given name. If no alias can be found in this node, this method queries its + * parent grouping node. If the alias still can not be found, this method returns null. + * + * @param id The id of the alias to return. + * @return The expression associated with the id. + */ + public GroupingExpression getAlias(String id) { + if (alias.containsKey(id)) { + return alias.get(id); + } else if (parent != null) { + return parent.getAlias(id); + } else { + return null; + } + } + + /** + * Adds a hint to this. + * + * @param hint The hint to add. + * @return This, to allow chaining. + */ + public GroupingOperation addHint(String hint) { + hints.add(hint); + return this; + } + + /** + * Returns whether or not the given hint has been added to this. + * + * @param hint The hint to check for. + * @return True if the hint has been added. + */ + public boolean containsHint(String hint) { + return hints.contains(hint); + } + + /** + * Returns an immutable view to the hint list of this node. + * + * @return The list. + */ + public Set<String> getHints() { + return Collections.unmodifiableSet(hints); + } + + /** + * Adds a child grouping node to this. This will also set the parent of the child so that it points to this node. + * + * @param op The child node to add. + * @return This, to allow chaining. + */ + public GroupingOperation addChild(GroupingOperation op) { + op.parent = this; + children.add(op); + return this; + } + + /** + * Convenience method to call {@link #addChild(GroupingOperation)} for each element in the given list. + * + * @param lst The list of operations to add. + * @return This, to allow chaining. + */ + public GroupingOperation addChildren(List<GroupingOperation> lst) { + for (GroupingOperation op : lst) { + addChild(op); + } + return this; + } + + /** + * Returns the number of child operations of this. + * + * @return The child count. + */ + public int getNumChildren() { + return children.size(); + } + + /** + * Returns the child operation at the given index. + * + * @param i The index of the child to return. + * @return The child at the given index. + * @throws IndexOutOfBoundsException If the index is out of range. + */ + public GroupingOperation getChild(int i) { + return children.get(i); + } + + /** + * Returns an immutable view to the child list of this node. + * + * @return The list. + */ + public List<GroupingOperation> getChildren() { + return Collections.unmodifiableList(children); + } + + /** + * Assigns an expressions as the group-by clause of this operation. + * + * @param exp The expression to assign to this. + * @return This, to allow chaining. + */ + public GroupingOperation setGroupBy(GroupingExpression exp) { + groupBy = exp; + return this; + } + + /** + * Returns the expression assigned as the group-by clause of this. + * + * @return The expression. + */ + public GroupingExpression getGroupBy() { + return groupBy; + } + + /** + * Returns the conceptual level of this node. + * + * @return The level, or -1 if not resolved. + * @see #resolveLevel(int) + */ + public int getLevel() { + return level; + } + + /** + * Resolves the conceptual level of this operation. This level represents the type of data that is consumed by this + * operation, where level 0 is a single hit, level 1 is a group, level 2 is a list of groups, and so forth. This + * method verifies the input level against the operation type, and recursively resolves the level of all argument + * expressions. + * + * @param level The level of the input data. + * @throws IllegalArgumentException Thrown if a contained expression is invalid for the given level. + */ + public void resolveLevel(int level) { + if (groupBy != null) { + if (level == 0) { + throw new IllegalArgumentException( + "Operation '" + this + "' can not group " + getLevelDesc(level) + "."); + } + groupBy.resolveLevel(level - 1); + ++level; + } + if (hasMax()) { + if (level == 0) { + throw new IllegalArgumentException( + "Operation '" + this + "' can not apply max to " + getLevelDesc(level) + "."); + } + } + this.level = level; + for (GroupingExpression exp : outputs) { + exp.resolveLevel(level); + } + if (!orderBy.isEmpty()) { + if (level == 0) { + throw new IllegalArgumentException( + "Operation '" + this + "' can not order " + getLevelDesc(level) + "."); + } + for (GroupingExpression exp : orderBy) { + exp.resolveLevel(level - 1); + } + } + for (GroupingOperation child : children) { + child.resolveLevel(level); + } + } + + public GroupingOperation setForceSinglePass(boolean forceSinglePass) { + this.forceSinglePass = forceSinglePass; + return this; + } + + public boolean getForceSinglePass() { + return forceSinglePass; + } + + /** + * Assigns the max clause of this. This is the maximum number of groups to return for this operation. + * + * @param max The expression to assign to this. + * @return This, to allow chaining. + * @see #setPrecision(int) + */ + public GroupingOperation setMax(int max) { + this.max = max; + return this; + } + + /** + * Returns the max clause of this. + * + * @return The expression. + * @see #setMax(int) + */ + public int getMax() { + return max; + } + + /** + * Indicates if the 'max' value has been set. + * + * @return true if max value is set. + */ + public boolean hasMax() { return max >= 0; } + + /** + * Assigns an accuracy value for this. This is a number between 0 and 1 describing the accuracy of the result, which + * again determines the speed of the grouping request. A low value will make sure the grouping operation runs fast, + * at the sacrifice if a (possible) imprecise result. + * + * @param accuracy The accuracy to assign to this. + * @return This, to allow chaining. + * @throws IllegalArgumentException If the accuracy is outside the allowed value range. + */ + public GroupingOperation setAccuracy(double accuracy) { + if (accuracy > 1.0 || accuracy < 0.0) { + throw new IllegalArgumentException("Illegal accuracy '" + accuracy + "'. Must be between 0 and 1."); + } + this.accuracy = accuracy; + return this; + } + + /** + * Return the accuracy of this. + * + * @return The accuracy value. + * @see #setAccuracy(double) + */ + public double getAccuracy() { + return accuracy; + } + + /** + * Adds an expression to the order-by clause of this operation. + * + * @param exp The expressions to add to this. + * @return This, to allow chaining. + */ + public GroupingOperation addOrderBy(GroupingExpression exp) { + orderBy.add(exp); + return this; + } + + /** + * Convenience method to call {@link #addOrderBy(GroupingExpression)} for each element in the given list. + * + * @param lst The list of expressions to add. + * @return This, to allow chaining. + */ + public GroupingOperation addOrderBy(List<GroupingExpression> lst) { + for (GroupingExpression exp : lst) { + addOrderBy(exp); + } + return this; + } + + /** + * Returns the number of expressions in the order-by clause of this. + * + * @return The expression count. + */ + public int getNumOrderBy() { + return orderBy.size(); + } + + /** + * Returns the group-by expression at the given index. + * + * @param i The index of the expression to return. + * @return The expression at the given index. + * @throws IndexOutOfBoundsException If the index is out of range. + */ + public GroupingExpression getOrderBy(int i) { + return orderBy.get(i); + } + + /** + * Returns an immutable view to the order-by clause of this. + * + * @return The expression list. + */ + public List<GroupingExpression> getOrderBy() { + return Collections.unmodifiableList(orderBy); + } + + /** + * Adds an expression to the output clause of this operation. + * + * @param exp The expressions to add to this. + * @return This, to allow chaining. + */ + public GroupingOperation addOutput(GroupingExpression exp) { + outputs.add(exp); + return this; + } + + /** + * Convenience method to call {@link #addOutput(GroupingExpression)} for each element in the given list. + * + * @param lst The list of expressions to add. + * @return This, to allow chaining. + */ + public GroupingOperation addOutputs(List<GroupingExpression> lst) { + for (GroupingExpression exp : lst) { + addOutput(exp); + } + return this; + } + + /** + * Returns the number of expressions in the output clause of this. + * + * @return The expression count. + */ + public int getNumOutputs() { + return outputs.size(); + } + + /** + * Returns the output expression at the given index. + * + * @param i The index of the expression to return. + * @return The expression at the given index. + * @throws IndexOutOfBoundsException If the index is out of range. + */ + public GroupingExpression getOutput(int i) { + return outputs.get(i); + } + + /** + * Returns an immutable view to the output clause of this. + * + * @return The expression list. + */ + public List<GroupingExpression> getOutputs() { + return Collections.unmodifiableList(outputs); + } + + /** + * Assigns the precision clause of this. This is the number of intermediate groups returned from each search-node + * during expression evaluation to give the dispatch-node more data to consider when selecting the N groups that are + * to be evaluated further. + * + * @param precision The precision to set. + * @return This, to allow chaining. + * @see #setMax(int) + */ + public GroupingOperation setPrecision(int precision) { + this.precision = precision; + return this; + } + + /** + * Returns the precision clause of this. + * + * @return The precision. + */ + public int getPrecision() { + return precision; + } + + /** + * Assigns a string as the where clause of this operation. + * + * @param str The string to assign to this. + * @return This, to allow chaining. + */ + public GroupingOperation setWhere(String str) { + where = str; + return this; + } + + /** + * Returns the where clause assigned to this operation. + * + * @return The where clause. + */ + public String getWhere() { + return where; + } + + /** + * Recursively calls {@link GroupingExpression#visit(ExpressionVisitor)} on all {@link GroupingExpression} objects + * in this operation and in all of its child operations. + * + * @param visitor The visitor to call. + */ + public void visitExpressions(ExpressionVisitor visitor) { + for (GroupingExpression exp : alias.values()) { + exp.visit(visitor); + } + for (GroupingExpression exp : outputs) { + exp.visit(visitor); + } + for (GroupingExpression exp : orderBy) { + exp.visit(visitor); + } + if (groupBy != null) { + groupBy.visit(visitor); + } + for (GroupingOperation op : children) { + op.visitExpressions(visitor); + } + } + + @Override + public GroupingOperation setLabel(String label) { + super.setLabel(label); + return this; + } + + @Override + public String toString() { + StringBuilder ret = new StringBuilder(); + ret.append(super.toString()).append("("); + if (groupBy != null) { + ret.append("group(").append(groupBy).append(") "); + } + for (String hint : hints) { + ret.append("hint(").append(hint).append(") "); + } + if (hasMax()) { + ret.append("max(").append(max).append(") "); + } + if (!orderBy.isEmpty()) { + ret.append("order("); + ret.append(GroupingExpression.asString(orderBy)); + ret.append(") "); + } + if (!outputs.isEmpty()) { + ret.append("output("); + for (int i = 0, len = outputs.size(); i < len; ++i) { + GroupingExpression exp = outputs.get(i); + ret.append(exp); + String label = exp.getLabel(); + if (label != null) { + ret.append(" as(").append(label).append(")"); + } + if (i < len - 1) { + ret.append(", "); + } + } + ret.append(") "); + } + if (precision != 0) { + ret.append("precision(").append(precision).append(") "); + } + if (where != null) { + ret.append("where(").append(where).append(") "); + } + for (GroupingOperation child : children) { + ret.append(child).append(" "); + } + int len = ret.length(); + if (ret.charAt(len - 1) == ' ') { + ret.setLength(len - 1); + } + ret.append(")"); + String label = getLabel(); + if (label != null) { + ret.append(" as(").append(label).append(")"); + } + return ret.toString(); + } + + /** + * Returns a description of the given level. This allows for more descriptive errors being passed back to the user. + * + * @param level The level to describe. + * @return A description of the given level. + */ + public static String getLevelDesc(int level) { + if (level <= 0) { + return "single hit"; + } else if (level == 1) { + return "single group"; + } else { + StringBuilder ret = new StringBuilder(); + for (int i = 1; i < level; ++i) { + ret.append("list of "); + } + ret.append("groups"); + return ret.toString(); + } + } + + /** + * Convenience method to call {@link #fromStringAsList(String)} and assert that the list contains exactly one + * grouping operation. + * + * @param str The string to parse. + * @return A grouping operation that corresponds to the string. + * @throws IllegalArgumentException Thrown if the string could not be parsed as a single operation. + */ + public static GroupingOperation fromString(String str) { + List<GroupingOperation> lst = fromStringAsList(str); + if (lst.size() != 1) { + throw new IllegalArgumentException("Expected 1 operation, got " + lst.size() + "."); + } + return lst.get(0); + } + + /** + * Parses the given string as a list of grouping operations. This method never returns null, it either returns a + * list of valid grouping requests or it throws an exception. + * + * @param str The string to parse. + * @return A list of grouping operations that corresponds to the string. + * @throws IllegalArgumentException Thrown if the string could not be parsed. + */ + public static List<GroupingOperation> fromStringAsList(String str) { + if (str == null || str.trim().length() == 0) { + return Collections.emptyList(); + } + GroupingParserInput input = new GroupingParserInput(str); + try { + return new GroupingParser(input).requestList(); + } catch (ParseException | TokenMgrError e) { + throw new IllegalArgumentException(input.formatException(e.getMessage()), e); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/HourOfDayFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/HourOfDayFunction.java new file mode 100644 index 00000000000..5410ada6cf5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/HourOfDayFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents an hour-of-day timestamp-function in a {@link GroupingExpression}. It evaluates to a long that + * equals the hour of day (0-23) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class HourOfDayFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public HourOfDayFunction(GroupingExpression exp) { + super("time.hourofday", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/Infinite.java b/container-search/src/main/java/com/yahoo/search/grouping/request/Infinite.java new file mode 100644 index 00000000000..dfee7d0e48a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/Infinite.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an Infinite value that may be used as a bucket + * size specifier. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +@SuppressWarnings("rawtypes") +public class Infinite implements Comparable { + private final boolean negative; + + /** + * Create an Infinite object with positive or negative sign. + * @param negative the signedness. + */ + public Infinite(boolean negative) { + this.negative = negative; + } + + /** + * Override the toString method in order to be re-parseable. + */ + @Override + public String toString() { + return (negative ? "-inf" : "inf"); + } + + /** + * An infinity value is always less than or greater than. + */ + @Override + public int compareTo(Object rhs) { + return (negative ? -1 : 1); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/InfiniteValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/InfiniteValue.java new file mode 100644 index 00000000000..d20a9eb63f8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/InfiniteValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an infinite value in a {@link GroupingExpression}. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class InfiniteValue extends ConstantValue<Infinite> { + + /** + * Constructs a new instance of this class. + * + * @param value The immutable value to assign to this. + */ + public InfiniteValue(Infinite value) { + super(value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/InterpolatedLookup.java b/container-search/src/main/java/com/yahoo/search/grouping/request/InterpolatedLookup.java new file mode 100644 index 00000000000..a49ccdddbbc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/InterpolatedLookup.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import com.google.common.annotations.Beta; + +/** + * This class represents a lookup in a multivalue document + * attribute in a {@link GroupingExpression}. It takes the + * attribute (assumed to contain a sorted array) from the input + * {@link com.yahoo.search.result.Hit} and finds the index that + * the second (lookup) argument expression would have, with linear + * interpolation when the lookup argument is between two array + * element values. + * + * @author arnej27959 + */ +@Beta +public class InterpolatedLookup extends DocumentValue { + + private final String attributeName; + private final GroupingExpression arg2; + + /** + * Constructs a new instance of this class. + * + * @param attributeName The attribute name the lookup should happen in + * @param lookupArg Expression giving a floating-point value for the lookup argument + */ + public InterpolatedLookup(String attributeName, GroupingExpression lookupArg) { + super("interpolatedlookup(" + attributeName + ", " + lookupArg + ")"); + this.attributeName = attributeName; + this.arg2 = lookupArg; + } + + /** + * Get the name of the attribute to be retrieved from the input hit. + * @return The attribute name. + */ + public String getAttributeName() { + return attributeName; + } + + /** + * Get the expression that will be evaluated before lookup. + * @return grouping expression argument + */ + public GroupingExpression getLookupArgument() { + return arg2; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/LongBucket.java b/container-search/src/main/java/com/yahoo/search/grouping/request/LongBucket.java new file mode 100644 index 00000000000..566ca31cb2e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/LongBucket.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a {@link Long} bucket in a {@link PredefinedFunction}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class LongBucket extends BucketValue { + + /** + * Gives the next distinct long value. + * + * @param value the base value. + * @return the nextt value. + */ + public static LongValue nextValue(LongValue value) { + long v = value.getValue(); + return new LongValue(v < Long.MAX_VALUE ? v + 1 : v); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public LongBucket(long from, long to) { + super(new LongValue(from), new LongValue(to)); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + @SuppressWarnings("rawtypes") + public LongBucket(ConstantValue from, ConstantValue to) { + super(from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/LongPredefined.java b/container-search/src/main/java/com/yahoo/search/grouping/request/LongPredefined.java new file mode 100644 index 00000000000..486c8a9ddde --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/LongPredefined.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a predefined bucket-function in a {@link GroupingExpression} for expressions that evaluate to a + * long. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class LongPredefined extends PredefinedFunction { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @param arg1 The compulsory bucket. + * @param argN The optional buckets. + */ + public LongPredefined(GroupingExpression exp, LongBucket arg1, LongBucket... argN) { + this(exp, asList(arg1, argN)); + } + + private LongPredefined(GroupingExpression exp, List<LongBucket> args) { + super(exp, args); + } + + @Override + public LongBucket getBucket(int i) { + return (LongBucket)getArg(i + 1); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @param args The buckets to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the list of buckets is empty. + */ + public static LongPredefined newInstance(GroupingExpression exp, List<LongBucket> args) { + if (args.isEmpty()) { + throw new IllegalArgumentException("Expected at least one bucket, got none."); + } + return new LongPredefined(exp, args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/LongValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/LongValue.java new file mode 100644 index 00000000000..62a0cb01f08 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/LongValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a constant {@link Long} value in a {@link GroupingExpression}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class LongValue extends ConstantValue<Long> { + + /** + * Constructs a new instance of this class. + * + * @param value The immutable value to assign to this. + */ + public LongValue(long value) { + super(value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathACosFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathACosFunction.java new file mode 100644 index 00000000000..637e0fdf57e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathACosFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathACosFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathACosFunction(GroupingExpression exp) { + super("math.acos", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathACosHFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathACosHFunction.java new file mode 100644 index 00000000000..aa5677d90d4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathACosHFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathACosHFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathACosHFunction(GroupingExpression exp) { + super("math.acosh", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathASinFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathASinFunction.java new file mode 100644 index 00000000000..c4b9c7a62d6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathASinFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathASinFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathASinFunction(GroupingExpression exp) { + super("math.asin", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathASinHFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathASinHFunction.java new file mode 100644 index 00000000000..f368aefe88a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathASinHFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathASinHFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathASinHFunction(GroupingExpression exp) { + super("math.asinh", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathATanFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathATanFunction.java new file mode 100644 index 00000000000..ed9349c86e6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathATanFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathATanFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathATanFunction(GroupingExpression exp) { + super("math.atan", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathATanHFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathATanHFunction.java new file mode 100644 index 00000000000..ebcfd1895fa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathATanHFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathATanHFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathATanHFunction(GroupingExpression exp) { + super("math.atanh", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathCbrtFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathCbrtFunction.java new file mode 100644 index 00000000000..78e2c3c9aa5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathCbrtFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathCbrtFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathCbrtFunction(GroupingExpression exp) { + super("math.cbrt", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathCosFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathCosFunction.java new file mode 100644 index 00000000000..0ab35653607 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathCosFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathCosFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathCosFunction(GroupingExpression exp) { + super("math.cos", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathCosHFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathCosHFunction.java new file mode 100644 index 00000000000..f4137c302e8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathCosHFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathCosHFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathCosHFunction(GroupingExpression exp) { + super("math.cosh", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathExpFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathExpFunction.java new file mode 100644 index 00000000000..4be93d77c41 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathExpFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathExpFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathExpFunction(GroupingExpression exp) { + super("math.exp", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathFloorFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathFloorFunction.java new file mode 100644 index 00000000000..f105332e352 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathFloorFunction.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** represents the math.floor(expression) function */ +public class MathFloorFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathFloorFunction(GroupingExpression exp) { + super("math.floor", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathFunctions.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathFunctions.java new file mode 100644 index 00000000000..5fe5a971be9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathFunctions.java @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public abstract class MathFunctions { + /** + * Defines the different types of math functions that are available. + */ + public enum Function { + EXP, // 0 + POW, // 1 + LOG, // 2 + LOG1P, // 3 + LOG10, // 4 + SIN, // 5 + ASIN, // 6 + COS, // 7 + ACOS, // 8 + TAN, // 9 + ATAN, // 10 + SQRT, // 11 + SINH, // 12 + ASINH, // 13 + COSH, // 14 + ACOSH, // 15 + TANH, // 16 + ATANH, // 17 + CBRT, // 18 + HYPOT, // 19 + FLOOR; // 20 + + static Function create(int tid) { + for(Function p : values()) { + if (tid == p.ordinal()) { + return p; + } + } + return null; + } + } + public static FunctionNode newInstance(Function type, GroupingExpression x, GroupingExpression y) { + switch (type) { + case EXP: return new MathExpFunction(x); + case POW: return new MathPowFunction(x, y); + case LOG: return new MathLogFunction(x); + case LOG1P: return new MathLog1pFunction(x); + case LOG10: return new MathLog10Function(x); + case SIN: return new MathSinFunction(x); + case ASIN: return new MathASinFunction(x); + case COS: return new MathCosFunction(x); + case ACOS: return new MathACosFunction(x); + case TAN: return new MathTanFunction(x); + case ATAN: return new MathATanFunction(x); + case SQRT: return new MathSqrtFunction(x); + case SINH: return new MathSinHFunction(x); + case ASINH: return new MathASinHFunction(x); + case COSH: return new MathCosHFunction(x); + case ACOSH: return new MathACosHFunction(x); + case TANH: return new MathTanHFunction(x); + case ATANH: return new MathATanHFunction(x); + case CBRT: return new MathCbrtFunction(x); + case HYPOT: return new MathHypotFunction(x, y); + case FLOOR: return new MathFloorFunction(x); + } + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathHypotFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathHypotFunction.java new file mode 100644 index 00000000000..777a94f9107 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathHypotFunction.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathHypotFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param x The expression to evaluate for x, double value will be requested. + * @param y The expression to evaluate for y exponent, double value will be requested. + */ + public MathHypotFunction(GroupingExpression x, GroupingExpression y) { + super("math.hypot", Arrays.asList(x, y)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathLog10Function.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathLog10Function.java new file mode 100644 index 00000000000..444ea7a7349 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathLog10Function.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathLog10Function extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathLog10Function(GroupingExpression exp) { + super("math.log10", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathLog1pFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathLog1pFunction.java new file mode 100644 index 00000000000..3be6c799bf2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathLog1pFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathLog1pFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathLog1pFunction(GroupingExpression exp) { + super("math.log1p", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathLogFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathLogFunction.java new file mode 100644 index 00000000000..4d3b43d45b0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathLogFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathLogFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathLogFunction(GroupingExpression exp) { + super("math.log", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathPowFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathPowFunction.java new file mode 100644 index 00000000000..09a9a28cbb0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathPowFunction.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathPowFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param x The expression to evaluate for base, double value will be requested. + * @param y The expression to evaluate for the exponent, double value will be requested. + */ + public MathPowFunction(GroupingExpression x, GroupingExpression y) { + super("math.pow", Arrays.asList(x,y)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathResolver.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathResolver.java new file mode 100644 index 00000000000..9410c6ea347 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathResolver.java @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.LinkedList; +import java.util.List; +import java.util.Stack; + +/** + * This is a helper class for resolving arithmetic operations over {@link GroupingExpression} objects. To resolve an + * operation simply push operator-expression pairs onto it, before calling {@link #resolve()} to retrieve the single + * corresponding grouping expression object. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MathResolver { + + public enum Type { + + ADD(0, "+"), + SUB(1, "-"), + DIV(2, "/"), + MOD(3, "%"), + MUL(4, "*"); + + private final int pre; + private final String image; + + private Type(int pre, String image) { + this.pre = pre; + this.image = image; + } + } + + private final List<Item> items = new LinkedList<>(); + + /** + * Pushes the given operator-expression pair onto this math resolver. Once all pairs have been pushed using this + * method, call {@link #resolve()} to retrieve to combined grouping expression. + * + * @param type The operator that appears before the expression being pushed. + * @param exp The expression to push. + */ + public void push(Type type, GroupingExpression exp) { + if (items.isEmpty() && type != Type.ADD) { + throw new IllegalArgumentException("First item in an arithmetic operation must be an addition."); + } + items.add(new Item(type, exp)); + } + + /** + * Converts the internal list of operator-expression pairs into a corresponding combined grouping expression. When + * this method returns there is no residue of the conversion, and this object can be reused. + * + * @return The grouping expression corresponding to the pushed arithmetic operations. + */ + public GroupingExpression resolve() { + if (items.size() == 1) { + return items.remove(0).exp; // optimize common case + } + Stack<Item> stack = new Stack<>(); + stack.push(items.remove(0)); + while (!items.isEmpty()) { + Item item = items.remove(0); + while (stack.size() > 1 && stack.peek().type.pre >= item.type.pre) { + pop(stack); + } + stack.push(item); + } + while (stack.size() > 1) { + pop(stack); + } + return stack.remove(0).exp; + } + + private void pop(Stack<Item> stack) { + Item rhs = stack.pop(); + Item lhs = stack.peek(); + switch (rhs.type) { + case ADD: + lhs.exp = new AddFunction(lhs.exp, rhs.exp); + break; + case DIV: + lhs.exp = new DivFunction(lhs.exp, rhs.exp); + break; + case MOD: + lhs.exp = new ModFunction(lhs.exp, rhs.exp); + break; + case MUL: + lhs.exp = new MulFunction(lhs.exp, rhs.exp); + break; + case SUB: + lhs.exp = new SubFunction(lhs.exp, rhs.exp); + break; + default: + throw new UnsupportedOperationException("Operator " + rhs.type + " not supported."); + } + } + + @Override + public String toString() { + StringBuilder ret = new StringBuilder(); + for (int i = 0, len = items.size(); i < len; ++i) { + Item item = items.get(i); + if (i != 0) { + ret.append(" ").append(item.type.image).append(" "); + } + ret.append(item.exp.toString()); + } + return ret.toString(); + } + + private static class Item { + final Type type; + GroupingExpression exp; + + Item(Type type, GroupingExpression exp) { + this.type = type; + this.exp = exp; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathSinFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathSinFunction.java new file mode 100644 index 00000000000..66612e9d80a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathSinFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathSinFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathSinFunction(GroupingExpression exp) { + super("math.sin", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathSinHFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathSinHFunction.java new file mode 100644 index 00000000000..79d260f51a0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathSinHFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathSinHFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathSinHFunction(GroupingExpression exp) { + super("math.sinh", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathSqrtFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathSqrtFunction.java new file mode 100644 index 00000000000..18c9396dd12 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathSqrtFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathSqrtFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathSqrtFunction(GroupingExpression exp) { + super("math.sqrt", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathTanFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathTanFunction.java new file mode 100644 index 00000000000..67db7a9d834 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathTanFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathTanFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathTanFunction(GroupingExpression exp) { + super("math.tan", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MathTanHFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MathTanHFunction.java new file mode 100644 index 00000000000..e111c1199d7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MathTanHFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author balder + */ +public class MathTanHFunction extends FunctionNode { +/** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, double value will be requested. + */ + public MathTanHFunction(GroupingExpression exp) { + super("math.tanh", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MaxAggregator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MaxAggregator.java new file mode 100644 index 00000000000..93f9e3c068e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MaxAggregator.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an maximum-aggregator in a {@link GroupingExpression}. It evaluates to the maximum value that + * the contained expression evaluated to over all the inputs. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MaxAggregator extends AggregatorNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to aggregate on. + */ + public MaxAggregator(GroupingExpression exp) { + super("max", exp); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MaxFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MaxFunction.java new file mode 100644 index 00000000000..da80a627c27 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MaxFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a max-function in a {@link GroupingExpression}. It evaluates to a number that equals the + * largest of the results of all arguments. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MaxFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public MaxFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private MaxFunction(List<GroupingExpression> args) { + super("max", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static MaxFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new MaxFunction(args); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/Md5Function.java b/container-search/src/main/java/com/yahoo/search/grouping/request/Md5Function.java new file mode 100644 index 00000000000..b2bd503c52f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/Md5Function.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents an md5-function in a {@link GroupingExpression}. It evaluates to a long that equals the md5 of + * the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class Md5Function extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + * @param numBits The number of bits of the md5 to include. + */ + public Md5Function(GroupingExpression exp, int numBits) { + super("md5", Arrays.asList(exp, new LongValue(numBits))); + } + + /** + * Returns the number of bits of the md5 to include in the evaluated result. + * + * @return The bit count. + */ + public int getNumBits() { + return ((LongValue)getArg(1)).getValue().intValue(); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MinAggregator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MinAggregator.java new file mode 100644 index 00000000000..5bb2f6675c8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MinAggregator.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an minimum-aggregator in a {@link GroupingExpression}. It evaluates to the minimum value that + * the contained expression evaluated to over all the inputs. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MinAggregator extends AggregatorNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to aggregate on. + */ + public MinAggregator(GroupingExpression exp) { + super("min", exp); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MinFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MinFunction.java new file mode 100644 index 00000000000..f66e23b87c0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MinFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a min-function in a {@link GroupingExpression}. It evaluates to a number that equals the + * smallest of the results of all arguments. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MinFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public MinFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private MinFunction(List<GroupingExpression> args) { + super("min", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static MinFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new MinFunction(args); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MinuteOfHourFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MinuteOfHourFunction.java new file mode 100644 index 00000000000..cb4b65f20b8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MinuteOfHourFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a minute-of-hour timestamp-function in a {@link GroupingExpression}. It evaluates to a long + * that equals the minute of hour (0-59) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MinuteOfHourFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public MinuteOfHourFunction(GroupingExpression exp) { + super("time.minuteofhour", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ModFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ModFunction.java new file mode 100644 index 00000000000..d3d2502b714 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ModFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a mod-function in a {@link GroupingExpression}. It evaluates to a number that equals the result + * of mod'ing the results of all arguments in the order they were given to the constructor (modulo first argument by + * second, result by third, ...). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class ModFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public ModFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private ModFunction(List<GroupingExpression> args) { + super("mod", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static ModFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new ModFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MonthOfYearFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MonthOfYearFunction.java new file mode 100644 index 00000000000..25f39892ee1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MonthOfYearFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a month-of-year timestamp-function in a {@link GroupingExpression}. It evaluates to a long that + * equals the month of year (1-12) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MonthOfYearFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public MonthOfYearFunction(GroupingExpression exp) { + super("time.monthofyear", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/MulFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/MulFunction.java new file mode 100644 index 00000000000..d66361888b0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/MulFunction.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a mul-function in a {@link GroupingExpression}. It evaluates to a number that equals the result + * of multiplying the results of all arguments together in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class MulFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public MulFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private MulFunction(List<GroupingExpression> args) { + super("mul", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static MulFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new MulFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/NegFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/NegFunction.java new file mode 100644 index 00000000000..7ea2b3a788b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/NegFunction.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a negate-function in a {@link GroupingExpression}. It evaluates to a number that equals the + * negative of the results of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class NegFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a number. + */ + public NegFunction(GroupingExpression exp) { + super("neg", Arrays.asList(exp)); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/NormalizeSubjectFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/NormalizeSubjectFunction.java new file mode 100644 index 00000000000..1eaad713383 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/NormalizeSubjectFunction.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + */ +public class NormalizeSubjectFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a string. + */ + public NormalizeSubjectFunction(GroupingExpression exp) { + super("normalizesubject", Arrays.asList(exp)); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/NowFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/NowFunction.java new file mode 100644 index 00000000000..f876ee9a1df --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/NowFunction.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Collections; + +/** + * This class represents a now-function in a {@link GroupingExpression}. It evaluates to a long that equals the number + * of seconds since midnight, January 1, 1970 UTC. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class NowFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + */ + public NowFunction() { + super("now", Collections.<GroupingExpression>emptyList()); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/OrFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/OrFunction.java new file mode 100644 index 00000000000..0a7ec7ecc06 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/OrFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents an or-function in a {@link GroupingExpression}. It evaluates to a long that equals the result + * of or'ing the results of all arguments together in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class OrFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a long. + * @param arg2 The second compulsory argument, must evaluate to a long. + * @param argN The optional arguments, must evaluate to a long. + */ + public OrFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private OrFunction(List<GroupingExpression> args) { + super("or", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static OrFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new OrFunction(args); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/PredefinedFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/PredefinedFunction.java new file mode 100644 index 00000000000..b00ee97452c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/PredefinedFunction.java @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +/** + * This class represents a predefined bucket-function in a {@link GroupingExpression}. It maps the input into one of the + * given buckets by the result of the argument expression. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class PredefinedFunction extends FunctionNode { + + protected PredefinedFunction(GroupingExpression exp, List<? extends BucketValue> args) { + super("predefined", asList(exp, args)); + Iterator<? extends BucketValue> it = args.iterator(); + BucketValue prev = it.next(); + while (it.hasNext()) { + BucketValue arg = it.next(); + if (prev.compareTo(arg) >= 0) { + throw new IllegalArgumentException("Buckets must be monotonically increasing, got " + prev + + " before " + arg + "."); + } + prev = arg; + } + } + + /** + * Returns the number of buckets to divide the result into. + * + * @return The bucket count. + */ + public int getNumBuckets() { + return getNumArgs() - 1; + } + + /** + * Returns the bucket at the given index. + * + * @param i The index of the bucket to return. + * @return The bucket at the given index. + * @throws IndexOutOfBoundsException If the index is out of range. + */ + public BucketValue getBucket(int i) { + return (BucketValue)getArg(i + 1); + } + + private static + List<GroupingExpression> asList(GroupingExpression exp, List<? extends BucketValue> args) { + List<GroupingExpression> ret = new LinkedList<>(); + ret.add(exp); + ret.addAll(args); + return ret; + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/RawBucket.java b/container-search/src/main/java/com/yahoo/search/grouping/request/RawBucket.java new file mode 100644 index 00000000000..d13b8b6ca67 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/RawBucket.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a {@link RawValue} bucket in a {@link PredefinedFunction}. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class RawBucket extends BucketValue { + + /** + * Get the next distinct value. + * + * @param value The base value. + * @return the next value. + */ + public static RawValue nextValue(RawValue value) { + return new RawValue(value.getValue().clone().put((byte)0)); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public RawBucket(RawBuffer from, RawBuffer to) { + super(new RawValue(from), new RawValue(to)); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public RawBucket(ConstantValue<?> from, ConstantValue<?> to) { + super(from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/RawBuffer.java b/container-search/src/main/java/com/yahoo/search/grouping/request/RawBuffer.java new file mode 100644 index 00000000000..00b9c899263 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/RawBuffer.java @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.ArrayList; + +/** + * This class represents a buffer of byte values to be used as a backing buffer + * for raw buckets. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class RawBuffer implements Comparable<RawBuffer>, Cloneable { + private final ArrayList<Byte> buffer; + + /** + * Create an empty buffer. + */ + public RawBuffer() { + this.buffer = new ArrayList<>(); + } + + /** + * Create a buffer with initial content. + * + * @param buffer A buffer of values to be assigned this buffer. + */ + public RawBuffer(ArrayList<Byte> buffer) { + this.buffer = buffer; + } + + /** + * Create a buffer with initial content. + * + * @param bytes A buffer of bytes to be assigned this buffer. + */ + public RawBuffer(byte[] bytes) { + buffer = new ArrayList<>(); + put(bytes); + } + + /** + * Insert a byte value into this buffer. + * + * @param value The value to add to the buffer. + * @return Reference to this. + */ + public RawBuffer put(byte value) { + buffer.add(value); + return this; + } + + /** + * Insert an array of byte values into this buffer. + * + * @param values The array to add to the buffer. + * @return Reference to this. + */ + public RawBuffer put(byte[] values) { + for (int i = 0; i < values.length; i++) { + buffer.add(values[i]); + } + return this; + } + + /** + * Create a copy of data in the internal buffer. + * + * @return A copy of the data. + */ + public byte[] getBytes() { + byte[] ret = new byte[buffer.size()]; + for (int i = 0; i < ret.length; i++) { + ret[i] = buffer.get(i); + } + return ret; + } + + @Override + public String toString() { + StringBuilder s = new StringBuilder(); + s.append("{"); + for (int i = 0; i < buffer.size(); i++) { + s.append(buffer.get(i)); + if (i < buffer.size() - 1) { + s.append(","); + } + } + s.append("}"); + return s.toString(); + } + + @Override + public RawBuffer clone() { + return new RawBuffer(new ArrayList<>(buffer)); + } + + @Override + public int compareTo(RawBuffer rhs) { + Byte[] my = buffer.toArray(new Byte[0]); + Byte[] their = rhs.buffer.toArray(new Byte[0]); + for (int i = 0; i < my.length && i < their.length; i++) { + if (my[i] < their[i]) { + return -1; + } else if (my[i] > their[i]) { + return 1; + } + } + return (my.length < their.length ? -1 : (my.length > their.length ? 1 : 0)); + } + + @Override + public int hashCode() { + return buffer.hashCode(); + } + + @Override + public boolean equals(Object rhs) { + if (rhs instanceof RawBuffer) { + return (compareTo((RawBuffer)rhs) == 0); + } + return false; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/RawPredefined.java b/container-search/src/main/java/com/yahoo/search/grouping/request/RawPredefined.java new file mode 100644 index 00000000000..c2650346231 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/RawPredefined.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a predefined bucket-function in a {@link GroupingExpression} for expressions that evaluate to a + * raw. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class RawPredefined extends PredefinedFunction { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a string. + * @param arg1 The compulsory bucket. + * @param argN The optional buckets. + */ + public RawPredefined(GroupingExpression exp, RawBucket arg1, RawBucket... argN) { + this(exp, asList(arg1, argN)); + } + + private RawPredefined(GroupingExpression exp, List<RawBucket> args) { + super(exp, args); + } + + @Override + public RawBucket getBucket(int i) { + return (RawBucket)getArg(i + 1); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param exp The expression to evaluate, must evaluate to a string. + * @param args The buckets to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the list of buckets is empty. + */ + public static RawPredefined newInstance(GroupingExpression exp, List<RawBucket> args) { + if (args.isEmpty()) { + throw new IllegalArgumentException("Expected at least one bucket, got none."); + } + return new RawPredefined(exp, args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/RawValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/RawValue.java new file mode 100644 index 00000000000..a04944d7897 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/RawValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a raw value in a {@link GroupingExpression}. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class RawValue extends ConstantValue<RawBuffer> { + + /** + * Constructs a new instance of this class. + * + * @param value The immutable value to assign to this. + */ + public RawValue(RawBuffer value) { + super(value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/RelevanceValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/RelevanceValue.java new file mode 100644 index 00000000000..8a5d4dc75d1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/RelevanceValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document relevance score in a {@link GroupingExpression}. It evaluates to the relevance of + * the input {@link com.yahoo.search.result.Hit}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class RelevanceValue extends DocumentValue { + + /** + * Constructs a new instance of this class. + */ + public RelevanceValue() { + super("relevance()"); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ReverseFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ReverseFunction.java new file mode 100644 index 00000000000..274bb20c9f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ReverseFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a reverse-function in a {@link GroupingExpression}. It evaluates to a list that equals the list + * result of the argument, sorted in descending order. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class ReverseFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a list. + */ + public ReverseFunction(GroupingExpression exp) { + super("reverse", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/SecondOfMinuteFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/SecondOfMinuteFunction.java new file mode 100644 index 00000000000..9443f862a16 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/SecondOfMinuteFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a second-of-minute timestamp-function in a {@link GroupingExpression}. It evaluates to a long + * that equals the second of minute (0-59) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class SecondOfMinuteFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public SecondOfMinuteFunction(GroupingExpression exp) { + super("time.secondofminute", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/SizeFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/SizeFunction.java new file mode 100644 index 00000000000..d445007a039 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/SizeFunction.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a size-function in a {@link GroupingExpression}. It evaluates to a number that equals the + * number of elements in the result of the argument (e.g. the number of elements in an array). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class SizeFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + */ + public SizeFunction(GroupingExpression exp) { + super("size", Arrays.asList(exp)); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/SortFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/SortFunction.java new file mode 100644 index 00000000000..2a8845f9847 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/SortFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a sort-function in a {@link GroupingExpression}. It evaluates to a list that equals the list + * result of the argument, sorted in ascending order. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class SortFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a list. + */ + public SortFunction(GroupingExpression exp) { + super("sort", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/StrCatFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/StrCatFunction.java new file mode 100644 index 00000000000..455f9dee917 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/StrCatFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a strcat-function in a {@link GroupingExpression}. It evaluates to a string that equals the + * contatenation of the string results of all arguments in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StrCatFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a string. + * @param arg2 The second compulsory argument, must evaluate to a string. + * @param argN The optional arguments, must evaluate to a string. + */ + public StrCatFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private StrCatFunction(List<GroupingExpression> args) { + super("strcat", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static StrCatFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new StrCatFunction(args); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/StrLenFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/StrLenFunction.java new file mode 100644 index 00000000000..2ef53f53bf2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/StrLenFunction.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a strcat-function in a {@link GroupingExpression}. It evaluates to a long that equals the + * number of bytes in the string result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StrLenFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a string. + */ + public StrLenFunction(GroupingExpression exp) { + super("strlen", Arrays.asList(exp)); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/StringBucket.java b/container-search/src/main/java/com/yahoo/search/grouping/request/StringBucket.java new file mode 100644 index 00000000000..34c7b9f526a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/StringBucket.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a {@link String} bucket in a {@link PredefinedFunction}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StringBucket extends BucketValue { + + /** + * Get the next distinct value. + * + * @param value The base value. + * @return the next value. + */ + public static StringValue nextValue(StringValue value) { + return new StringValue(value.getValue() + " "); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public StringBucket(String from, String to) { + super(new StringValue(from), new StringValue(to)); + } + + /** + * Constructs a new instance of this class. + * + * @param from The from-value to assign to this. + * @param to The to-value to assign to this. + */ + public StringBucket(ConstantValue<?> from, ConstantValue<?> to) { + super(from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/StringPredefined.java b/container-search/src/main/java/com/yahoo/search/grouping/request/StringPredefined.java new file mode 100644 index 00000000000..d3a469fdd7e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/StringPredefined.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a predefined bucket-function in a {@link GroupingExpression} for expressions that evaluate to a + * string. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StringPredefined extends PredefinedFunction { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a string. + * @param arg1 The compulsory bucket. + * @param argN The optional buckets. + */ + public StringPredefined(GroupingExpression exp, StringBucket arg1, StringBucket... argN) { + this(exp, asList(arg1, argN)); + } + + private StringPredefined(GroupingExpression exp, List<StringBucket> args) { + super(exp, args); + } + + @Override + public StringBucket getBucket(int i) { + return (StringBucket)getArg(i + 1); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param exp The expression to evaluate, must evaluate to a string. + * @param args The buckets to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the list of buckets is empty. + */ + public static StringPredefined newInstance(GroupingExpression exp, List<StringBucket> args) { + if (args.isEmpty()) { + throw new IllegalArgumentException("Expected at least one bucket, got none."); + } + return new StringPredefined(exp, args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/StringValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/StringValue.java new file mode 100644 index 00000000000..87e818368d6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/StringValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a constant {@link String} value in a {@link GroupingExpression}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StringValue extends ConstantValue<String> { + + /** + * Constructs a new instance of this class. + * + * @param value The immutable value to assign to this. + */ + public StringValue(String value) { + super(value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/SubFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/SubFunction.java new file mode 100644 index 00000000000..15e05c50f63 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/SubFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents a div-function in a {@link GroupingExpression}. It evaluates to a number that equals the result + * of subtracting the results of all arguments in the order they were given to the constructor (subtract second argument + * from first, third from result, ...). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class SubFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a number. + * @param arg2 The second compulsory argument, must evaluate to a number. + * @param argN The optional arguments, must evaluate to a number. + */ + public SubFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private SubFunction(List<GroupingExpression> args) { + super("sub", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static SubFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new SubFunction(args); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/SumAggregator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/SumAggregator.java new file mode 100644 index 00000000000..1ace1cfbba2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/SumAggregator.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an sum-aggregator in a {@link GroupingExpression}. It evaluates to the sum of the values that + * the contained expression evaluated to over all the inputs. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class SumAggregator extends AggregatorNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to aggregate on. + */ + public SumAggregator(GroupingExpression exp) { + super("sum", exp); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/SummaryValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/SummaryValue.java new file mode 100644 index 00000000000..72e4c6662d3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/SummaryValue.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document summary in a {@link GroupingExpression}. It evaluates to the summary of the input + * {@link com.yahoo.search.result.Hit} that corresponds to the named summary class. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class SummaryValue extends DocumentValue { + + private final String name; + + /** + * Constructs a new instance of this class, using the default summary class. + */ + public SummaryValue() { + super("summary()"); + name = null; + } + + /** + * Constructs a new instance of this class. + * + * @param summaryName The name of the summary class to assign to this. + */ + public SummaryValue(String summaryName) { + super("summary(" + summaryName + ")"); + name = summaryName; + } + + /** + * Returns the name of the summary class used to retrieve the hit from the search node. + * + * @return The summary name. + */ + public String getSummaryName() { + return name; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/TimeFunctions.java b/container-search/src/main/java/com/yahoo/search/grouping/request/TimeFunctions.java new file mode 100644 index 00000000000..bde1c5831b5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/TimeFunctions.java @@ -0,0 +1,148 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This abstract class is a factory for timestamp functions in a {@link GroupingExpression}. Apart from offering + * per-function factory methods, this class also contains a {@link #newInstance(com.yahoo.search.grouping.request.TimeFunctions.Type, + * GroupingExpression)} method which is useful for runtime construction of grouping requests. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class TimeFunctions { + + /** + * Defines the different types of timestamps-functions that are available. + */ + public enum Type { + DATE, + DAY_OF_MONTH, + DAY_OF_WEEK, + DAY_OF_YEAR, + HOUR_OF_DAY, + MINUTE_OF_HOUR, + MONTH_OF_YEAR, + SECOND_OF_MINUTE, + YEAR + } + + /** + * Creates a new timestamp-function of the specified type for the given {@link GroupingExpression}. + * + * @param type The type of function to create. + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static FunctionNode newInstance(Type type, GroupingExpression exp) { + switch (type) { + case DATE: + return newDate(exp); + case DAY_OF_MONTH: + return newDayOfMonth(exp); + case DAY_OF_WEEK: + return newDayOfWeek(exp); + case DAY_OF_YEAR: + return newDayOfYear(exp); + case HOUR_OF_DAY: + return newHourOfDay(exp); + case MINUTE_OF_HOUR: + return newMinuteOfHour(exp); + case MONTH_OF_YEAR: + return newMonthOfYear(exp); + case SECOND_OF_MINUTE: + return newSecondOfMinute(exp); + case YEAR: + return newYear(exp); + } + throw new UnsupportedOperationException("Time function '" + type + "' not supported."); + } + + /** + * Creates a new instance of {@link DateFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static DateFunction newDate(GroupingExpression exp) { + return new DateFunction(exp); + } + + /** + * Creates a new instance of {@link DayOfMonthFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static DayOfMonthFunction newDayOfMonth(GroupingExpression exp) { + return new DayOfMonthFunction(exp); + } + + /** + * Creates a new instance of {@link DayOfWeekFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static DayOfWeekFunction newDayOfWeek(GroupingExpression exp) { + return new DayOfWeekFunction(exp); + } + + /** + * Creates a new instance of {@link DayOfYearFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static DayOfYearFunction newDayOfYear(GroupingExpression exp) { + return new DayOfYearFunction(exp); + } + + /** + * Creates a new instance of {@link HourOfDayFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static HourOfDayFunction newHourOfDay(GroupingExpression exp) { + return new HourOfDayFunction(exp); + } + + /** + * Creates a new instance of {@link MinuteOfHourFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static MinuteOfHourFunction newMinuteOfHour(GroupingExpression exp) { + return new MinuteOfHourFunction(exp); + } + + /** + * Creates a new instance of {@link MonthOfYearFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static MonthOfYearFunction newMonthOfYear(GroupingExpression exp) { + return new MonthOfYearFunction(exp); + } + + /** + * Creates a new instance of {@link SecondOfMinuteFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static SecondOfMinuteFunction newSecondOfMinute(GroupingExpression exp) { + return new SecondOfMinuteFunction(exp); + } + + /** + * Creates a new instance of {@link YearFunction} for the given {@link GroupingExpression}. + * + * @param exp The expression to evaluate, must evaluate to a long. + * @return The created function node. + */ + public static YearFunction newYear(GroupingExpression exp) { + return new YearFunction(exp); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ToDoubleFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ToDoubleFunction.java new file mode 100644 index 00000000000..8eab2af8691 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ToDoubleFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a todouble-function in a {@link GroupingExpression}. It converts the result of the argument to + * a double. If the argument can not be converted, this function returns 0. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class ToDoubleFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + */ + public ToDoubleFunction(GroupingExpression exp) { + super("todouble", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ToLongFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ToLongFunction.java new file mode 100644 index 00000000000..c47a043eea0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ToLongFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a tolong-function in a {@link GroupingExpression}. It converts the result of the argument to a + * long. If the argument can not be converted, this function returns 0. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class ToLongFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + */ + public ToLongFunction(GroupingExpression exp) { + super("tolong", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ToRawFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ToRawFunction.java new file mode 100644 index 00000000000..d1ba3afa28c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ToRawFunction.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a toraw-function in a {@link GroupingExpression}. It + * converts the result of the argument to a raw type. If the argument can not + * be converted, this function returns null. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class ToRawFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + */ + public ToRawFunction(GroupingExpression exp) { + super("toraw", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ToStringFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ToStringFunction.java new file mode 100644 index 00000000000..364d9e5064d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ToStringFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a tolong-function in a {@link GroupingExpression}. It converts the result of the argument to a + * long. If the argument can not be converted, this function returns 0. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class ToStringFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + */ + public ToStringFunction(GroupingExpression exp) { + super("tostring", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/UcaFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/UcaFunction.java new file mode 100644 index 00000000000..2e23f41f139 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/UcaFunction.java @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents an uca-function in a {@link GroupingExpression}. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class UcaFunction extends FunctionNode { + + private final String locale; + private final String strength; + + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + * @param locale The locale to used for sorting. + */ + public UcaFunction(GroupingExpression exp, String locale) { + super("uca", Arrays.asList(exp, new StringValue(locale))); + this.locale = locale; + this.strength = "TERTIARY"; + } + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + * @param locale The locale to used for sorting. + * @param strength The strength level to use. + */ + public UcaFunction(GroupingExpression exp, String locale, String strength) { + super("uca", Arrays.asList(exp, new StringValue(locale), new StringValue(strength))); + if (!validStrength(strength)) { + throw new IllegalArgumentException("Not a valid UCA strength: " + strength); + } + this.locale = locale; + this.strength = strength; + } + + private boolean validStrength(String strength) { + return (strength.equals("PRIMARY") || + strength.equals("SECONDARY") || + strength.equals("TERTIARY") || + strength.equals("QUATERNARY") || + strength.equals("IDENTICAL")); + } + + public String getLocale() { + return locale; + } + + public String getStrength() { + return strength; + } +} + + + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/XorAggregator.java b/container-search/src/main/java/com/yahoo/search/grouping/request/XorAggregator.java new file mode 100644 index 00000000000..be0f092b929 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/XorAggregator.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents an xor-aggregator in a {@link GroupingExpression}. It evaluates to the xor of the values that + * the contained expression evaluated to over all the inputs. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class XorAggregator extends AggregatorNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to aggregate on. + */ + public XorAggregator(GroupingExpression exp) { + super("xor", exp); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/XorBitFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/XorBitFunction.java new file mode 100644 index 00000000000..304917bf905 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/XorBitFunction.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents an xor-function in a {@link GroupingExpression}. It evaluates to a long that equals the xor of + * 'width' bits over the binary representation of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class XorBitFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate. + * @param numBits The number of bits of the expression value to xor. + */ + public XorBitFunction(GroupingExpression exp, int numBits) { + super("xorbit", Arrays.asList(exp, new LongValue(numBits))); + } + + /** + * Returns the number of bits of the expression value to xor. + * + * @return The bit count. + */ + public int getNumBits() { + return ((LongValue)getArg(1)).getValue().intValue(); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/XorFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/XorFunction.java new file mode 100644 index 00000000000..dc47926ea51 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/XorFunction.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.List; + +/** + * This class represents an xor-function in a {@link GroupingExpression}. It evaluates to a long that equals the result + * of and'ing the results of all arguments together in the order they were given to the constructor. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class XorFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param arg1 The first compulsory argument, must evaluate to a long. + * @param arg2 The second compulsory argument, must evaluate to a long. + * @param argN The optional arguments, must evaluate to a long. + */ + public XorFunction(GroupingExpression arg1, GroupingExpression arg2, GroupingExpression... argN) { + this(asList(arg1, arg2, argN)); + } + + private XorFunction(List<GroupingExpression> args) { + super("xor", args); + } + + /** + * Constructs a new instance of this class from a list of arguments. + * + * @param args The arguments to pass to the constructor. + * @return The created instance. + * @throws IllegalArgumentException Thrown if the number of arguments is less than 2. + */ + public static XorFunction newInstance(List<GroupingExpression> args) { + if (args.size() < 2) { + throw new IllegalArgumentException("Expected 2 or more arguments, got " + args.size() + "."); + } + return new XorFunction(args); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/YearFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/YearFunction.java new file mode 100644 index 00000000000..2115d99140d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/YearFunction.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * This class represents a year timestamp-function in a {@link GroupingExpression}. It evaluates to a long that equals + * the full year (e.g. 2010) of the result of the argument. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class YearFunction extends FunctionNode { + + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long. + */ + public YearFunction(GroupingExpression exp) { + super("time.year", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/YmumValue.java b/container-search/src/main/java/com/yahoo/search/grouping/request/YmumValue.java new file mode 100644 index 00000000000..5754edd8155 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/YmumValue.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +/** + * This class represents a document checksum in a {@link GroupingExpression}. It evaluates to the YMUM checksum of the + * input {@link com.yahoo.search.result.Hit}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class YmumValue extends DocumentValue { + + /** + * Constructs a new instance of this class. + */ + public YmumValue() { + super("ymum()"); + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ZCurveXFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ZCurveXFunction.java new file mode 100644 index 00000000000..b4790b912e7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ZCurveXFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class ZCurveXFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long or long[]. + */ + public ZCurveXFunction(GroupingExpression exp) { + super("zcurve.x", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/ZCurveYFunction.java b/container-search/src/main/java/com/yahoo/search/grouping/request/ZCurveYFunction.java new file mode 100644 index 00000000000..e9a011f2193 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/ZCurveYFunction.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request; + +import java.util.Arrays; + +/** + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class ZCurveYFunction extends FunctionNode { + /** + * Constructs a new instance of this class. + * + * @param exp The expression to evaluate, must evaluate to a long or long[]. + */ + public ZCurveYFunction(GroupingExpression exp) { + super("zcurve.y", Arrays.asList(exp)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/package-info.java b/container-search/src/main/java/com/yahoo/search/grouping/request/package-info.java new file mode 100644 index 00000000000..ff30ef2b939 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.grouping.request; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/grouping/request/parser/GroupingParserInput.java b/container-search/src/main/java/com/yahoo/search/grouping/request/parser/GroupingParserInput.java new file mode 100644 index 00000000000..e87291fba18 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/request/parser/GroupingParserInput.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.request.parser; + +import com.yahoo.javacc.FastCharStream; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class GroupingParserInput extends FastCharStream implements CharStream { + + public GroupingParserInput(String input) { + super(input); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/AbstractList.java b/container-search/src/main/java/com/yahoo/search/grouping/result/AbstractList.java new file mode 100644 index 00000000000..058c68470c4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/AbstractList.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import com.yahoo.collections.LazyMap; +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.result.HitGroup; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class AbstractList extends HitGroup { + + private final Map<String, Continuation> continuations = LazyMap.newHashMap(); + private final String label; + + /** + * <p>Constructs a new instance of this class.</p> + * + * @param type The type of this list. + * @param label The label of this list. + */ + public AbstractList(String type, String label) { + super(type + ":" + label); + this.label = label; + } + + /** + * <p>Returns the label of this list.</p> + * + * @return The label. + */ + public String getLabel() { + return label; + } + + /** + * <p>Returns the map of all possible {@link Continuation}s of this list.</p> + * + * @return The list of Continuations. + */ + public Map<String, Continuation> continuations() { + return continuations; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/BucketGroupId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/BucketGroupId.java new file mode 100644 index 00000000000..1d6dcc6762c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/BucketGroupId.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * This abstract class is used in {@link Group} instances where the identifying expression evaluated to a {@link + * com.yahoo.search.grouping.request.BucketValue}. The range is inclusive-from and exclusive-to. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class BucketGroupId<T> extends GroupId { + + private final T from; + private final T to; + + /** + * Constructs a new instance of this class. + * + * @param type The type of this id's value. + * @param from The inclusive-from of the range. + * @param to The exclusive-to of the range. + */ + public BucketGroupId(String type, T from, T to) { + this(type, from, String.valueOf(from), to, String.valueOf(to)); + } + + /** + * Constructs a new instance of this class. + * + * @param type The type of this id's value. + * @param from The inclusive-from of the range. + * @param fromImage The String representation of the <tt>from</tt> argument. + * @param to The exclusive-to of the range. + * @param toImage The String representation of the <tt>to</tt> argument. + */ + public BucketGroupId(String type, T from, String fromImage, T to, String toImage) { + super(type, fromImage, toImage); + this.from = from; + this.to = to; + } + + /** + * Returns the inclusive-from of the value range. + * + * @return The from-value. + */ + public T getFrom() { + return from; + } + + /** + * Returns the exclusive-to of the value range. + * + * @return The to-value. + */ + public T getTo() { + return to; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/DoubleBucketId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/DoubleBucketId.java new file mode 100644 index 00000000000..e9f7ffc04c0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/DoubleBucketId.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link + * com.yahoo.search.grouping.request.DoubleBucket}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DoubleBucketId extends BucketGroupId<Double> { + + /** + * Constructs a new instance of this class. + * + * @param from The identifying inclusive-from double. + * @param to The identifying exclusive-to double. + */ + public DoubleBucketId(Double from, Double to) { + super("double_bucket", from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/DoubleId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/DoubleId.java new file mode 100644 index 00000000000..c6f0b15feb2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/DoubleId.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link Double}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class DoubleId extends ValueGroupId<Double> { + + /** + * Constructs a new instance of this class. + * + * @param value The identifying double. + */ + public DoubleId(Double value) { + super("double", value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/Group.java b/container-search/src/main/java/com/yahoo/search/grouping/result/Group.java new file mode 100644 index 00000000000..ddf8fe6140d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/Group.java @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.result.Relevance; + +/** + * This class represents a single group in the grouping result model. A group may contain any number of results (stored + * as fields, use {@link #getField(String)} to access), {@link GroupList} and {@link HitList}. Use the {@link + * com.yahoo.search.grouping.GroupingRequest#getResultGroup(com.yahoo.search.Result)} to retrieve an instance of this. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class Group extends HitGroup { + + private static final long serialVersionUID = 2122928012157537800L; + private final GroupId groupId; + + /** + * Creates a new instance of this class. + * + * @param groupId The id to assign to this group. + * @param rel The relevance of this group. + */ + public Group(GroupId groupId, Relevance rel) { + super(groupId.toString(), rel); + this.groupId = groupId; + } + + /** + * Returns the id of this group. This is a model of the otherwise flattened {@link #getId() hit id}. + * + * @return The group id. + */ + public GroupId getGroupId() { + return groupId; + } + + /** + * Returns the {@link HitList} with the given label. The label is the one given to the {@link + * com.yahoo.search.grouping.request.EachOperation} that generated the list. This method returns null if no such + * list was found. + * + * @param label The label of the list to return. + * @return The requested list, or null. + */ + public HitList getHitList(String label) { + for (Hit hit : this) { + if (!(hit instanceof HitList)) { + continue; + } + HitList lst = (HitList)hit; + if (!label.equals(lst.getLabel())) { + continue; + } + return lst; + } + return null; + } + + /** + * Returns the {@link GroupList} with the given label. The label is the one given to the {@link + * com.yahoo.search.grouping.request.EachOperation} that generated the list. This method returns null if no such + * list was found. + * + * @param label The label of the list to return. + * @return The requested list, or null. + */ + public GroupList getGroupList(String label) { + for (Hit hit : this) { + if (!(hit instanceof GroupList)) { + continue; + } + GroupList lst = (GroupList)hit; + if (!label.equals(lst.getLabel())) { + continue; + } + return lst; + } + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/GroupId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/GroupId.java new file mode 100644 index 00000000000..a9f5102caea --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/GroupId.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This abstract class represents the id of a single group in the grouping result model. A subclass corresponding to the + * evaluation result of generating {@link com.yahoo.search.grouping.request.GroupingExpression} is contained in all + * {@link Group} objects. It is used by {@link com.yahoo.search.grouping.GroupingRequest} to identify its root result + * group, and by all client code for identifying groups. + * <p> + * The {@link #toString()} method of this class generates a URI-compatible string on the form + * "group:<typeName>:<subclassSpecific>". + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class GroupId { + + private final String type; + private final String image; + + protected GroupId(String type, Object... args) { + this.type = type; + + StringBuilder image = new StringBuilder("group:"); + image.append(type); + for (Object arg : args) { + image.append(":").append(arg); + } + this.image = image.toString(); + } + + /** + * Returns the type name of this group id. This is the second part of the {@link #toString()} value of this. + * + * @return The type name. + */ + public String getTypeName() { + return type; + } + + @Override + public String toString() { + return image; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/GroupList.java b/container-search/src/main/java/com/yahoo/search/grouping/result/GroupList.java new file mode 100644 index 00000000000..ee8d7c33fa7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/GroupList.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import com.yahoo.search.Result; +import com.yahoo.search.grouping.GroupingRequest; + +/** + * This class represents a labeled group list in the grouping result model. It is contained in {@link Group}, and + * contains one or more {@link Group groups} itself, allowing for a hierarchy of grouping results. Use the {@link + * GroupingRequest#getResultGroup(Result)} to retrieve grouping results. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class GroupList extends AbstractList { + + /** + * Constructs a new instance of this class. + * + * @param label The label to assign to this. + */ + public GroupList(String label) { + super("grouplist", label); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/HitList.java b/container-search/src/main/java/com/yahoo/search/grouping/result/HitList.java new file mode 100644 index 00000000000..abc87a92ab1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/HitList.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import com.yahoo.search.Result; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.result.Hit; + +/** + * <p>This class represents a labeled hit list in the grouping result model. It is contained in {@link Group}, and + * contains one or more {@link Hit hits} itself, making this the parent of leaf nodes in the hierarchy of grouping + * results. Use the {@link GroupingRequest#getResultGroup(Result)} to retrieve grouping results.</p> + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class HitList extends AbstractList { + + /** + * <p>Constructs a new instance of this class.</p> + * + * @param label The label to assign to this. + */ + public HitList(String label) { + super("hitlist", label); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/HitRenderer.java b/container-search/src/main/java/com/yahoo/search/grouping/result/HitRenderer.java new file mode 100644 index 00000000000..7558af5acb5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/HitRenderer.java @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.result.HitGroup; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; + +/** + * This is a helper class for rendering grouping results. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class HitRenderer { + + private static final Utf8String ATR_LABEL = new Utf8String("label"); + private static final Utf8String ATR_RELEVANCE = new Utf8String("relevance"); + private static final Utf8String ATR_TYPE = new Utf8String("type"); + private static final Utf8String TAG_BUCKET_FROM = new Utf8String("from"); + private static final Utf8String TAG_BUCKET_TO = new Utf8String("to"); + private static final Utf8String TAG_CONTINUATION = new Utf8String("continuation"); + private static final Utf8String TAG_CONTINUATION_ID = new Utf8String("id"); + private static final Utf8String TAG_GROUP_LIST = new Utf8String("grouplist"); + private static final Utf8String TAG_GROUP = new Utf8String("group"); + private static final Utf8String TAG_GROUP_ID = new Utf8String("id"); + private static final Utf8String TAG_HIT_LIST = new Utf8String("hitlist"); + private static final Utf8String TAG_OUTPUT = new Utf8String("output"); + + /** + * Renders the header for the given grouping hit. If the hit is not a grouping hit, this method does nothing and + * returns false. + * <p>Post-condition if this is a grouping hit: The hit tag is open. + * + * @param hit The hit whose header to render. + * @param writer The writer to render to. + * @return True if the hit was rendered. + * @throws IOException Thrown if there was a problem writing. + */ + public static boolean renderHeader(HitGroup hit, XMLWriter writer) throws IOException { + if (hit instanceof GroupList) { + writer.openTag(TAG_GROUP_LIST).attribute(ATR_LABEL, ((GroupList)hit).getLabel()); + renderContinuations(((GroupList)hit).continuations(), writer); + } else if (hit instanceof Group) { + writer.openTag(TAG_GROUP).attribute(ATR_RELEVANCE, hit.getRelevance().toString()); + renderGroupId(((Group)hit).getGroupId(), writer); + if (hit instanceof RootGroup) { + renderContinuation(Continuation.THIS_PAGE, ((RootGroup)hit).continuation(), writer); + } + for (String label : hit.fieldKeys()) { + writer.openTag(TAG_OUTPUT).attribute(ATR_LABEL, label).content(hit.getField(label), false).closeTag(); + } + } else if (hit instanceof HitList) { + writer.openTag(TAG_HIT_LIST).attribute(ATR_LABEL, ((HitList)hit).getLabel()); + renderContinuations(((HitList)hit).continuations(), writer); + } else { + return false; + } + writer.closeStartTag(); + return true; + } + + private static void renderGroupId(GroupId id, XMLWriter writer) { + writer.openTag(TAG_GROUP_ID).attribute(ATR_TYPE, id.getTypeName()); + if (id instanceof ValueGroupId) { + writer.content(getIdValue((ValueGroupId)id), false); + } else if (id instanceof BucketGroupId) { + BucketGroupId bucketId = (BucketGroupId)id; + writer.openTag(TAG_BUCKET_FROM).content(getBucketFrom(bucketId), false).closeTag(); + writer.openTag(TAG_BUCKET_TO).content(getBucketTo(bucketId), false).closeTag(); + } + writer.closeTag(); + } + + private static Object getIdValue(ValueGroupId id) { + return id instanceof RawId ? Arrays.toString(((RawId)id).getValue()) : id.getValue(); + } + + private static Object getBucketFrom(BucketGroupId id) { + return id instanceof RawBucketId ? Arrays.toString(((RawBucketId)id).getFrom()) : id.getFrom(); + } + + private static Object getBucketTo(BucketGroupId id) { + return id instanceof RawBucketId ? Arrays.toString(((RawBucketId)id).getTo()) : id.getTo(); + } + + private static void renderContinuations(Map<String, Continuation> continuations, XMLWriter writer) { + for (Map.Entry<String, Continuation> entry : continuations.entrySet()) { + renderContinuation(entry.getKey(), entry.getValue(), writer); + } + } + + private static void renderContinuation(String id, Continuation continuation, XMLWriter writer) { + writer.openTag(TAG_CONTINUATION).attribute(TAG_CONTINUATION_ID, id).content(continuation, false).closeTag(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/LongBucketId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/LongBucketId.java new file mode 100644 index 00000000000..14ced353b67 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/LongBucketId.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link + * com.yahoo.search.grouping.request.LongBucket}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class LongBucketId extends BucketGroupId<Long> { + + /** + * Constructs a new instance of this class. + * + * @param from The identifying inclusive-from long. + * @param to The identifying exclusive-to long. + */ + public LongBucketId(Long from, Long to) { + super("long_bucket", from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/LongId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/LongId.java new file mode 100644 index 00000000000..18d2098a5a1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/LongId.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link Long}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class LongId extends ValueGroupId<Long> { + + /** + * Constructs a new instance of this class. + * + * @param value The identifying long. + */ + public LongId(Long value) { + super("long", value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/NullId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/NullId.java new file mode 100644 index 00000000000..a6473837c76 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/NullId.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is in {@link Group} instances where the identifying expression evaluated to null. For example, hits that + * fall outside the buckets of a {@link com.yahoo.search.grouping.request.PredefinedFunction} are added to an + * auto-generated group with this id. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class NullId extends GroupId { + + /** + * Constructs a new instance of this class. + */ + public NullId() { + super("null"); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/RawBucketId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/RawBucketId.java new file mode 100644 index 00000000000..bb0dae9d6b8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/RawBucketId.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import java.util.Arrays; + +/** + * This class is used in {@link Group} instances where the identifying + * expression evaluated to a {@link com.yahoo.search.grouping.request.RawBucket}. + * + * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +public class RawBucketId extends BucketGroupId<byte[]> { + + /** + * Constructs a new instance of this class. + * + * @param from The identifying inclusive-from raw buffer. + * @param to The identifying exclusive-to raw buffer. + */ + public RawBucketId(byte[] from, byte[] to) { + super("raw_bucket", from, Arrays.toString(from), to, Arrays.toString(to)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/RawId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/RawId.java new file mode 100644 index 00000000000..48e9c6e4523 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/RawId.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import java.util.Arrays; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link Byte} array. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class RawId extends ValueGroupId<byte[]> { + + /** + * Constructs a new instance of this class. + * + * @param value The identifying byte array. + */ + public RawId(byte[] value) { + super("raw", value, Arrays.toString(value)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/RootGroup.java b/container-search/src/main/java/com/yahoo/search/grouping/result/RootGroup.java new file mode 100644 index 00000000000..238f9ec68f3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/RootGroup.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.result.Relevance; + +/** + * This class represents the root {@link Group} in the grouping result model. This class adds a {@link Continuation} + * object that can be used to paginate the result. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class RootGroup extends Group { + + private final Continuation continuation; + + public RootGroup(int id, Continuation continuation) { + super(new RootId(id), new Relevance(1.0)); + this.continuation = continuation; + } + + public Continuation continuation() { + return continuation; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/RootId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/RootId.java new file mode 100644 index 00000000000..ebf3152646a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/RootId.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link RootGroup} instances. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class RootId extends GroupId { + + public RootId(int id) { + super("root", id); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/StringBucketId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/StringBucketId.java new file mode 100644 index 00000000000..0b4459aa4b6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/StringBucketId.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link + * com.yahoo.search.grouping.request.StringBucket}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StringBucketId extends BucketGroupId<String> { + + /** + * Constructs a new instance of this class. + * + * @param from The identifying inclusive-from string. + * @param to The identifying exclusive-to string. + */ + public StringBucketId(String from, String to) { + super("string_bucket", from, to); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/StringId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/StringId.java new file mode 100644 index 00000000000..0a82b98af44 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/StringId.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +/** + * This class is used in {@link Group} instances where the identifying expression evaluated to a {@link String}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class StringId extends ValueGroupId<String> { + + /** + * Constructs a new instance of this class. + * + * @param value The identifying string. + */ + public StringId(String value) { + super("string", value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/ValueGroupId.java b/container-search/src/main/java/com/yahoo/search/grouping/result/ValueGroupId.java new file mode 100644 index 00000000000..f6e815b231c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/ValueGroupId.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.result; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * This abstract class is used in {@link Group} instances where the identifying expression evaluated to a singe value. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public abstract class ValueGroupId<T> extends GroupId { + + private final T value; + + /** + * Constructs a new instance of this class. + * + * @param type The type of this id's value. + * @param value The identifying value. + */ + public ValueGroupId(String type, T value) { + this(type, value, String.valueOf(value.toString())); + } + + /** + * Constructs a new instance of this class. + * + * @param type The type of this id's value. + * @param value The identifying value. + * @param valueImage The String representation of the <tt>value</tt> argument. + */ + public ValueGroupId(String type, T value, String valueImage) { + super(type, valueImage); + this.value = value; + } + + /** + * Returns the identifying value. + * + * @return The value. + */ + public T getValue() { + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/result/package-info.java b/container-search/src/main/java/com/yahoo/search/grouping/result/package-info.java new file mode 100644 index 00000000000..6c70f67971d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/result/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.grouping.result; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/CompositeContinuation.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/CompositeContinuation.java new file mode 100644 index 00000000000..e8efce2d0bc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/CompositeContinuation.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.Continuation; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class CompositeContinuation extends EncodableContinuation implements Iterable<EncodableContinuation> { + + private final List<EncodableContinuation> children = new ArrayList<>(); + + public CompositeContinuation add(EncodableContinuation child) { + children.add(child); + return this; + } + + @Override + public Iterator<EncodableContinuation> iterator() { + return children.iterator(); + } + + @Override + public int hashCode() { + return children.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof CompositeContinuation && children.equals(((CompositeContinuation)obj).children); + } + + @Override + public void encode(IntegerEncoder out) { + for (EncodableContinuation child : children) { + child.encode(out); + } + } + + public static CompositeContinuation decode(IntegerDecoder from) { + CompositeContinuation ret = new CompositeContinuation(); + while (from.hasNext()) { + ret.add(OffsetContinuation.decode(from)); + } + return ret; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/ContinuationDecoder.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ContinuationDecoder.java new file mode 100644 index 00000000000..a8779be09c2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ContinuationDecoder.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.Continuation; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class ContinuationDecoder { + + public static Continuation decode(String str) { + return CompositeContinuation.decode(new IntegerDecoder(str)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/EncodableContinuation.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/EncodableContinuation.java new file mode 100644 index 00000000000..ca059cbe1fe --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/EncodableContinuation.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.Continuation; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +abstract class EncodableContinuation extends Continuation { + + public abstract void encode(IntegerEncoder out); + + @Override + public final String toString() { + IntegerEncoder encoder = new IntegerEncoder(); + encode(encoder); + return encoder.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/ExpressionConverter.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ExpressionConverter.java new file mode 100644 index 00000000000..9de1c902be1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ExpressionConverter.java @@ -0,0 +1,598 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.request.AddFunction; +import com.yahoo.search.grouping.request.AggregatorNode; +import com.yahoo.search.grouping.request.AndFunction; +import com.yahoo.search.grouping.request.ArrayAtLookup; +import com.yahoo.search.grouping.request.AttributeFunction; +import com.yahoo.search.grouping.request.AttributeValue; +import com.yahoo.search.grouping.request.AvgAggregator; +import com.yahoo.search.grouping.request.BucketValue; +import com.yahoo.search.grouping.request.CatFunction; +import com.yahoo.search.grouping.request.ConstantValue; +import com.yahoo.search.grouping.request.CountAggregator; +import com.yahoo.search.grouping.request.DateFunction; +import com.yahoo.search.grouping.request.DayOfMonthFunction; +import com.yahoo.search.grouping.request.DayOfWeekFunction; +import com.yahoo.search.grouping.request.DayOfYearFunction; +import com.yahoo.search.grouping.request.DebugWaitFunction; +import com.yahoo.search.grouping.request.DivFunction; +import com.yahoo.search.grouping.request.DocIdNsSpecificValue; +import com.yahoo.search.grouping.request.DoubleValue; +import com.yahoo.search.grouping.request.FixedWidthFunction; +import com.yahoo.search.grouping.request.GroupingExpression; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.grouping.request.HourOfDayFunction; +import com.yahoo.search.grouping.request.InfiniteValue; +import com.yahoo.search.grouping.request.InterpolatedLookup; +import com.yahoo.search.grouping.request.LongValue; +import com.yahoo.search.grouping.request.MathACosFunction; +import com.yahoo.search.grouping.request.MathACosHFunction; +import com.yahoo.search.grouping.request.MathASinFunction; +import com.yahoo.search.grouping.request.MathASinHFunction; +import com.yahoo.search.grouping.request.MathATanFunction; +import com.yahoo.search.grouping.request.MathATanHFunction; +import com.yahoo.search.grouping.request.MathCbrtFunction; +import com.yahoo.search.grouping.request.MathCosFunction; +import com.yahoo.search.grouping.request.MathCosHFunction; +import com.yahoo.search.grouping.request.MathExpFunction; +import com.yahoo.search.grouping.request.MathFloorFunction; +import com.yahoo.search.grouping.request.MathHypotFunction; +import com.yahoo.search.grouping.request.MathLog10Function; +import com.yahoo.search.grouping.request.MathLog1pFunction; +import com.yahoo.search.grouping.request.MathLogFunction; +import com.yahoo.search.grouping.request.MathPowFunction; +import com.yahoo.search.grouping.request.MathSinFunction; +import com.yahoo.search.grouping.request.MathSinHFunction; +import com.yahoo.search.grouping.request.MathSqrtFunction; +import com.yahoo.search.grouping.request.MathTanFunction; +import com.yahoo.search.grouping.request.MathTanHFunction; +import com.yahoo.search.grouping.request.MaxAggregator; +import com.yahoo.search.grouping.request.MaxFunction; +import com.yahoo.search.grouping.request.Md5Function; +import com.yahoo.search.grouping.request.MinAggregator; +import com.yahoo.search.grouping.request.MinFunction; +import com.yahoo.search.grouping.request.MinuteOfHourFunction; +import com.yahoo.search.grouping.request.ModFunction; +import com.yahoo.search.grouping.request.MonthOfYearFunction; +import com.yahoo.search.grouping.request.MulFunction; +import com.yahoo.search.grouping.request.NegFunction; +import com.yahoo.search.grouping.request.NormalizeSubjectFunction; +import com.yahoo.search.grouping.request.NowFunction; +import com.yahoo.search.grouping.request.OrFunction; +import com.yahoo.search.grouping.request.PredefinedFunction; +import com.yahoo.search.grouping.request.RawValue; +import com.yahoo.search.grouping.request.RelevanceValue; +import com.yahoo.search.grouping.request.ReverseFunction; +import com.yahoo.search.grouping.request.SecondOfMinuteFunction; +import com.yahoo.search.grouping.request.SizeFunction; +import com.yahoo.search.grouping.request.SortFunction; +import com.yahoo.search.grouping.request.StrCatFunction; +import com.yahoo.search.grouping.request.StrLenFunction; +import com.yahoo.search.grouping.request.StringValue; +import com.yahoo.search.grouping.request.SubFunction; +import com.yahoo.search.grouping.request.SumAggregator; +import com.yahoo.search.grouping.request.SummaryValue; +import com.yahoo.search.grouping.request.ToDoubleFunction; +import com.yahoo.search.grouping.request.ToLongFunction; +import com.yahoo.search.grouping.request.ToRawFunction; +import com.yahoo.search.grouping.request.ToStringFunction; +import com.yahoo.search.grouping.request.UcaFunction; +import com.yahoo.search.grouping.request.XorAggregator; +import com.yahoo.search.grouping.request.XorBitFunction; +import com.yahoo.search.grouping.request.XorFunction; +import com.yahoo.search.grouping.request.YearFunction; +import com.yahoo.search.grouping.request.YmumValue; +import com.yahoo.search.grouping.request.ZCurveXFunction; +import com.yahoo.search.grouping.request.ZCurveYFunction; + +import com.yahoo.searchlib.aggregation.AggregationResult; +import com.yahoo.searchlib.aggregation.AverageAggregationResult; +import com.yahoo.searchlib.aggregation.CountAggregationResult; +import com.yahoo.searchlib.aggregation.ExpressionCountAggregationResult; +import com.yahoo.searchlib.aggregation.HitsAggregationResult; +import com.yahoo.searchlib.aggregation.MaxAggregationResult; +import com.yahoo.searchlib.aggregation.MinAggregationResult; +import com.yahoo.searchlib.aggregation.SumAggregationResult; +import com.yahoo.searchlib.aggregation.XorAggregationResult; + +import com.yahoo.searchlib.expression.AddFunctionNode; +import com.yahoo.searchlib.expression.AggregationRefNode; +import com.yahoo.searchlib.expression.AndFunctionNode; +import com.yahoo.searchlib.expression.ArrayAtLookupNode; +import com.yahoo.searchlib.expression.AttributeNode; +import com.yahoo.searchlib.expression.BucketResultNode; +import com.yahoo.searchlib.expression.CatFunctionNode; +import com.yahoo.searchlib.expression.ConstantNode; +import com.yahoo.searchlib.expression.DebugWaitFunctionNode; +import com.yahoo.searchlib.expression.DivideFunctionNode; +import com.yahoo.searchlib.expression.ExpressionNode; +import com.yahoo.searchlib.expression.FixedWidthBucketFunctionNode; +import com.yahoo.searchlib.expression.FloatBucketResultNode; +import com.yahoo.searchlib.expression.FloatBucketResultNodeVector; +import com.yahoo.searchlib.expression.FloatResultNode; +import com.yahoo.searchlib.expression.GetDocIdNamespaceSpecificFunctionNode; +import com.yahoo.searchlib.expression.GetYMUMChecksumFunctionNode; +import com.yahoo.searchlib.expression.IntegerBucketResultNode; +import com.yahoo.searchlib.expression.IntegerBucketResultNodeVector; +import com.yahoo.searchlib.expression.IntegerResultNode; +import com.yahoo.searchlib.expression.InterpolatedLookupNode; +import com.yahoo.searchlib.expression.MD5BitFunctionNode; +import com.yahoo.searchlib.expression.MathFunctionNode; +import com.yahoo.searchlib.expression.MaxFunctionNode; +import com.yahoo.searchlib.expression.MinFunctionNode; +import com.yahoo.searchlib.expression.ModuloFunctionNode; +import com.yahoo.searchlib.expression.MultiArgFunctionNode; +import com.yahoo.searchlib.expression.MultiplyFunctionNode; +import com.yahoo.searchlib.expression.NegateFunctionNode; +import com.yahoo.searchlib.expression.NormalizeSubjectFunctionNode; +import com.yahoo.searchlib.expression.NumElemFunctionNode; +import com.yahoo.searchlib.expression.OrFunctionNode; +import com.yahoo.searchlib.expression.RangeBucketPreDefFunctionNode; +import com.yahoo.searchlib.expression.RawBucketResultNode; +import com.yahoo.searchlib.expression.RawBucketResultNodeVector; +import com.yahoo.searchlib.expression.RawResultNode; +import com.yahoo.searchlib.expression.RelevanceNode; +import com.yahoo.searchlib.expression.ResultNodeVector; +import com.yahoo.searchlib.expression.ReverseFunctionNode; +import com.yahoo.searchlib.expression.SortFunctionNode; +import com.yahoo.searchlib.expression.StrCatFunctionNode; +import com.yahoo.searchlib.expression.StrLenFunctionNode; +import com.yahoo.searchlib.expression.StringBucketResultNode; +import com.yahoo.searchlib.expression.StringBucketResultNodeVector; +import com.yahoo.searchlib.expression.StringResultNode; +import com.yahoo.searchlib.expression.TimeStampFunctionNode; +import com.yahoo.searchlib.expression.ToFloatFunctionNode; +import com.yahoo.searchlib.expression.ToIntFunctionNode; +import com.yahoo.searchlib.expression.ToRawFunctionNode; +import com.yahoo.searchlib.expression.ToStringFunctionNode; +import com.yahoo.searchlib.expression.UcaFunctionNode; +import com.yahoo.searchlib.expression.XorBitFunctionNode; +import com.yahoo.searchlib.expression.XorFunctionNode; +import com.yahoo.searchlib.expression.ZCurveFunctionNode; + +/** + * This is a helper class for {@link RequestBuilder} that offloads the code to convert {@link GroupingExpression} type + * objects to back-end specific expressions. This is a straightforward one-to-one conversion. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class ExpressionConverter { + + public static final String DEFAULT_SUMMARY_NAME = ""; + public static final int DEFAULT_TIME_OFFSET = 0; + private String defaultSummaryName = DEFAULT_SUMMARY_NAME; + private int timeOffset = DEFAULT_TIME_OFFSET; + + /** + * Sets the summary name to use when converting {@link SummaryValue} that was created without an explicit name. + * + * @param summaryName The default summary name to use. + * @return This, to allow chaining. + */ + public ExpressionConverter setDefaultSummaryName(String summaryName) { + defaultSummaryName = summaryName; + return this; + } + + /** + * Sets an offset to use for all time-based grouping expressions. + * + * @param millis The offset in milliseconds. + * @return This, to allow chaining. + */ + public ExpressionConverter setTimeOffset(int millis) { + this.timeOffset = millis / 1000; + return this; + } + + /** + * Converts the given ast type grouping expression to a corresponding back-end type aggregation result. + * + * @param exp The expression to convert. + * @return The corresponding back-end result. + * @throws UnsupportedOperationException Thrown if the given expression could not be converted. + */ + public AggregationResult toAggregationResult(GroupingExpression exp) { + int level = exp.getLevel(); + // Is aggregating on list of groups? + if (level > 1) { + /* + * The below aggregator operates on lists of groups in the query language world. + * Internally, it operates on hits (by evaluating the group-by expression for each hit). + * The group-by expression is passed to the aggregator by RequestBuilder. + */ + if (exp instanceof CountAggregator) { + return new ExpressionCountAggregationResult(); + } + throw new UnsupportedOperationException( + "Can not aggregate on " + GroupingOperation.getLevelDesc(level) + "."); + } + if (exp instanceof AvgAggregator) { + return new AverageAggregationResult() + .setExpression(toExpressionNode(((AvgAggregator)exp).getExpression())); + } + if (exp instanceof CountAggregator) { + return new CountAggregationResult() + .setExpression(new ConstantNode(new IntegerResultNode(0))); + } + if (exp instanceof MaxAggregator) { + return new MaxAggregationResult() + .setExpression(toExpressionNode(((MaxAggregator)exp).getExpression())); + } + if (exp instanceof MinAggregator) { + return new MinAggregationResult() + .setExpression(toExpressionNode(((MinAggregator)exp).getExpression())); + } + if (exp instanceof SumAggregator) { + return new SumAggregationResult() + .setExpression(toExpressionNode(((SumAggregator)exp).getExpression())); + } + if (exp instanceof SummaryValue) { + String summaryName = ((SummaryValue)exp).getSummaryName(); + return new HitsAggregationResult() + .setSummaryClass(summaryName != null ? summaryName : defaultSummaryName) + .setExpression(new ConstantNode(new IntegerResultNode(0))); + } + if (exp instanceof XorAggregator) { + return new XorAggregationResult() + .setExpression(toExpressionNode(((XorAggregator)exp).getExpression())); + } + throw new UnsupportedOperationException("Can not convert '" + exp + "' to an aggregator."); + } + + /** + * Converts the given ast type grouping expression to a corresponding back-end type expression. + * + * @param exp The expression to convert. + * @return The corresponding back-end expression. + * @throws UnsupportedOperationException Thrown if the given expression could not be converted. + */ + public ExpressionNode toExpressionNode(GroupingExpression exp) { + if (exp instanceof AddFunction) { + return addArguments(new AddFunctionNode(), (AddFunction)exp); + } + if (exp instanceof AggregatorNode) { + return new AggregationRefNode(toAggregationResult(exp)); + } + if (exp instanceof AndFunction) { + return addArguments(new AndFunctionNode(), (AndFunction)exp); + } + if (exp instanceof AttributeValue) { + return new AttributeNode(((AttributeValue)exp).getAttributeName()); + } + if (exp instanceof AttributeFunction) { + return new AttributeNode(((AttributeFunction)exp).getAttributeName()); + } + if (exp instanceof CatFunction) { + return addArguments(new CatFunctionNode(), (CatFunction)exp); + } + if (exp instanceof DebugWaitFunction) { + return new DebugWaitFunctionNode(toExpressionNode(((DebugWaitFunction)exp).getArg(0)), + ((DebugWaitFunction)exp).getWaitTime(), + ((DebugWaitFunction)exp).getBusyWait()); + } + if (exp instanceof DocIdNsSpecificValue) { + return new GetDocIdNamespaceSpecificFunctionNode(); + } + if (exp instanceof DoubleValue) { + return new ConstantNode(new FloatResultNode(((DoubleValue)exp).getValue())); + } + if (exp instanceof DivFunction) { + return addArguments(new DivideFunctionNode(), (DivFunction)exp); + } + if (exp instanceof FixedWidthFunction) { + Number w = ((FixedWidthFunction)exp).getWidth(); + return new FixedWidthBucketFunctionNode( + w instanceof Double ? new FloatResultNode(w.doubleValue()) : new IntegerResultNode(w.longValue()), + toExpressionNode(((FixedWidthFunction)exp).getArg(0))); + } + if (exp instanceof LongValue) { + return new ConstantNode(new IntegerResultNode(((LongValue)exp).getValue())); + } + if (exp instanceof MaxFunction) { + return addArguments(new MaxFunctionNode(), (MaxFunction)exp); + } + if (exp instanceof Md5Function) { + return new MD5BitFunctionNode().setNumBits(((Md5Function)exp).getNumBits()) + .addArg(toExpressionNode(((Md5Function)exp).getArg(0))); + } + if (exp instanceof UcaFunction) { + UcaFunction uca = (UcaFunction)exp; + return new UcaFunctionNode(toExpressionNode(uca.getArg(0)), uca.getLocale(), uca.getStrength()); + } + if (exp instanceof MinFunction) { + return addArguments(new MinFunctionNode(), (MinFunction)exp); + } + if (exp instanceof ModFunction) { + return addArguments(new ModuloFunctionNode(), (ModFunction)exp); + } + if (exp instanceof MulFunction) { + return addArguments(new MultiplyFunctionNode(), (MulFunction)exp); + } + if (exp instanceof NegFunction) { + return new NegateFunctionNode(toExpressionNode(((NegFunction)exp).getArg(0))); + } + if (exp instanceof NormalizeSubjectFunction) { + return new NormalizeSubjectFunctionNode(toExpressionNode(((NormalizeSubjectFunction)exp).getArg(0))); + } + if (exp instanceof NowFunction) { + return new ConstantNode(new IntegerResultNode(System.currentTimeMillis() / 1000)); + } + if (exp instanceof OrFunction) { + return addArguments(new OrFunctionNode(), (OrFunction)exp); + } + if (exp instanceof PredefinedFunction) { + return new RangeBucketPreDefFunctionNode(toBucketList((PredefinedFunction)exp), + toExpressionNode(((PredefinedFunction)exp).getArg(0))); + } + if (exp instanceof RelevanceValue) { + return new RelevanceNode(); + } + if (exp instanceof ReverseFunction) { + return new ReverseFunctionNode(toExpressionNode(((ReverseFunction)exp).getArg(0))); + } + if (exp instanceof SizeFunction) { + return new NumElemFunctionNode(toExpressionNode(((SizeFunction)exp).getArg(0))); + } + if (exp instanceof SortFunction) { + return new SortFunctionNode(toExpressionNode(((SortFunction)exp).getArg(0))); + } + if (exp instanceof ArrayAtLookup) { + ArrayAtLookup aal = (ArrayAtLookup) exp; + return new ArrayAtLookupNode(aal.getAttributeName(), toExpressionNode(aal.getIndexArgument())); + } + if (exp instanceof InterpolatedLookup) { + InterpolatedLookup sarl = (InterpolatedLookup) exp; + return new InterpolatedLookupNode(sarl.getAttributeName(), toExpressionNode(sarl.getLookupArgument())); + } + if (exp instanceof StrCatFunction) { + return addArguments(new StrCatFunctionNode(), (StrCatFunction)exp); + } + if (exp instanceof StringValue) { + return new ConstantNode(new StringResultNode(((StringValue)exp).getValue())); + } + if (exp instanceof StrLenFunction) { + return new StrLenFunctionNode(toExpressionNode(((StrLenFunction)exp).getArg(0))); + } + if (exp instanceof SubFunction) { + return toSubNode((SubFunction)exp); + } + if (exp instanceof ToDoubleFunction) { + return new ToFloatFunctionNode(toExpressionNode(((ToDoubleFunction)exp).getArg(0))); + } + if (exp instanceof ToLongFunction) { + return new ToIntFunctionNode(toExpressionNode(((ToLongFunction)exp).getArg(0))); + } + if (exp instanceof ToRawFunction) { + return new ToRawFunctionNode(toExpressionNode(((ToRawFunction)exp).getArg(0))); + } + if (exp instanceof ToStringFunction) { + return new ToStringFunctionNode(toExpressionNode(((ToStringFunction)exp).getArg(0))); + } + if (exp instanceof DateFunction) { + StrCatFunctionNode ret = new StrCatFunctionNode(); + GroupingExpression arg = ((DateFunction)exp).getArg(0); + ret.addArg(new ToStringFunctionNode(toTime(arg, TimeStampFunctionNode.TimePart.Year))); + ret.addArg(new ConstantNode(new StringResultNode("-"))); + ret.addArg(new ToStringFunctionNode(toTime(arg, TimeStampFunctionNode.TimePart.Month))); + ret.addArg(new ConstantNode(new StringResultNode("-"))); + ret.addArg(new ToStringFunctionNode(toTime(arg, TimeStampFunctionNode.TimePart.MonthDay))); + return ret; + } + if (exp instanceof MathSqrtFunction) { + return new MathFunctionNode(toExpressionNode(((MathSqrtFunction)exp).getArg(0)), + MathFunctionNode.Function.SQRT); + } + if (exp instanceof MathCbrtFunction) { + return new MathFunctionNode(toExpressionNode(((MathCbrtFunction)exp).getArg(0)), + MathFunctionNode.Function.CBRT); + } + if (exp instanceof MathLogFunction) { + return new MathFunctionNode(toExpressionNode(((MathLogFunction)exp).getArg(0)), + MathFunctionNode.Function.LOG); + } + if (exp instanceof MathLog1pFunction) { + return new MathFunctionNode(toExpressionNode(((MathLog1pFunction)exp).getArg(0)), + MathFunctionNode.Function.LOG1P); + } + if (exp instanceof MathLog10Function) { + return new MathFunctionNode(toExpressionNode(((MathLog10Function)exp).getArg(0)), + MathFunctionNode.Function.LOG10); + } + if (exp instanceof MathExpFunction) { + return new MathFunctionNode(toExpressionNode(((MathExpFunction)exp).getArg(0)), + MathFunctionNode.Function.EXP); + } + if (exp instanceof MathPowFunction) { + return new MathFunctionNode(toExpressionNode(((MathPowFunction)exp).getArg(0)), + MathFunctionNode.Function.POW) + .addArg(toExpressionNode(((MathPowFunction)exp).getArg(1))); + } + if (exp instanceof MathHypotFunction) { + return new MathFunctionNode(toExpressionNode(((MathHypotFunction)exp).getArg(0)), + MathFunctionNode.Function.HYPOT) + .addArg(toExpressionNode(((MathHypotFunction)exp).getArg(1))); + } + if (exp instanceof MathSinFunction) { + return new MathFunctionNode(toExpressionNode(((MathSinFunction)exp).getArg(0)), + MathFunctionNode.Function.SIN); + } + if (exp instanceof MathASinFunction) { + return new MathFunctionNode(toExpressionNode(((MathASinFunction)exp).getArg(0)), + MathFunctionNode.Function.ASIN); + } + if (exp instanceof MathCosFunction) { + return new MathFunctionNode(toExpressionNode(((MathCosFunction)exp).getArg(0)), + MathFunctionNode.Function.COS); + } + if (exp instanceof MathACosFunction) { + return new MathFunctionNode(toExpressionNode(((MathACosFunction)exp).getArg(0)), + MathFunctionNode.Function.ACOS); + } + if (exp instanceof MathTanFunction) { + return new MathFunctionNode(toExpressionNode(((MathTanFunction)exp).getArg(0)), + MathFunctionNode.Function.TAN); + } + if (exp instanceof MathATanFunction) { + return new MathFunctionNode(toExpressionNode(((MathATanFunction)exp).getArg(0)), + MathFunctionNode.Function.ATAN); + } + if (exp instanceof MathSinHFunction) { + return new MathFunctionNode(toExpressionNode(((MathSinHFunction)exp).getArg(0)), + MathFunctionNode.Function.SINH); + } + if (exp instanceof MathASinHFunction) { + return new MathFunctionNode(toExpressionNode(((MathASinHFunction)exp).getArg(0)), + MathFunctionNode.Function.ASINH); + } + if (exp instanceof MathCosHFunction) { + return new MathFunctionNode(toExpressionNode(((MathCosHFunction)exp).getArg(0)), + MathFunctionNode.Function.COSH); + } + if (exp instanceof MathACosHFunction) { + return new MathFunctionNode(toExpressionNode(((MathACosHFunction)exp).getArg(0)), + MathFunctionNode.Function.ACOSH); + } + if (exp instanceof MathTanHFunction) { + return new MathFunctionNode(toExpressionNode(((MathTanHFunction)exp).getArg(0)), + MathFunctionNode.Function.TANH); + } + if (exp instanceof MathATanHFunction) { + return new MathFunctionNode(toExpressionNode(((MathATanHFunction)exp).getArg(0)), + MathFunctionNode.Function.ATANH); + } + if (exp instanceof MathFloorFunction) { + return new MathFunctionNode(toExpressionNode(((MathFloorFunction)exp).getArg(0)), + MathFunctionNode.Function.FLOOR); + } + if (exp instanceof ZCurveXFunction) { + return new ZCurveFunctionNode(toExpressionNode(((ZCurveXFunction)exp).getArg(0)), + ZCurveFunctionNode.Dimension.X); + } + if (exp instanceof ZCurveYFunction) { + return new ZCurveFunctionNode(toExpressionNode(((ZCurveYFunction)exp).getArg(0)), + ZCurveFunctionNode.Dimension.Y); + } + if (exp instanceof DayOfMonthFunction) { + return toTime(((DayOfMonthFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.MonthDay); + } + if (exp instanceof DayOfWeekFunction) { + return toTime(((DayOfWeekFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.WeekDay); + } + if (exp instanceof DayOfYearFunction) { + return toTime(((DayOfYearFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.YearDay); + } + if (exp instanceof HourOfDayFunction) { + return toTime(((HourOfDayFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.Hour); + } + if (exp instanceof MinuteOfHourFunction) { + return toTime(((MinuteOfHourFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.Minute); + } + if (exp instanceof MonthOfYearFunction) { + return toTime(((MonthOfYearFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.Month); + } + if (exp instanceof SecondOfMinuteFunction) { + return toTime(((SecondOfMinuteFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.Second); + } + if (exp instanceof YearFunction) { + return toTime(((YearFunction)exp).getArg(0), TimeStampFunctionNode.TimePart.Year); + } + if (exp instanceof XorFunction) { + return addArguments(new XorFunctionNode(), (XorFunction)exp); + } + if (exp instanceof XorBitFunction) { + return new XorBitFunctionNode().setNumBits(((XorBitFunction)exp).getNumBits()) + .addArg(toExpressionNode(((XorBitFunction)exp).getArg(0))); + } + if (exp instanceof YmumValue) { + return new GetYMUMChecksumFunctionNode(); + } + throw new UnsupportedOperationException("Can not convert '" + exp + "' of class " + exp.getClass().getName() + + " to an expression."); + } + + private TimeStampFunctionNode toTime(GroupingExpression arg, TimeStampFunctionNode.TimePart timePart) { + if (timeOffset == 0) { + return new TimeStampFunctionNode(toExpressionNode(arg), timePart, true); + } + AddFunctionNode exp = new AddFunctionNode(); + exp.addArg(toExpressionNode(arg)); + exp.addArg(new ConstantNode(new IntegerResultNode(timeOffset))); + return new TimeStampFunctionNode(exp, timePart, true); + } + + private MultiArgFunctionNode addArguments(MultiArgFunctionNode ret, Iterable<GroupingExpression> lst) { + for (GroupingExpression exp : lst) { + ret.addArg(toExpressionNode(exp)); + } + return ret; + } + + private MultiArgFunctionNode toSubNode(Iterable<GroupingExpression> lst) { + MultiArgFunctionNode ret = new AddFunctionNode(); + int i = 0; + for (GroupingExpression exp : lst) { + ExpressionNode node = toExpressionNode(exp); + if (++i > 1) { + node = new NegateFunctionNode(node); + } + ret.addArg(node); + } + return ret; + } + + private ResultNodeVector toBucketList(PredefinedFunction fnc) { + ResultNodeVector ret = null; + for (int i = 0, len = fnc.getNumBuckets(); i < len; ++i) { + BucketResultNode bucket = toBucket(fnc.getBucket(i)); + if (ret == null) { + if (bucket instanceof FloatBucketResultNode) { + ret = new FloatBucketResultNodeVector(); + } else if (bucket instanceof IntegerBucketResultNode) { + ret = new IntegerBucketResultNodeVector(); + } else if (bucket instanceof RawBucketResultNode) { + ret = new RawBucketResultNodeVector(); + } else { + ret = new StringBucketResultNodeVector(); + } + } + ret.add(bucket); + } + return ret; + } + + private BucketResultNode toBucket(GroupingExpression exp) { + if (!(exp instanceof BucketValue)) { + throw new UnsupportedOperationException("Can not convert '" + exp + "' to a bucket."); + } + ConstantValue<?> begin = ((BucketValue)exp).getFrom(); + ConstantValue<?> end = ((BucketValue)exp).getTo(); + if (begin instanceof DoubleValue || end instanceof DoubleValue) { + return new FloatBucketResultNode( + begin instanceof InfiniteValue ? FloatResultNode.getNegativeInfinity().getFloat() + : Double.valueOf(begin.toString()), + end instanceof InfiniteValue ? FloatResultNode.getPositiveInfinity().getFloat() + : Double.valueOf(end.toString())); + } else if (begin instanceof LongValue || end instanceof LongValue) { + return new IntegerBucketResultNode( + begin instanceof InfiniteValue ? IntegerResultNode.getNegativeInfinity().getInteger() + : Long.valueOf(begin.toString()), + end instanceof InfiniteValue ? IntegerResultNode.getPositiveInfinity().getInteger() + : Long.valueOf(end.toString())); + } else if (begin instanceof StringValue || end instanceof StringValue) { + return new StringBucketResultNode( + begin instanceof InfiniteValue ? StringResultNode.getNegativeInfinity() + : new StringResultNode((String)begin.getValue()), + end instanceof InfiniteValue ? StringResultNode.getPositiveInfinity() + : new StringResultNode((String)end.getValue())); + } else { + return new RawBucketResultNode( + begin instanceof InfiniteValue ? RawResultNode.getNegativeInfinity() + : new RawResultNode(((RawValue)begin).getValue().getBytes()), + end instanceof InfiniteValue ? RawResultNode.getPositiveInfinity() + : new RawResultNode(((RawValue)end).getValue().getBytes())); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java new file mode 100644 index 00000000000..e5e91f21f5f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java @@ -0,0 +1,411 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.grouping.GroupingValidator; +import com.yahoo.search.grouping.result.Group; +import com.yahoo.search.grouping.result.RootGroup; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.vespa.objects.Identifiable; +import com.yahoo.vespa.objects.ObjectOperation; +import com.yahoo.vespa.objects.ObjectPredicate; + +/** + * Executes the {@link GroupingRequest grouping requests} set up by other searchers. This does the necessary + * transformation from the abstract request to Vespa grouping expressions (using {@link RequestBuilder}), and the + * corresponding transformation of results (using {@link ResultBuilder}). + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@After({ GroupingValidator.GROUPING_VALIDATED, + "com.yahoo.search.querytransform.WandSearcher", + "com.yahoo.search.querytransform.BooleanSearcher" }) +@Provides({ GroupingExecutor.COMPONENT_NAME, QueryCanonicalizer.queryCanonicalization } ) +public class GroupingExecutor extends Searcher { + + public final static String COMPONENT_NAME = "GroupingExecutor"; + private final static CompoundName PROP_GROUPINGLIST = newCompoundName("GroupingList"); + private final static Logger log = Logger.getLogger(GroupingExecutor.class.getName()); + + /** + * Constructs a new instance of this searcher without configuration. + * This makes the searcher completely useless for searching purposes, + * and should only be used for testing its logic. + */ + GroupingExecutor() { + } + + /** + * Constructs a new instance of this searcher with the given component id. + * + * @param componentId The identifier to assign to this searcher. + */ + public GroupingExecutor(ComponentId componentId) { + super(componentId); + } + + @Override + public Result search(Query query, Execution execution) { + String error = QueryCanonicalizer.canonicalize(query); + if (error != null) { + return new Result(query, ErrorMessage.createIllegalQuery(error)); + } + query.prepare(); + + // Retrieve grouping requests from query. + List<GroupingRequest> reqList = GroupingRequest.getRequests(query); + if (reqList.isEmpty()) { + return execution.search(query); + } + + // Convert requests to Vespa style grouping. + Map<Integer, Grouping> groupingMap = new HashMap<>(); + List<RequestContext> ctxList = new LinkedList<>(); + for (GroupingRequest grpRequest : reqList) { + ctxList.add(convertRequest(query, grpRequest, groupingMap)); + } + if (groupingMap.isEmpty()) { + return execution.search(query); + } + + // Perform the necessary passes to execute grouping. + Result result = performSearch(query, execution, groupingMap); + + // Convert Vespa style results to hits. + HitConverter hitConverter = new HitConverter(this, query); + for (RequestContext ctx : ctxList) { + RootGroup grp = convertResult(ctx, groupingMap, hitConverter); + ctx.request.setResultGroup(grp); + result.hits().add(grp); + } + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + Map<String, Result> summaryMap = new HashMap<>(); + for (Iterator<Hit> it = result.hits().unorderedDeepIterator(); it.hasNext(); ) { + Hit hit = it.next(); + Object metaData = hit.getSearcherSpecificMetaData(this); + String hitSummary = (metaData instanceof String) ? (String)metaData : summaryClass; + Result summaryResult = summaryMap.get(hitSummary); + if (summaryResult == null) { + summaryResult = new Result(result.getQuery()); + summaryMap.put(hitSummary, summaryResult); + } + summaryResult.hits().add(hit); + } + for (Map.Entry<String, Result> entry : summaryMap.entrySet()) { + Result res = entry.getValue(); + execution.fill(res, entry.getKey()); + ErrorMessage err = res.hits().getError(); + if (err != null) { + result.hits().addError(err); + } + } + Result defaultResult = summaryMap.get(ExpressionConverter.DEFAULT_SUMMARY_NAME); + if (defaultResult != null) { + // the reason we need to do this fix is that the docsum packet protocol uses null summary class name to + // signal that the backend should use its configured default, whereas for grouping it uses the literal + // "default" to signal the same + for (Hit hit : defaultResult.hits()) { + hit.setFilled(null); + } + } + } + + /** + * Converts the given {@link GroupingRequest} into a set of {@link Grouping} objects. The returned object holds the + * context that corresponds to the given request, whereas the created {@link Grouping} objects are written directly + * to the given map. + * + * @param query The query being executed. + * @param req The request to convert. + * @param map The grouping map to write to. + * @return The context required to identify the request results. + */ + private RequestContext convertRequest(Query query, GroupingRequest req, Map<Integer, Grouping> map) { + RequestBuilder builder = new RequestBuilder(req.getRequestId()); + builder.setRootOperation(req.getRootOperation()); + builder.setDefaultSummaryName(query.getPresentation().getSummary()); + builder.setTimeZone(req.getTimeZone()); + builder.addContinuations(req.continuations()); + builder.build(); + + RequestContext ctx = new RequestContext(req, builder.getTransform()); + List<Grouping> grpList = builder.getRequestList(); + for (Grouping grp : grpList) { + int grpId = map.size(); + grp.setId(grpId); + map.put(grpId, grp); + ctx.idList.add(grpId); + } + return ctx; + } + + /** + * Converts the results of the given request context into a single {@link Group}. + * + * @param requestCtx The context that identifies the results to convert. + * @param groupingMap The map of all {@link Grouping} objects available. + * @param hitConverter The converter to use for {@link Hit} conversion. + * @return The corresponding root RootGroup. + */ + private RootGroup convertResult(RequestContext requestCtx, Map<Integer, Grouping> groupingMap, + HitConverter hitConverter) { + ResultBuilder builder = new ResultBuilder(); + builder.setHitConverter(hitConverter); + builder.setTransform(requestCtx.transform); + builder.setRequestId(requestCtx.request.getRequestId()); + for (Integer grpId : requestCtx.idList) { + builder.addGroupingResult(groupingMap.get(grpId)); + } + builder.build(); + return builder.getRoot(); + } + + /** + * Performs the actual search passes to complete all the given {@link Grouping} requests. This method uses the + * grouping map argument as both an input and an output variable, as the contained {@link Grouping} objects are + * updates as results arrive from the back end. + * + * @param query The query to execute. + * @param execution The execution context used to run the queries. + * @param groupingMap The map of grouping requests to perform. + * @return The search result to pass back from this searcher. + */ + private Result performSearch(Query query, Execution execution, Map<Integer, Grouping> groupingMap) { + // Determine how many passes to perform. + int lastPass = 0; + for (Grouping grouping : groupingMap.values()) { + if ( ! grouping.useSinglePass()) { + lastPass = Math.max(lastPass, grouping.getLevels().size()); + } + } + + // Perform multi-pass query to complete all grouping requests. + Item origRoot = query.getModel().getQueryTree().getRoot(); + int prePassErrors = query.errors().size(); + Result ret = null; + Item baseRoot = origRoot; + if (lastPass > 0) { + baseRoot = origRoot.clone(); + } + if (query.isTraceable(3) && query.getGroupingSessionCache()) { + query.trace("Grouping in " + (lastPass + 1) + " passes. SessionId='" + query.getSessionId(true) + "'.", 3); + } + for (int pass = 0; pass <= lastPass; ++pass) { + boolean firstPass = (pass == 0); + List<Grouping> passList = getGroupingListForPassN(groupingMap, pass); + if (passList.isEmpty()) { + throw new RuntimeException("No grouping request for pass " + pass + ", bug!"); + } + if (log.isLoggable(LogLevel.DEBUG)) { + for (Grouping grouping : passList) { + log.log(LogLevel.DEBUG, "Pass(" + pass + "), Grouping(" + grouping.getId() + "): " + grouping); + } + } + Item passRoot; + if (firstPass) { + passRoot = origRoot; // Use original query the first time. + } else if (pass == lastPass) { + passRoot = baseRoot; // Has already been cloned once, use this for last pass. + } else { + // noinspection ConstantConditions + passRoot = baseRoot.clone(); + } + if (query.isTraceable(4) && query.getGroupingSessionCache()) { + query.trace("Grouping with session cache '" + query.getGroupingSessionCache() + "' enabled for pass #" + pass + ".", 4); + } + if (origRoot != passRoot) { + query.getModel().getQueryTree().setRoot(passRoot); + } + setGroupingList(query, passList); + Result passResult = execution.search(query); + if (passResult.hits().getError() != null) { + if (firstPass) { + if (passResult.hits().getErrorHit().errors().size() > prePassErrors || + passResult.hits().getErrorHit().errors().size() == 0) { + return passResult; + } + } else { + return passResult; + } + } + Map<Integer, Grouping> passGroupingMap = mergeGroupingResults(passResult); + mergeGroupingMaps(groupingMap, passGroupingMap); + if (firstPass) { + ret = passResult; + } + } + if (log.isLoggable(LogLevel.DEBUG)) { + for (Grouping grouping : groupingMap.values()) { + log.log(LogLevel.DEBUG, "Result Grouping(" + grouping.getId() + "): " + grouping); + } + } + return ret; + } + + /** + * Merges the content of result into state. This needs to be done in order to conserve the context objects contained + * in the state as they are not part of the serialized object representation. + * + * @param state the current state. + * @param result the results from the current pass. + */ + private void mergeGroupingMaps(Map<Integer, Grouping> state, Map<Integer, Grouping> result) { + for (Grouping grouping : result.values()) { + Grouping old = state.get(grouping.getId()); + if (old != null) { + old.merge(grouping); + // no need to invoke postMerge, as state is empty for + // current level + } else { + log.warning("Got grouping result with unknown id: " + grouping); + } + } + } + + /** + * Returns a list of {@link Grouping} objects that are to be used for the given pass. + * + * @param groupingMap The map of all grouping objects. + * @param pass The pass about to be performed. + * @return A list of grouping objects. + */ + private List<Grouping> getGroupingListForPassN(Map<Integer, Grouping> groupingMap, int pass) { + List<Grouping> ret = new ArrayList<>(); + for (Grouping grouping : groupingMap.values()) { + if (grouping.useSinglePass()) { + if (pass == 0) { + grouping.setFirstLevel(0); + grouping.setLastLevel(grouping.getLevels().size()); + ret.add(grouping); // more levels to go + } + } else { + if (pass <= grouping.getLevels().size()) { + grouping.setFirstLevel(pass); + grouping.setLastLevel(pass); + ret.add(grouping); // more levels to go + } + } + } + return ret; + } + + /** + * Merges the grouping content of the given result object. The first grouping hit found by iterating over the result + * content is kept, and all consecutive matching hits are merged into this. + * + * @param result The result to traverse. + * @return A map of merged grouping objects. + */ + private Map<Integer, Grouping> mergeGroupingResults(Result result) { + Map<Integer, Grouping> ret = new HashMap<>(); + for (Iterator<Hit> i = result.hits().unorderedIterator(); i.hasNext(); ) { + Hit hit = i.next(); + if (hit instanceof GroupingListHit) { + ContextInjector injector = new ContextInjector(hit); + for (Grouping grp : ((GroupingListHit)hit).getGroupingList()) { + grp.select(injector, injector); + Grouping old = ret.get(grp.getId()); + if (old != null) { + old.merge(grp); + } else { + ret.put(grp.getId(), grp); + } + } + i.remove(); + } + } + for (Grouping grouping : ret.values()) { + grouping.postMerge(); + } + return ret; + } + + /** + * Returns the list of {@link Grouping} objects assigned to the given query. If no list has been assigned, this + * method returns an empty list. + * + * @param query The query whose grouping list to return. + * @return The list of assigned grouping objects. + */ + @SuppressWarnings({ "unchecked" }) + public static List<Grouping> getGroupingList(Query query) { + Object obj = query.properties().get(PROP_GROUPINGLIST); + if (!(obj instanceof List)) { + return Collections.emptyList(); + } + return (List<Grouping>)obj; + } + + /** + * Sets the list of {@link Grouping} objects assigned to the given query. This method overwrites any grouping + * objects already assigned to the query. + * + * @param query The query whose grouping list to set. + * @param lst The grouping list to set. + */ + public static void setGroupingList(Query query, List<Grouping> lst) { + query.properties().set(PROP_GROUPINGLIST, lst); + } + + private static CompoundName newCompoundName(String name) { + return new CompoundName(GroupingExecutor.class.getName() + "." + name); + } + + private static class ContextInjector implements ObjectPredicate, ObjectOperation { + + final Object context; + + ContextInjector(Object context) { + this.context = context; + } + + @Override + public boolean check(Object obj) { + return com.yahoo.searchlib.aggregation.Hit.class.isInstance(obj); + } + + @Override + public void execute(Object obj) { + ((com.yahoo.searchlib.aggregation.Hit)obj).setContext(context); + } + } + + private static class RequestContext { + + final List<Integer> idList = new LinkedList<>(); + final GroupingRequest request; + final GroupingTransform transform; + + RequestContext(GroupingRequest request, GroupingTransform transform) { + this.request = request; + this.transform = transform; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingTransform.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingTransform.java new file mode 100644 index 00000000000..928b0ebd22f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingTransform.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.Continuation; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * This class contains enough information about how a {@link com.yahoo.search.grouping.request.GroupingOperation} was + * transformed into a list {@link com.yahoo.searchlib.aggregation.Grouping} objects, so that the results of those + * queries can be transformed into something that corresponds to the original request. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class GroupingTransform { + + private final Map<Integer, Set<Integer>> children = new HashMap<>(); + private final Map<Integer, String> labels = new HashMap<>(); + private final Map<Integer, Integer> maxes = new HashMap<>(); + private final Map<Integer, Integer> offsetByTag = new HashMap<>(); + private final Map<ResultId, Integer> offsetById = new HashMap<>(); + private final Set<ResultId> unstable = new HashSet<>(); + private final int requestId; + + public GroupingTransform(int requestId) { + this.requestId = requestId; + } + + public GroupingTransform addContinuation(Continuation cont) { + if (cont instanceof CompositeContinuation) { + for (Continuation item : ((CompositeContinuation)cont)) { + addContinuation(item); + } + } else if (cont instanceof OffsetContinuation) { + OffsetContinuation offsetCont = (OffsetContinuation)cont; + ResultId id = offsetCont.getResultId(); + if (!id.startsWith(requestId)) { + return this; + } + if (offsetCont.testFlag(OffsetContinuation.FLAG_UNSTABLE)) { + unstable.add(id); + } else { + unstable.remove(id); + } + int tag = offsetCont.getTag(); + int offset = offsetCont.getOffset(); + if (getOffset(tag) < offset) { + offsetByTag.put(tag, offset); + } + offsetById.put(id, offset); + } else { + throw new UnsupportedOperationException(cont.getClass().getName()); + } + return this; + } + + public boolean isStable(ResultId resultId) { + return !unstable.contains(resultId); + } + + public int getOffset(int tag) { + return toPosInt(offsetByTag.get(tag)); + } + + public int getOffset(ResultId resultId) { + return toPosInt(offsetById.get(resultId)); + } + + public GroupingTransform putMax(int tag, int max, String type) { + if (maxes.containsKey(tag)) { + throw new IllegalStateException("Can not set max of " + type + " " + tag + " to " + max + + " because it is already set to " + maxes.get(tag) + "."); + } + maxes.put(tag, max); + return this; + } + + public int getMax(int tag) { + return toPosInt(maxes.get(tag)); + } + + public GroupingTransform putLabel(int parentTag, int tag, String label, String type) { + Set<Integer> siblings = children.get(parentTag); + if (siblings == null) { + siblings = new HashSet<>(); + children.put(parentTag, siblings); + } else { + for (Integer sibling : siblings) { + if (label.equals(labels.get(sibling))) { + throw new UnsupportedOperationException("Can not use " + type + " label '" + label + + "' for multiple siblings."); + } + } + } + siblings.add(tag); + if (labels.containsKey(tag)) { + throw new IllegalStateException("Can not set label of " + type + " " + tag + " to '" + label + + "' because it is already set to '" + labels.get(tag) + "'."); + } + labels.put(tag, label); + return this; + } + + public String getLabel(int tag) { + return labels.get(tag); + } + + @Override + public String toString() { + StringBuilder ret = new StringBuilder(); + ret.append("groupingTransform {\n"); + ret.append("\tlabels {\n"); + for (Map.Entry<Integer, String> entry : labels.entrySet()) { + ret.append("\t\t").append(entry.getKey()).append(" : ").append(entry.getValue()).append("\n"); + } + ret.append("\t}\n"); + ret.append("\toffsets {\n"); + for (Map.Entry<Integer, Integer> entry : offsetByTag.entrySet()) { + ret.append("\t\t").append(entry.getKey()).append(" : ").append(entry.getValue()).append("\n"); + } + ret.append("\t}\n"); + ret.append("\tmaxes {\n"); + for (Map.Entry<Integer, Integer> entry : maxes.entrySet()) { + ret.append("\t\t").append(entry.getKey()).append(" : ").append(entry.getValue()).append("\n"); + } + ret.append("\t}\n"); + ret.append("}"); + return ret.toString(); + } + + private static int toPosInt(Integer val) { + return val == null ? 0 : Math.max(0, val.intValue()); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java new file mode 100644 index 00000000000..81ae100b84f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.searchlib.aggregation.FS4Hit; +import com.yahoo.searchlib.aggregation.VdsHit; + +/** + * Implementation of the {@link ResultBuilder.HitConverter} interface for {@link GroupingExecutor}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class HitConverter implements ResultBuilder.HitConverter { + + private final Searcher searcher; + private final Query query; + + /** + * Creates a new instance of this class. + * + * @param searcher The searcher that owns this converter. + * @param query The query that returned the hits. + */ + public HitConverter(Searcher searcher, Query query) { + this.searcher = searcher; + this.query = query; + } + + @Override + public com.yahoo.search.result.Hit toSearchHit(String summaryClass, com.yahoo.searchlib.aggregation.Hit hit) { + if (hit instanceof FS4Hit) { + return convertFs4Hit(summaryClass, (FS4Hit)hit); + } else if (hit instanceof VdsHit) { + return convertVdsHit(summaryClass, (VdsHit)hit); + } else { + throw new UnsupportedOperationException("Hit type '" + hit.getClass().getName() + "' not supported."); + } + } + + private Hit convertFs4Hit(String summaryClass, FS4Hit grpHit) { + FastHit ret = new FastHit(); + ret.setRelevance(grpHit.getRank()); + ret.setGlobalId(grpHit.getGlobalId()); + ret.setPartId(grpHit.getPath(), 0); + ret.setDistributionKey(grpHit.getDistributionKey()); + ret.setFillable(); + ret.setSearcherSpecificMetaData(searcher, summaryClass); + + Hit ctxHit = (Hit)grpHit.getContext(); + if (ctxHit == null) { + throw new NullPointerException("Hit has no context."); + } + ret.setSource(ctxHit.getSource()); + ret.setSourceNumber(ctxHit.getSourceNumber()); + ret.setQuery(ctxHit.getQuery()); + + if (ctxHit instanceof GroupingListHit) { + // in a live system the ctxHit can only by GroupingListHit, but because the code used Hit prior to version + // 5.10 we need to check to avoid breaking existing unit tests -- both internally and with customers + QueryPacketData queryPacketData = ((GroupingListHit)ctxHit).getQueryPacketData(); + if (queryPacketData != null) { + ret.setQueryPacketData(queryPacketData); + } + } + return ret; + } + + private Hit convertVdsHit(String summaryClass, VdsHit grpHit) { + FastHit ret = new FastHit(); + ret.setRelevance(grpHit.getRank()); + if (grpHit.getSummary().getData().length > 0) { + GroupingListHit ctxHit = (GroupingListHit)grpHit.getContext(); + if (ctxHit == null) { + throw new NullPointerException("Hit has no context."); + } + DocsumDefinitionSet defs = ctxHit.getDocsumDefinitionSet(); + defs.lazyDecode(summaryClass, grpHit.getSummary().getData(), ret); + ret.setFilled(summaryClass); + ret.setFilled(query.getPresentation().getSummary()); + } + return ret; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/IntegerDecoder.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/IntegerDecoder.java new file mode 100644 index 00000000000..c398fb41db2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/IntegerDecoder.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class IntegerDecoder { + + private static final int CHAR_MIN = IntegerEncoder.CHARS[0]; + private static final int CHAR_MAX = IntegerEncoder.CHARS[IntegerEncoder.CHARS.length - 1]; + private final String input; + private int pos = 0; + + public IntegerDecoder(String input) { + this.input = input; + } + + public boolean hasNext() { + return pos < input.length(); + } + + public int next() { + int val = 0; + int len = decodeChar(input.charAt(pos++)); + for (int i = 0; i < len; i++) { + val = (val << 4) | decodeChar(input.charAt(pos + i)); + } + pos += len; + return (val >>> 1) ^ (-(val & 0x1)); + } + + private static int decodeChar(char c) { + if (c >= CHAR_MIN && c <= CHAR_MAX) { + return (0xF & (c - CHAR_MIN)); + } else { + throw new NumberFormatException(String.valueOf(c)); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/IntegerEncoder.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/IntegerEncoder.java new file mode 100644 index 00000000000..c710905a0c8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/IntegerEncoder.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class IntegerEncoder { + + public static final char[] CHARS = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P' }; + private final StringBuilder out = new StringBuilder(); + + public void append(int val) { + val = ((val << 1) ^ (val >> 31)); + int cnt = 8; + for (int i = 0; i < 8; ++i) { + if (((val >> (28 - 4 * i)) & 0xF) != 0) { + break; + } + --cnt; + } + out.append(CHARS[cnt]); + for (int i = 8 - cnt; i < 8; ++i) { + out.append(CHARS[(val >> (28 - 4 * i)) & 0xF]); + } + } + + @Override + public String toString() { + return out.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/OffsetContinuation.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/OffsetContinuation.java new file mode 100644 index 00000000000..789be271c5c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/OffsetContinuation.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class OffsetContinuation extends EncodableContinuation { + + public static final int FLAG_UNSTABLE = 1; + private final ResultId resultId; + private final int tag; + private final int offset; + private final int flags; + + public OffsetContinuation(ResultId resultId, int tag, int offset, int flags) { + resultId.getClass(); // throws NullPointerException + this.resultId = resultId; + this.tag = tag; + this.offset = offset; + this.flags = flags; + } + + public ResultId getResultId() { + return resultId; + } + + public int getTag() { + return tag; + } + + public int getOffset() { + return offset; + } + + public int getFlags() { + return flags; + } + + public boolean testFlag(int flag) { + return (flags & flag) != 0; + } + + @Override + public int hashCode() { + return resultId.hashCode() + offset + flags; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof OffsetContinuation)) { + return false; + } + OffsetContinuation rhs = (OffsetContinuation)obj; + if (!resultId.equals(rhs.resultId)) { + return false; + } + if (tag != rhs.tag) { + return false; + } + if (offset != rhs.offset) { + return false; + } + if (flags != rhs.flags) { + return false; + } + return true; + } + + @Override + public void encode(IntegerEncoder out) { + resultId.encode(out); + out.append(tag); + out.append(offset); + out.append(flags); + } + + public static OffsetContinuation decode(IntegerDecoder in) { + ResultId resultId = ResultId.decode(in); + int tag = in.next(); + int offset = in.next(); + int flags = in.next(); + return new OffsetContinuation(resultId, tag, offset, flags); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/RequestBuilder.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/RequestBuilder.java new file mode 100644 index 00000000000..9d47464b1de --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/RequestBuilder.java @@ -0,0 +1,397 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.grouping.request.EachOperation; +import com.yahoo.search.grouping.request.GroupingExpression; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.grouping.request.NegFunction; +import com.yahoo.searchlib.aggregation.*; +import com.yahoo.searchlib.expression.ExpressionNode; + +import java.util.*; + +/** + * This class implements the necessary logic to build a list of {@link Grouping} objects from an instance of {@link + * GroupingOperation}. It is used by the {@link GroupingExecutor}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class RequestBuilder { + + private static final int LOOKAHEAD = 1; + private final ExpressionConverter converter = new ExpressionConverter(); + private final List<Grouping> requestList = new LinkedList<>(); + private final GroupingTransform transform; + private GroupingOperation root; + private int tag = 0; + + /** + * Constructs a new instance of this class. + * + * @param requestId The id of the corresponding {@link GroupingRequest}. + */ + public RequestBuilder(int requestId) { + this.transform = new GroupingTransform(requestId); + } + + /** + * Sets the abstract syntax tree of the request whose back-end queries to create. + * + * @param root The grouping request to convert. + * @return This, to allow chaining. + */ + public RequestBuilder setRootOperation(GroupingOperation root) { + root.getClass(); // throws NullPointerException + this.root = root; + return this; + } + + /** + * Sets the time zone to build the request for. This information is propagated to the time-based grouping + * expressions so that the produced groups are reasonable for the given zone. + * + * @param timeZone The time zone to set. + * @return This, to allow chaining. + */ + public RequestBuilder setTimeZone(TimeZone timeZone) { + converter.setTimeOffset(timeZone != null ? timeZone.getOffset(System.currentTimeMillis()) + : ExpressionConverter.DEFAULT_TIME_OFFSET); + return this; + } + + /** + * Sets the name of the summary class to use if a {@link com.yahoo.search.grouping.request.SummaryValue} has none. + * + * @param summaryName The summary class name to set. + * @return This, to allow chaining. + */ + public RequestBuilder setDefaultSummaryName(String summaryName) { + converter.setDefaultSummaryName(summaryName != null ? summaryName + : ExpressionConverter.DEFAULT_SUMMARY_NAME); + return this; + } + + /** + * Returns the transform that was created when {@link #build()} was called. + * + * @return The grouping transform that was built. + */ + public GroupingTransform getTransform() { + return transform; + } + + /** + * Returns the list of grouping objects that were created when {@link #build()} was called. + * + * @return The list of built grouping objects. + */ + public List<Grouping> getRequestList() { + return requestList; + } + + /** + * Constructs a set of Vespa specific grouping request that corresponds to the parameters given to this builder. + * This method might fail due to unsupported constructs in the request, in which case an exception is thrown. + * + * @throws IllegalStateException If this method is called more than once. + * @throws UnsupportedOperationException If the grouping request contains unsupported constructs. + */ + public void build() { + if (tag != 0) { + throw new IllegalStateException(); + } + root.resolveLevel(1); + + Grouping grouping = new Grouping(); + grouping.getRoot().setTag(++tag); + grouping.setForceSinglePass(root.getForceSinglePass() || root.containsHint("singlepass")); + Stack<BuildFrame> stack = new Stack<>(); + stack.push(new BuildFrame(grouping, new BuildState(), root)); + while (!stack.isEmpty()) { + BuildFrame frame = stack.pop(); + processRequestNode(frame); + List<GroupingOperation> children = frame.astNode.getChildren(); + if (children.isEmpty()) { + requestList.add(frame.grouping); + } else { + for (int i = children.size(); --i >= 0; ) { + Grouping childGrouping = (i == 0) ? frame.grouping : frame.grouping.clone(); + BuildState childState = (i == 0) ? frame.state : new BuildState(frame.state); + BuildFrame child = new BuildFrame(childGrouping, childState, children.get(i)); + stack.push(child); + } + } + } + pruneRequests(); + } + + public RequestBuilder addContinuations(Iterable<Continuation> continuations) { + for (Continuation continuation : continuations) { + if (continuation == null) { + continue; + } + transform.addContinuation(continuation); + } + return this; + } + + private void processRequestNode(BuildFrame frame) { + int level = frame.astNode.getLevel(); + if (level > 2) { + throw new UnsupportedOperationException("Can not operate on " + + GroupingOperation.getLevelDesc(level) + "."); + } + if (frame.astNode instanceof EachOperation) { + resolveEach(frame); + } else { + resolveOutput(frame); + } + resolveState(frame); + injectGroupByToExpressionCountAggregator(frame); + } + + private void injectGroupByToExpressionCountAggregator(BuildFrame frame) { + Group group = getLeafGroup(frame); + // The ExpressionCountAggregationResult uses the group-by expression to simulate aggregation of list of groups. + group.getAggregationResults().stream() + .filter(aggr -> aggr instanceof ExpressionCountAggregationResult) + .forEach(aggr -> aggr.setExpression(frame.state.groupBy.clone())); + } + + private void resolveEach(BuildFrame frame) { + int parentTag = getLeafGroup(frame).getTag(); + if (frame.state.groupBy != null) { + GroupingLevel grpLevel = new GroupingLevel(); + grpLevel.getGroupPrototype().setTag(++tag); + grpLevel.setExpression(frame.state.groupBy); + frame.state.groupBy = null; + int offset = transform.getOffset(tag); + if (frame.state.precision != null) { + grpLevel.setPrecision(frame.state.precision + offset); + frame.state.precision = null; + } + if (frame.state.max != null) { + transform.putMax(tag, frame.state.max, "group list"); + grpLevel.setMaxGroups(LOOKAHEAD + frame.state.max + offset); + frame.state.max = null; + } + frame.grouping.getLevels().add(grpLevel); + } + String label = frame.astNode.getLabel(); + if (label != null) { + frame.state.label = label; + } + if (frame.astNode.getLevel() > 0) { + transform.putLabel(parentTag, getLeafGroup(frame).getTag(), frame.state.label, "group list"); + } + resolveOutput(frame); + if (!frame.state.orderByExp.isEmpty()) { + GroupingLevel grpLevel = getLeafGroupingLevel(frame); + for (int i = 0, len = frame.state.orderByExp.size(); i < len; ++i) { + grpLevel.getGroupPrototype().addOrderBy(frame.state.orderByExp.get(i), + frame.state.orderByAsc.get(i)); + } + frame.state.orderByExp.clear(); + frame.state.orderByAsc.clear(); + } + } + + private void resolveState(BuildFrame frame) { + resolveGroupBy(frame); + resolveMax(frame); + resolveOrderBy(frame); + resolvePrecision(frame); + resolveWhere(frame); + } + + private void resolveGroupBy(BuildFrame frame) { + GroupingExpression exp = frame.astNode.getGroupBy(); + if (exp != null) { + if (frame.state.groupBy != null) { + throw new UnsupportedOperationException("Can not group list of groups."); + } + frame.state.groupBy = converter.toExpressionNode(exp); + frame.state.label = exp.toString(); // label for next each() + + } else { + int level = frame.astNode.getLevel(); + if (level == 0) { + // no next each() + } else if (level == 1) { + frame.state.label = "hits"; // next each() is hitlist + } else { + throw new UnsupportedOperationException("Can not create anonymous " + + GroupingOperation.getLevelDesc(level) + "."); + } + } + } + + private void resolveMax(BuildFrame frame) { + + if (frame.astNode.hasMax()) { + int max = frame.astNode.getMax(); + if (isRootOperation(frame)) { + frame.grouping.setTopN(max); + } else { + frame.state.max = max; + } + } + } + + private void resolveOrderBy(BuildFrame frame) { + List<GroupingExpression> lst = frame.astNode.getOrderBy(); + if (lst == null || lst.isEmpty()) { + return; + } + int reqLevel = frame.astNode.getLevel(); + if (reqLevel != 2) { + throw new UnsupportedOperationException( + "Can not order " + GroupingOperation.getLevelDesc(reqLevel) + " content."); + } + for (GroupingExpression exp : lst) { + boolean asc = true; + if (exp instanceof NegFunction) { + asc = false; + exp = ((NegFunction)exp).getArg(0); + } + frame.state.orderByExp.add(converter.toExpressionNode(exp)); + frame.state.orderByAsc.add(asc); + } + } + + private void resolveOutput(BuildFrame frame) { + List<GroupingExpression> lst = frame.astNode.getOutputs(); + if (lst == null || lst.isEmpty()) { + return; + } + Group group = getLeafGroup(frame); + for (GroupingExpression exp : lst) { + group.addAggregationResult(toAggregationResult(exp, group, frame)); + } + } + + private AggregationResult toAggregationResult(GroupingExpression exp, Group group, BuildFrame frame) { + AggregationResult result = converter.toAggregationResult(exp); + result.setTag(++tag); + + String label = exp.getLabel(); + if (result instanceof HitsAggregationResult) { + if (label != null) { + throw new UnsupportedOperationException("Can not label expression '" + exp + "'."); + } + HitsAggregationResult hits = (HitsAggregationResult)result; + if (frame.state.max != null) { + transform.putMax(tag, frame.state.max, "hit list"); + int offset = transform.getOffset(tag); + hits.setMaxHits(LOOKAHEAD + frame.state.max + offset); + frame.state.max = null; + } + transform.putLabel(group.getTag(), tag, frame.state.label, "hit list"); + } else { + transform.putLabel(group.getTag(), tag, label != null ? label : exp.toString(), "output"); + } + return result; + } + + private void resolvePrecision(BuildFrame frame) { + int precision = frame.astNode.getPrecision(); + if (precision > 0) { + frame.state.precision = precision; + } + } + + private void resolveWhere(BuildFrame frame) { + String where = frame.astNode.getWhere(); + if (where != null) { + if (!isRootOperation(frame)) { + throw new UnsupportedOperationException("Can not apply 'where' to non-root group."); + } + switch (where) { + case "true": + frame.grouping.setAll(true); + break; + case "$query": + // ignore + break; + default: + throw new UnsupportedOperationException("Operation 'where' does not support '" + where + "'."); + } + } + } + + private boolean isRootOperation(BuildFrame frame) { + return frame.astNode == root && frame.state.groupBy == null; + } + + private GroupingLevel getLeafGroupingLevel(BuildFrame frame) { + if (frame.grouping.getLevels().isEmpty()) { + return null; + } + return frame.grouping.getLevels().get(frame.grouping.getLevels().size() - 1); + } + + private Group getLeafGroup(BuildFrame frame) { + if (frame.grouping.getLevels().isEmpty()) { + return frame.grouping.getRoot(); + } else { + GroupingLevel grpLevel = getLeafGroupingLevel(frame); + return grpLevel != null ? grpLevel.getGroupPrototype() : null; + } + } + + private void pruneRequests() { + for (int reqIdx = requestList.size(); --reqIdx >= 0; ) { + Grouping request = requestList.get(reqIdx); + List<GroupingLevel> lst = request.getLevels(); + for (int lvlIdx = lst.size(); --lvlIdx >= 0; ) { + if (!lst.get(lvlIdx).getGroupPrototype().getAggregationResults().isEmpty()) { + break; + } + lst.remove(lvlIdx); + } + if (lst.isEmpty() && request.getRoot().getAggregationResults().isEmpty()) { + requestList.remove(reqIdx); + } + } + } + + private static class BuildFrame { + + final Grouping grouping; + final BuildState state; + final GroupingOperation astNode; + + BuildFrame(Grouping grouping, BuildState state, GroupingOperation astNode) { + this.grouping = grouping; + this.state = state; + this.astNode = astNode; + } + } + + private static class BuildState { + + final List<ExpressionNode> orderByExp = new ArrayList<>(); + final List<Boolean> orderByAsc = new ArrayList<>(); + ExpressionNode groupBy = null; + String label = null; + Integer max = null; + Integer precision = null; + + BuildState() { + // empty + } + + BuildState(BuildState obj) { + for (ExpressionNode e : obj.orderByExp) { + orderByExp.add(e.clone()); + } + orderByAsc.addAll(obj.orderByAsc); + groupBy = obj.groupBy; + label = obj.label; + max = obj.max; + precision = obj.precision; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/ResultBuilder.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ResultBuilder.java new file mode 100644 index 00000000000..590b531812a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ResultBuilder.java @@ -0,0 +1,353 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.grouping.result.*; +import com.yahoo.search.grouping.result.Group; +import com.yahoo.search.result.Relevance; +import com.yahoo.searchlib.aggregation.*; +import com.yahoo.searchlib.expression.*; + +import java.util.*; + +/** + * This class implements the necessary logic to build a {@link RootGroup} from a list of {@link Grouping} objects. It is + * used by the {@link GroupingExecutor}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class ResultBuilder { + + private final CompositeContinuation continuation = new CompositeContinuation(); + private RootGroup root; + private GroupListBuilder rootBuilder; + private HitConverter hitConverter; + private GroupingTransform transform; + + /** + * Sets the id of the {@link GroupingRequest} that this builder is creating the result for. + * + * @param requestId The id of the corresponding GroupingRequest. + * @return This, to allow chaining. + */ + public ResultBuilder setRequestId(int requestId) { + root = new RootGroup(requestId, continuation); + rootBuilder = new GroupListBuilder(ResultId.valueOf(requestId), 0, true, true); + return this; + } + + /** + * Sets the transform that details how the result should be built. + * + * @param transform The transform to set. + * @return This, to allow chaining. + */ + public ResultBuilder setTransform(GroupingTransform transform) { + this.transform = transform; + return this; + } + + /** + * Sets the converts that details how hits are converted. + * + * @param hitConverter The converter to set. + * @return This, to allow chaining. + */ + public ResultBuilder setHitConverter(HitConverter hitConverter) { + this.hitConverter = hitConverter; + return this; + } + + /** + * Adds a grouping result to this transform. This method will recurse through the given object and retrieve all the + * information it needs to produce the desired result when calling {@link #build()}. + * + * @param executionResult The grouping result to process. + */ + public void addGroupingResult(Grouping executionResult) { + executionResult.unifyNull(); + rootBuilder.addGroup(executionResult.getRoot()); + } + + /** + * Returns the root {@link RootGroup} that was created when {@link #build()} was called. + * + * @return The root that was built. + */ + public RootGroup getRoot() { + return root; + } + + /** + * Returns the {@link Continuation} that would recreate the exact same result as this. It is not complete until + * {@link #build()} has been called. + * + * @return The continuation of this result. + */ + public Continuation getContinuation() { + return continuation; + } + + /** + * Constructs the grouping result tree that corresponds to the parameters given to this builder. This method might + * fail due to unsupported constructs in the results, in which case an exception is thrown. + * + * @throws UnsupportedOperationException Thrown if the grouping result contains unsupported constructs. + */ + public void build() { + int numChildren = rootBuilder.childGroups.size(); + if (numChildren != 1) { + throw new UnsupportedOperationException("Expected 1 group, got " + numChildren + "."); + } + rootBuilder.childGroups.get(0).fill(root); + } + + private class GroupBuilder { + + boolean [] results = new boolean[8]; + GroupListBuilder [] childLists = new GroupListBuilder[8]; + int childCount = 0; + final ResultId resultId; + final com.yahoo.searchlib.aggregation.Group group; + final boolean stable; + + GroupBuilder(ResultId resultId, com.yahoo.searchlib.aggregation.Group group, boolean stable) { + this.resultId = resultId; + this.group = group; + this.stable = stable; + } + + Group build(double relevance) { + return fill(new Group(newGroupId(group), new Relevance(relevance))); + } + + Group fill(Group group) { + for (AggregationResult res : this.group.getAggregationResults()) { + int tag = res.getTag(); + if (res instanceof HitsAggregationResult) { + group.add(newHitList(group.size(), tag, (HitsAggregationResult)res)); + } else { + String label = transform.getLabel(res.getTag()); + if (label != null) { + group.setField(label, newResult(res, tag)); + } + } + } + for (GroupListBuilder child : childLists) { + if (child != null) { + group.add(child.build()); + } + } + return group; + } + + GroupListBuilder getOrCreateChildList(int tag, boolean ranked) { + int index = tag + 1; // Add 1 to avoid the dreaded -1 default value. + if (index >= childLists.length) { + childLists = Arrays.copyOf(childLists, tag + 8); + } + GroupListBuilder ret = childLists[index]; + if (ret == null) { + ret = new GroupListBuilder(resultId.newChildId(childCount), tag, stable, ranked); + childLists[index] = ret; + childCount++; + } + return ret; + } + + void merge(com.yahoo.searchlib.aggregation.Group group) { + for (AggregationResult res : group.getAggregationResults()) { + int tag = res.getTag() + 1; // Add 1 due to dreaded -1 initialization as default. + if (tag >= results.length) { + results = Arrays.copyOf(results, tag+8); + } + if ( ! results[tag] ) { + this.group.getAggregationResults().add(res); + results[tag] = true; + } + } + } + + GroupId newGroupId(com.yahoo.searchlib.aggregation.Group execGroup) { + ResultNode res = execGroup.getId(); + if (res instanceof FloatResultNode) { + return new DoubleId(res.getFloat()); + } else if (res instanceof IntegerResultNode) { + return new LongId(res.getInteger()); + } else if (res instanceof NullResultNode) { + return new NullId(); + } else if (res instanceof RawResultNode) { + return new RawId(res.getRaw()); + } else if (res instanceof StringResultNode) { + return new StringId(res.getString()); + } else if (res instanceof FloatBucketResultNode) { + FloatBucketResultNode bucketId = (FloatBucketResultNode)res; + return new DoubleBucketId(bucketId.getFrom(), bucketId.getTo()); + } else if (res instanceof IntegerBucketResultNode) { + IntegerBucketResultNode bucketId = (IntegerBucketResultNode)res; + return new LongBucketId(bucketId.getFrom(), bucketId.getTo()); + } else if (res instanceof StringBucketResultNode) { + StringBucketResultNode bucketId = (StringBucketResultNode)res; + return new StringBucketId(bucketId.getFrom(), bucketId.getTo()); + } else if (res instanceof RawBucketResultNode) { + RawBucketResultNode bucketId = (RawBucketResultNode)res; + return new RawBucketId(bucketId.getFrom(), bucketId.getTo()); + } else { + throw new UnsupportedOperationException(res.getClass().getName()); + } + } + + Object newResult(ExpressionNode execResult, int tag) { + if (execResult instanceof AverageAggregationResult) { + return ((AverageAggregationResult)execResult).getAverage().getNumber(); + } else if (execResult instanceof CountAggregationResult) { + return ((CountAggregationResult)execResult).getCount(); + } else if (execResult instanceof ExpressionCountAggregationResult) { + long count = ((ExpressionCountAggregationResult)execResult).getEstimatedUniqueCount(); + return correctExpressionCountEstimate(count, tag); + } else if (execResult instanceof MaxAggregationResult) { + return ((MaxAggregationResult)execResult).getMax().getValue(); + } else if (execResult instanceof MinAggregationResult) { + return ((MinAggregationResult)execResult).getMin().getValue(); + } else if (execResult instanceof SumAggregationResult) { + return ((SumAggregationResult)execResult).getSum().getValue(); + } else if (execResult instanceof XorAggregationResult) { + return ((XorAggregationResult)execResult).getXor(); + } else { + throw new UnsupportedOperationException(execResult.getClass().getName()); + } + } + + private long correctExpressionCountEstimate(long count, int tag) { + int actualGroupCount = group.getChildren().size(); + // Use actual group count if estimate differ. If max is present, only use actual group count if less than max. + // NOTE: If the actual group count is 0, estimate is also 0. + if (actualGroupCount > 0 && count != actualGroupCount) { + if (transform.getMax(tag + 1) == 0 || transform.getMax(tag + 1) > actualGroupCount) { + return actualGroupCount; + } + } + return count; + } + + + HitList newHitList(int listIdx, int tag, HitsAggregationResult execResult) { + HitList hitList = new HitList(transform.getLabel(tag)); + List<Hit> hits = execResult.getHits(); + PageInfo page = new PageInfo(resultId.newChildId(listIdx), tag, stable, hits.size()); + for (int i = page.firstEntry; i < page.lastEntry; ++i) { + hitList.add(hitConverter.toSearchHit(execResult.getSummaryClass(), hits.get(i))); + } + page.putContinuations(hitList.continuations()); + return hitList; + } + } + + private class GroupListBuilder { + + final Map<ResultNode, GroupBuilder> childResultGroups = new HashMap<>(); + final List<GroupBuilder> childGroups = new ArrayList<>(); + final ResultId resultId; + final int tag; + final boolean stable; + final boolean stableChildren; + final boolean ranked; + + GroupListBuilder(ResultId resultId, int tag, boolean stable, boolean ranked) { + this.resultId = resultId; + this.tag = tag; + this.stable = stable; + this.stableChildren = stable && transform.isStable(resultId); + this.ranked = ranked; + } + + GroupList build() { + PageInfo page = new PageInfo(resultId, tag, stable, childGroups.size()); + GroupList groupList = new GroupList(transform.getLabel(tag)); + for (int i = page.firstEntry; i < page.lastEntry; ++i) { + GroupBuilder child = childGroups.get(i); + groupList.add(child.build(ranked ? child.group.getRank() : + (double)(page.lastEntry - i) / (page.lastEntry - page.firstEntry))); + } + page.putContinuations(groupList.continuations()); + return groupList; + } + + void addGroup(com.yahoo.searchlib.aggregation.Group execGroup) { + GroupBuilder groupBuilder = getOrCreateGroup(execGroup); + if (!execGroup.getChildren().isEmpty()) { + boolean ranked = execGroup.getChildren().get(0).isRankedByRelevance(); + execGroup.sortChildrenByRank(); + for (com.yahoo.searchlib.aggregation.Group childGroup : execGroup.getChildren()) { + GroupListBuilder childList = groupBuilder.getOrCreateChildList(childGroup.getTag(), ranked); + childList.addGroup(childGroup); + } + } + } + + GroupBuilder getOrCreateGroup(com.yahoo.searchlib.aggregation.Group execGroup) { + ResultNode res = execGroup.getId(); + GroupBuilder ret = childResultGroups.get(res); + if (ret != null) { + ret.merge(execGroup); + } else { + ret = new GroupBuilder(resultId.newChildId(childResultGroups.size()), execGroup, stableChildren); + childResultGroups.put(res, ret); + childGroups.add(ret); + } + return ret; + } + } + + private class PageInfo { + + final ResultId resultId; + final int tag; + final int max; + final int numEntries; + final int firstEntry; + final int lastEntry; + + PageInfo(ResultId resultId, int tag, boolean stable, int numEntries) { + this.resultId = resultId; + this.tag = tag; + this.numEntries = numEntries; + max = transform.getMax(tag); + if (max > 0) { + firstEntry = stable ? transform.getOffset(resultId) : 0; + lastEntry = Math.min(numEntries, firstEntry + max); + } else { + firstEntry = 0; + lastEntry = numEntries; + } + } + + void putContinuations(Map<String, Continuation> out) { + if (max > 0) { + if (firstEntry > 0) { + continuation.add(new OffsetContinuation(resultId, tag, firstEntry, 0)); + + int prevPage = Math.max(0, Math.min(firstEntry, lastEntry) - max); + out.put(Continuation.PREV_PAGE, new OffsetContinuation(resultId, tag, prevPage, + OffsetContinuation.FLAG_UNSTABLE)); + } + if (lastEntry < numEntries) { + out.put(Continuation.NEXT_PAGE, new OffsetContinuation(resultId, tag, lastEntry, + OffsetContinuation.FLAG_UNSTABLE)); + } + } + } + } + + /** + * Defines a helper interface to convert Vespa style grouping hits into corresponding instances of {@link Hit}. It + * is an interface to simplify testing. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ + public interface HitConverter { + + public com.yahoo.search.result.Hit toSearchHit(String summaryClass, com.yahoo.searchlib.aggregation.Hit hit); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/ResultId.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ResultId.java new file mode 100644 index 00000000000..21026ac7e92 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/ResultId.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.grouping.vespa; + +import java.util.Arrays; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +class ResultId { + + private final int[] indexes; + private final int hashCode; + + private ResultId(int[] indexes) { + this.indexes = indexes; + this.hashCode = Arrays.hashCode(indexes); + } + + public boolean startsWith(int... prefix) { + if (prefix.length > indexes.length) { + return false; + } + for (int i = 0; i < prefix.length; ++i) { + if (prefix[i] != indexes[i]) { + return false; + } + } + return true; + } + + public ResultId newChildId(int childIdx) { + int[] arr = Arrays.copyOf(indexes, indexes.length + 1); + arr[indexes.length] = childIdx; + return new ResultId(arr); + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof ResultId && Arrays.equals(indexes, ((ResultId)obj).indexes); + } + + @Override + public String toString() { + return Arrays.toString(indexes); + } + + public void encode(IntegerEncoder out) { + out.append(indexes.length); + for (int i : indexes) { + out.append(i); + } + } + + public static ResultId decode(IntegerDecoder in) { + int len = in.next(); + int[] arr = new int[len]; + for (int i = 0; i < len; ++i) { + arr[i] = in.next(); + } + return new ResultId(arr); + } + + public static ResultId valueOf(int... indexes) { + return new ResultId(Arrays.copyOf(indexes, indexes.length)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/handler/HttpSearchResponse.java b/container-search/src/main/java/com/yahoo/search/handler/HttpSearchResponse.java new file mode 100644 index 00000000000..f844b5dd940 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/handler/HttpSearchResponse.java @@ -0,0 +1,173 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.handler; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.google.common.util.concurrent.ListenableFuture; +import com.yahoo.collections.ListMap; +import com.yahoo.container.jdisc.ExtendedResponse; +import com.yahoo.container.handler.Coverage; +import com.yahoo.container.handler.Timing; +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.container.logging.AccessLogEntry; +import com.yahoo.container.logging.HitCounts; +import com.yahoo.jdisc.HeaderFields; +import com.yahoo.jdisc.handler.CompletionHandler; +import com.yahoo.jdisc.handler.ContentChannel; +import com.yahoo.processing.execution.Execution.Trace.LogValue; +import com.yahoo.processing.rendering.AsynchronousSectionedRenderer; +import com.yahoo.processing.rendering.Renderer; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.query.context.QueryContext; + +/** + * Wrap the result of a query as an HTTP response. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class HttpSearchResponse extends ExtendedResponse { + + private final Result result; + private final Query query; + private final Renderer<Result> rendererCopy; + private final Timing timing; + private final HitCounts hitCounts; + + public HttpSearchResponse(int status, Result result, Query query, Renderer renderer) { + super(status); + this.query = query; + this.result = result; + this.rendererCopy = renderer; + + this.timing = SearchResponse.createTiming(query, result); + this.hitCounts = SearchResponse.createHitCounts(query, result); + populateHeaders(headers(), result.getHeaders(false)); + } + + /** + * Copy custom HTTP headers from the search result over to the HTTP + * response. + * + * @param outputHeaders + * the headers which will be sent to a client + * @param searchHeaders + * the headers from the search result, or null + */ + private static void populateHeaders(HeaderFields outputHeaders, + ListMap<String, String> searchHeaders) { + if (searchHeaders == null) { + return; + } + for (Map.Entry<String, List<String>> header : searchHeaders.entrySet()) { + for (String value : header.getValue()) { + outputHeaders.add(header.getKey(), value); + } + } + } + + public ListenableFuture<Boolean> waitableRender(OutputStream stream) throws IOException { + return waitableRender(result, query, rendererCopy, stream); + } + + public static ListenableFuture<Boolean> waitableRender(Result result, + Query query, + Renderer<Result> renderer, + OutputStream stream) throws IOException { + SearchResponse.trimHits(result); + SearchResponse.removeEmptySummaryFeatureFields(result); + return renderer.render(stream, result, query.getModel().getExecution(), query); + + } + + @Override + public void render(OutputStream output, ContentChannel networkChannel, CompletionHandler handler) throws IOException { + if (rendererCopy instanceof AsynchronousSectionedRenderer) { + AsynchronousSectionedRenderer<Result> renderer = (AsynchronousSectionedRenderer<Result>) rendererCopy; + renderer.setNetworkWiring(networkChannel, handler); + } + try { + try { + waitableRender(output); + } finally { + if (!(rendererCopy instanceof AsynchronousSectionedRenderer)) { + output.flush(); + } + } + } finally { + if (networkChannel != null && !(rendererCopy instanceof AsynchronousSectionedRenderer)) { + networkChannel.close(handler); + } + } + } + + @Override + public void populateAccessLogEntry(final AccessLogEntry accessLogEntry) { + super.populateAccessLogEntry(accessLogEntry); + populateAccessLogEntry(accessLogEntry, getHitCounts()); + } + + /* package-private */ + static void populateAccessLogEntry(AccessLogEntry jdiscRequestAccessLogEntry, HitCounts hitCounts) { + // This entry will be logged at Jetty level. Here we just populate with tidbits from this context. + + jdiscRequestAccessLogEntry.setHitCounts(hitCounts); + } + + @Override + public String getParsedQuery() { + return query.toString(); + } + + @Override + public Timing getTiming() { + return timing; + } + + @Override + public Coverage getCoverage() { + return result.getCoverage(false); + } + + @Override + public HitCounts getHitCounts() { + return hitCounts; + } + + /** + * Returns MIME type of this response + */ + @Override + public String getContentType() { + return rendererCopy.getMimeType(); + } + + /** + * Returns expected character encoding of this response + */ + @Override + public String getCharacterEncoding() { + String encoding = result.getQuery().getModel().getEncoding(); + return (encoding != null) ? encoding : rendererCopy.getEncoding(); + } + + /** Returns the query wrapped by this */ + public Query getQuery() { return query; } + + /** Returns the result wrapped by this */ + public Result getResult() { return result; } + + @Override + public Iterable<LogValue> getLogValues() { + QueryContext context = query.getContext(false); + return context == null + ? Collections::emptyIterator + : context::logValueIterator; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java new file mode 100644 index 00000000000..c431fdac638 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java @@ -0,0 +1,532 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.handler; + +import com.google.inject.Inject; +import com.yahoo.collections.Tuple2; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.ChainsConfigurer; +import com.yahoo.component.chain.model.ChainsModel; +import com.yahoo.component.chain.model.ChainsModelBuilder; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.container.Container; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.core.ChainsConfig; +import com.yahoo.container.core.QrTemplatesConfig; +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.container.jdisc.HttpResponse; +import com.yahoo.container.jdisc.LoggingRequestHandler; +import com.yahoo.container.jdisc.VespaHeaders; +import com.yahoo.container.logging.AccessLog; +import com.yahoo.container.protect.FreezeDetector; +import com.yahoo.jdisc.Metric; +import com.yahoo.language.Linguistics; +import com.yahoo.log.LogLevel; +import com.yahoo.net.UriTools; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.IndexModel; +import com.yahoo.prelude.VespaSVersionRetriever; +import com.yahoo.prelude.query.QueryException; +import com.yahoo.prelude.query.parser.ParseException; +import com.yahoo.prelude.query.parser.SpecialTokenRegistry; +import com.yahoo.processing.rendering.Renderer; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.IndexInfoConfig; +import com.yahoo.search.debug.DebugRpcAdaptor; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfile; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.query.profile.config.QueryProfileConfigurer; +import com.yahoo.search.query.profile.config.QueryProfilesConfig; +import com.yahoo.search.query.properties.DefaultProperties; +import com.yahoo.search.rendering.RendererRegistry; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.SearchChainRegistry; +import com.yahoo.search.statistics.ElapsedTime; +import com.yahoo.statistics.Callback; +import com.yahoo.statistics.Handle; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig; +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.util.concurrent.Executor; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Handles search request. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SearchHandler extends LoggingRequestHandler { + + private final AtomicInteger requestsInFlight = new AtomicInteger(0); + + // max number of threads for the executor for this handler + private final int maxThreads; + + private static final CompoundName DETAILED_TIMING_LOGGING = new CompoundName("trace.timingDetails"); + + /** Event name for number of connections to the search subsystem */ + private static final String SEARCH_CONNECTIONS = "search_connections"; + + private static Logger log = Logger.getLogger(SearchHandler.class.getName()); + + private Value searchConnections; + + private final SearchChainRegistry searchChainRegistry; + + private final RendererRegistry rendererRegistry; + + private final IndexFacts indexFacts; + + private final SpecialTokenRegistry specialTokens; + + public static final String defaultSearchChainName = "default"; + private static final String fallbackSearchChain = "vespa"; + private static final CompoundName FORCE_TIMESTAMPS = new CompoundName("trace.timestamps");; + + // This is a hack to add the RPC adaptors for search only once + // TODO: Figure out the correct life cycle and init of RPC adaptors + static { + Container c = Container.get(); + c.addOptionalRpcAdaptor(new DebugRpcAdaptor()); + } + + private final Linguistics linguistics; + + private final CompiledQueryProfileRegistry queryProfileRegistry; + + private final class MeanConnections implements Callback { + @Override + public void run(final Handle h, final boolean firstTime) { + if (firstTime) { + metric.set(SEARCH_CONNECTIONS, 0.0d, null); + return; + } + Value v = (Value) h; + metric.set(SEARCH_CONNECTIONS, v.getMean(), null); + } + } + + @Inject + public SearchHandler( + final ChainsConfig chainsConfig, + final IndexInfoConfig indexInfo, + final QrSearchersConfig clusters, + final SpecialtokensConfig specialtokens, + final Statistics statistics, + final Linguistics linguistics, + final Metric metric, + final ComponentRegistry<Renderer> renderers, + final Executor executor, + final AccessLog accessLog, + final QueryProfilesConfig queryProfileConfig, + final ComponentRegistry<Searcher> searchers) { + super(executor, accessLog, metric, true); + log.log(LogLevel.DEBUG, "SearchHandler.init " + System.identityHashCode(this)); + searchChainRegistry = new SearchChainRegistry(searchers); + setupSearchChainRegistry(searchers, chainsConfig); + indexFacts = new IndexFacts(new IndexModel(indexInfo, clusters)); + indexFacts.freeze(); + specialTokens = new SpecialTokenRegistry(specialtokens); + rendererRegistry = new RendererRegistry(renderers.allComponents()); + QueryProfileRegistry queryProfileRegistry = QueryProfileConfigurer.createFromConfig(queryProfileConfig); + this.queryProfileRegistry = queryProfileRegistry.compile(); + + this.linguistics = linguistics; + this.maxThreads = examineExecutor(executor); + + searchConnections = new Value(SEARCH_CONNECTIONS, statistics, + new Value.Parameters().setLogRaw(true).setLogMax(true) + .setLogMean(true).setLogMin(true) + .setNameExtension(true) + .setCallback(new MeanConnections())); + } + + /** @deprecated use the constructor without deprecated parameters */ + @Deprecated + public SearchHandler( + final ChainsConfig chainsConfig, + final IndexInfoConfig indexInfo, + final QrSearchersConfig clusters, + final SpecialtokensConfig specialTokens, + final QrTemplatesConfig ignored, + final FreezeDetector ignored2, + final Statistics statistics, + final Linguistics linguistics, + final Metric metric, + final ComponentRegistry<Renderer> renderers, + final Executor executor, + final AccessLog accessLog, + final QueryProfilesConfig queryProfileConfig, + final ComponentRegistry<Searcher> searchers) { + this(chainsConfig, indexInfo, clusters, specialTokens, statistics, linguistics, metric, renderers, + executor, accessLog, queryProfileConfig, searchers); + } + + private void setupSearchChainRegistry(final ComponentRegistry<Searcher> searchers, + final ChainsConfig chainsConfig) { + final ChainsModel chainsModel = ChainsModelBuilder.buildFromConfig(chainsConfig); + ChainsConfigurer.prepareChainRegistry(searchChainRegistry, chainsModel, searchers); + searchChainRegistry.freeze(); + } + + private static int examineExecutor(Executor executor) { + if (executor instanceof ThreadPoolExecutor) { + return ((ThreadPoolExecutor) executor).getMaximumPoolSize(); + } + return Integer.MAX_VALUE; // assume unbound + } + + @Override + public final HttpResponse handle(com.yahoo.container.jdisc.HttpRequest request) { + requestsInFlight.incrementAndGet(); + try { + try { + return handleBody(request); + } catch (final QueryException e) { + return (e.getCause() instanceof IllegalArgumentException) + ? invalidParameterResponse(request, e) + : illegalQueryResponse(request, e); + } catch (final RuntimeException e) { // Make sure we generate a valid + // XML response even on unexpected + // errors + log.log(Level.WARNING, "Failed handling " + request, e); + return internalServerErrorResponse(request, e); + } + } finally { + requestsInFlight.decrementAndGet(); + } + } + + private int getHttpResponseStatus(com.yahoo.container.jdisc.HttpRequest httpRequest, Result result) { + boolean benchmarkOutput = VespaHeaders.benchmarkOutput(httpRequest); + if (benchmarkOutput) { + return VespaHeaders.getEagerErrorStatus(result.hits().getError(), + SearchResponse.getErrorIterator(result.hits().getErrorHit())); + } else { + return VespaHeaders.getStatus(SearchResponse.isSuccess(result), + result.hits().getError(), + SearchResponse.getErrorIterator(result.hits().getErrorHit())); + } + + } + + @SuppressWarnings("unchecked") + private HttpResponse errorResponse(HttpRequest request, ErrorMessage errorMessage) { + Query query = new Query(); + Result result = new Result(query, errorMessage); + Renderer renderer = getRendererCopy(ComponentSpecification.fromString(request.getProperty("format"))); + + result.getTemplating().setRenderer(renderer); // Pre-Vespa 6 Result.getEncoding() expects this TODO: Remove + + return new HttpSearchResponse(getHttpResponseStatus(request, result), result, query, renderer); + } + + private HttpResponse invalidParameterResponse(HttpRequest request, RuntimeException e) { + return errorResponse(request, ErrorMessage.createInvalidQueryParameter(Exceptions.toMessageString(e))); + } + + private HttpResponse illegalQueryResponse(HttpRequest request, RuntimeException e) { + return errorResponse(request, ErrorMessage.createIllegalQuery(Exceptions.toMessageString(e))); + } + + private HttpResponse internalServerErrorResponse(HttpRequest request, RuntimeException e) { + return errorResponse(request, ErrorMessage.createInternalServerError(Exceptions.toMessageString(e))); + } + + private HttpSearchResponse handleBody(HttpRequest request) { + // Find query profile + String queryProfileName = request.getProperty("queryProfile"); + CompiledQueryProfile queryProfile = queryProfileRegistry.findQueryProfile(queryProfileName); + boolean benchmarkOutput = VespaHeaders.benchmarkOutput(request); + + // Create query + Query query = new Query(request, queryProfile); + + boolean benchmarkCoverage = VespaHeaders.benchmarkCoverage(benchmarkOutput, request.getJDiscRequest().headers()); + if (benchmarkCoverage) { + query.getPresentation().setReportCoverage(true); + } + + // Find and execute search chain if we have a valid query + String invalidReason = query.validate(); + Chain<Searcher> searchChain = null; + String searchChainName = null; + if (invalidReason == null) { + Tuple2<String, Chain<Searcher>> nameAndChain = resolveChain(query.properties().getString(Query.SEARCH_CHAIN)); + searchChainName = nameAndChain.first; + searchChain = nameAndChain.second; + } + + // Create the result + Result result; + if (invalidReason != null) { + result = new Result(query, ErrorMessage.createIllegalQuery(invalidReason)); + } else if (queryProfile == null && queryProfileName != null) { + result = new Result( + query, + ErrorMessage.createIllegalQuery("Could not resolve query profile '" + queryProfileName + "'")); + } else if (searchChain == null) { + result = new Result( + query, + ErrorMessage.createInvalidQueryParameter("No search chain named '" + searchChainName + "' was found")); + } else { + String pathAndQuery = UriTools.rawRequest(request.getUri()); + result = search(pathAndQuery, query, searchChain, searchChainRegistry); + } + + Renderer renderer; + if (result.getTemplating().usesDefaultTemplate()) { + renderer = toRendererCopy(query.getPresentation().getRenderer()); + result.getTemplating().setRenderer(renderer); // pre-Vespa 6 Result.getEncoding() expects this to be set. TODO: Remove + } + else { // somebody explicitly assigned a old style template + renderer = perRenderingCopy(result.getTemplating().getRenderer()); + } + + // Transform result to response + HttpSearchResponse response = new HttpSearchResponse(getHttpResponseStatus(request, result), + result, query, renderer); + if (benchmarkOutput) { + VespaHeaders.benchmarkOutput(response.headers(), benchmarkCoverage, response.getTiming(), + response.getHitCounts(), getErrors(result), response.getCoverage()); + } + + return response; + } + + private static int getErrors(Result result) { + return result.hits().getErrorHit() == null ? 0 : 1; + } + + @NonNull + private Renderer<Result> toRendererCopy(ComponentSpecification format) { + Renderer<Result> renderer = rendererRegistry.getRenderer(format); + renderer = perRenderingCopy(renderer); + return renderer; + } + + private Tuple2<String, Chain<Searcher>> resolveChain(String explicitChainName) { + String chainName = explicitChainName; + if (chainName == null) { + chainName = defaultSearchChainName; + } + + Chain<Searcher> searchChain = searchChainRegistry.getChain(chainName); + if (searchChain == null && explicitChainName == null) { // explicit + // search chain + // not found + // should cause + // error + chainName = fallbackSearchChain; + searchChain = searchChainRegistry.getChain(chainName); + } + return new Tuple2<>(chainName, searchChain); + } + + /** Used from container SDK, for internal use only */ + public Result searchAndFill(Query query, Chain<? extends Searcher> searchChain, SearchChainRegistry registry) { + Result errorResult = validateQuery(query); + if (errorResult != null) return errorResult; + + Renderer<Result> renderer = rendererRegistry.getRenderer(query.getPresentation().getRenderer()); + + // docsumClass null means "unset", so we set it (it might be null + // here too in which case it will still be "unset" after we + // set it :-) + if (query.getPresentation().getSummary() == null && renderer instanceof com.yahoo.search.rendering.Renderer) + query.getPresentation().setSummary(((com.yahoo.search.rendering.Renderer) renderer).getDefaultSummaryClass()); + + Execution execution = new Execution(searchChain, + new Execution.Context(registry, indexFacts, specialTokens, rendererRegistry, linguistics)); + query.getModel().setExecution(execution); + query.getModel().traceLanguage(); + execution.trace().setForceTimestamps(query.properties().getBoolean(FORCE_TIMESTAMPS, false)); + if (query.properties().getBoolean(DETAILED_TIMING_LOGGING, false)) { + // check and set (instead of set directly) to avoid overwriting stuff from prepareForBreakdownAnalysis() + execution.context().setDetailedDiagnostics(true); + } + Result result = execution.search(query); + + if (result.getTemplating() == null) + result.getTemplating().setRenderer(renderer); + + ensureQuerySet(result, query); + execution.fill(result, result.getQuery().getPresentation().getSummary()); + + traceExecutionTimes(query, result); + traceVespaSVersion(query); + traceRequestAttributes(query); + return result; + } + + private void traceRequestAttributes(Query query) { + int miminumTraceLevel = 7; + if (query.getTraceLevel() >= 7) { + query.trace("Request attributes: " + query.getHttpRequest().getJDiscRequest().context(), miminumTraceLevel); + } + } + + /** + * For internal use only + */ + public Renderer<Result> getRendererCopy(ComponentSpecification spec) { // TODO: Deprecate this + Renderer<Result> renderer = rendererRegistry.getRenderer(spec); + return perRenderingCopy(renderer); + } + + @NonNull + private Renderer<Result> perRenderingCopy(Renderer<Result> renderer) { + Renderer<Result> copy = renderer.clone(); + copy.init(); + return copy; + } + + private void ensureQuerySet(Result result, Query fallbackQuery) { + Query query = result.getQuery(); + if (query == null) { + result.setQuery(fallbackQuery); + } + } + + private Result search(String request, Query query, Chain<Searcher> searchChain, SearchChainRegistry registry) { + if (query.getTraceLevel() >= 2) { + query.trace("Invoking " + searchChain, false, 2); + } + + if (searchConnections != null) { + connectionStatistics(); + } else { + log.log(LogLevel.WARNING, + "searchConnections is a null reference, probably a known race condition during startup.", + new IllegalStateException("searchConnections reference is null.")); + } + try { + return searchAndFill(query, searchChain, registry); + } catch (ParseException e) { + ErrorMessage error = ErrorMessage.createIllegalQuery("Could not parse query [" + request + "]: " + + Exceptions.toMessageString(e)); + log.log(LogLevel.DEBUG, () -> error.getDetailedMessage()); + return new Result(query, error); + } catch (IllegalArgumentException e) { + ErrorMessage error = ErrorMessage.createBadRequest("Invalid search request [" + request + "]: " + + Exceptions.toMessageString(e)); + log.log(LogLevel.DEBUG, () -> error.getDetailedMessage()); + return new Result(query, error); + } catch (LinkageError e) { + // Should have been an Exception in an OSGi world - typical bundle dependency issue problem + ErrorMessage error = ErrorMessage.createErrorInPluginSearcher( + "Error executing " + searchChain + "]: " + Exceptions.toMessageString(e), e); + log(request, query, e); + return new Result(query, error); + } catch (StackOverflowError e) { // Also recoverable + ErrorMessage error = ErrorMessage.createErrorInPluginSearcher( + "Error executing " + searchChain + "]: " + Exceptions.toMessageString(e), e); + log(request, query, e); + return new Result(query, error); + } catch (Exception e) { + Result result = new Result(query); + log(request, query, e); + result.hits().setError( + ErrorMessage.createUnspecifiedError("Failed searching: " + Exceptions.toMessageString(e), e)); + return result; + } + } + + private void connectionStatistics() { + int connections = requestsInFlight.intValue(); + searchConnections.put(connections); + if (maxThreads > 3) { + // cast to long to avoid overflows if maxThreads is at no + // log value (maxint) + final long maxThreadsAsLong = maxThreads; + final long connectionsAsLong = connections; + // only log when exactly crossing the limit to avoid + // spamming the log + if (connectionsAsLong < maxThreadsAsLong * 9L / 10L) { + // NOP + } else if (connectionsAsLong == maxThreadsAsLong * 9L / 10L) { + log.log(Level.WARNING, threadConsumptionMessage(connections, maxThreads, "90")); + } else if (connectionsAsLong == maxThreadsAsLong * 95L / 100L) { + log.log(Level.WARNING, threadConsumptionMessage(connections, maxThreads, "95")); + } else if (connectionsAsLong == maxThreadsAsLong) { + log.log(Level.WARNING, threadConsumptionMessage(connections, maxThreads, "100")); + } + } + } + + private String threadConsumptionMessage(int connections, int maxThreads, String percentage) { + return percentage + "% of possible search connections (" + connections + + " of maximum " + maxThreads + ") currently active."; + } + + private void log(String request, Query query, Throwable e) { + // Attempted workaround for missing stack traces + if (e.getStackTrace().length == 0) { + log.log(LogLevel.ERROR, + "Failed executing " + query.toDetailString() + " [" + request + + "], received exception with no context", e); + } else { + log.log(LogLevel.ERROR, + "Failed executing " + query.toDetailString() + " [" + request + "]", e); + } + } + + private Result validateQuery(Query query) { + if (query.getHttpRequest().getProperty(DefaultProperties.MAX_HITS.toString()) != null) + throw new RuntimeException(DefaultProperties.MAX_HITS + " must be specified in a query profile."); + + if (query.getHttpRequest().getProperty(DefaultProperties.MAX_OFFSET.toString()) != null) + throw new RuntimeException(DefaultProperties.MAX_OFFSET + " must be specified in a query profile."); + + int maxHits = query.properties().getInteger(DefaultProperties.MAX_HITS); + int maxOffset = query.properties().getInteger(DefaultProperties.MAX_OFFSET); + + if (query.getHits() > maxHits) { + return new Result(query, ErrorMessage.createIllegalQuery(query.getHits() + + " hits requested, configured limit: " + maxHits + ".")); + + } else if (query.getOffset() > maxOffset) { + return new Result(query, + ErrorMessage.createIllegalQuery("Offset of " + query.getOffset() + + " requested, configured limit: " + maxOffset + ".")); + } + return null; + } + + private void traceExecutionTimes(Query query, Result result) { + if (query.getTraceLevel() < 3) return; + + ElapsedTime elapsedTime = result.getElapsedTime(); + long now = System.currentTimeMillis(); + if (elapsedTime.firstFill() != 0) { + query.trace("Query time " + query + ": " + + (elapsedTime.firstFill() - elapsedTime.first()) + " ms", false, 3); + + query.trace("Summary fetch time " + query + ": " + + (now - elapsedTime.firstFill()) + " ms", false, 3); + } else { + query.trace("Total search time " + query + ": " + + (now - elapsedTime.first()) + " ms", false, 3); + } + } + + private void traceVespaSVersion(Query query) { + query.trace("Vespa version: " + VespaSVersionRetriever.getVersion(), false, 4); + } + + public SearchChainRegistry getSearchChainRegistry() { + return searchChainRegistry; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/handler/SearchResponse.java b/container-search/src/main/java/com/yahoo/search/handler/SearchResponse.java new file mode 100644 index 00000000000..b0460ee6597 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/handler/SearchResponse.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.handler; + +import com.yahoo.container.handler.Timing; +import com.yahoo.container.logging.HitCounts; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * Some leftover static methods. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SearchResponse { + + // Remove (the empty) summary feature field if not requested. + static void removeEmptySummaryFeatureFields(Result result) { + // TODO: Move to some searcher in Vespa backend search chains + if (!result.hits().getQuery().getRanking().getListFeatures()) + for (Iterator<Hit> i = result.hits().unorderedIterator(); i.hasNext();) + i.next().removeField(Hit.RANKFEATURES_FIELD); + } + + static void trimHits(Result result) { + if (result.getConcreteHitCount() > result.hits().getQuery().getHits()) { + result.hits().trim(0, result.hits().getQuery().getHits()); + } + } + + static Iterator<? extends ErrorMessage> getErrorIterator(ErrorHit h) { + if (h == null) { + return new ArrayList<ErrorMessage>(0).iterator(); + } else { + return h.errorIterator(); + } + } + + static boolean isSuccess(Result r) { + if (r.hits().getErrorHit()==null) return true; + for (Hit hit : r.hits()) + if ( ! hit.isMeta()) return true; // contains data : success + return false; + } + + @SuppressWarnings("deprecation") + public static Timing createTiming(Query query, Result result) { + return new Timing(result.getElapsedTime().firstFill(), + 0, + result.getElapsedTime().first(), query.getTimeout()); + } + + public static HitCounts createHitCounts(Query query, Result result) { + return new HitCounts(result.getHitCount(), + result.getConcreteHitCount(), + result.getTotalHitCount(), + query.getHits(), + query.getOffset()); + } + +} + + diff --git a/container-search/src/main/java/com/yahoo/search/handler/package-info.java b/container-search/src/main/java/com/yahoo/search/handler/package-info.java new file mode 100644 index 00000000000..fa35495e3f8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/handler/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The search handler, which handles search request to the Container by translating the Request into a Query, invoking the + * chosen Search Chain to get a Result, which it translates to a Response which is returned to the Container. + */ +@ExportPackage +@PublicApi +package com.yahoo.search.handler; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/Intent.java b/container-search/src/main/java/com/yahoo/search/intent/model/Intent.java new file mode 100644 index 00000000000..f9d97e057d1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/Intent.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +/** + * A representation of an intent behind a query. Intents have no structure but are just id's of a + * set which is predefined in the application. + * <p> + * Intents are Value Objects. + * <p> + * Intent ids should be human readable, start with lower case and use camel casing + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Intent { + + private String id; + + public static final Intent Default=new Intent("default"); + + /** Creates an intent from a string id */ + public Intent(String id) { + this.id=id; + } + + /** Returns the id of this intent, never null */ + public String getId() { return id; } + + public @Override int hashCode() { return id.hashCode(); } + + public @Override boolean equals(Object other) { + if (other==this) return true; + if ( ! (other instanceof Intent)) return false; + return this.id.equals(((Intent)other).id); + } + + /** Returns the id of this intent */ + public @Override String toString() { return id; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/IntentModel.java b/container-search/src/main/java/com/yahoo/search/intent/model/IntentModel.java new file mode 100644 index 00000000000..915c8fbd1d1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/IntentModel.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +import com.yahoo.search.Query; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.text.interpretation.Interpretation; + +import java.util.*; + +/** + * This is the root node of an intent model. + * The intent model represents the intent analysis of a query. + * This is a probabilistic model - the query may have multiple interpretations with different probability. + * Each interpretation may have multiple + * possible intents, making this a tree. + * + * @author bratseth + */ +public class IntentModel extends ParentNode<InterpretationNode> { + + /** The name of the property carrying the intent model string: intentModel */ + public static final CompoundName intentModelStringName=new CompoundName("intentModel"); + /** The name of the property carrying the intent model object: IntentModel */ + public static final CompoundName intentModelObjectName=new CompoundName("IntentModel"); + + private static final InterpretationNodeComparator inodeComp = new InterpretationNodeComparator(); + + /** Creates an empty intent model */ + public IntentModel() { + } + + /** Creates an intent model from some interpretations */ + public IntentModel(List<Interpretation> interpretations) { + for (Interpretation interpretation : interpretations) + children().add(new InterpretationNode(interpretation)); + sortChildren(); + } + + /** Creates an intent model from some interpretations */ + public IntentModel(Interpretation... interpretations) { + for (Interpretation interpretation : interpretations) + children().add(new InterpretationNode(interpretation)); + sortChildren(); + } + + /** Sort interpretations by descending score order */ + public void sortChildren() { + Collections.sort(children(), inodeComp); + } + + /** + * Returns a flattened list of sources with a normalized appropriateness of each, sorted by + * decreasing appropriateness. + * This is obtained by summing the source appropriateness vectors of each intent node weighted + * by the owning intent and interpretation probabilities. + * Sources with a resulting probability of 0 is omitted in the returned list. + */ + public List<SourceNode> getSources() { + Map<Source,SourceNode> sources=new HashMap<>(); + addSources(1.0,sources); + List<SourceNode> sourceList=new ArrayList<>(sources.values()); + Collections.sort(sourceList); + return sourceList; + } + + /** Returns the names of the sources returned from {@link #getSources} for convenience */ + public List<String> getSourceNames() { + List<String> sourceNames=new ArrayList<>(); + for (SourceNode sourceNode : getSources()) + sourceNames.add(sourceNode.getSource().getId()); + return sourceNames; + } + + /** Returns the intent model stored at property key "intentModel" in this query, or null if none */ + public static IntentModel getFrom(Query query) { + return (IntentModel)query.properties().get(intentModelObjectName); + } + + /** Stores this intent model at property key "intentModel" in this query */ + public void setTo(Query query) { + query.properties().set(intentModelObjectName,this); + } + + static class InterpretationNodeComparator implements Comparator<InterpretationNode> { + public int compare(InterpretationNode o1, InterpretationNode o2) { + double diff = o2.getScore()-o1.getScore(); + return (diff>0) ? 1 : ( (diff<0)? -1:0 ); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/IntentNode.java b/container-search/src/main/java/com/yahoo/search/intent/model/IntentNode.java new file mode 100644 index 00000000000..c77c937b760 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/IntentNode.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +/** + * An intent in an intent model tree. The intent node score is the <i>probability</i> of this intent + * given the parent interpretation. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class IntentNode extends ParentNode<SourceNode> { + + private Intent intent; + + public IntentNode(Intent intent,double probabilityScore) { + super(probabilityScore); + this.intent=intent; + } + + /** Returns the intent of this node, this is never null */ + public Intent getIntent() { return intent; } + + public void setIntent(Intent intent) { this.intent=intent; } + + /** Returns intent:probability */ + public @Override String toString() { + return intent + ":" + getScore(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/InterpretationNode.java b/container-search/src/main/java/com/yahoo/search/intent/model/InterpretationNode.java new file mode 100644 index 00000000000..51e5d00c563 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/InterpretationNode.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +import com.yahoo.text.interpretation.Interpretation; + +/** + * An interpretation which may have multiple intents. The score of this node is the probability of + * the wrapped interpretation. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class InterpretationNode extends ParentNode<IntentNode> { + + private Interpretation interpretation; + + public InterpretationNode(Interpretation interpretation) { + super(0); // Super score is not used + this.interpretation=interpretation; + children().add(new IntentNode(Intent.Default,1.0)); + } + + /** Returns this interpretation. This is never null. */ + public Interpretation getInterpretation() { return interpretation; } + + /** Sets this interpretation */ + public void setInterpretation(Interpretation interpretation) { + this.interpretation=interpretation; + } + + /** Returns the probability of the interpretation of this */ + public @Override double getScore() { + return interpretation.getProbability(); + } + + /** Sets the probability of the interpretation of this */ + public void setScore(double score) { + interpretation.setProbability(score); + } + + /** Returns interpretations toString() */ + public @Override String toString() { + return interpretation.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/Node.java b/container-search/src/main/java/com/yahoo/search/intent/model/Node.java new file mode 100644 index 00000000000..ecd3ec712bb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/Node.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +import java.util.Map; + +/** + * A node in the <a href="TODO">intent model tree</a> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class Node implements Comparable<Node> { + + /** + * The score, unless getScore/setScore is overridden which is the case with interpretations, + * so DO NOT ACCESS SCORE DIRECTLY, ALWAYS USE GET/SET + */ + private double score; + + public Node(double score) { + this.score=score; + } + + /** Returns the normalized (0-1) score of this node */ + public double getScore() { return score; } + + /** Sets the normalized (0-1) score of this node */ + public void setScore(double score) { this.score=score; } + + /** Increases this score by an increment and returns the new score */ + public double increaseScore(double increment) { + setScore(getScore()+increment); + return getScore(); + } + + public int compareTo(Node other) { + if (this.getScore()<other.getScore()) return 1; + if (this.getScore()>other.getScore()) return -1; + return 0; + } + + /** + * Adds the sources at (and beneath) this node to the given + * sparsely represented source vector, weighted by the score of this node + * times the given weight from the parent path + */ + abstract void addSources(double weight,Map<Source,SourceNode> sources); + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/ParentNode.java b/container-search/src/main/java/com/yahoo/search/intent/model/ParentNode.java new file mode 100644 index 00000000000..357060be93c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/ParentNode.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * A node which is not a leaf in the intent tree + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class ParentNode<T extends Node> extends Node { + + private List<T> children=new ArrayList<>(); + + public ParentNode() { + super(1.0); + } + + public ParentNode(double score) { + super(score); + } + + /** + * This returns the children of this node in the intent tree. + * This is never null. Children can be added and removed from this list to modify this node. + */ + public List<T> children() { return children; } + + @Override void addSources(double weight,Map<Source,SourceNode> sources) { + for (T child : children) + child.addSources(weight*getScore(),sources); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/Source.java b/container-search/src/main/java/com/yahoo/search/intent/model/Source.java new file mode 100644 index 00000000000..937b6ca02e4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/Source.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +/** + * A representation of a source. Sources have no structure but are just id of a + * set which is defined in the application. + * <p> + * Sources are Value Objects. + * <p> + * Source ids should be human readable, start with lower case and use camel casing + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Source { + + private String id; + + /** Creates an intent from a string id */ + public Source(String id) { + this.id=id; + } + + /** Returns the id of this source, never null */ + public String getId() { return id; } + + public @Override int hashCode() { return id.hashCode(); } + + public @Override boolean equals(Object other) { + if (other==this) return true; + if ( ! (other instanceof Source)) return false; + return this.id.equals(((Source)other).id); + } + + /** Returns the id of this source */ + public @Override String toString() { return id; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/SourceNode.java b/container-search/src/main/java/com/yahoo/search/intent/model/SourceNode.java new file mode 100644 index 00000000000..5f63ddbe8d1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/SourceNode.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.intent.model; + +import java.util.Map; + +/** + * A source node in an intent model tree. Represents a source with an appropriateness score + * (i.e the score of a source node is called <i>appropriateness</i>). + * Sources are ordered by decreasing appropriateness. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class SourceNode extends Node { + + private Source source; + + public SourceNode(Source source,double score) { + super(score); + this.source=source; + } + + /** Sets the source of this node */ + public void setSource(Source source) { this.source=source; } + + /** Returns the source of this node */ + public Source getSource() { return source; } + + @Override void addSources(double weight,Map<Source,SourceNode> sources) { + SourceNode existing=sources.get(source); + if (existing!=null) + existing.increaseScore(weight*getScore()); + else + sources.put(source,new SourceNode(source,weight*getScore())); + } + + /** Returns source:appropriateness */ + public @Override String toString() { + return source + ":" + getScore(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/intent/model/package-info.java b/container-search/src/main/java/com/yahoo/search/intent/model/package-info.java new file mode 100644 index 00000000000..1e3e38208c5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/intent/model/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.intent.model; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/match/DocumentDb.java b/container-search/src/main/java/com/yahoo/search/match/DocumentDb.java new file mode 100644 index 00000000000..f4be6861364 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/match/DocumentDb.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.match; + +import com.yahoo.document.Document; +import com.yahoo.document.DocumentOperation; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * A searchable database of documents + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DocumentDb extends Searcher { + + /** + * Put a document or apply an update to this document db + */ + public void put(DocumentOperation op) { + + } + + /** Remove a document from this document db */ + public void remove(Document document) { + + } + + /** Search this document db */ + @Override + public Result search(Query query, Execution execution) { + Result r = execution.search(query); + return r; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/package-info.java b/container-search/src/main/java/com/yahoo/search/package-info.java new file mode 100644 index 00000000000..96255d9108b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * <b>The top level classes of the search container.</b> A Query represents the incoming request, which produces a Result + * by chained execution of a set of Searchers. + */ +@ExportPackage +@PublicApi +package com.yahoo.search; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplate.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplate.java new file mode 100644 index 00000000000..8c421feae47 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplate.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.provider.FreezableComponent; +import com.yahoo.search.pagetemplates.model.PageElement; +import com.yahoo.search.pagetemplates.model.PageTemplateVisitor; +import com.yahoo.search.pagetemplates.model.Section; +import com.yahoo.search.pagetemplates.model.Source; + +import java.util.Collections; +import java.util.Set; + +/** + * A page template represents a particular way to organize a return page. It is a recursive structure of + * page template elements. + * + * @author bratseth + */ +public final class PageTemplate extends FreezableComponent implements PageElement { + + /** The root section of this page */ + private Section section=new Section(); + + /** The sources mentioned (recursively) in this page template, or null if this is not frozen */ + private Set<Source> sources=null; + + public PageTemplate(ComponentId id) { + super(id); + } + + public void setSection(Section section) { + ensureNotFrozen(); + this.section=section; + } + + /** Returns the root section of this. This is never null. */ + public Section getSection() { return section; } + + /** + * Returns an unmodifiable set of all the sources this template <i>may</i> include (depending on choice resolution). + * If the template allows (somewhere) the "any" source (*), Source.any will be in the set returned. + * This operation is fast on frozen page templates (i.e at execution time). + */ + public Set<Source> getSources() { + if (isFrozen()) return sources; + SourceVisitor sourceVisitor=new SourceVisitor(); + getSection().accept(sourceVisitor); + return Collections.unmodifiableSet(sourceVisitor.getSources()); + } + + public @Override void freeze() { + if (isFrozen()) return; + resolvePlaceholders(); + section.freeze(); + sources=getSources(); + super.freeze(); + } + + /** Validates and creates the necessary internal references between placeholders and their resolving choices */ + private void resolvePlaceholders() { + try { + PlaceholderMappingVisitor placeholderMappingVisitor=new PlaceholderMappingVisitor(); + accept(placeholderMappingVisitor); + accept(new PlaceholderReferenceCreatingVisitor(placeholderMappingVisitor.getMap())); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException(this + " is invalid",e); + } + } + + /** Accepts a visitor to this structure */ + public @Override void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + section.accept(visitor); + } + + public @Override String toString() { + return "page template '" + getId() + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplateRegistry.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplateRegistry.java new file mode 100644 index 00000000000..ffeec4b5dd1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplateRegistry.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.pagetemplates.engine.Resolver; + +/** + * @author bratseth + */ +public class PageTemplateRegistry extends ComponentRegistry<PageTemplate> { + + public void register(PageTemplate pageTemplate) { + super.register(pageTemplate.getId(), pageTemplate); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplateSearcher.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplateSearcher.java new file mode 100644 index 00000000000..eb928097e2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/PageTemplateSearcher.java @@ -0,0 +1,234 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.intent.model.IntentModel; +import com.yahoo.search.pagetemplates.config.PageTemplateConfigurer; +import com.yahoo.search.pagetemplates.engine.Organizer; +import com.yahoo.search.pagetemplates.engine.Resolution; +import com.yahoo.search.pagetemplates.engine.Resolver; +import com.yahoo.search.pagetemplates.engine.resolvers.DeterministicResolver; +import com.yahoo.search.pagetemplates.engine.resolvers.RandomResolver; +import com.yahoo.search.pagetemplates.engine.resolvers.ResolverRegistry; +import com.yahoo.search.pagetemplates.model.Choice; +import com.yahoo.search.pagetemplates.model.PageElement; +import com.yahoo.search.pagetemplates.model.Source; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +import java.util.*; + +/** + * Enables page optimization templates. + * This searcher should be placed before federation points in the search chain. + * <p> + * <b>Input query properties:</b> + * <ul> + * <li><code>page.idList</code> - a List<String> of id strings of the page templates this should choose between</li> + * <li><code>page.id</code> - a space-separated string of ids of the page templates this should choose between. + * This property is ignored if <code>page.idList</code> is set</li> + * <li><code>page.resolver</code> the id of the resolver to use to resolve choices. This is either the component id + * of a deployed resolver component, or one of the strings + * <code>native.deterministic</code> (which always pics the last choice) or <code>native.random</code></li> + * </ul> + * + * <b>Output query properties:</b> + * <ul> + * <li><code>page.ListOfPageTemplate</code>A List<PageTemplate> + * containing a list of the page templates used for this query + * </ul> + * + * <p> + * The set of page templates chosen for the query specifies a list of sources to be queries (the page template sources). + * In addition, the query may contain + * <ul> + * <li>a set of sources set explicitly in the Request, a query property or a searcher (the query model sources) + * <li>a set of sources specified in the {@link com.yahoo.search.intent.model.IntentModel} (the intent model sources) + * </ul> + * This searcher combines these sources into a single set in query.model by the following rules: + * <ul> + * <li>If the query model sources is set (not empty), it is not changed + * <li>If the page template sources contains the ANY source AND there is an intent model + * the query model sources is set to the union of the page template sources and the intent model sources + * <li>If the page template sources contains the ANY source AND there is no intent model, + * the query model sources is left empty (causing all sources to be queried) + * <li>Otherwise, the query model sources is set to the page template sources + * </ul> + * + * @author bratseth + */ +@Provides("PageTemplates") +public class PageTemplateSearcher extends Searcher { + + /** The name of the query property containing the resolved candidate page template list */ + public static final CompoundName pagePageTemplateListName=new CompoundName("page.PageTemplateList"); + /** The name of the query property containing a list of candidate pages to consider */ + public static final CompoundName pageIdListName=new CompoundName("page.idList"); + /** The name of the query property containing the page id to use */ + public static final CompoundName pageIdName=new CompoundName("page.id"); + /** The name of the query property containing the resolver id to use */ + public static final CompoundName pageResolverName=new CompoundName("page.resolver"); + + private final ResolverRegistry resolverRegistry; + + private final Organizer organizer = new Organizer(); + + private final PageTemplateRegistry templateRegistry; + + /** Creates this from a configuration. This will be called by the container. */ + @Inject + public PageTemplateSearcher(PageTemplatesConfig pageTemplatesConfig, ComponentRegistry<Resolver> resolverRegistry) { + this(PageTemplateConfigurer.toRegistry(pageTemplatesConfig), resolverRegistry.allComponents()); + } + + /** + * Creates this from an existing page template registry, using only built-in resolvers + * + * @param templateRegistry the page template registry. This will be frozen by this call. + * @param resolvers the resolvers to use, in addition to the default resolvers + */ + public PageTemplateSearcher(PageTemplateRegistry templateRegistry, Resolver... resolvers) { + this(templateRegistry, Arrays.asList(resolvers)); + } + + private PageTemplateSearcher(PageTemplateRegistry templateRegistry, List<Resolver> resolvers) { + this.templateRegistry = templateRegistry; + templateRegistry.freeze(); + this.resolverRegistry = new ResolverRegistry(resolvers); + } + + @Override + public Result search(Query query, Execution execution) { + // Pre execution: Choose template and sources + List<PageElement> pages=selectPageTemplates(query); + if (pages.isEmpty()) return execution.search(query); // Bypass if no page template chosen + addSources(pages,query); + + // Set the page template list for inspection by other searchers + query.properties().set(pagePageTemplateListName, pages); + + // Execute + Result result=execution.search(query); + + // Post execution: Resolve choices and organize the result as dictated by the resolved template + Choice pageTemplateChoice=Choice.createSingletons(pages); + Resolution resolution=selectResolver(query).resolve(pageTemplateChoice,query,result); + organizer.organize(pageTemplateChoice,resolution,result); + return result; + } + + /** + * Returns the list of page templates specified in the query, or the default if none, or the + * empty list if no default, never null. + */ + private List<PageElement> selectPageTemplates(Query query) { + // Determine the list of page template ids + @SuppressWarnings("unchecked") + List<String> pageIds = (List<String>) query.properties().get(pageIdListName); + if (pageIds==null) { + String pageIdString=query.properties().getString(pageIdName,"").trim(); + if (pageIdString.length()>0) + pageIds=Arrays.asList(pageIdString.split(" ")); + } + + // If none set, just return the default or null if none + if (pageIds==null) { + PageElement defaultPage=templateRegistry.getComponent("default"); + return (defaultPage==null ? Collections.<PageElement>emptyList() : Collections.singletonList(defaultPage)); + } + + // Resolve the id list to page templates + List<PageElement> pages=new ArrayList<>(pageIds.size()); + for (String pageId : pageIds) { + PageTemplate page=templateRegistry.getComponent(pageId); + if (page==null) + query.errors().add(ErrorMessage.createInvalidQueryParameter("Could not resolve requested page template '" + + pageId + "'")); + else + pages.add(page); + } + + return pages; + } + + private Resolver selectResolver(Query query) { + String resolverId=query.properties().getString(pageResolverName); + if (resolverId==null) return resolverRegistry.defaultResolver(); + Resolver resolver=resolverRegistry.getComponent(resolverId); + if (resolver==null) throw new IllegalArgumentException("No page template resolver '" + resolverId + "'"); + return resolver; + } + + /** Sets query.getModel().getSources() to the right value and add source parameters specified in templates */ + private void addSources(List<PageElement> pages,Query query) { + // Determine all wanted sources + Set<Source> pageSources=new HashSet<>(); + for (PageElement page : pages) + pageSources.addAll(((PageTemplate)page).getSources()); + + addErrorIfSameSourceMultipleTimes(pages,pageSources,query); + + if (query.getModel().getSources().size() > 0) { + // Add properties if the source list is set explicitly, but do not modify otherwise + addParametersForIncludedSources(pageSources,query); + return; + } + + if (pageSources.contains(Source.any)) { + IntentModel intentModel=IntentModel.getFrom(query); + if (intentModel!=null) { + query.getModel().getSources().addAll(intentModel.getSourceNames()); + addPageTemplateSources(pageSources,query); + } + // otherwise leave empty to search all + } + else { // Let the page templates decide + addPageTemplateSources(pageSources,query); + } + } + + private void addPageTemplateSources(Set<Source> pageSources,Query query) { + for (Source pageSource : pageSources) { + if (pageSource==Source.any) continue; + query.getModel().getSources().add(pageSource.getName()); + addParameters(pageSource,query); + } + } + + private void addParametersForIncludedSources(Set<Source> sources,Query query) { + for (Source source : sources) { + if (source.parameters().size()>0 && query.getModel().getSources().contains(source.getName())) + addParameters(source,query); + } + } + + /** Adds parameters specified in the source to the correct namespace in the query */ + private void addParameters(Source source,Query query) { + for (Map.Entry<String,String> parameter : source.parameters().entrySet()) + query.properties().set("source." + source.getName() + "." + parameter.getKey(),parameter.getValue()); + } + + /** + * Currently executing multiple queries to the same source with different parameter sets, + * is not supported. (Same parameter sets in multiple templates is supported, + * and will be just one entry in this set). + */ + private void addErrorIfSameSourceMultipleTimes(List<PageElement> pages,Set<Source> sources,Query query) { + Set<String> sourceNames=new HashSet<>(); + for (Source source : sources) { + if (sourceNames.contains(source.getName())) + query.errors().add(ErrorMessage.createInvalidQueryParameter( + "Querying the same source multiple times with different parameter sets as part of one query " + + "is not supported. " + pages + " requests this for source '" + source + "'")); + sourceNames.add(source.getName()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/PlaceholderMappingVisitor.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/PlaceholderMappingVisitor.java new file mode 100644 index 00000000000..2d61d17ade8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/PlaceholderMappingVisitor.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates; + +import com.yahoo.search.pagetemplates.model.*; + +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Creates a map from placeholder id to the choice providing its value + * for all placeholder values visited. + * <p> + * This visitor will throw an IllegalArgumentException if the same placeholder id + * is referenced by two choices. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class PlaceholderMappingVisitor extends PageTemplateVisitor { + + private Map<String, MapChoice> placeholderIdToChoice=new LinkedHashMap<>(); + + public @Override void visit(MapChoice mapChoice) { + List<String> placeholderIds=mapChoice.placeholderIds(); + for (String placeholderId : placeholderIds) { + MapChoice existingChoice=placeholderIdToChoice.put(placeholderId,mapChoice); + if (existingChoice!=null) + throw new IllegalArgumentException("placeholder id '" + placeholderId + "' is referenced by both " + + mapChoice + " and " + existingChoice + ": Only one reference is allowed"); + } + } + + public Map<String, MapChoice> getMap() { return placeholderIdToChoice; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/PlaceholderReferenceCreatingVisitor.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/PlaceholderReferenceCreatingVisitor.java new file mode 100644 index 00000000000..2e22ad7291e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/PlaceholderReferenceCreatingVisitor.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates; + +import com.yahoo.search.pagetemplates.model.*; + +import java.util.HashMap; +import java.util.Map; + +/** + * Creates references from all placeholders to the choices which resolves them. + * If a placeholder is encountered which is not resolved by any choice, an IllegalArgumentException is thrown. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class PlaceholderReferenceCreatingVisitor extends PageTemplateVisitor { + + private Map<String, MapChoice> placeholderIdToChoice=new HashMap<>(); + + public PlaceholderReferenceCreatingVisitor(Map<String, MapChoice> placeholderIdToChoice) { + this.placeholderIdToChoice=placeholderIdToChoice; + } + + public @Override void visit(Placeholder placeholder) { + MapChoice choice=placeholderIdToChoice.get(placeholder.getId()); + if (choice==null) + throw new IllegalArgumentException(placeholder + " is not referenced by any choice"); + placeholder.setValueContainer(choice); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/SourceVisitor.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/SourceVisitor.java new file mode 100644 index 00000000000..bf2685da56f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/SourceVisitor.java @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates; + +import com.yahoo.search.pagetemplates.model.PageTemplateVisitor; +import com.yahoo.search.pagetemplates.model.Source; + +import java.util.HashSet; +import java.util.Set; + +/** + * Visits a page template object structure and records the sources mentioned. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class SourceVisitor extends PageTemplateVisitor { + + private Set<Source> sources=new HashSet<>(); + + @Override + public void visit(Source source) { + sources.add(source); + } + + /** Returns the live list of sources collected by this during visiting */ + public Set<Source> getSources() { return sources; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/config/PageTemplateConfigurer.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/config/PageTemplateConfigurer.java new file mode 100644 index 00000000000..5d106a6df8e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/config/PageTemplateConfigurer.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.config; + +import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.search.pagetemplates.PageTemplatesConfig; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.search.pagetemplates.PageTemplateRegistry; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; + +/** + * Provides a static method to convert a page template config into a PageTemplateRegistry. + * In addition, instances of this can be created to subscribe to config and keep an up to date registry reference. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class PageTemplateConfigurer { + + /** + * Creates a new page template registry from the content of a config and returns it. + * The returned registry will <b>not</b> be frozen. This should be done, by calling freeze(), before it is used. + */ + public static PageTemplateRegistry toRegistry(PageTemplatesConfig config) { + List<NamedReader> pageReaders=new ArrayList<>(); + int pageNumber=0; + for (String pageString : config.page()) + pageReaders.add(new NamedReader("page[" + pageNumber++ + "]",new StringReader(pageString))); + return new PageTemplateXMLReader().read(pageReaders,false); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/config/PageTemplateXMLReader.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/config/PageTemplateXMLReader.java new file mode 100644 index 00000000000..46823f30cf2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/config/PageTemplateXMLReader.java @@ -0,0 +1,355 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.config; + +import com.yahoo.component.ComponentId; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.search.pagetemplates.PageTemplate; +import com.yahoo.search.pagetemplates.PageTemplateRegistry; +import com.yahoo.search.pagetemplates.model.*; +import com.yahoo.search.query.Sorting; +import com.yahoo.text.XML; +import org.w3c.dom.Element; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.*; +import java.util.logging.Logger; + +/** + * Reads all page template XML files from a given directory (or list of readers). + * Instances of this are for single-thread usage only. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class PageTemplateXMLReader { + + private static Logger logger=Logger.getLogger(PageTemplateXMLReader.class.getName()); + + /** The registry being constructed */ + private PageTemplateRegistry registry; + + /** XML elements by page id - available after phase 1. Needed for includes. */ + private Map<ComponentId, Element> pageElementsByPageId=new LinkedHashMap<>(); + + /** + * Reads all page template xml files in a given directory. + * + * @throws RuntimeException if <code>directory</code> is not a readable directory, or if there is some error in the XML + */ + public PageTemplateRegistry read(String directory) { + List<NamedReader> pageReaders=new ArrayList<>(); + try { + File dir=new File(directory); + if ( !dir.isDirectory() ) throw new IllegalArgumentException("Could not read page templates: '" + + directory + "' is not a valid directory."); + + for (File file : sortFiles(dir)) { + if ( ! file.getName().endsWith(".xml")) continue; + pageReaders.add(new NamedReader(file.getName(),new FileReader(file))); + } + + return read(pageReaders,true); + } + catch (IOException e) { + throw new IllegalArgumentException("Could not read page templates from '" + directory + "'",e); + } + finally { + for (NamedReader reader : pageReaders) { + try { reader.close(); } catch (IOException e) { } + } + } + } + + /** + * Reads a single page template file. + * + * @throws RuntimeException if <code>fileName</code> is not a readable file, or if there is some error in the XML + */ + public PageTemplate readFile(String fileName) { + NamedReader pageReader=null; + try { + File file=new File(fileName); + pageReader=new NamedReader(fileName,new FileReader(file)); + String firstName=file.getName().substring(0,file.getName().length()-4); + return read(Collections.singletonList(pageReader),true).getComponent(firstName); + } + catch (IOException e) { + throw new IllegalArgumentException("Could not read the page template '" + fileName + "'",e); + } + finally { + if (pageReader!=null) + try { pageReader.close(); } catch (IOException e) { } + } + } + + private List<File> sortFiles(File dir) { + ArrayList<File> files = new ArrayList<>(); + files.addAll(Arrays.asList(dir.listFiles())); + Collections.sort(files); + return files; + } + + /** + * Reads all page template xml files in a given list of readers. This is called from the Vespa configuration model. + * + * @param validateReaderNames should be set to true if the readers were created by files, not otherwise + * @throws RuntimeException if <code>directory</code> is not a readable directory, or if there is some error in the XML + */ + public PageTemplateRegistry read(List<NamedReader> pageReaders,boolean validateReaderNames) { + // Initialize state + registry=new PageTemplateRegistry(); + + // Phase 1 + pageElementsByPageId=createPages(pageReaders,validateReaderNames); + // Phase 2 + readPages(); + return registry; + } + + private Map<ComponentId,Element> createPages(List<NamedReader> pageReaders,boolean validateReaderNames) { + Map<ComponentId,Element> pageElementsByPageId=new LinkedHashMap<>(); + for (NamedReader reader : pageReaders) { + Element pageElement= XML.getDocument(reader).getDocumentElement(); + if ( ! pageElement.getNodeName().equals("page")) { + logger.info("Ignoring '" + reader.getName() + + "': Expected XML root element 'page' but was '" + pageElement.getNodeName() + "'"); + continue; + } + String idString=pageElement.getAttribute("id"); + + if (idString==null || idString.isEmpty()) + throw new IllegalArgumentException("Page template '" + reader.getName() + "' has no 'id' attribute in the root element"); + ComponentId id=new ComponentId(idString); + if (validateReaderNames) + validateFileName(reader.getName(),id,"page template"); + registry.register(new PageTemplate(id)); + pageElementsByPageId.put(id,pageElement); + } + return pageElementsByPageId; + } + + /** Throws an exception if the name is not corresponding to the id */ + private void validateFileName(final String actualName,ComponentId id,String artifactName) { + String expectedCanonicalFileName=id.toFileName(); + String fileName=new File(actualName).getName(); + fileName=stripXmlEnding(fileName); + String canonicalFileName=ComponentId.fromFileName(fileName).toFileName(); + if ( ! canonicalFileName.equals(expectedCanonicalFileName)) + throw new IllegalArgumentException("The file name of " + artifactName + " '" + id + + "' must be '" + expectedCanonicalFileName + ".xml' but was '" + actualName + "'"); + } + + private String stripXmlEnding(String fileName) { + if (!fileName.endsWith(".xml")) + throw new IllegalArgumentException("'" + fileName + "' should have a .xml ending"); + else + return fileName.substring(0,fileName.length()-4); + } + + private void readPages() { + for (Map.Entry<ComponentId,Element> pageElement : pageElementsByPageId.entrySet()) { + try { + PageTemplate page=registry.getComponent(pageElement.getValue().getAttribute("id")); + readPageContent(pageElement.getValue(),page); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Could not read page template '" + pageElement.getKey() + "'",e); + } + } + } + + private void readPageContent(Element pageElement,PageTemplate page) { + if (page.isFrozen()) return; // Already read + Section rootSection=new Section(page.getId().toString()); + readSection(pageElement,rootSection); + page.setSection(rootSection); + page.freeze(); + } + + /** Fills a section with attributes and sub-elements from a "section" or "page" element */ + private Section readSection(Element sectionElement,Section section) { + section.setLayout(Layout.fromString(sectionElement.getAttribute("layout"))); + section.setRegion(sectionElement.getAttribute("region")); + section.setOrder(Sorting.fromString(sectionElement.getAttribute("order"))); + section.setMax(readOptionalNumber(sectionElement,"max")); + section.setMin(readOptionalNumber(sectionElement,"min")); + section.elements().addAll(readSourceAttribute(sectionElement)); + section.elements().addAll(readPageElements(sectionElement)); + return section; + } + + /** Returns all page elements found under the given node */ + private List<PageElement> readPageElements(Element parent) { + List<PageElement> pageElements=new ArrayList<>(); + for (Element child : XML.getChildren(parent)) { + if (child.getNodeName().equals("include")) + pageElements.addAll(readInclude(child)); + else + addIfNonNull(readPageElement(child),pageElements); + } + return pageElements; + } + + private void addIfNonNull(PageElement pageElement,List<PageElement> pageElements) { + if (pageElement!=null) + pageElements.add(pageElement); + } + + /** Reads the direct descendant elements of an include */ + private List<PageElement> readInclude(Element element) { + PageTemplate included=registry.getComponent(element.getAttribute("idref")); + if (included==null) + throw new IllegalArgumentException("Could not find page template '" + element.getAttribute("idref")); + readPageContent(pageElementsByPageId.get(included.getId()),included); + return included.getSection().elements(Section.class); + } + + /** Returns the page element corresponding to the given node, never null */ + private PageElement readPageElement(Element child) { + if (child.getNodeName().equals("choice")) + return readChoice(child); + else if (child.getNodeName().equals("source")) + return readSource(child); + else if (child.getNodeName().equals("placeholder")) + return readPlaceholder(child); + else if (child.getNodeName().equals("section")) + return readSection(child,new Section(child.getAttribute("id"))); + else if (child.getNodeName().equals("renderer")) + return readRenderer(child); + else if (child.getNodeName().equals("parameter")) + return null; // read elsewhere + throw new IllegalArgumentException("Unknown node type '" + child.getNodeName() + "'"); + } + + private List<Source> readSourceAttribute(Element sectionElement) { + List<Source> sources=new ArrayList<>(); + String sourceAttributeString=sectionElement.getAttribute("source"); + if (sourceAttributeString!=null) { + for (String sourceName : sourceAttributeString.split(" ")) { + if (sourceName.isEmpty()) continue; + if ("*".equals(sourceName)) + sources.add(Source.any); + else + sources.add(new Source(sourceName)); + } + } + return sources; + } + + private Source readSource(Element sourceElement) { + Source source=new Source(sourceElement.getAttribute("name")); + source.setUrl(nullIfEmpty(sourceElement.getAttribute("url"))); + source.renderers().addAll(readPageElements(sourceElement)); + /* + source.renderers().addAll(readRenderers(XML.children(sourceElement,"renderer"))); + readChoices(sourceElement,source); + */ + source.parameters().putAll(readParameters(sourceElement)); + return source; + } + + private String nullIfEmpty(String s) { + if (s==null) return s; + s=s.trim(); + if (s.isEmpty()) return null; + return s; + } + + private Placeholder readPlaceholder(Element placeholderElement) { + return new Placeholder(placeholderElement.getAttribute("id")); + } + + private Renderer readRenderer(Element rendererElement) { + Renderer renderer =new Renderer(rendererElement.getAttribute("name")); + renderer.setRendererFor(nullIfEmpty(rendererElement.getAttribute("for"))); + renderer.parameters().putAll(readParameters(rendererElement)); + return renderer; + } + + private int readOptionalNumber(Element element,String attributeName) { + String attributeValue=element.getAttribute(attributeName); + try { + if (attributeValue.isEmpty()) return -1; + return Integer.parseInt(attributeValue); + } + catch (NumberFormatException e) { // Suppress original exception as it conveys no useful information + throw new IllegalArgumentException("'" + attributeName + "' in " + element + " must be a number, not '" + attributeValue + "'"); + } + } + + private AbstractChoice readChoice(Element choiceElement) { + String method=nullIfEmpty(choiceElement.getAttribute("method")); + if (XML.getChildren(choiceElement,"map").size()>0) + return readMapChoice(choiceElement,method); + else + return readNonMapChoice(choiceElement,method); + } + + private MapChoice readMapChoice(Element choiceElement,String method) { + Element mapElement=XML.getChildren(choiceElement,"map").get(0); + MapChoice map=new MapChoice(); + map.setMethod(method); + + map.placeholderIds().addAll(readSpaceSeparatedAttribute("to",mapElement)); + for (Element value : XML.getChildren(mapElement)) { + if ("item".equals(value.getNodeName())) + map.values().add(readPageElements(value)); + else + map.values().add(Collections.singletonList(readPageElement(value))); + } + return map; + } + + private Choice readNonMapChoice(Element choiceElement,String method) { + Choice choice=new Choice(); + choice.setMethod(method); + + for (Element alternative : XML.getChildren(choiceElement)) { + if (alternative.getNodeName().equals("alternative")) // Explicit alternative container + choice.alternatives().add(readPageElements(alternative)); + else if (alternative.getNodeName().equals("include")) // Implicit include + choice.alternatives().add(readInclude(alternative)); + else // Other implicit + choice.alternatives().add(Collections.singletonList(readPageElement(alternative))); + } + return choice; + } + + /* + private void readChoices(Element sourceElement,Source source) { + for (Element choiceElement : XML.children(sourceElement,"choice")) { + for (Element alternative : XML.children(choiceElement)) { + if ("alternative".equals(alternative.getNodeName())) // Explicit alternative container + source.renderer().alternatives().addAll(readRenderers(XML.children(alternative))); + else // Implicit alternative - yes implicit and explicit may be combined + source.renderer().alternatives().addAll(readRenderers(Collections.singletonList(alternative))); + } + } + } + */ + + private Map<String,String> readParameters(Element containingElement) { + List<Element> parameterElements=XML.getChildren(containingElement,"parameter"); + if (parameterElements.size()==0) return Collections.emptyMap(); // Shortcut + + Map<String,String> parameters=new LinkedHashMap<>(); + for (Element parameter : parameterElements) { + String key=parameter.getAttribute("name"); + String value=XML.getValue(parameter); + parameters.put(key,value); + } + return parameters; + } + + private List<String> readSpaceSeparatedAttribute(String attributeName, Element containingElement) { + List<String> values=new ArrayList<>(); + String attributeString=nullIfEmpty(containingElement.getAttribute(attributeName)); + if (attributeString!=null) { + for (String value : attributeString.split(" ")) + values.add(value); + } + return values; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/config/package-info.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/config/package-info.java new file mode 100644 index 00000000000..40cdfb691ab --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/config/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.pagetemplates.config; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Organizer.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Organizer.java new file mode 100644 index 00000000000..00e154d460b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Organizer.java @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine; + +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.PageTemplate; +import com.yahoo.search.pagetemplates.model.*; +import com.yahoo.search.pagetemplates.result.SectionHitGroup; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.result.*; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Reorganizes and prunes a result as prescribed by a resolved template. + * This class is multithread safe. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Organizer { + + /** + * Organizes the given result + * + * @param templateChoice a choice between singleton lists of PageTemplates + * @param resolution the resolution of (at least) the template choice and all choices contained in that template + * @param result the result to organize + */ + public void organize(Choice templateChoice, Resolution resolution, Result result) { + PageTemplate template=(PageTemplate)templateChoice.get(resolution.getResolution(templateChoice)).get(0); + SectionHitGroup sectionGroup =toGroup(template.getSection(),resolution,result); + ErrorHit errors=result.hits().getErrorHit(); + + // transfer state from existing hit + sectionGroup.setQuery(result.hits().getQuery()); + if (errors!=null && errors instanceof DefaultErrorHit) + sectionGroup.add((DefaultErrorHit)errors); + for (Iterator<Map.Entry<String, Object>> it = result.hits().fieldIterator(); it.hasNext(); ) { + Map.Entry<String, Object> field = it.next(); + sectionGroup.setField(field.getKey(), field.getValue()); + } + + result.setHits(sectionGroup); + } + + /** Creates the hit group corresponding to a section, drawing data from the given result */ + private SectionHitGroup toGroup(Section section,Resolution resolution,Result result) { + SectionHitGroup sectionGroup=new SectionHitGroup("section:" + section.getId()); + setField("id",section.getId(),sectionGroup); + sectionGroup.setLeaf(section.elements(Section.class).size()==0); + setField("layout",section.getLayout().getName(),sectionGroup); + setField("region",section.getRegion(),sectionGroup); + + List<String> sourceList=new ArrayList<>(); + renderElements(resolution, result, sectionGroup, sourceList, section.elements()); + + // Trim to max + if (section.getMax()>=0) + sectionGroup.trim(0,section.getMax()); + if (sectionGroup.size()>1) + assignOrderer(section,resolution,sourceList,sectionGroup); + + return sectionGroup; + } + + private void renderElements(Resolution resolution, Result result, SectionHitGroup sectionGroup, List<String> sourceList, List<PageElement> elements) { + for (PageElement element : elements) { + if (element instanceof Section) { + sectionGroup.add(toGroup((Section)element,resolution,result)); + } + else if (element instanceof Source) { + addSource(resolution,(Source)element,sectionGroup,result,sourceList); + } + else if (element instanceof Renderer) { + sectionGroup.renderers().add((Renderer)element); + } + else if (element instanceof Choice) { + Choice choice=(Choice)element; + if (choice.isEmpty()) continue; // Ignore + int chosen=resolution.getResolution(choice); + renderElements(resolution, result, sectionGroup, sourceList, choice.alternatives().get(chosen)); + } + else if (element instanceof Placeholder) { + Placeholder placeholder =(Placeholder)element; + List<PageElement> mappedElements= + resolution.getResolution(placeholder.getValueContainer()).get(placeholder.getId()); + renderElements(resolution,result,sectionGroup,sourceList,mappedElements); + } + } + } + + private void setField(String fieldName,Object value,Hit to) { + if (value==null) return; + to.setField(fieldName,value); + } + + private void addSource(Resolution resolution,Source source,SectionHitGroup sectionGroup,Result result,List<String> sourceList) { + renderElements(resolution,result,sectionGroup, sourceList, source.renderers()); + /* + for (PageElement element : source.renderers()) { + if (element instanceof Renderer) + if (renderer.isEmpty()) continue; + sectionGroup.renderers().add(renderer.get(resolution.getResolution(renderer))); + } + */ + + if (source.getUrl()==null) + addHitsFromSource(source,sectionGroup,result,sourceList); + else + sectionGroup.sources().add(source); // source to be rendered by the frontend + } + + private void addHitsFromSource(Source source,SectionHitGroup sectionGroup,Result result,List<String> sourceList) { + if (source==Source.any) { // Add any source not added yet + for (Hit hit : result.hits()) { + if ( ! (hit instanceof HitGroup)) continue; + String groupId=hit.getId().stringValue(); + if ( ! groupId.startsWith("source:")) continue; + String sourceName=groupId.substring(7); + if (sourceList.contains(sourceName)) continue; + sectionGroup.addAll(((HitGroup)hit).asList()); + sourceList.add(sourceName); // Add *'ed sources explicitly + } + } + else { + HitGroup sourceGroup=(HitGroup)result.hits().get("source:" + source.getName()); + if (sourceGroup!=null) + sectionGroup.addAll(sourceGroup.asList()); + sourceList.add(source.getName()); // Add even if not found - may be added later + } + } + + private void assignOrderer(Section section,Resolution resolution,List<String> sourceList,HitGroup group) { + if (section.getOrder()==null) { // then sort by relevance, source + group.setOrderer(new HitSortOrderer(new RelevanceComparator(new SourceOrderComparator(sourceList)))); + return; + } + + // replace a source field comparison by one which knows the source list order + // and add default sorting at the end if necessary + Sorting sorting=section.getOrder(); + int rankIndex=-1; + int sourceIndex=-1; + for (int i=0; i<sorting.fieldOrders().size(); i++) { + Sorting.FieldOrder order=sorting.fieldOrders().get(i); + if ("[relevance]".equals(order.getFieldName()) || "[rank]".equals(order.getFieldName())) + rankIndex=i; + else if (order.getFieldName().equals("[source]")) + sourceIndex=i; + } + + ChainableComparator comparator; + Sorting beforeSource=null; + Sorting afterSource=null; + if (sourceIndex>=0) { // replace alphabetical sorting on source by sourceList order sorting + if (sourceIndex>0) // sort fields before the source + beforeSource=new Sorting(new ArrayList<>(sorting.fieldOrders().subList(0,sourceIndex))); + if (sorting.fieldOrders().size()>sourceIndex+1) // sort fields after the source + afterSource=new Sorting(new ArrayList<>(sorting.fieldOrders().subList(sourceIndex+1,sorting.fieldOrders().size()+1))); + + comparator=new SourceOrderComparator(sourceList, FieldComparator.create(afterSource)); + if (beforeSource!=null) + comparator=new FieldComparator(beforeSource,comparator); + + } + else if (rankIndex>=0) { // add sort by source at the end + comparator=new FieldComparator(sorting,new SourceOrderComparator(sourceList)); + } + else { // add sort by rank,source at the end + comparator=new FieldComparator(sorting,new RelevanceComparator(new SourceOrderComparator(sourceList))); + } + group.setOrderer(new HitSortOrderer(comparator)); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/RelevanceComparator.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/RelevanceComparator.java new file mode 100644 index 00000000000..7489768b5a3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/RelevanceComparator.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine; + +import com.yahoo.search.result.ChainableComparator; +import com.yahoo.search.result.Hit; + +import java.util.Comparator; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class RelevanceComparator extends ChainableComparator { + + /** + * Creates a relevance comparator, with an optional secondary comparator. + * If the secondary is null, the intrinsic hit order is used as secondary. + */ + public RelevanceComparator(Comparator<Hit> secondaryComparator) { + super(secondaryComparator); + } + + public @Override int compare(Hit h1,Hit h2) { + int relevanceComparison=h2.getRelevance().compareTo(h1.getRelevance()); + if (relevanceComparison!=0) return relevanceComparison; + + return super.compare(h1,h2); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Resolution.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Resolution.java new file mode 100644 index 00000000000..d67faf805ad --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Resolution.java @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine; + +import com.yahoo.search.pagetemplates.model.Choice; +import com.yahoo.search.pagetemplates.model.MapChoice; +import com.yahoo.search.pagetemplates.model.PageElement; + +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; + +/** + * A resolution of choices within a template. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Resolution { + + /** A record of choices made as choice → alternative index (id) */ + private Map<Choice,Integer> choiceResolutions=new IdentityHashMap<>(); + + /** A of map choices made as choice → mapping */ + private Map<MapChoice,Map<String,List<PageElement>>> mapChoiceResolutions= + new IdentityHashMap<>(); + + public void addChoiceResolution(Choice choice,int alternativeIndex) { + choiceResolutions.put(choice,alternativeIndex); + } + + public void addMapChoiceResolution(MapChoice choice, Map<String,List<PageElement>> mapping) { + mapChoiceResolutions.put(choice,mapping); + } + + /** + * Returns the resolution of a choice. + * + * @return the (0-base) index of the choice made. If the given choice has exactly one alternative, + * 0 is always returned (whether or not the choice has been attempted resolved). + * @throws IllegalArgumentException if the choice is empty, or if it has multiple alternatives but have not + * been resolved in this + */ + public int getResolution(Choice choice) { + if (choice.alternatives().size()==1) return 0; + if (choice.isEmpty()) throw new IllegalArgumentException("Cannot return a resolution of empty " + choice); + Integer resolution=choiceResolutions.get(choice); + if (resolution==null) throw new IllegalArgumentException(this + " has no resolution of " + choice); + return resolution; + } + + /** + * Returns the resolution of a map choice. + * + * @return the chosen mapping - entries from placeholder id to the values to use at the location of that placeholder + * @throws IllegalArgumentException if this choice has not been resolved in this + */ + public Map<String,List<PageElement>> getResolution(MapChoice choice) { + Map<String,List<PageElement>> resolution=mapChoiceResolutions.get(choice); + if (resolution==null) throw new IllegalArgumentException(this + " has no resolution of " + choice); + return resolution; + } + + public @Override String toString() { + return "a resolution of " + choiceResolutions.size() + " choices"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Resolver.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Resolver.java new file mode 100644 index 00000000000..4972b0e4689 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/Resolver.java @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.ComponentId; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.PageTemplate; +import com.yahoo.search.pagetemplates.model.Choice; +import com.yahoo.search.pagetemplates.model.MapChoice; +import com.yahoo.search.pagetemplates.model.PageTemplateVisitor; + +/** + * Superclass of page template choice resolvers. + * <p> + * Subclasses overrides one of the two resolve methods to either resolve each choices individually + * or look at all choices at once. + * <p> + * All subclasses of this must be multithread safe. I.e multiple calls may be made + * to resolve at the same time from different threads. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class Resolver extends AbstractComponent { + + public Resolver(String id) { + super(new ComponentId(id)); + } + + public Resolver(ComponentId id) { + super(id); + } + + protected Resolver() {} + + /** + * Override this to resolve choices. Before retuning this method <i>must</i> resolve the given choice + * between a set of page templates <i>and</i> all choices found recursively within the <i>chosen</i> + * page template. It is permissible but not required to add solutions also to choices present within those + * templates which are not chosen. + * <p> + * This default implementation creates a Resolution and calls + * <code>resolve(choice/mapChoice,query,result,resolution)</code> first on the given page template choice, then + * on each choice found in that temnplate. This provides a simple API to resolvers which make each choice + * independently. + * + * @param pageTemplate the choice of page templates to resolve - a choice containing singleton lists of PageTemplate elements + * @param query the query, from which information useful for correct resolution can be found + * @param result the result, from which further information useful for correct resolution can be found + * @return the resolution of the choices contained in the given page template + */ + public Resolution resolve(Choice pageTemplate, Query query, Result result) { + Resolution resolution=new Resolution(); + resolve(pageTemplate,query,result,resolution); + PageTemplate chosenPageTemplate=(PageTemplate)pageTemplate.get(resolution.getResolution(pageTemplate)).get(0); + ChoiceResolverVisitor choiceResolverVisitor=new ChoiceResolverVisitor(query,result,resolution); + chosenPageTemplate.accept(choiceResolverVisitor); + return choiceResolverVisitor.getResolution(); + } + + /** + * Override this to resolve <i>each</i> choice independently. + * This default implementation does nothing. + * + * @param choice the choice to resolve + * @param query the query for which this should be resolved, typically used to extract features + * @param result the result for which this should be resolved, typically used to extract features + * @param resolution the set of resolutions made so far, to which this should be added: + * <code>resolution.addChoiceResolution(choice,chosenAlternativeIndex)</code> + */ + public void resolve(Choice choice,Query query,Result result,Resolution resolution) { + } + + /** + * Override this to resolve <i>each</i> map choice independently. + * This default implementation does nothing. + * + * @param choice the choice to resolve + * @param query the query for which this should be resolved, typically used to extract features + * @param result the result for which this should be resolved, typically used to extract features + * @param resolution the set of resolutions made so far, to which this should be added: + * <code>resolution.addMapChoiceResolution(choice,chosenMapping)</code> + */ + public void resolve(MapChoice choice,Query query,Result result,Resolution resolution) { + } + + private class ChoiceResolverVisitor extends PageTemplateVisitor { + + private Resolution resolution; + + private Query query; + + private Result result; + + public ChoiceResolverVisitor(Query query,Result result,Resolution resolution) { + this.query=query; + this.result=result; + this.resolution=resolution; + } + + public @Override void visit(Choice choice) { + if (choice.alternatives().size()<2) return; // No choice... + resolve(choice,query,result,resolution); + } + + public @Override void visit(MapChoice choice) { + resolve(choice,query,result,resolution); + } + + public Resolution getResolution() { return resolution; } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/SourceOrderComparator.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/SourceOrderComparator.java new file mode 100644 index 00000000000..b4cd01f0c36 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/SourceOrderComparator.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine; + +import com.yahoo.search.result.ChainableComparator; +import com.yahoo.search.result.Hit; + +import java.util.Comparator; +import java.util.List; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class SourceOrderComparator extends ChainableComparator { + + private final List<String> sourceOrder; + + /** + * Creates a source order comparator, with no secondary + * + * @param sourceOrder the sort order of list names. This list gets owned by this and must not be modified + */ + public SourceOrderComparator(List<String> sourceOrder) { + this(sourceOrder,null); + } + + /** + * Creates a source order comparator, with an optional secondary comparator. + * + * @param sourceOrder the sort order of list names. This list gets owned by this and must not be modified + * @param secondaryComparator the comparator to use as secondary, or null to use the intrinsic hit order + */ + public SourceOrderComparator(List<String> sourceOrder,Comparator<Hit> secondaryComparator) { + super(secondaryComparator); + this.sourceOrder=sourceOrder; + } + + public @Override int compare(Hit h1,Hit h2) { + int primaryOrder=sourceOrderCompare(h1,h2); + if (primaryOrder!=0) return primaryOrder; + + return super.compare(h1,h2); + } + + private int sourceOrderCompare(Hit h1,Hit h2) { + String h1Source=h1.getSource(); + String h2Source=h2.getSource(); + + if (h1Source==null && h2Source==null) return 0; + if (h1Source==null) return 1; // No source -> last + if (h2Source==null) return -1; // No source -> last + + if (h1Source.equals(h2Source)) return 0; + + return sourceOrder.indexOf(h1Source)-sourceOrder.indexOf(h2Source); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/package-info.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/package-info.java new file mode 100644 index 00000000000..6628156cb33 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.pagetemplates.engine; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/DeterministicResolver.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/DeterministicResolver.java new file mode 100644 index 00000000000..32ed54a6775 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/DeterministicResolver.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine.resolvers; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.engine.Resolution; +import com.yahoo.search.pagetemplates.engine.Resolver; +import com.yahoo.search.pagetemplates.model.Choice; +import com.yahoo.search.pagetemplates.model.MapChoice; +import com.yahoo.search.pagetemplates.model.PageElement; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A resolver which + * <ul> + * <li>Always chooses the <i>last</i> alternative of any Choice + * <li>Always maps values to placeholders in the order they are listed in the map definition of any MapChoice + * </ul> + * This is useful for testing. + * <p> + * The id of this if <code>native.deterministic</code> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DeterministicResolver extends Resolver { + public static final String nativeId = "native.deterministic"; + + public DeterministicResolver() {} + + protected DeterministicResolver(String id) { + super(id); + } + + /** Chooses the last alternative of any choice */ + @Override + public void resolve(Choice choice, Query query, Result result, Resolution resolution) { + resolution.addChoiceResolution(choice,choice.alternatives().size()-1); + } + + /** Chooses a mapping which is always by the literal order given in the source template */ + @Override + public void resolve(MapChoice choice,Query query,Result result,Resolution resolution) { + Map<String, List<PageElement>> mapping=new HashMap<>(); + // Map 1-1 by order + List<String> placeholderIds=choice.placeholderIds(); + List<List<PageElement>> valueList=choice.values(); + int i=0; + for (String placeholderId : placeholderIds) + mapping.put(placeholderId,valueList.get(i++)); + resolution.addMapChoiceResolution(choice,mapping); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/RandomResolver.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/RandomResolver.java new file mode 100644 index 00000000000..5f06c66795d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/RandomResolver.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine.resolvers; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.engine.Resolution; +import com.yahoo.search.pagetemplates.engine.Resolver; +import com.yahoo.search.pagetemplates.model.Choice; +import com.yahoo.search.pagetemplates.model.MapChoice; +import com.yahoo.search.pagetemplates.model.PageElement; + +import java.util.*; + +/** + * A resolver which makes all choices by random. + * The id of this is <code>native.random</code>. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class RandomResolver extends Resolver { + + public static final String nativeId = "native.random"; + + private Random random = new Random(System.currentTimeMillis()); // Use of this is multithread safe + + public RandomResolver() {} + + protected RandomResolver(String id) { + super(id); + } + + /** Chooses the last alternative of any choice */ + @Override + public void resolve(Choice choice, Query query, Result result, Resolution resolution) { + resolution.addChoiceResolution(choice,random.nextInt(choice.alternatives().size())); + } + + /** Chooses a mapping which is always by the literal order given in the source template */ + @Override + public void resolve(MapChoice choice,Query query,Result result,Resolution resolution) { + Map<String, List<PageElement>> mapping=new HashMap<>(); + // Draw a random element from the value list on each iteration and assign it to a placeholder + List<String> placeholderIds=choice.placeholderIds(); + List<List<PageElement>> valueList=new ArrayList<>(choice.values()); + for (String placeholderId : placeholderIds) + mapping.put(placeholderId,valueList.remove(random.nextInt(valueList.size()))); + resolution.addMapChoiceResolution(choice,mapping); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/ResolverRegistry.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/ResolverRegistry.java new file mode 100644 index 00000000000..0bbbec655bd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/ResolverRegistry.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.engine.resolvers; + +import com.google.inject.Inject; +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.ComponentId; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.pagetemplates.engine.Resolver; + +import java.util.List; +import java.util.logging.Logger; + +/** + * A registry of available resolver components + * + * @author bratseth + */ +public class ResolverRegistry extends ComponentRegistry<Resolver> { + + private final Resolver defaultResolver; + + public ResolverRegistry(List<Resolver> resolvers) { + addBuiltInResolvers(); + for (Resolver component : resolvers) + registerResolver(component); + defaultResolver = decideDefaultResolver(); + freeze(); + } + + private void addBuiltInResolvers() { + registerResolver(createNativeDeterministicResolver()); + registerResolver(createNativeRandomResolver()); + } + + private Resolver decideDefaultResolver() { + Resolver defaultResolver = getComponent("default"); + if (defaultResolver != null) return defaultResolver; + return getComponent("native.random"); + } + + private Resolver createNativeRandomResolver() { + RandomResolver resolver = new RandomResolver(); + resolver.initId(ComponentId.fromString(RandomResolver.nativeId)); + return resolver; + } + + private DeterministicResolver createNativeDeterministicResolver() { + DeterministicResolver resolver = new DeterministicResolver(); + resolver.initId(ComponentId.fromString(DeterministicResolver.nativeId)); + return resolver; + } + + private void registerResolver(Resolver resolver) { + super.register(resolver.getId(), resolver); + } + + public Resolver defaultResolver() { return defaultResolver; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/package-info.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/package-info.java new file mode 100644 index 00000000000..c1e3f218480 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/engine/resolvers/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.pagetemplates.engine.resolvers; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/AbstractChoice.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/AbstractChoice.java new file mode 100644 index 00000000000..069598b2e02 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/AbstractChoice.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.component.provider.FreezableClass; + +/** + * Abstract superclass of various kinds of choices. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class AbstractChoice extends FreezableClass implements PageElement { + + private String method; + + /** + * Returns the choice method to use - a string interpreted by the resolver in use, + * or null to use any available method + */ + public String getMethod() { return method; } + + public void setMethod(String method) { + ensureNotFrozen(); + this.method=method; + } + + // TODO: is this really choices between classes in general, or e.g. subclasses of Section? + /** Returns true if this choice is (partially or completely) a choice between the given type */ + @SuppressWarnings("rawtypes") + public abstract boolean isChoiceBetween(Class pageTemplateModelClass); + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Choice.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Choice.java new file mode 100644 index 00000000000..a1932012236 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Choice.java @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import java.util.*; + +/** + * A choice between some alternative lists of page elements. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public final class Choice extends AbstractChoice { + + private List<List<PageElement>> alternatives=new ArrayList<>(3); + + /** Creates an empty choice */ + public Choice() { } + + /** Creates a choice having a single alternative having a single page element */ + public static Choice createSingleton(PageElement singletonAlternative) { + Choice choice=new Choice(); + choice.alternatives().add(createSingletonList(singletonAlternative)); + return choice; + } + + /** Creates a choice in which each alternative consists of a single element */ + public static Choice createSingletons(List<PageElement> alternatives) { + Choice choice=new Choice(); + for (PageElement alternative : alternatives) + choice.alternatives().add(createSingletonList(alternative)); + return choice; + } + + private static List<PageElement> createSingletonList(PageElement member) { + List<PageElement> list=new ArrayList<>(); + list.add(member); + return list; + } + + /** + * Creates a choice between some alternatives. This method takes a copy of the given lists. + */ + public Choice(List<List<PageElement>> alternatives) { + for (List<PageElement> alternative : alternatives) + this.alternatives.add(new ArrayList<>(alternative)); + } + + /** + * Returns the alternatives of this as a live reference to the alternatives of this. + * The list and elements may be modified unless this is frozen. This is never null. + */ + public List<List<PageElement>> alternatives() { return alternatives; } + + /** Convenience shorthand of <code>return alternatives().get(index)</code> */ + public List<PageElement> get(int index) { + return alternatives.get(index); + } + + /** Convenience shorthand for <code>if (alternative!=null) alternatives().add(alternative)</code> */ + public void add(List<PageElement> alternative) { + if (alternative!=null) + alternatives.add(new ArrayList<>(alternative)); + } + + /** Returns true only if there are no alternatives in this */ + public boolean isEmpty() { return alternatives.size()==0; } + + /** Answers true if this is either a choice between the given class, or between Lists of the given class */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + @Override + public boolean isChoiceBetween(Class pageTemplateModelElementClass) { + List firstNonEmpty=null; + for (List<PageElement> value : alternatives) { + if (pageTemplateModelElementClass.isAssignableFrom(value.getClass())) return true; + if (value instanceof List) { + List listValue=(List)value; + if (listValue.size()>0) + firstNonEmpty=listValue; + } + } + if (firstNonEmpty==null) return false; + return (pageTemplateModelElementClass.isAssignableFrom(firstNonEmpty.get(0).getClass())); + } + + @Override + public void freeze() { + if (isFrozen()) return; + super.freeze(); + for (ListIterator<List<PageElement>> i=alternatives.listIterator(); i.hasNext(); ) { + List<PageElement> alternative=i.next(); + for (PageElement alternativeElement : alternative) + alternativeElement.freeze(); + i.set(Collections.unmodifiableList(alternative)); + } + alternatives= Collections.unmodifiableList(alternatives); + } + + /** Accepts a visitor to this structure */ + @Override + public void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + for (List<PageElement> alternative : alternatives) { + for (PageElement alternativeElement : alternative) + alternativeElement.accept(visitor); + } + } + + @Override + public String toString() { + if (alternatives.isEmpty()) return "(empty choice)"; + if (alternatives.size()==1) return alternatives.get(0).toString(); + return "a choice between " + alternatives; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Layout.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Layout.java new file mode 100644 index 00000000000..f8e00b78787 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Layout.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +/** + * The layout of a section + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +// This is not made an enum, to allow the value set to be extendible. +// It is not explicitly made immutable +// to enable adding of internal state later (esp. parameters). +// If this becomes mutable, the creation scheme must be changed +// such that each fromString returns a unique instance, and +// the name must become a (immutable) type. +public class Layout { + + /** The built in "column" layout */ + public static final Layout column=new Layout("column"); + /** The built in "row" layout */ + public static final Layout row=new Layout("row"); + + private String name; + + public Layout(String name) { + this.name=name; + } + + public String getName() { return name; } + + public @Override int hashCode() { return name.hashCode(); } + + public @Override boolean equals(Object o) { + if (o==this) return true; + if (! (o instanceof Layout)) return false; + Layout other=(Layout)o; + return this.name.equals(other.name); + } + + /** Returns a layout having this string as name, or null if the given string is null or empty */ + public static Layout fromString(String layout) { + //if (layout==null) return null; + //if (layout) + if (layout.equals("column")) return column; + if (layout.equals("row")) return row; + return new Layout(layout); + } + + public @Override String toString() { return "layout '" + name + "'"; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/MapChoice.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/MapChoice.java new file mode 100644 index 00000000000..33c3bba9a77 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/MapChoice.java @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * A choice between different possible mapping functions of a set of values to a set of placeholder ids. + * A <i>resolution</i> of this choice consists of choosing a unique value for each placeholder id + * (hence a map choice is valid iff there are at least as many values as placeholder ids). + * <p> + * Each unique set of mappings (pairs) from values to placeholder ids is a separate possible + * alternative of this choice. The alternatives are not listed explicitly but are generated as needed. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class MapChoice extends AbstractChoice { + + private List<String> placeholderIds=new ArrayList<>(); + + private List<List<PageElement>> values=new ArrayList<>(); + + @SuppressWarnings({ "rawtypes", "unchecked" }) + @Override + public boolean isChoiceBetween(Class pageTemplateModelElementClass) { + List<PageElement> firstNonEmpty=null; + for (List<PageElement> value : values) + if (value.size()>0) + firstNonEmpty=value; + if (firstNonEmpty==null) return false; + return (pageTemplateModelElementClass.isAssignableFrom(firstNonEmpty.get(0).getClass())); + } + + /** + * Returns the placeholder ids (the "to" of the mapping) of this as a live reference which can be modified unless + * this is frozen. + */ + public List<String> placeholderIds() { return placeholderIds; } + + /** + * Returns the values (the "from" of the mapping) of this as a live reference which can be modified unless + * this is frozen. Note that each single choice of values within this is also a list of values. This is + * the inner list. + */ + public List<List<PageElement>> values() { return values; } + + @Override + public void freeze() { + if (isFrozen()) return; + super.freeze(); + placeholderIds=Collections.unmodifiableList(placeholderIds); + values=Collections.unmodifiableList(values); + } + + /** Accepts a visitor to this structure */ + public @Override void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + for (List<PageElement> valueEntry : values) + for (PageElement value : valueEntry) + value.accept(visitor); + } + + @Override + public String toString() { + return "mapping to placeholders " + placeholderIds; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/PageElement.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/PageElement.java new file mode 100644 index 00000000000..fba58f069ec --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/PageElement.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.component.provider.Freezable; + +/** + * Implemented by all page template model classes + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public interface PageElement extends Freezable { + + /** Accepts a visitor to this structure */ + public void accept(PageTemplateVisitor visitor); + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/PageTemplateVisitor.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/PageTemplateVisitor.java new file mode 100644 index 00000000000..d7ebd3d1169 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/PageTemplateVisitor.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.search.pagetemplates.PageTemplate; + +/** + * Superclass of visitors over the page template object structure + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class PageTemplateVisitor { + + /** Called each time a page template is encountered. This default implementation does nothing */ + public void visit(PageTemplate pageTemplate) { + } + + /** Called each time a source or source placeholder is encountered. This default implementation does nothing */ + public void visit(Source source) { + } + + /** Called each time a section or section placeholder is encountered. This default implementation does nothing */ + public void visit(Section section) { + } + + /** Called each time a renderer is encountered. This default implementation does nothing */ + public void visit(Renderer renderer) { + } + + /** Called each time a choice is encountered. This default implementation does nothing */ + public void visit(Choice choice) { + } + + /** Called each time a map choice is encountered. This default implementation does nothing */ + public void visit(MapChoice choice) { + } + + /** Called each time a placeholder is encountered. This default implementation does nothing */ + public void visit(Placeholder placeholder) { + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Placeholder.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Placeholder.java new file mode 100644 index 00000000000..cf7a85fc779 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Placeholder.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +/** + * A source placeholder is replaced with a list of source instances at evaluation time. + * Source placeholders may not have any content themselves - attempting to call any setter on this + * results in a IllegalStateException. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Placeholder implements PageElement { + + private String id; + + private MapChoice valueContainer=null; + + /** Creates a source placeholder with an id. */ + public Placeholder(String id) { + this.id=id; + } + + public String getId() { return id; } + + /** Returns the element which contains the value(s) of this placeholder. Never null. */ + public MapChoice getValueContainer() { return valueContainer; } + + public void setValueContainer(MapChoice valueContainer) { this.valueContainer=valueContainer; } + + public @Override void freeze() {} + + /** Accepts a visitor to this structure */ + public @Override void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + } + + public @Override String toString() { + return "source placeholder '" + id + "'"; + } + + /** + * This method always returns false, is a Placeholder always is mutable. + * (freeze() is a NOOP.) + */ + @Override + public boolean isFrozen() { + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Renderer.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Renderer.java new file mode 100644 index 00000000000..4564ceeef3c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Renderer.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.component.provider.FreezableClass; +import com.yahoo.protect.Validator; + +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * A description of a way to present data items from a source. + * All data items has a default renderer. This can be overridden or parametrized by + * an explicit renderer. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public final class Renderer extends FreezableClass implements PageElement { + + private String name; + + private String rendererFor; + + private Map<String,String> parameters =new LinkedHashMap<>(); + + public Renderer(String name) { + setName(name); + } + + /** + * Returns the name of this renderer (never null). + * The name should be recognized by the system receiving results for rendering + */ + public String getName() { return name; } + + public final void setName(String name) { + ensureNotFrozen(); + Validator.ensureNotNull("renderer name",name); + this.name=name; + } + + /** + * Returns the name of the kind of data this is a renderer for. + * This is used to allow frontends to dispatch the right data items (hits) to + * the right renderer in the case where the data consists of a heterogeneous list. + * <p> + * This is null if this is a renderer for a whole section, or if this is a renderer + * for all kinds of data from a particular source <i>and</i> this is not frozen. + * <p> + * Otherwise, it is either the name of the source this is the renderer for, + * <i>or</i> the renderer for all data items having this name as a <i>type</i>. + * <p> + * This, a (frontend) dispatcher of data to renderers should for each data item: + * <ul> + * <li>use the renderer having the same name as any <code>type</code> name set of the data item + * <li>if no such renderer, use the renderer having <code>rendererFor</code> equal to the data items <code>source</code> + * <li>if no such renderer, use a default renderer + * </ul> + */ + public String getRendererFor() { return rendererFor; } + + public void setRendererFor(String rendererFor) { + ensureNotFrozen(); + this.rendererFor=rendererFor; + } + + /** + * Returns the parameters of this renderer as a live reference (never null). + * The parameters will be passed to the renderer with each result + */ + public Map<String,String> parameters() { return parameters; } + + public @Override void freeze() { + if (isFrozen()) return; + super.freeze(); + parameters = Collections.unmodifiableMap(parameters); + } + + /** Accepts a visitor to this structure */ + public @Override void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + } + public @Override String toString() { + return "renderer '" + name + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Section.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Section.java new file mode 100644 index 00000000000..0a980419853 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Section.java @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.component.provider.FreezableClass; +import com.yahoo.search.query.Sorting; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * An element of a page template corresponding to a physical area of the layout of the final physical page. + * Pages are freezable - once frozen calling a setter will cause an IllegalStateException, and returned + * live collection references are unmodifiable + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Section extends FreezableClass implements PageElement { + + private final String id; + + private Layout layout=Layout.column; + + private String region; + + /** The elements of this - sources, subsections etc. and/or choices of the same */ + private List<PageElement> elements=new ArrayList<>(); + + /** Filtered versions of elements pre-calculated at freeze time */ + private List<PageElement> sections, sources, renderers; + + private int max=-1; + + private int min=-1; + + private Sorting order=null; + + private static AtomicInteger nextId=new AtomicInteger(); + + public Section() { + this(null); + } + + /** Creates a section with an id (or null if no id) */ + public Section(String id) { + if (id==null || id.isEmpty()) + this.id=String.valueOf("section_" + nextId.incrementAndGet()); + else + this.id=id; + } + + /** Returns a unique id of this section within the page. Used for referencing and identification. Never null. */ + public String getId() { return id; } + + /** + * Returns the layout identifier describing the kind of layout which should be used by the rendering engine to + * lay out the content of this section. This is never null. Default: "column". + */ + public Layout getLayout() { return layout; } + + /** Sets the layout. If the layout is set to null it will become Layout.column */ + public void setLayout(Layout layout) { + ensureNotFrozen(); + if (layout==null) layout=Layout.column; + this.layout=layout; + } + + /** + * Returns the identifier telling the layout of the containing section where this section should be placed. + * Permissible values, and whether this is mandatory is determined by the particular layout identifier of the parent. + * May be null if a placement is not required by the containing layout, or if this is the top-level section. + * This is null by default. + */ + public String getRegion() { return region; } + + public void setRegion(String region) { + ensureNotFrozen(); + this.region=region; + } + + /** + * Returns the elements of this - sources, subsections and presentations and/or choices of these, + * as a live reference which can be modified to change the content of this (unless this is frozen). + * <p> + * All elements are kept in a single list to allow multiple elements of each type to be nested within separate + * choices, and to maintain the internal order of elements of various types, which is sometimes significant. + * To extract a certain kind of elements (say, sources), the element list must be traversed to collect + * all source elements as well as all choices of sources. + * <p> + * This list is never null but may be empty. + */ + public List<PageElement> elements() { return elements; } + + /** + * Convenience method which returns the elements <b>and choices</b> of the given type in elements as a + * read-only list. Not that as this returns both concrete elements and choices betwen them, + * the list element cannot be case to the given class - this must be used in conjunction + * with a resolve which contains the resolution to the choices. + * + * @param pageTemplateModelElementClass type to returns elements and choices of, a subtype of PageElement + */ + public List<PageElement> elements(@SuppressWarnings("rawtypes") Class pageTemplateModelElementClass) { + if (isFrozen()) { // Use precalculated lists + if (pageTemplateModelElementClass==Section.class) + return sections; + else if (pageTemplateModelElementClass==Source.class) + return sources; + else if (pageTemplateModelElementClass==Renderer.class) + return renderers; + } + return createElementList(pageTemplateModelElementClass); + } + + @SuppressWarnings("unchecked") + private List<PageElement> createElementList(@SuppressWarnings("rawtypes") Class pageTemplateModelElementClass) { + List<PageElement> filteredElements=new ArrayList<>(); + for (PageElement element : elements) { + if (pageTemplateModelElementClass.isAssignableFrom(element.getClass())) + filteredElements.add(element); + else if (element instanceof AbstractChoice) + if (((AbstractChoice)element).isChoiceBetween(pageTemplateModelElementClass)) + filteredElements.add(element); + } + return Collections.unmodifiableList(filteredElements); + } + + /** Returns the choice of ways to sort immediate children in this, or empty meaning sort by default order (relevance) */ + public Sorting getOrder() { return order; } + + public void setOrder(Sorting order) { + ensureNotFrozen(); + this.order=order; + } + + /** Returns max number of (immediate) elements/sections permissible within this, -1 means unrestricted. Default: -1. */ + public int getMax() { return max; } + + public void setMax(int max) { + ensureNotFrozen(); + this.max=max; + } + + /** Returns min number of (immediate) elements/sections desired within this, -1 means unrestricted. Default: -1. */ + public int getMin() { return min; } + + public void setMin(int min) { + ensureNotFrozen(); + this.min=min; + } + + public @Override void freeze() { + if (isFrozen()) return; + + for (PageElement element : elements) + element.freeze(); + elements=Collections.unmodifiableList(elements); + sections=createElementList(Section.class); + sources=createElementList(Source.class); + renderers=createElementList(Renderer.class); + + super.freeze(); + } + + /** Accepts a visitor to this structure */ + public @Override void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + for (PageElement element : elements) + element.accept(visitor); + } + + public @Override String toString() { + if (id==null || id.isEmpty()) return "a section"; + return "section '" + id + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Source.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Source.java new file mode 100644 index 00000000000..91c403eae84 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/Source.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.component.provider.FreezableClass; +import com.yahoo.protect.Validator; + +import java.util.*; + +/** + * A source mentioned in a page template. + * <p> + * Two sources are equal if they have the same name and parameters. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Source extends FreezableClass implements PageElement { + + /** The "any" source - used to mark that any source is acceptable here */ + public static final Source any=new Source("*",true); + + /** The obligatory name of a source */ + private String name; + + private List<PageElement> renderers =new ArrayList<>(); + + private Map<String,String> parameters =new LinkedHashMap<>(); + + private String url; + + /** The precalculated hashCode of this object, or 0 if this is not frozen */ + private int hashCode=0; + + public Source(String name) { + this(name,false); + } + + /** Creates a source and optionally immediately freezes it */ + private Source(String name,boolean freeze) { + setName(name); + if (freeze) + freeze(); + } + + /** Returns the name of this source (never null) */ + public String getName() { return name; } + + public final void setName(String name) { + ensureNotFrozen(); + Validator.ensureNotNull("Source name",name); + this.name=name; + } + + /** Returns the url of this source or null if none */ + public String getUrl() { return url; } + + /** + * Sets the url of this source. If a source has an url (i.e this returns non-null), the content of + * the url is <i>not</i> fetched - fetching is left to the frontend by exposing this url in the result. + */ + public void setUrl(String url) { + ensureNotFrozen(); + this.url=url; + } + + /** + * Returns the renderers or choices of renderers to apply on individual items of this source + * <p> + * If this contains multiple renderers/choices, they are to be used on different types of hits returned by this source. + */ + public List<PageElement> renderers() { return renderers; } + + /** + * Returns the parameters of this source as a live reference (never null). + * The parameters will be passed to the provider getting source data. + */ + public Map<String,String> parameters() { return parameters; } + + public @Override void freeze() { + if (isFrozen()) return; + for (PageElement element : renderers) { + if (element instanceof Renderer) { + assignRendererForIfNotSet((Renderer)element); + } + else if (element instanceof Choice) { + for (List<PageElement> renderersAlternative : ((Choice)element).alternatives()) { + for (PageElement rendererElement : renderersAlternative) { + Renderer renderer=(Renderer)rendererElement; + if (renderer.getRendererFor()==null) + renderer.setRendererFor(name); + } + } + } + element.freeze(); + } + parameters = Collections.unmodifiableMap(parameters); + hashCode=hashCode(); + super.freeze(); + } + + private void assignRendererForIfNotSet(Renderer renderer) { + if (renderer.getRendererFor()==null) + renderer.setRendererFor(name); + } + + /** Accepts a visitor to this structure */ + public @Override void accept(PageTemplateVisitor visitor) { + visitor.visit(this); + for (PageElement renderer : renderers) + renderer.accept(visitor); + } + + public @Override int hashCode() { + if (isFrozen()) return hashCode; + int hashCode=name.hashCode(); + int i=0; + for (Map.Entry<String,String> parameter : parameters.entrySet()) + hashCode+=i*17*parameter.getKey().hashCode()+i*31*parameter.getValue().hashCode(); + return hashCode; + } + + public @Override boolean equals(Object other) { + if (other==this) return true; + if (! (other instanceof Source)) return false; + Source otherSource=(Source)other; + if (! this.name.equals(otherSource.name)) return false; + if (this.parameters.size() != otherSource.parameters.size()) return false; + for (Map.Entry<String,String> thisParameter : this.parameters.entrySet()) + if ( ! thisParameter.getValue().equals(otherSource.parameters.get(thisParameter.getKey()))) + return false; + return true; + } + + public @Override String toString() { + return "source '" + name + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/model/package-info.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/package-info.java new file mode 100644 index 00000000000..22a004d7555 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/model/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.pagetemplates.model; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/package-info.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/package-info.java new file mode 100644 index 00000000000..0368351a6dc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.pagetemplates; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/result/SectionHitGroup.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/result/SectionHitGroup.java new file mode 100644 index 00000000000..00f6c6350fc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/result/SectionHitGroup.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.pagetemplates.result; + +import com.yahoo.search.pagetemplates.model.Renderer; +import com.yahoo.search.pagetemplates.model.Source; +import com.yahoo.search.result.HitGroup; + +import java.util.ArrayList; +import java.util.List; + +/** + * A hit group corresponding to a section - contains some additional information + * in proper getters and setters which is used during rendering. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class SectionHitGroup extends HitGroup { + + private static final long serialVersionUID = -9048845836777953538L; + private List<Source> sources=new ArrayList<>(0); + private List<Renderer> renderers=new ArrayList<>(0); + private final String displayId; + + private boolean leaf=false; + + public SectionHitGroup(String id) { + super(id); + if (id.startsWith("section:section_")) + displayId=null; // Don't display section ids when not named explicitly + else + displayId=id; + types().add("section"); + } + + @Override + public String getDisplayId() { return displayId; } + + /** + * Returns the live, modifiable list of sources which are not fetched by the framework but should + * instead be included in the result + */ + public List<Source> sources() { return sources; } + + /** Returns the live, modifiable list of renderers in this section */ + public List<Renderer> renderers() { return renderers; } + + /** Returns whether this is a leaf section containing no subsections */ + public boolean isLeaf() { return leaf; } + + public void setLeaf(boolean leaf) { this.leaf=leaf; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/pagetemplates/result/package-info.java b/container-search/src/main/java/com/yahoo/search/pagetemplates/result/package-info.java new file mode 100644 index 00000000000..7d006aad551 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/pagetemplates/result/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.pagetemplates.result; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/Model.java b/container-search/src/main/java/com/yahoo/search/query/Model.java new file mode 100644 index 00000000000..588580dda4d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/Model.java @@ -0,0 +1,521 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.LocaleFactory; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.prelude.query.textualrepresentation.TextualQueryRepresentation; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.Parser; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.searchchain.Execution; + +import java.util.*; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * The parameters defining the recall of a query. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Model implements Cloneable { + + /** The type representing the property arguments consumed by this */ + private static final QueryProfileType argumentType; + private static final CompoundName argumentTypeName; + + public static final String MODEL = "model"; + public static final String PROGRAM = "program"; + public static final String QUERY_STRING = "queryString"; + public static final String TYPE = "type"; + public static final String FILTER = "filter"; + public static final String DEFAULT_INDEX = "defaultIndex"; + public static final String LANGUAGE = "language"; + public static final String ENCODING = "encoding"; + public static final String SOURCES = "sources"; + public static final String SEARCH_PATH = "searchPath"; + public static final String RESTRICT = "restrict"; + + static { + argumentType =new QueryProfileType(MODEL); + argumentType.setStrict(true); + argumentType.setBuiltin(true); + //argumentType.addField(new FieldDescription(PROGRAM, "string", "yql")); // TODO: Custom type + argumentType.addField(new FieldDescription(QUERY_STRING, "string", "query")); + argumentType.addField(new FieldDescription(TYPE, "string", "type")); + argumentType.addField(new FieldDescription(FILTER, "string","filter")); + argumentType.addField(new FieldDescription(DEFAULT_INDEX, "string", "default-index def-idx defidx")); + argumentType.addField(new FieldDescription(LANGUAGE, "string", "language lang")); + argumentType.addField(new FieldDescription(ENCODING, "string", "encoding")); + argumentType.addField(new FieldDescription(SOURCES, "string", "sources search")); + argumentType.addField(new FieldDescription(SEARCH_PATH, "string", "searchpath")); + argumentType.addField(new FieldDescription(RESTRICT, "string", "restrict")); + argumentType.freeze(); + argumentTypeName=new CompoundName(argumentType.getId().getName()); + } + + public static QueryProfileType getArgumentType() { return argumentType; } + + /** The name of the query property used for generating hit count estimate queries. */ + public static final CompoundName ESTIMATE = new CompoundName("hitcountestimate"); + + private String encoding = null; + private String queryString = ""; + private String filter = null; + private Language language = null; + private Locale locale = null; + private QueryTree queryTree = null; // The actual query. This is lazily created from the program + private String defaultIndex = null; + private Query.Type type = Query.Type.ALL; + private Query parent; + private Set<String> sources=new LinkedHashSet<>(); + private Set<String> restrict=new LinkedHashSet<>(); + private String searchPath; + private String documentDbName = null; + private Execution execution=new Execution(new Execution.Context(null, null, null, null, null)); + + public Model(Query query) { + setParent(query); + } + + /** + * Creates trace a message of language detection results into this Model + * instance's parent query. Do note this will give bogus results if the + * Execution instance is not set correctly. This is done automatically + * inside {@link Execution#search(Query)}. If tracing the same place as + * creating the query instance, {@link #setExecution(Execution)} has to be + * invoked first with the same Execution instance the query is intended to + * be run by. + */ + public void traceLanguage() { + if (getParent().getTraceLevel()<2) return; + if (language != null) { + getParent().trace("Language " + getLanguage() + " specified directly as a parameter", false, 2); + } + else { + Language l = getParsingLanguage(); + // Don't include the query, it will trigger query parsing + getParent().trace("Detected language: " + l, false, 2); + getParent().trace("Language " + l + " determined by " + + (Language.fromEncoding(encoding) != Language.UNKNOWN ? "query encoding" : + "the characters in the terms") + ".", false, 2); + } + } + + /** + * Gets the language to use for parsing. If this is explicitly set, that language is returned, otherwise + * it is guessed from the query string. If this does not yield an actual language, English is + * returned as the default. + * + * @return the language determined, never null + */ + public Language getParsingLanguage() { + Language language = getLanguage(); + if (language != null) { + return language; + } + language = Language.fromEncoding(encoding); + if (language != Language.UNKNOWN) { + return language; + } + Linguistics linguistics = execution.context().getLinguistics(); + if (linguistics != null) { + language = linguistics.getDetector().detect(queryString, null).getLanguage(); + } + if (language != Language.UNKNOWN) { + return language; + } + return Language.ENGLISH; + } + + /** Returns the explicitly set parsing language of this query model, or null if none */ + public Language getLanguage() { return language; } + + /** Explicitly sets the language to be used during parsing */ + public void setLanguage(Language language) { this.language = language; } + + /** + * <p>Explicitly sets the language to be used during parsing. The argument is first normalized by replacing + * underscores with hyphens (to support locale strings being used as RFC 5646 language tags), and then forwarded to + * {@link #setLocale(String)} so that the Locale information of the tag is preserved.</p> + * + * @param language The language string to parse. + * @see #getLanguage() + * @see #setLocale(String) + */ + public void setLanguage(String language) { + setLocale(language.replace("_", "-")); + } + + /** + * <p>Returns the explicitly set parsing locale of this query model, or null if none.</p> + * + * @return The locale of this. + * @see #setLocale(Locale) + */ + public Locale getLocale() { + return locale; + } + + /** + * <p>Explicitly sets the locale to be used during parsing. This method also calls {@link #setLanguage(Language)} + * with the corresponding {@link Language} instance.</p> + * + * @param locale The locale to set. + * @see #getLocale() + * @see #setLanguage(Language) + */ + public void setLocale(Locale locale) { + this.locale = locale; + setLanguage(Language.fromLocale(locale)); + } + + /** + * <p>Explicitly sets the locale to be used during parsing. This creates a Locale instance from the given language + * tag, and passes that to {@link #setLocale(Locale)}.</p> + * + * @param languageTag The language tag to parse. + * @see #setLocale(Locale) + */ + public void setLocale(String languageTag) { + setLocale(LocaleFactory.fromLanguageTag(languageTag)); + } + + /** Returns the encoding used in the query as a lowercase string */ + public String getEncoding() { return encoding; } + + /** Sets the encoding which was used in the received query string */ + public void setEncoding(String encoding) { + this.encoding = toLowerCase(encoding); + } + + /** Set the path for which backend nodes to forward the search too. */ + public void setSearchPath(String searchPath) { this.searchPath = searchPath; } + + public String getSearchPath() { return searchPath; } + + /** + * Set the query from a string. This will not be parsed into a query tree until that tree is attempted accessed. + * Note that setting this will clear the current query tree. Usually, this should <i>not</i> be modified - + * changes to the query should be implemented as modifications on the query tree structure. + * <p> + * Passing null causes this to be set to an empty string. + */ + public void setQueryString(String queryString) { + if (queryString==null) queryString=""; + this.queryString = queryString; + queryTree=null; // Cause parsing of the new query string next time the tree is accessed + } + + /** + * Returns the query string which caused the original query tree of this model to come about. + * Note that changes to the query tree are <b>not</b> reflected in this query string. + * + * @return the original (or reassigned) query string - never null + */ + public String getQueryString() { return queryString; } + + /** + * Returns the query as an object structure. + * This causes parsing of the query string if it has changed since this was last called + * (i.e query parsing is lazy) + */ + public QueryTree getQueryTree() { + if (queryTree == null) { + Parser parser = ParserFactory.newInstance(type, ParserEnvironment.fromExecutionContext(execution.context())); + queryTree = parser.parse(Parsable.fromQueryModel(this)); + if (parent.getTraceLevel() >= 2) { + parent.trace("Query parsed to: " + parent.yqlRepresentation(), 2); + } + } + return queryTree; + } + + /** + * Returns the filter string set for this query. + * The filter is included in the query tree at the time the query tree is parsed + */ + public String getFilter() { return filter; } + + /** + * Sets the filter string set for this query. + * The filter is included in the query tree at the time the query tree is parsed. + * Setting this does <i>not</i> cause the query to be reparsed. + */ + public void setFilter(String filter) { this.filter = filter; } + + /** + * Returns the default index for this query. + * The default index is taken into account at the time the query tree is parsed. + */ + public String getDefaultIndex() { return defaultIndex; } + + /** + * Sets the default index for this query. + * The default index is taken into account at the time the query tree is parsed. + * Setting this does <i>not</i> cause the query to be reparsed. + */ + public void setDefaultIndex(String defaultIndex) { this.defaultIndex = defaultIndex; } + + /** + * Sets the query type of for this query. + * The type is taken into account at the time the query tree is parsed. + */ + public Query.Type getType() { return type; } + + /** + * Sets the query type of for this query. + * The type is taken into account at the time the query tree is parsed. + * Setting this does <i>not</i> cause the query to be reparsed. + */ + public void setType(Query.Type type) { this.type = type; } + + /** + * Sets the query type of for this query. + * The type is taken into account at the time the query tree is parsed. + * Setting this does <i>not</i> cause the query to be reparsed. + */ + public void setType(String typeString) { this.type = Query.Type.getType(typeString); } + + public boolean equals(Object o) { + if ( ! (o instanceof Model)) return false; + + Model other = (Model) o; + if ( ! ( + QueryHelper.equals(other.encoding, this.encoding) && + QueryHelper.equals(other.language, this.language) && + QueryHelper.equals(other.searchPath, this.searchPath) && + QueryHelper.equals(other.sources, this.sources) && + QueryHelper.equals(other.restrict, this.restrict) && + QueryHelper.equals(other.defaultIndex, this.defaultIndex) && + QueryHelper.equals(other.type, this.type) )) + return false; + + if (other.queryTree == null && this.queryTree == null) // don't cause query parsing + return QueryHelper.equals(other.queryString, this.queryString) && + QueryHelper.equals(other.filter, this.filter); + else // make sure we compare a parsed variant of both + return QueryHelper.equals(other.getQueryTree(), this.getQueryTree()); + } + + @Override + public int hashCode() { + return getClass().hashCode() + + QueryHelper.combineHash(encoding,filter,language,getQueryTree(),sources,restrict,defaultIndex,type,searchPath); + } + + + public Object clone() { + try { + Model clone = (Model) super.clone(); + if (queryTree != null) + clone.queryTree = this.queryTree.clone(); + if (sources !=null) + clone.sources = new LinkedHashSet<>(this.sources); + if (restrict !=null) + clone.restrict = new LinkedHashSet<>(this.restrict); + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone inserted a noncloneable superclass",e); + } + } + + public Model cloneFor(Query q) { + Model model = (Model) this.clone(); + model.setParent(q); + return model; + } + + /** returns the query owning this, never null */ + public Query getParent() { return parent; } + + /** Assigns the query owning this */ + public void setParent(Query parent) { + if (parent==null) throw new NullPointerException("A query models owner cannot be null"); + this.parent = parent; + } + + /** Sets the set of sources this query will search from a comma-separated string of source names */ + public void setSources(String sourceString) { + setFromString(sourceString,sources); + } + + /** + * Returns the set of sources this query will search. + * This set can be modified to change the set of sources. If all sources are to be searched, this returns + * an empty set + * + * @return the set of sources to search, never null + */ + public Set<String> getSources() { return sources; } + + /** + * Sets the set of types (document type or search definition names) this query will search from a + * comma-separated string of type names. This is useful to narrow a search to just a subset of the types available + * from a sources + */ + public void setRestrict(String restrictString) { + setFromString(restrictString,restrict); + } + + /** + * Returns the set of types this query will search. + * This set can be modified to change the set of types. If all types are to be searched, this returns + * an empty set. + * + * @return the set of types to search, never null + */ + public Set<String> getRestrict() { return restrict; } + + /** Sets the execution working on this. For internal use. */ + public void setExecution(Execution execution) { + if (execution==this.execution) return; + + // If not already coupled, bind the trace of the new execution into the existing execution trace + if (execution.trace().traceNode().isRoot() + && execution.trace().traceNode() != this.execution.trace().traceNode().root()) { + this.execution.trace().traceNode().add(execution.trace().traceNode()); + } + + this.execution = execution; + } + + /** Sets the document database this will search - a document type */ + public void setDocumentDb(String documentDbName) { + this.documentDbName = documentDbName; + } + + /** Returns the name of the document db this should search, or null if not set. */ + public String getDocumentDb() { return documentDbName; } + + /** Returns the Execution working on this, or a null execution if none. For internal use. */ + public Execution getExecution() { return execution; } + + private void setFromString(String string,Set<String> set) { + set.clear(); + for (String item : string.split(",")) + set.add(item.trim()); + } + + public static Model getFrom(Query q) { + return (Model)q.properties().get(argumentTypeName); + } + + public @Override String toString() { + return "query representation [queryTree: " + queryTree + ", filter: " + filter + "]"; + } + + /** Prepares this for binary serialization. For internal use. */ + public void prepare(Ranking ranking) { + prepareRankFeaturesFromModel(ranking); + } + + private void prepareRankFeaturesFromModel(Ranking ranking) { + Item root = getQueryTree().getRoot(); + if (root != null) { + List<Item> tagged = setUniqueIDs(root); + addLabels(tagged, ranking); + addConnectivityRankProperties(tagged, ranking); + addSignificances(tagged, ranking); + } + } + + private List<Item> setUniqueIDs(Item root) { + List<Item> items = new ArrayList<>(); + collectTaggableItems(root, items); + int id = 1; + for (Item i : items) { + TaggableItem t = (TaggableItem) i; + t.setUniqueID(id++); + } + return items; + } + + private void addLabels(List<Item> candidates, Ranking ranking) { + for (Item candidate : candidates) { + String label = candidate.getLabel(); + if (label != null) { + String name = "vespa.label." + label + ".id"; + TaggableItem t = (TaggableItem) candidate; + ranking.getProperties().put(name, String.valueOf(t.getUniqueID())); + } + } + } + + private void addConnectivityRankProperties(List<Item> connectedItems, Ranking ranking) { + for (Item link : connectedItems) { + TaggableItem t = (TaggableItem) link; + Item connectedTo = t.getConnectedItem(); + if (connectedTo != null && strictContains(connectedTo, connectedItems)) { + TaggableItem t2 = (TaggableItem) connectedTo; + String name = "vespa.term." + t.getUniqueID() + ".connexity"; + ranking.getProperties().put(name, String.valueOf(t2.getUniqueID())); + ranking.getProperties().put(name, String.valueOf(t.getConnectivity())); + } + } + } + + private void addSignificances(List<Item> candidates, Ranking ranking) { + for (Item candidate : candidates) { + TaggableItem t = (TaggableItem) candidate; + if ( ! t.hasExplicitSignificance()) continue; + String name = "vespa.term." + t.getUniqueID() + ".significance"; + ranking.getProperties().put(name, String.valueOf(t.getSignificance())); + } + } + + private void collectTaggableItems(Item root, List<Item> terms) { + if (root == null) return; + + if (root instanceof TaggableItem) { + // This is tested before descending, as phrases are viewed + // as leaf nodes in the ranking code in the backend + terms.add(root); + } else if (root instanceof CompositeItem) { + CompositeItem c = (CompositeItem) root; + for (Iterator<Item> i = c.getItemIterator(); i.hasNext();) { + collectTaggableItems(i.next(), terms); + } + } else {} // nop + } + + private boolean strictContains(Object needle, Collection<?> haystack) { + for (Object pin : haystack) + if (pin == needle) return true; + return false; + } + + + /** + * Set the YTrace header value to use when transmitting this model to a + * search backend (of some kind). + * + * @param next string representation of header value + * @deprecated Not use, ytrace is done + */ + @Deprecated + public void setYTraceHeaderToNext(String next) { } + + /** + * Get the YTrace header value to use when transmitting this model to a + * search backend (of some kind). Returns null if no ytrace data is not + * turned on. + * @deprecated Not use, ytrace is done + */ + @Deprecated + public String getYTraceHeaderToNext() { + return null; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/ParameterParser.java b/container-search/src/main/java/com/yahoo/search/query/ParameterParser.java new file mode 100644 index 00000000000..a27e1bfde55 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/ParameterParser.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import static com.yahoo.container.util.Util.quote; + +/** + * Wrapper class to avoid code duplication of common parsing requirements. + * + * @author <a href="steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class ParameterParser { + + /** + * Tries to return the given object as a Long. If it is a Number, treat it + * as a number of seconds, i.e. get a Long representation and multiply by + * 1000. If it has a String representation, try to parse this as a floating + * point number, followed by by an optional unit (seconds and an SI prefix, + * a couple of valid examples are "s" and "ms". Only a very small subset of + * SI prefixes are supported). If no unit is given, seconds are assumed. + * + * @param value + * some representation of a number of seconds + * @param defaultValue + * returned if value is null + * @return value as a number of milliseconds + * @throws NumberFormatException + * if value is not a Number instance and its String + * representation cannot be parsed as a number followed + * optionally by time unit + */ + public static Long asMilliSeconds(Object value, Long defaultValue) { + if (value == null) { + return defaultValue; + } + if (value instanceof Number) { + Number n = (Number) value; + return Long.valueOf(n.longValue() * 1000L); + } + return parseTime(value.toString()); + } + + private static Long parseTime(String time) throws NumberFormatException { + + time = time.trim(); + try { + int unitOffset = findUnitOffset(time); + double measure = Double.valueOf(time.substring(0, unitOffset)); + double multiplier = parseUnit(time.substring(unitOffset)); + return Long.valueOf((long) (measure * multiplier)); + } catch (RuntimeException e) { + throw new IllegalArgumentException("Error parsing " + quote(time), e); + } + } + + private static int findUnitOffset(String time) { + int unitOffset = 0; + while (unitOffset < time.length()) { + char c = time.charAt(unitOffset); + if (c == '.' || (c >= '0' && c <= '9')) { + unitOffset += 1; + } else { + break; + } + } + if (unitOffset == 0) { + throw new NumberFormatException("Invalid number " + quote(time)); + } + return unitOffset; + } + + private static double parseUnit(String unit) { + unit = unit.trim(); + final double multiplier; + if ("ks".equals(unit)) { + multiplier = 1e6d; + } else if ("s".equals(unit)) { + multiplier = 1000.0d; + } else if ("ms".equals(unit)) { + multiplier = 1.0d; + } else if ("\u00B5s".equals(unit)) { + // microseconds + multiplier = 1e-3d; + } else { + multiplier = 1000.0d; + } + return multiplier; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/Presentation.java b/container-search/src/main/java/com/yahoo/search/query/Presentation.java new file mode 100644 index 00000000000..466ddf88299 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/Presentation.java @@ -0,0 +1,211 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.google.common.base.Splitter; +import com.yahoo.collections.LazySet; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.rendering.RendererRegistry; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + + +/** + * Parameters deciding how the result of a query should be presented + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Presentation implements Cloneable { + + /** The type representing the property arguments consumed by this */ + private static QueryProfileType argumentType; + + public static final String PRESENTATION = "presentation"; + public static final String BOLDING = "bolding"; + public static final String TIMING = "timing"; + public static final String SUMMARY = "summary"; + public static final String REPORT_COVERAGE = "reportCoverage"; + public static final String SUMMARY_FIELDS = "summaryFields"; + + /** The (short) name of the parameter holding the name of the return format to use */ + public static final String FORMAT = "format"; + + static { + argumentType=new QueryProfileType(PRESENTATION); + argumentType.setStrict(true); + argumentType.setBuiltin(true); + argumentType.addField(new FieldDescription(BOLDING, "boolean", "bolding")); + argumentType.addField(new FieldDescription(TIMING, "boolean", "timing")); + argumentType.addField(new FieldDescription(SUMMARY, "string", "summary")); + argumentType.addField(new FieldDescription(REPORT_COVERAGE, "string", "reportcoverage")); + argumentType.addField(new FieldDescription(FORMAT, "string", "format template")); + argumentType.addField(new FieldDescription(SUMMARY_FIELDS, "string", "summaryFields")); + argumentType.freeze(); + } + public static QueryProfileType getArgumentType() { return argumentType; } + + /** How the result should be highlighted */ + private Highlight highlight= null; + + /** The terms to highlight in the result (only used by BoldingSearcher, may be removed later). */ + private List<IndexedItem> boldingData = null; + + /** Whether or not to do highlighting */ + private boolean bolding = true; + + /** The summary class to be shown */ + private String summary = null; + + /** Whether coverage information (how much of the indices was searched should be included in the result */ + private boolean reportCoverage=false; + + /** The name of the renderer to use for rendering the hits. */ + private ComponentSpecification format = RendererRegistry.defaultRendererId.toSpecification(); + + /** Whether optional timing data should be rendered */ + private boolean timing = false; + + /** Set of explicitly requested summary fields, instead of summary classes */ + @NonNull + private Set<String> summaryFields = LazySet.newHashSet(); + + private static final Splitter COMMA_SPLITTER = Splitter.on(',').omitEmptyStrings().trimResults(); + + public Presentation(Query parent) { } + + /** Returns how terms in this result should be highlighted, or null if not set */ + public Highlight getHighlight() { return highlight; } + + /** Sets how terms in this result should be highlighted. Set to null to turn highlighting off */ + public void setHighlight(Highlight highlight) { this.highlight = highlight; } + + /** Returns the name of the summary class to be used to present hits from this query, or null if not set */ + public String getSummary() { return summary; } + + /** Sets the name of the summary class to be used to present hits from this query */ + public void setSummary(String summary) { this.summary = summary; } + + /** Returns whether matching query terms should be bolded in the result. Default is true. */ + public boolean getBolding() { return bolding; } + + /** Sets whether matching query terms should be bolded in the result */ + public void setBolding(boolean bolding) { this.bolding = bolding; } + + /** Returns whether coverage information should be returned in the result, if available. Default is false */ + public boolean getReportCoverage() { return reportCoverage; } + + /** Sets whether coverage information should be returned in the result, if available */ + public void setReportCoverage(boolean reportCoverage) { this.reportCoverage=reportCoverage; } + + /** Get the name of the format desired for result rendering. */ + @NonNull + public ComponentSpecification getRenderer() { return format; } + + /** Set the desired format for result rendering. If null, use the default renderer. */ + public void setRenderer(@Nullable ComponentSpecification format) { + this.format = (format != null) ? format : RendererRegistry.defaultRendererId.toSpecification(); + } + + /** + * Get the name of the format desired for result rendering. + */ + @NonNull + public String getFormat() { return format.getName(); } + + /** + * Set the desired format for result rendering. If null, use the default renderer. + */ + public void setFormat(@Nullable String format) { + setRenderer(ComponentSpecification.fromString(format)); + } + + @Override + public Object clone() { + try { + Presentation clone = (Presentation)super.clone(); + if (boldingData != null) + clone.boldingData = new ArrayList<>(boldingData); + + if (highlight != null) + clone.highlight = highlight.clone(); + + if (summaryFields != null) { + clone.summaryFields = LazySet.newHashSet(); + clone.summaryFields.addAll(this.summaryFields); + } + + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone inserted a noncloneable superclass",e); + } + } + + @Override + public boolean equals(Object o) { + if (o == null || !(o instanceof Presentation)) return false; + Presentation p = (Presentation) o; + return QueryHelper.equals(bolding,p.bolding) && QueryHelper.equals(summary,p.summary); + } + + @Override + public int hashCode() { + return QueryHelper.combineHash(bolding, summary); + } + + /** + * @return whether to add optional timing data to the rendered result + */ + public boolean getTiming() { + return timing; + } + + public void setTiming(boolean timing) { + this.timing = timing; + } + + /** + * Return the set of explicitly requested fields. Returns an empty set if no + * fields are specified outside of summary classes. The returned set is + * mutable and fields may be added or removed before passing on the query. + * + * @return the set of names of requested fields, never null + */ + @NonNull + public Set<String> getSummaryFields() { + return summaryFields; + } + + /** Prepares this for binary serialization. For internal use - see {@link Query#prepare} */ + public void prepare() { + if (highlight != null) + highlight.prepare(); + } + + /** + * Parse the given string as a comma delimited set of field names and + * overwrite the set of summary fields. Whitespace will be trimmed. If you + * want to add or remove fields programmatically, use + * {@link #getSummaryFields()} and modify the returned set. + * + * @param asString + * the summary fields requested, e.g. "price,author,title" + */ + public void setSummaryFields(String asString) { + summaryFields.clear(); + for (String field : COMMA_SPLITTER.split(asString)) { + summaryFields.add(field); + } + + } + +} + diff --git a/container-search/src/main/java/com/yahoo/search/query/Properties.java b/container-search/src/main/java/com/yahoo/search/query/Properties.java new file mode 100644 index 00000000000..df3d120c337 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/Properties.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.yahoo.search.Query; + +/** + * Object properties keyed by name which can be looked up using default values and + * with conversion to various primitive wrapper types. + * <p> + * Multiple property implementations can be chained to provide unified access to properties + * backed by multiple sources as a Chain of Responsibility. + * <p> + * For better performance, prefer CompoundName argument constants over Strings. + * <p> + * Properties can be cloned. Cloning a properties instance returns a new instance + * which chains new instances of all chained instances. The content within each instance + * is cloned to the extent determined appropriate by that implementation. + * <p> + * This base class simply passes all access on to the next in chain. + * + * @author bratseth + */ +public abstract class Properties extends com.yahoo.processing.request.Properties { + + @Override + public Properties chained() { return (Properties)super.chained(); } + + @Override + public Properties clone() { + return (Properties)super.clone(); + } + + /** The query owning this property object. + * Only guaranteed to work if this instance is accessible as query.properties() + */ + public Query getParentQuery() { + if (chained() == null) { + throw new RuntimeException("getParentQuery should only be called on a properties instance accessible as query.properties()"); + } else { + return chained().getParentQuery(); + } + } + + /** + * Invoked during deep cloning of the parent query. + */ + public void setParentQuery(Query query) { + if (chained() != null) + chained().setParentQuery(query); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/QueryHelper.java b/container-search/src/main/java/com/yahoo/search/query/QueryHelper.java new file mode 100644 index 00000000000..d4b6f257c11 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/QueryHelper.java @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +/** + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +class QueryHelper { + + /** Compares two objects which may be null */ + public static boolean equals(Object a,Object b) { + if (a == null) return b == null; + return a.equals(b); + } + + /** + * Helper method that finds the hashcode for a group of objects. + * Inspired by java.util.List + */ + public static int combineHash(Object... objs) { + int hash = 1; + for (Object o:objs) { + hash = 31*hash + (o == null ? 0 : o.hashCode()); + } + return hash; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java new file mode 100644 index 00000000000..3a501853388 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.yahoo.prelude.query.*; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * The root node of a query tree. This is always present above the actual semantic root to ease query manipulation, + * especially replacing the actual semantic root, but does not have any search semantics on its own. + * + * <p>To ease recursive manipulation of the query tree, this is a composite having one child, which is the actual root. + * <ul> + * <li>Setting the root item (at position 0, either directly or though the iterator of this, works as expected. + * Setting at any other position is disallowed. + * <li>Removing the root is allowed and causes this to be a null query. + * <li>Adding an item is only allowed if this is currently a null query (having no root) + * </ul> + * + * <p>This is also the home of accessor methods which eases querying into and manipulation of the query tree.</p> + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class QueryTree extends CompositeItem { + + public QueryTree(Item root) { + setRoot(root); + } + + public void setIndexName(String index) { + if (getRoot() != null) + getRoot().setIndexName(index); + } + + public ItemType getItemType() { + throw new RuntimeException("Packet type access attempted. " + + "A query tree has no packet code. This is probably a misbehaving searcher."); + } + + public String getName() { return "ROOT"; } + + public int encode(ByteBuffer buffer) { + if (getRoot() == null) return 0; + return getRoot().encode(buffer); + } + + //Lets not pollute toString() by adding "ROOT" + protected void appendHeadingString(StringBuilder sb) { + } + + /** Returns the query root. This is null if this is a null query. */ + public Item getRoot() { + if (getItemCount()==0) return null; + return getItem(0); + } + + public final void setRoot(Item root) { + if (root==this) throw new IllegalArgumentException("Cannot make a root point at itself"); + if (root == null) throw new IllegalArgumentException("Root must not be null, use NullItem instead."); + if (root instanceof QueryTree) throw new IllegalArgumentException("Do not use a new QueryTree instance as a root."); + if (this.getItemCount()==0) // initializing + super.addItem(root); + else + setItem(0,root); // replacing + } + + @Override + public boolean equals(Object o) { + if( !(o instanceof QueryTree)) return false; + return super.equals(o); + } + + /** Returns a deep copy of this */ + @Override + public QueryTree clone() { + QueryTree clone = (QueryTree) super.clone(); + fixClonedConnectivityReferences(clone); + return clone; + } + + private void fixClonedConnectivityReferences(QueryTree clone) { + // TODO! + } + + @Override + public void addItem(Item item) { + if (getItemCount()==0) + super.addItem(item); + else + throw new RuntimeException("Programming error: Cannot add multiple roots"); + } + + @Override + public void addItem(int index, Item item) { + if (getItemCount()==0 && index==0) + super.addItem(index,item); + else + throw new RuntimeException("Programming error: Cannot add multiple roots, have '" + getRoot() + "'"); + } + + /** Returns true if this represents the null query */ + public boolean isEmpty() { + return getRoot() instanceof NullItem; + } + + // -------------- Facade + + /** Modifies this query to become the current query AND the given item */ + // TODO: Make sure this is complete, unit test and make it public + private void and(Item item) { + if (isEmpty()) { + setRoot(item); + } + else if (getRoot() instanceof NotItem && item instanceof NotItem) { + throw new IllegalArgumentException("Can't AND two NOTs"); // TODO: Complete + } + else if (getRoot() instanceof NotItem){ + NotItem notItem = (NotItem)getRoot(); + notItem.addPositiveItem(item); + } + else if (item instanceof NotItem){ + NotItem notItem = (NotItem)item; + notItem.addPositiveItem(getRoot()); + setRoot(notItem); + } + else { + AndItem andItem = new AndItem(); + andItem.addItem(getRoot()); + andItem.addItem(item); + setRoot(andItem); + } + } + + /** Returns a flattened list of all positive query terms under the given item */ + public static List<IndexedItem> getPositiveTerms(Item item) { + List<IndexedItem> items = new ArrayList<>(); + getPositiveTerms(item,items); + return items; + } + + private static void getPositiveTerms(Item item, List<IndexedItem> terms) { + if (item instanceof NotItem) { + getPositiveTerms(((NotItem) item).getPositiveItem(), terms); + } else if (item instanceof PhraseItem) { + PhraseItem pItem = (PhraseItem)item; + terms.add(pItem); + } else if (item instanceof CompositeItem) { + for (Iterator<Item> i = ((CompositeItem) item).getItemIterator(); i.hasNext();) { + getPositiveTerms(i.next(), terms); + } + } else if (item instanceof TermItem) { + terms.add((TermItem)item); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/Ranking.java b/container-search/src/main/java/com/yahoo/search/query/Ranking.java new file mode 100644 index 00000000000..e543589f74d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/Ranking.java @@ -0,0 +1,246 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.prelude.Freshness; +import com.yahoo.prelude.Location; +import com.yahoo.search.Query; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.ranking.MatchPhase; +import com.yahoo.search.query.ranking.RankFeatures; +import com.yahoo.search.query.ranking.RankProperties; +import com.yahoo.search.result.ErrorMessage; + +/** + * The ranking (hit ordering) settings of a query + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author bratseth + */ +public class Ranking implements Cloneable { + + /** An alias for listing features */ + public static final com.yahoo.processing.request.CompoundName RANKFEATURES = + new com.yahoo.processing.request.CompoundName("rankfeatures"); + + /** The type representing the property arguments consumed by this */ + private static final QueryProfileType argumentType; + private static final CompoundName argumentTypeName; + + public static final String RANKING = "ranking"; + public static final String LOCATION = "location"; + public static final String PROFILE = "profile"; + public static final String SORTING = "sorting"; + public static final String LIST_FEATURES = "listFeatures"; + public static final String FRESHNESS = "freshness"; + public static final String QUERYCACHE = "queryCache"; + public static final String MATCH_PHASE = "matchPhase"; + public static final String DIVERSITY = "diversity"; + public static final String FEATURES = "features"; + public static final String PROPERTIES = "properties"; + + static { + argumentType =new QueryProfileType(RANKING); + argumentType.setStrict(true); + argumentType.setBuiltin(true); + argumentType.addField(new FieldDescription(LOCATION, "string", "location")); + argumentType.addField(new FieldDescription(PROFILE, "string", "ranking")); + argumentType.addField(new FieldDescription(SORTING, "string", "sorting sortspec")); + argumentType.addField(new FieldDescription(LIST_FEATURES, "string", RANKFEATURES.toString())); + argumentType.addField(new FieldDescription(FRESHNESS, "string", "datetime")); + argumentType.addField(new FieldDescription(QUERYCACHE, "string")); + argumentType.addField(new FieldDescription(MATCH_PHASE, "query-profile", "matchPhase")); + argumentType.addField(new FieldDescription(FEATURES, "query-profile", "rankfeature")); + argumentType.addField(new FieldDescription(PROPERTIES, "query-profile", "rankproperty")); + argumentType.freeze(); + argumentTypeName=new CompoundName(argumentType.getId().getName()); + } + public static QueryProfileType getArgumentType() { return argumentType; } + + private Query parent; + + /** The location of the query is used for distance ranking */ + private Location location = null; + + /** The name of the rank profile to use */ + private String profile = null; + + /** How the query should be sorted */ + private Sorting sorting = null; + + /** Set to true to include the value of "all" rank features in the result */ + private boolean listFeatures = false; + + private Freshness freshness; + + private boolean queryCache = false; + + private RankProperties rankProperties = new RankProperties(); + + private RankFeatures rankFeatures = new RankFeatures(); + + private MatchPhase matchPhase = new MatchPhase(); + + public Ranking(Query parent) { + this.parent = parent; + } + + /** + * Returns whether a rank profile has been explicitly set. + * + * This is only used in serializing the packet properly to FS4. + */ + public boolean hasRankProfile() { + return profile != null; + } + + /** Get the freshness search parameters associated with this query */ + public Freshness getFreshness() { + return freshness; + } + + /** Set the freshness search parameters for this query */ + public void setFreshness(String dateTime) { + try { + Freshness freshness = new Freshness(dateTime); + setFreshness(freshness); + } catch (NumberFormatException e) { + parent.errors().add(ErrorMessage.createInvalidQueryParameter("Datetime reference could not be converted from '" + + dateTime + "' to long")); + } + } + + public void setFreshness(Freshness freshness) { + this.freshness = freshness; + } + + /** + * Returns whether feature caching is turned on in the backed. + * Feature caching allows us to avoid sending the query during document summary retrieval + * and recalculate feature scores, it is typically beneficial to turn it on if + * fan-out is low or queries are large. + * <p> + * Default is false (off). + */ + public void setQueryCache(boolean queryCache) { this.queryCache = queryCache; } + + public boolean getQueryCache() { return queryCache; } + + /** Returns the location of this query, or null if none */ + public Location getLocation() { return location; } + + public void setLocation(Location location) { this.location = location; } + + /** Sets the location from a string, see {@link Location} for syntax */ + public void setLocation(String str) { this.location = new Location(str); } + + /** Returns the name of the rank profile to be used. Returns "default" if nothing is set. */ + public String getProfile() { return profile == null ? "default" : profile; } + + /** Sets the name of the rank profile to use. This cannot be set to null. */ + public void setProfile(String profile) { + if (profile==null) throw new NullPointerException("The ranking profile cannot be set to null"); + this.profile = profile; + } + + /** + * Returns the rank features of this, an empty container (never null) if none are set. + * The returned object can be modified directly to change the rank properties of this. + */ + public RankFeatures getFeatures() { + return rankFeatures; + } + + /** + * Returns the rank properties of this, an empty container (never null) if none are set. + * The returned object can be modified directly to change the rank properties of this. + */ + public RankProperties getProperties() { + return rankProperties; + } + + /** Set whether rank features should be included with the result of this query */ + public void setListFeatures(boolean listFeatures) { this.listFeatures = listFeatures; } + + /** Returns whether rank features should be dumped with the result of this query, default false */ + public boolean getListFeatures() { return listFeatures; } + + /** Returns the match phase rank settings of this. This is never null. */ + public MatchPhase getMatchPhase() { return matchPhase; } + + @Override + public Object clone() { + try { + Ranking clone = (Ranking) super.clone(); + + if (sorting != null) clone.sorting = this.sorting.clone(); + + clone.rankProperties = this.rankProperties.clone(); + clone.rankFeatures = this.rankFeatures.clone(); + clone.matchPhase = this.matchPhase.clone(); + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone inserted a noncloneable superclass",e); + } + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if( ! (o instanceof Ranking)) return false; + + Ranking other = (Ranking) o; + + if ( ! QueryHelper.equals(rankProperties, other.rankProperties)) return false; + if ( ! QueryHelper.equals(rankFeatures, other.rankFeatures)) return false; + if ( ! QueryHelper.equals(freshness, other.freshness)) return false; + if ( ! QueryHelper.equals(this.sorting, other.sorting)) return false; + if ( ! QueryHelper.equals(this.location, other.location)) return false; + if ( ! QueryHelper.equals(this.profile, other.profile)) return false; + return true; + } + + @Override + public int hashCode() { + int hash = 0; + hash += 11 * rankFeatures.hashCode(); + hash += 13 * rankProperties.hashCode(); + hash += 17 * matchPhase.hashCode(); + return Ranking.class.hashCode() + QueryHelper.combineHash(sorting,location,profile,hash); + } + + /** Returns the sorting spec of this query, or null if none is set */ + public Sorting getSorting() { return sorting; } + + /** Sets how this query should be sorted. Set to null to turn off explicit sorting. */ + public void setSorting(Sorting sorting) { this.sorting = sorting; } + + /** Sets sorting from a string. See {@link Sorting} on syntax */ + public void setSorting(String sortingString) { + if (sortingString==null) + setSorting((Sorting)null); + else + setSorting(new Sorting(sortingString)); + } + + public static Ranking getFrom(Query q) { + return (Ranking) q.properties().get(argumentTypeName); + } + + public void prepare() { + rankFeatures.prepare(rankProperties); + matchPhase.prepare(rankProperties); + prepareNow(freshness); + } + + private void prepareNow(Freshness freshness) { + if (freshness == null) return; + // TODO: See what freshness is doing with the internal props and simplify + if (rankProperties.get("vespa.now") == null || rankProperties.get("vespa.now").isEmpty()) { + rankProperties.put("vespa.now", "" + freshness.getRefTime()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/SessionId.java b/container-search/src/main/java/com/yahoo/search/query/SessionId.java new file mode 100644 index 00000000000..7f8ca6385e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/SessionId.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.yahoo.container.Server; +import com.yahoo.text.Utf8String; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * A query id which is unique across this cluster - consisting of + * container runtime id + timestamp + serial. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class SessionId { + + private static final String serverId = Server.get().getServerDiscriminator(); + private static final AtomicLong sequenceCounter = new AtomicLong(); + + private final Utf8String id; + + private SessionId(String serverId, long timestamp, long sequence) { + this.id = new Utf8String(serverId + "." + timestamp + "." + sequence); + } + + public Utf8String asUtf8String() { return id; } + + /** + * Creates a session id which is unique across the cluster this runtime is a member of each time this is called. + * Calling this causes synchronization. + */ + public static SessionId next() { + return new SessionId(serverId, System.currentTimeMillis(), sequenceCounter.getAndIncrement()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/Sorting.java b/container-search/src/main/java/com/yahoo/search/query/Sorting.java new file mode 100644 index 00000000000..3af9bc34940 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/Sorting.java @@ -0,0 +1,407 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.util.ULocale; +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + + +/** + * Specifies how a query is sorted by a list of fields with a sort order + * + * @author Arne Bergene Fossaa + */ +public class Sorting implements Cloneable { + + public static final String STRENGTH_IDENTICAL = "identical"; + public static final String STRENGTH_QUATERNARY = "quaternary"; + public static final String STRENGTH_TERTIARY = "tertiary"; + public static final String STRENGTH_SECONDARY = "secondary"; + public static final String STRENGTH_PRIMARY = "primary"; + public static final String UCA = "uca"; + public static final String RAW = "raw"; + public static final String LOWERCASE = "lowercase"; + + private final List<FieldOrder> fieldOrders = new ArrayList<>(2); + + /** Creates an empty sort spec */ + public Sorting() { } + + public Sorting(List<FieldOrder> fieldOrders) { + this.fieldOrders.addAll(fieldOrders); + } + + /** Creates a sort spec from a string */ + public Sorting(String sortSpec) { + setSpec(sortSpec); + } + + /** + * Creates a new sorting from the given string and returns it, or returns null if the argument does not contain + * any sorting criteria (e.g it is null or the empty string) + */ + public static Sorting fromString(String sortSpec) { + if (sortSpec==null) return null; + if ("".equals(sortSpec)) return null; + return new Sorting(sortSpec); + } + + private void setSpec(String rawSortSpec) { + String[] vectors = rawSortSpec.split(" "); + + for (String sortString:vectors) { + // A sortspec element must be at least two characters long, + // a sorting order and an attribute vector name + if (sortString.length() < 1) { + continue; + } + char orderMarker = sortString.charAt(0); + int funcAttrStart = 0; + if ((orderMarker == '+') || (orderMarker == '-')) { + funcAttrStart = 1; + } + AttributeSorter sorter = null; + int startPar = sortString.indexOf('(',funcAttrStart); + int endPar = sortString.lastIndexOf(')'); + if ((startPar > 0) && (endPar > startPar)) { + String funcName = sortString.substring(funcAttrStart, startPar); + if (LOWERCASE.equalsIgnoreCase(funcName)) { + sorter = new LowerCaseSorter(sortString.substring(startPar+1, endPar)); + } else if (RAW.equalsIgnoreCase(funcName)) { + sorter = new RawSorter(sortString.substring(startPar+1, endPar)); + } else if (UCA.equalsIgnoreCase(funcName)) { + int commaPos = sortString.indexOf(',', startPar+1); + if ((startPar+1 < commaPos) && (commaPos < endPar)) { + int commaopt = sortString.indexOf(',', commaPos + 1); + UcaSorter.Strength strength = UcaSorter.Strength.UNDEFINED; + if (commaopt > 0) { + String s = sortString.substring(commaopt+1, endPar); + if (STRENGTH_PRIMARY.equalsIgnoreCase(s)) { + strength = UcaSorter.Strength.PRIMARY; + } else if (STRENGTH_SECONDARY.equalsIgnoreCase(s)) { + strength = UcaSorter.Strength.SECONDARY; + } else if (STRENGTH_TERTIARY.equalsIgnoreCase(s)) { + strength = UcaSorter.Strength.TERTIARY; + } else if (STRENGTH_QUATERNARY.equalsIgnoreCase(s)) { + strength = UcaSorter.Strength.QUATERNARY; + } else if (STRENGTH_IDENTICAL.equalsIgnoreCase(s)) { + strength = UcaSorter.Strength.IDENTICAL; + } else { + throw new IllegalArgumentException("Unknown collation strength: '" + s + "'"); + } + sorter = new UcaSorter(sortString.substring(startPar+1, commaPos), sortString.substring(commaPos+1, commaopt), strength); + } else { + sorter = new UcaSorter(sortString.substring(startPar+1, commaPos), sortString.substring(commaPos+1, endPar), strength); + } + } else { + sorter = new UcaSorter(sortString.substring(startPar+1, endPar)); + } + } else { + if (funcName.isEmpty()) { + throw new IllegalArgumentException("No sort function specified"); + } else { + throw new IllegalArgumentException("Unknown sort function '" + funcName + "'"); + } + } + } else { + sorter = new AttributeSorter(sortString.substring(funcAttrStart)); + } + Order order = Order.UNDEFINED; + if (funcAttrStart != 0) { + // Override in sortspec + order = (orderMarker == '+') ? Order.ASCENDING : Order.DESCENDING; + } + fieldOrders.add(new FieldOrder(sorter, order)); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + String space = ""; + for (FieldOrder spec : fieldOrders) { + sb.append(space); + if (spec.getSortOrder() == Order.DESCENDING) { + sb.append("-"); + } else { + sb.append("+"); + } + sb.append(spec.getFieldName()); + space = " "; + } + return sb.toString(); + } + + + public enum Order {ASCENDING,DESCENDING,UNDEFINED} + + /** + * Returns the field orders of this sort specification as list. This is never null but can be empty. + * This list can be modified to change this sort spec. + */ + public List<FieldOrder> fieldOrders() { return fieldOrders; } + + public Sorting clone() { + return new Sorting(this.fieldOrders); + } + + public static class AttributeSorter implements Cloneable { + private static final Pattern legalAttributeName = Pattern.compile("[\\[]*[a-zA-Z_][\\.a-zA-Z0-9_-]*[\\]]*"); + + private String fieldName; + public AttributeSorter(String fieldName) { + if (legalAttributeName.matcher(fieldName).matches()) { + this.fieldName = fieldName; + } else { + throw new IllegalArgumentException("Illegal attribute name '" + fieldName + "' for sorting. Requires '" + legalAttributeName.pattern() + "'"); + } + } + public String getName() { return fieldName; } + public void setName(String fieldName) { this.fieldName = fieldName; } + @Override + public String toString() { return fieldName; } + @Override + public int hashCode() { return fieldName.hashCode(); } + @Override + public boolean equals(Object other) { + if (!(other instanceof AttributeSorter)) { + return false; + } + return ((AttributeSorter) other).fieldName.equals(fieldName); + } + @Override + public AttributeSorter clone() { + try { + return (AttributeSorter)super.clone(); + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + + } + @SuppressWarnings({ "rawtypes", "unchecked" }) + public int compare(Comparable a, Comparable b) { + return a.compareTo(b); + } + + } + public static class RawSorter extends AttributeSorter + { + public RawSorter(String fieldName) { super(fieldName); } + @Override + public boolean equals(Object other) { + if (!(other instanceof RawSorter)) { + return false; + } + return super.equals(other); + } + } + public static class LowerCaseSorter extends AttributeSorter + { + public LowerCaseSorter(String fieldName) { super(fieldName); } + @Override + public String toString() { return "lowercase(" + getName() + ')'; } + @Override + public int hashCode() { return 1 + 3*super.hashCode(); } + @Override + public boolean equals(Object other) { + if (!(other instanceof LowerCaseSorter)) { + return false; + } + return super.equals(other); + } + @SuppressWarnings({ "rawtypes", "unchecked" }) + public int compare(Comparable a, Comparable b) { + if ((a instanceof String) && (b instanceof String)) { + return ((String)a).compareToIgnoreCase((String) b); + } + return a.compareTo(b); + } + } + public static class UcaSorter extends AttributeSorter + { + public enum Strength { PRIMARY, SECONDARY, TERTIARY, QUATERNARY, IDENTICAL, UNDEFINED }; + private String locale = null; + private Strength strength = Strength.UNDEFINED; + private Collator collator; + public UcaSorter(String fieldName, String locale, Strength strength) { super(fieldName); setLocale(locale, strength); } + public UcaSorter(String fieldName) { super(fieldName); } + static private int strength2Collator(Strength strength) { + switch (strength) { + case PRIMARY: return Collator.PRIMARY; + case SECONDARY: return Collator.SECONDARY; + case TERTIARY: return Collator.TERTIARY; + case QUATERNARY: return Collator.QUATERNARY; + case IDENTICAL: return Collator.IDENTICAL; + case UNDEFINED: return Collator.PRIMARY; + } + return Collator.PRIMARY; + } + public void setLocale(String locale, Strength strength) { + this.locale = locale; + this.strength = strength; + ULocale uloc; + try { + uloc = new ULocale(locale); + } catch (Throwable e) { + throw new RuntimeException("ULocale("+locale+") failed with exception " + e.toString()); + } + try { + collator = Collator.getInstance(uloc); + if (collator == null) { + throw new RuntimeException("No collator available for: " + locale); + } + } catch (Throwable e) { + throw new RuntimeException("Collator.getInstance(ULocale("+locale+")) failed with exception " + e.toString()); + } + collator.setStrength(strength2Collator(strength)); + // collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); + } + public String getLocale() { return locale; } + public Strength getStrength() { return strength; } + public Collator getCollator() { return collator; } + public String getDecomposition() { return (collator.getDecomposition() == Collator.CANONICAL_DECOMPOSITION) ? "CANONICAL_DECOMPOSITION" : "NO_DECOMPOSITION"; } + @Override + public String toString() { return "uca(" + getName() + ',' + locale + ',' + ((strength != Strength.UNDEFINED) ? strength.toString() : "PRIMARY") + ')'; } + @Override + public int hashCode() { return 1 + 3*locale.hashCode() + 5*strength.hashCode() + 7*super.hashCode(); } + @Override + public boolean equals(Object other) { + if (!(other instanceof UcaSorter)) { + return false; + } + return super.equals(other) && locale.equals(((UcaSorter)other).locale) && (strength == ((UcaSorter)other).strength); + } + public UcaSorter clone() { + UcaSorter clone = (UcaSorter)super.clone(); + if (locale != null) { + clone.setLocale(locale, strength); + } + return clone; + } + @SuppressWarnings({ "rawtypes", "unchecked" }) + public int compare(Comparable a, Comparable b) { + if ((a instanceof String) && (b instanceof String)) { + return collator.compare((String)a, (String) b); + } + return a.compareTo(b); + } + } + /** + * An attribute (field) and how it should be sorted + */ + public static class FieldOrder implements Cloneable { + + private AttributeSorter fieldSorter; + private Order sortOrder; + + /** + * Creates an attribute vector + * + * @param fieldSorter the sorter of this attribute + * @param sortOrder whether to sort this ascending or descending + */ + public FieldOrder(AttributeSorter fieldSorter, Order sortOrder) { + this.fieldSorter = fieldSorter; + this.sortOrder = sortOrder; + } + + /** + * Returns the name of this attribute + */ + public String getFieldName() { + return fieldSorter.getName(); + } + + /** + * Returns the sorter of this attribute + */ + public AttributeSorter getSorter() { return fieldSorter; } + public void setSorter(AttributeSorter sorter) { fieldSorter = sorter; } + + /** + * Returns the sorting order of this attribute + */ + public Order getSortOrder() { + return sortOrder; + } + + /** + * Decide if sortorder is ascending or not. + */ + public void setAscending(boolean asc) { + sortOrder = asc ? Order.ASCENDING : Order.DESCENDING; + } + + @Override + public String toString() { + return sortOrder.toString() + ":" + fieldSorter.toString(); + } + + @Override + public int hashCode() { + return sortOrder.hashCode() + 17 * fieldSorter.hashCode(); + } + @Override + public boolean equals(Object other) { + if (!(other instanceof FieldOrder)) { + return false; + } + FieldOrder otherAttr = (FieldOrder) other; + + return otherAttr.sortOrder.equals(sortOrder) + && otherAttr.fieldSorter.equals(fieldSorter); + } + @Override + public FieldOrder clone() { + return new FieldOrder(fieldSorter.clone(), sortOrder); + } + } + + @Override + public int hashCode() { + return fieldOrders.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if( ! (o instanceof Sorting)) return false; + + Sorting ss = (Sorting) o; + return fieldOrders.equals(ss.fieldOrders); + } + + public int encode(ByteBuffer buffer) { + int usedBytes = 0; + byte[] nameBuffer; + buffer.position(); + byte space = '.'; + for (FieldOrder fieldOrder : fieldOrders) { + if (space == ' ') { + buffer.put(space); + usedBytes++; + } + if (fieldOrder.getSortOrder() == Order.ASCENDING) { + buffer.put((byte) '+'); + } else { + buffer.put((byte) '-'); + } + usedBytes++; + nameBuffer = Utf8.toBytes(fieldOrder.getSorter().toString()); + buffer.put(nameBuffer); + usedBytes += nameBuffer.length; + // If this isn't the last element, append a separating space + //if (i + 1 < sortSpec.size()) { + space = ' '; + } + return usedBytes; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/context/QueryContext.java b/container-search/src/main/java/com/yahoo/search/query/context/QueryContext.java new file mode 100644 index 00000000000..e59f8589903 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/context/QueryContext.java @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.context; + +import com.yahoo.processing.execution.Execution; +import com.yahoo.search.Query; +import com.yahoo.search.rendering.DefaultRenderer; +import com.yahoo.text.XMLWriter; +import com.yahoo.yolean.trace.TraceNode; + +import java.io.Writer; +import java.util.Iterator; + + +/** + * A proxy to the Execution.trace() which exists for legacy reasons. + * Calls to this is forwarded to owningQuery.getModel().getExecution().trace(). + * + * @since 4.2 + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class QueryContext implements Cloneable { + + public static final String ID = "context"; + private Query owner; + + public QueryContext(int ignored,Query owner) { + this.owner=owner; + } + + //---------------- Public API --------------------------------------------------------------------------------- + + /** Adds a context message to this context */ + public void trace(String message, int traceLevel) { + owner.getModel().getExecution().trace().trace(message,traceLevel); + } + + /** + * Adds a key-value which will be logged to the access log for this query (by doing toString() on the value + * Multiple values may be set to the same key. A value cannot be removed once set. + */ + public void logValue(String key,Object value) { + owner.getModel().getExecution().trace().logValue(key, value.toString()); + } + + /** Returns the values to be written to the access log for this */ + public Iterator<Execution.Trace.LogValue> logValueIterator() { + return owner.getModel().getExecution().trace().logValueIterator(); + } + + /** + * Adds a property key-value to this context. + * If the same name is set multiple times, the behavior is thus: + * <ul> + * <li>Within a single context (thread/query clone), the last value set is used</li> + * <li>Across multiple traces, the <i>last</i> value from the <i>last</i> deepest nested thread/clone is used. + * In the case of multiple threads writing the value concurrently to their clone, it is of course undefined + * which one will be used.</li> + * </ul> + * + * @param name the name of the property + * @param value the value of the property, or null to set this property to null + */ + public void setProperty(String name,Object value) { + owner.getModel().getExecution().trace().setProperty(name,value); + } + + /** + * Returns a property set anywhere in this context. + * Note that even though this call is itself "thread robust", the object values returned + * may in some scenarios not be written behind a synchronization barrier, so when accessing + * objects which are not inherently thread safe, synchronization should be considered. + * <p> + * Note that this method have a time complexity which is proportional to + * the number of cloned/created queries times the average number of properties in each. + */ + public Object getProperty(String name) { + return owner.getModel().getExecution().trace().getProperty(name); + } + + /** Returns a short string description of this (includes the first few messages only, and no newlines) */ + @Override + public String toString() { + return owner.getModel().getExecution().trace().toString(); + } + + public boolean render(Writer writer) throws java.io.IOException { + if (owner.getTraceLevel()!=0) { + XMLWriter xmlWriter=XMLWriter.from(writer); + xmlWriter.openTag("meta").attribute("type",ID); + TraceNode traceRoot=owner.getModel().getExecution().trace().traceNode().root(); + traceRoot.accept(new DefaultRenderer.RenderingVisitor(xmlWriter,owner.getStartTime())); + xmlWriter.closeTag(); + } + return true; + } + + public QueryContext cloneFor(Query cloneOwner) { + try { + QueryContext clone=(QueryContext)super.clone(); + clone.owner=cloneOwner; + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + /** Returns the execution trace this delegates to */ + public Execution.Trace getTrace() { return owner.getModel().getExecution().trace(); } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/context/package-info.java b/container-search/src/main/java/com/yahoo/search/query/context/package-info.java new file mode 100644 index 00000000000..c19e5abedd0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/context/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.query.context; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/package-info.java b/container-search/src/main/java/com/yahoo/search/query/package-info.java new file mode 100644 index 00000000000..2384169c52b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The search query model + */ +@ExportPackage +@PublicApi +package com.yahoo.search.query; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/parser/Parsable.java b/container-search/src/main/java/com/yahoo/search/query/parser/Parsable.java new file mode 100644 index 00000000000..92601a5464d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/parser/Parsable.java @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.search.query.Model; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +/** + * <p>This class encapsulates all the parameters required to call {@link Parser#parse(Parsable)}. Because all set- + * methods return a reference to self, you can write very compact calls to the parser:</p> + * + * <pre> + * parser.parse(new Parsable() + * .setQuery("foo") + * .setFilter("bar") + * .setDefaultIndexName("default") + * .setLanguage(Language.ENGLISH)) + * </pre> + * + * <p>In case you are parsing the content of a {@link Model}, you can use the {@link #fromQueryModel(Model)} factory for + * convenience.</p> + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public final class Parsable { + + private final Set<String> sourceList = new HashSet<>(); + private final Set<String> restrictList = new HashSet<>(); + private String query; + private String filter; + private String defaultIndexName; + private Language language; + + public String getQuery() { + return query; + } + + public Parsable setQuery(String query) { + this.query = query; + return this; + } + + public String getFilter() { + return filter; + } + + public Parsable setFilter(String filter) { + this.filter = filter; + return this; + } + + public String getDefaultIndexName() { + return defaultIndexName; + } + + public Parsable setDefaultIndexName(String defaultIndexName) { + this.defaultIndexName = defaultIndexName; + return this; + } + + public Language getLanguage() { + return language; + } + + public Parsable setLanguage(Language language) { + this.language = language; + return this; + } + + public Set<String> getSources() { + return sourceList; + } + + public Parsable addSource(String sourceName) { + sourceList.add(sourceName); + return this; + } + + public Parsable addSources(Collection<String> sourceNames) { + sourceList.addAll(sourceNames); + return this; + } + + public Set<String> getRestrict() { + return restrictList; + } + + public Parsable addRestrict(String restrictName) { + restrictList.add(restrictName); + return this; + } + + public Parsable addRestricts(Collection<String> restrictNames) { + restrictList.addAll(restrictNames); + return this; + } + + public static Parsable fromQueryModel(Model model) { + return new Parsable() + .setQuery(model.getQueryString()) + .setFilter(model.getFilter()) + .setLanguage(model.getParsingLanguage()) + .setDefaultIndexName(model.getDefaultIndex()) + .addSources(model.getSources()) + .addRestricts(model.getRestrict()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/parser/Parser.java b/container-search/src/main/java/com/yahoo/search/query/parser/Parser.java new file mode 100644 index 00000000000..3822b9b67d8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/parser/Parser.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.parser; + +import com.yahoo.search.query.QueryTree; + +/** + * Defines the interface of a query parser. To construct an instance of this class, use the {@link ParserFactory}. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public interface Parser { + + /** + * Parser the given {@link Parsable}, and returns a corresponding + * {@link QueryTree}. If parsing fails without an exception, the contained + * root will be an instance of {@link com.yahoo.prelude.query.NullItem}. + * + * @param query + * the Parsable to parse + * @return the parsed QueryTree, never null + */ + QueryTree parse(Parsable query); + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java b/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java new file mode 100644 index 00000000000..b00afa27bf6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.parser; + +import com.yahoo.language.Linguistics; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.parser.SpecialTokenRegistry; +import com.yahoo.prelude.query.parser.SpecialTokens; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * This class encapsulates the environment of a {@link Parser}. In case you are creating a parser from within a + * {@link Searcher}, you can use the {@link #fromExecutionContext(Execution.Context)} factory for convenience. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public final class ParserEnvironment { + + private IndexFacts indexFacts = new IndexFacts(); + private Linguistics linguistics = new SimpleLinguistics(); + private SpecialTokens specialTokens = new SpecialTokens(); + + public IndexFacts getIndexFacts() { + return indexFacts; + } + + public ParserEnvironment setIndexFacts(IndexFacts indexFacts) { + this.indexFacts = indexFacts; + return this; + } + + public Linguistics getLinguistics() { + return linguistics; + } + + public ParserEnvironment setLinguistics(Linguistics linguistics) { + this.linguistics = linguistics; + return this; + } + + public SpecialTokens getSpecialTokens() { + return specialTokens; + } + + public ParserEnvironment setSpecialTokens(SpecialTokens specialTokens) { + this.specialTokens = specialTokens; + return this; + } + + public static ParserEnvironment fromExecutionContext(Execution.Context context) { + ParserEnvironment env = new ParserEnvironment(); + if (context == null) { + return env; + } + if (context.getIndexFacts() != null) { + env.setIndexFacts(context.getIndexFacts()); + } + if (context.getLinguistics() != null) { + env.setLinguistics(context.getLinguistics()); + } + SpecialTokenRegistry registry = context.getTokenRegistry(); + if (registry != null) { + env.setSpecialTokens(registry.getSpecialTokens("default")); + } + return env; + } + + public static ParserEnvironment fromParserEnvironment(ParserEnvironment environment) { + return new ParserEnvironment() + .setIndexFacts(environment.indexFacts) + .setLinguistics(environment.linguistics) + .setSpecialTokens(environment.specialTokens); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/parser/ParserFactory.java b/container-search/src/main/java/com/yahoo/search/query/parser/ParserFactory.java new file mode 100644 index 00000000000..e0a3338fec2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/parser/ParserFactory.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.parser; + +import com.yahoo.prelude.query.parser.*; +import com.yahoo.search.Query; +import com.yahoo.search.yql.YqlParser; + +/** + * <p>Implements a factory for {@link Parser}.</p> + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public final class ParserFactory { + + private ParserFactory() { + // hide + } + + /** + * Creates a {@link Parser} appropriate for the given <tt>Query.Type</tt>, providing the Parser with access to + * the {@link ParserEnvironment} given. + * + * @param type the query type for which to create a Parser + * @param environment the environment settings to attach to the Parser + * @return the created Parser + */ + public static Parser newInstance(Query.Type type, ParserEnvironment environment) { + switch (type) { + case ALL: + return new AllParser(environment); + case ANY: + return new AnyParser(environment); + case PHRASE: + return new PhraseParser(environment); + case ADVANCED: + return new AdvancedParser(environment); + case WEB: + return new WebParser(environment); + case PROGRAMMATIC: + return new ProgrammaticParser(); + case YQL: + return new YqlParser(environment); + default: + throw new UnsupportedOperationException(type.toString()); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/parser/package-info.java b/container-search/src/main/java/com/yahoo/search/query/parser/package-info.java new file mode 100644 index 00000000000..ddae3e83ddb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/parser/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Provides access to parsing query strings into queries + */ +@ExportPackage +@PublicApi +package com.yahoo.search.query.parser; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/AllReferencesQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/AllReferencesQueryProfileVisitor.java new file mode 100644 index 00000000000..393aba2b002 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/AllReferencesQueryProfileVisitor.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileFieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class AllReferencesQueryProfileVisitor extends PrefixQueryProfileVisitor { + + /** A map of query profile types */ + private Set<CompoundName> references = new HashSet<>(); + + public AllReferencesQueryProfileVisitor(CompoundName prefix) { + super(prefix); + } + + @Override + public void onValue(String name, Object value, DimensionBinding binding, QueryProfile owner) {} + + @Override + public void onQueryProfileInsidePrefix(QueryProfile profile, DimensionBinding binding, QueryProfile owner) { + references.add(currentPrefix); + } + + /** Returns the values resulting from this visiting */ + public Set<CompoundName> getResult() { return references; } + + /** Returns false - we are not done until we have seen all */ + public boolean isDone() { return false; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/AllTypesQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/AllTypesQueryProfileVisitor.java new file mode 100644 index 00000000000..fb9638a958b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/AllTypesQueryProfileVisitor.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileFieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class AllTypesQueryProfileVisitor extends PrefixQueryProfileVisitor { + + /** A map of query profile types */ + private Map<CompoundName, QueryProfileType> types = new HashMap<>(); + + public AllTypesQueryProfileVisitor(CompoundName prefix) { + super(prefix); + } + + @Override + public void onValue(String name, Object value, DimensionBinding binding, QueryProfile owner) {} + + + @Override + public void onQueryProfileInsidePrefix(QueryProfile profile, DimensionBinding binding, QueryProfile owner) { + if (profile.getType() != null) + addReachableTypes(currentPrefix, profile.getType()); + } + + private void addReachableTypes(CompoundName name, QueryProfileType type) { + types.put(name, type); + for (FieldDescription fieldDescription : type.fields().values()) { + if ( ! (fieldDescription.getType() instanceof QueryProfileFieldType)) continue; + QueryProfileFieldType fieldType = (QueryProfileFieldType)fieldDescription.getType(); + if (fieldType.getQueryProfileType() !=null) { + addReachableTypes(name.append(fieldDescription.getName()), fieldType.getQueryProfileType()); + } + } + } + + /** Returns the values resulting from this visiting */ + public Map<CompoundName, QueryProfileType> getResult() { return types; } + + /** Returns false - we are not done until we have seen all */ + public boolean isDone() { return false; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/AllUnoverridableQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/AllUnoverridableQueryProfileVisitor.java new file mode 100644 index 00000000000..65c3480272e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/AllUnoverridableQueryProfileVisitor.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class AllUnoverridableQueryProfileVisitor extends PrefixQueryProfileVisitor { + + /** A map of query profile types */ + private Set<CompoundName> unoverridables = new HashSet<>(); + + public AllUnoverridableQueryProfileVisitor(CompoundName prefix) { + super(prefix); + } + + @Override + public void onValue(String name, Object value, DimensionBinding binding, QueryProfile owner) { + addUnoverridable(name, currentPrefix.append(name), binding, owner); + } + + @Override + public void onQueryProfileInsidePrefix(QueryProfile profile, DimensionBinding binding, QueryProfile owner) { + addUnoverridable(currentPrefix.last(), currentPrefix, binding, owner); + } + + private void addUnoverridable(String localName, CompoundName fullName, DimensionBinding binding, QueryProfile owner) { + if (owner == null) return; + + Boolean isOverridable = owner.isLocalOverridable(localName, binding); + if (isOverridable != null && ! isOverridable) + unoverridables.add(fullName); + } + + /** Returns the values resulting from this visiting */ + public Set<CompoundName> getResult() { return unoverridables; } + + /** Returns false - we are not done until we have seen all */ + public boolean isDone() { return false; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/AllValuesQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/AllValuesQueryProfileVisitor.java new file mode 100644 index 00000000000..bef5b00c51b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/AllValuesQueryProfileVisitor.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class AllValuesQueryProfileVisitor extends PrefixQueryProfileVisitor { + + private Map<String,Object> values=new HashMap<>(); + + /* Lists all values starting at prefix */ + public AllValuesQueryProfileVisitor(CompoundName prefix) { + super(prefix); + } + + public @Override void onValue(String localName, Object value, DimensionBinding binding, QueryProfile owner) { + putValue(localName, value, values); + } + + public @Override void onQueryProfileInsidePrefix(QueryProfile profile, DimensionBinding binding, QueryProfile owner) { + putValue("", profile.getValue(), values); + } + + private final void putValue(String key, Object value, Map<String, Object> values) { + if (value == null) return; + CompoundName fullName = currentPrefix.append(key); + if (fullName.isEmpty()) return; // Avoid putting a non-leaf (subtree) root in the list + if (values.containsKey(fullName.toString())) return; // The first value encountered has priority + values.put(fullName.toString(), value); + } + + /** Returns the values resulting from this visiting */ + public Map<String, Object> getResult() { return values; } + + /** Returns false - we are not done until we have seen all */ + public boolean isDone() { return false; } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/BackedOverridableQueryProfile.java b/container-search/src/main/java/com/yahoo/search/query/profile/BackedOverridableQueryProfile.java new file mode 100644 index 00000000000..71b27c6da63 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/BackedOverridableQueryProfile.java @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.protect.Validator; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * <p>A wrapper of a query profile where overrides to the values in the referenced + * profile can be set.</p> + * + * <p>This is used to allow configured overrides (in a particular referencing profile) of a referenced query profile. + * + * <p>Properties which are defined as not overridable in the type (if any) of the referenced query profile + * cannot be set.</p> + * + * @author bratseth + */ +public class BackedOverridableQueryProfile extends OverridableQueryProfile implements Cloneable { + + /** The backing read only query profile, or null if this is not backed */ + private QueryProfile backingProfile; + + /** + * Creates an overridable profile from the given backing profile. The backing profile will never be + * written to. + * + * @param backingProfile the backing profile, which is assumed read only, never null + */ + public BackedOverridableQueryProfile(QueryProfile backingProfile) { + Validator.ensureNotNull("An overridable query profile must be backed by a real query profile",backingProfile); + setType(backingProfile.getType()); + this.backingProfile=backingProfile; + } + + @Override + public synchronized void freeze() { + super.freeze(); + backingProfile.freeze(); + } + + @Override + protected Object localLookup(String localName, DimensionBinding dimensionBinding) { + Object valueInThis=super.localLookup(localName,dimensionBinding); + if (valueInThis!=null) return valueInThis; + return backingProfile.localLookup(localName,dimensionBinding); + } + + protected Boolean isLocalInstanceOverridable(String localName) { + Boolean valueInThis=super.isLocalInstanceOverridable(localName); + if (valueInThis!=null) return valueInThis; + return backingProfile.isLocalInstanceOverridable(localName); + } + + @Override + protected QueryProfile createSubProfile(String name,DimensionBinding dimensionBinding) { + Object backing=backingProfile.lookup(new CompoundName(name),true,dimensionBinding.createFor(backingProfile.getDimensions())); + if (backing!=null && backing instanceof QueryProfile) + return new BackedOverridableQueryProfile((QueryProfile)backing); + else + return new OverridableQueryProfile(); // Nothing is set in this branch, so nothing to override, but need override checking + } + + /** Returns a clone of this which can be independently overridden, but which refers to the same backing profile */ + @Override + public BackedOverridableQueryProfile clone() { + BackedOverridableQueryProfile clone=(BackedOverridableQueryProfile)super.clone(); + return clone; + } + + /** Returns the query profile backing this */ + public QueryProfile getBacking() { return backingProfile; } + + @Override + public void addInherited(QueryProfile inherited) { + backingProfile.addInherited(inherited); + } + + void addInheritedHere(QueryProfile inherited) { + super.addInherited(inherited); + } + + @Override + protected void visitVariants(boolean allowContent,QueryProfileVisitor visitor,DimensionBinding dimensionBinding) { + super.visitVariants(allowContent, visitor, dimensionBinding); + if (visitor.isDone()) return; + backingProfile.visitVariants(allowContent, visitor, dimensionBinding); + } + + @Override + protected void visitInherited(boolean allowContent,QueryProfileVisitor visitor,DimensionBinding dimensionBinding, QueryProfile owner) { + super.visitInherited(allowContent,visitor,dimensionBinding, owner); + if (visitor.isDone()) return; + backingProfile.visitInherited(allowContent,visitor,dimensionBinding,owner); + } + + /** Returns a value from the content of this: The value in this, or the value from the backing if not set in this */ + protected Object getContent(String localKey) { + Object value=super.getContent(localKey); + if (value!=null) return value; + return backingProfile.getContent(localKey); + } + + /** + * Returns all the content from this: + * All the values in this, and all values in the backing where an overriding value is not set in this + */ + @Override + protected Map<String,Object> getContent() { + Map<String,Object> thisContent=super.getContent(); + Map<String,Object> backingContent=backingProfile.getContent(); + if (thisContent.isEmpty()) return backingContent; // Shortcut + if (backingContent.isEmpty()) return thisContent; // Shortcut + Map<String,Object> content=new HashMap<>(backingContent); + content.putAll(thisContent); + return content; + } + + @Override + public String toString() { + return "overridable wrapper of " + backingProfile.toString(); + } + + @Override + public boolean isExplicit() { + return backingProfile.isExplicit(); + } + + @Override + public List<String> getDimensions() { + List<String> dimensions=super.getDimensions(); + if (dimensions!=null) return dimensions; + return backingProfile.getDimensions(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/CopyOnWriteContent.java b/container-search/src/main/java/com/yahoo/search/query/profile/CopyOnWriteContent.java new file mode 100644 index 00000000000..3c02677b676 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/CopyOnWriteContent.java @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.component.provider.FreezableClass; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * A HashMap wrapper which can be cloned without copying the wrapped map. + * Copying of the map is deferred until there is a write access to the wrapped map. + * This may be frozen, at which point no further modifications are allowed. + * Note that <b>until</b> this is cloned, the internal map may be both read and written. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class CopyOnWriteContent extends FreezableClass implements Cloneable { + + // TODO: Now that we used CompiledQueryProfiles at runtime we can remove this + + // Possible states: + // WRITABLE: The map can be freely modified - it is only used by this + // -> !isFrozen() && (map!=null || unmodifiableMap==null) + // COPYONWRITE: The map is referred by at least one clone - further modification must cause a copy + // -> !isFrozen() && (map==null && unmodifiableMap!=null) + // FROZEN: No further changes are allowed to the state of this, ever + // -> isFrozen() + + // Possible start states: + // WRITABLE: When created using the public constructor + // COPYONWRITE: When created by cloning + + // Possible state transitions: + // WRITABLE->COPYONWRITE: When this is cloned + // COPYONWRITE->WRITABLE: When a clone is written to + // (COPYONWRITE,WRITABLE)->FROZEN: When a profile is frozen + + /** The modifiable content of this. Null if this is empty or if this is not in the WRITABLE state */ + private Map<String,Object> map=null; + /** + * If map is non-null this is either null (not instantiated yet) or an unmodifiable wrapper of map, + * if map is null this is either null (this is empty) or a reference to the map of the content this was cloned from + */ + private Map<String,Object> unmodifiableMap =null; + + /** Create a WRITABLE, empty instance */ + public CopyOnWriteContent() { + } + + /** Create a COPYONWRITE instance with some initial state */ + private static CopyOnWriteContent createInCopyOnWriteState(Map<String,Object> unmodifiableMap) { + CopyOnWriteContent content=new CopyOnWriteContent(); + content.unmodifiableMap = unmodifiableMap; + return content; + } + + /** Create a WRITABLE instance with some initial state */ + private static CopyOnWriteContent createInWritableState(Map<String,Object> map) { + CopyOnWriteContent content=new CopyOnWriteContent(); + content.map = map; + return content; + } + + @Override + public void freeze() { + // Freeze this + if (unmodifiableMap==null) + unmodifiableMap= map!=null ? Collections.unmodifiableMap(map) : Collections.<String, Object>emptyMap(); + map=null; // just to keep the states simpler + + // Freeze content + for (Map.Entry<String,Object> entry : unmodifiableMap.entrySet()) { + if (entry.getValue() instanceof QueryProfile) + ((QueryProfile)entry.getValue()).freeze(); + } + super.freeze(); + } + + private boolean isEmpty() { + return (map==null || map.isEmpty()) && (unmodifiableMap ==null || unmodifiableMap.isEmpty()); + } + + private boolean isWritable() { + return !isFrozen() && (map!=null || unmodifiableMap==null); + } + + @Override + public CopyOnWriteContent clone() { + if (isEmpty()) return new CopyOnWriteContent(); // No referencing is necessary in this case + if (isDeepUnmodifiable(unmodifiableMap())) { + // Create an instance pointing to this and put both in the COPYONWRITE state + unmodifiableMap(); // Make sure we have an unmodifiable reference to the map below + map=null; // Put this into the COPYONWRITE state (unless it is already frozen, in which case this is a noop) + return createInCopyOnWriteState(unmodifiableMap()); + } + else { + // This contains query profiles, don't try to defer copying + return createInWritableState(deepClone(map)); + } + } + + private boolean isDeepUnmodifiable(Map<String,Object> map) { + for (Object value : map.values()) + if (value instanceof QueryProfile && !((QueryProfile)value).isFrozen()) return false; + return true; // all other values are primitives + } + + /** Deep clones a map - this handles all value types which can be found in a query profile */ + static Map<String,Object> deepClone(Map<String,Object> map) { + if (map==null) return null; + Map<String,Object> mapClone=new HashMap<>(map.size()); + for (Map.Entry<String,Object> entry : map.entrySet()) + mapClone.put(entry.getKey(),QueryProfile.cloneIfNecessary(entry.getValue())); + return mapClone; + } + + + //------- Content access ------------------------------------------------------- + + public Map<String,Object> unmodifiableMap() { + if (isEmpty()) return Collections.emptyMap(); + if (map==null) // in COPYONWRITE or FROZEN state + return unmodifiableMap; + // In WRITABLE state: Create unmodifiable wrapper if necessary and return it + if (unmodifiableMap==null) + unmodifiableMap=Collections.unmodifiableMap(map); + return unmodifiableMap; + } + + public Object get(String key) { + if (map!=null) return map.get(key); + if (unmodifiableMap!=null) return unmodifiableMap.get(key); + return null; + } + + public void put(String key,Object value) { + ensureNotFrozen(); + copyIfNotWritable(); + if (map==null) + map=new HashMap<>(); + map.put(key,value); + } + + public void remove(String key) { + ensureNotFrozen(); + copyIfNotWritable(); + if (map!=null) + map.remove(key); + } + + private void copyIfNotWritable() { + if (isWritable()) return; + // move from COPYONWRITE to WRITABLE state + map=new HashMap<>(unmodifiableMap); // deep clone is not necessary as this map is shallowly modifiable + unmodifiableMap=null; // will be created as needed + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/DimensionBinding.java b/container-search/src/main/java/com/yahoo/search/query/profile/DimensionBinding.java new file mode 100644 index 00000000000..9adacee74af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/DimensionBinding.java @@ -0,0 +1,223 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * An immutable, binding of a list of dimensions to dimension values + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DimensionBinding { + + /** The dimensions of this */ + private List<String> dimensions=null; + + /** The values matching those dimensions */ + private DimensionValues values; + + /** The binding from those dimensions to values, and possibly other values */ + private Map<String,String> context; + + public static final DimensionBinding nullBinding = + new DimensionBinding(Collections.<String>unmodifiableList(Collections.<String>emptyList()), DimensionValues.empty, null); + + public static final DimensionBinding invalidBinding = + new DimensionBinding(Collections.<String>unmodifiableList(Collections.<String>emptyList()), DimensionValues.empty, null); + + /** Whether the value array contains only nulls */ + private boolean containsAllNulls; + + /** Creates a binding from a variant and a context. Any of the arguments may be null. */ + public static DimensionBinding createFrom(List<String> dimensions, Map<String,String> context) { + if (dimensions==null || dimensions.size()==0) { + if (context==null) return nullBinding; + if (dimensions==null) return new DimensionBinding(null,DimensionValues.empty,context); // Null, but must preserve context + } + + return new DimensionBinding(dimensions,extractDimensionValues(dimensions,context),context); + } + + /** Creates a binding from a variant and a context. Any of the arguments may be null. */ + public static DimensionBinding createFrom(List<String> dimensions, DimensionValues dimensionValues) { + if (dimensionValues==null || dimensionValues==DimensionValues.empty) return nullBinding; + if (dimensions==null) return new DimensionBinding(null,dimensionValues,null); // Null, but preserve raw material for creating a context later (in createFor) + + return new DimensionBinding(dimensions,dimensionValues,null); + } + + /** Returns a binding for a (possibly) new set of variants. Variants may be null, but not bindings */ + public DimensionBinding createFor(List<String> newDimensions) { + if (newDimensions==null) return this; // Note: Not necessarily null - if no new variants then keep the existing binding + // if (this.context==null && values.length==0) return nullBinding; // No data from which to create a non-null binding + if (this.dimensions==newDimensions) return this; // Avoid creating a new object if the dimensions are the same + + Map<String,String> context=this.context; + if (context==null) + context=this.values.asContext(this.dimensions !=null ? this.dimensions : newDimensions); + return new DimensionBinding(newDimensions,extractDimensionValues(newDimensions,context),context); + } + + /** + * Creates a dimension binding. The dimensions list given should be unmodifiable. + * The array will not be modified. The context is needed in order to convert this binding to another + * given another set of variant dimensions. + */ + private DimensionBinding(List<String> dimensions, DimensionValues values, Map<String,String> context) { + this.dimensions=dimensions; + this.values=values; + this.context = context; + containsAllNulls=values.isEmpty(); + } + + /** Returns a read-only list of the dimensions of this. This value is undefined if this isNull() */ + public List<String> getDimensions() { return dimensions; } + + /** Returns a context created from the dimensions and values of this */ + public Map<String,String> getContext() { + if (context !=null) return context; + context =values.asContext(dimensions); + return context; + } + + /** + * Returns the values for the dimensions of this. This value is undefined if this isEmpty() + * This array is always of the same length as the + * length of the dimension list - missing elements are represented as nulls. + * This is never null but may be empty. + */ + public DimensionValues getValues() { return values; } + + /** Returns true only if this binding is null (contains no values for its dimensions (if any) */ + public boolean isNull() { return dimensions==null || containsAllNulls; } + + /** + * Returns an array of the dimension values corresponding to the dimensions of this from the given context, + * in the corresponding order. The array is always of the same length as the number of dimensions. + * Dimensions which are not set in this context get a null value. + */ + private static DimensionValues extractDimensionValues(List<String> dimensions,Map<String,String> context) { + String[] dimensionValues=new String[dimensions.size()]; + if (context==null || context.size()==0) return DimensionValues.createFrom(dimensionValues); + for (int i=0; i<dimensions.size(); i++) + dimensionValues[i]=context.get(dimensions.get(i)); + return DimensionValues.createFrom(dimensionValues); + } + + /** + * Combines this binding with another if compatible. + * Two bindings are incompatible if + * <ul> + * <li>They contain a different value for the same key, or</li> + * <li>They contain the same pair of dimensions in a different order</li> + * </ul> + * + * @return the combined binding, or the special invalidBinding if these two bindings are incompatible + */ + public DimensionBinding combineWith(DimensionBinding binding) { + List<String> combinedDimensions = combineDimensions(getDimensions(), binding.getDimensions()); + if (combinedDimensions == null) return invalidBinding; + + // not runtime, so assume we don't need to preserve values outside the dimensions + Map<String, String> combinedValues = combineValues(getContext(), binding.getContext()); + if (combinedValues == null) return invalidBinding; + + return DimensionBinding.createFrom(combinedDimensions, combinedValues); + } + + /** + * Returns a combined list of dimensions from two separate lists, + * or null if they are incompatible. + * This is to combine two lists to one such that the partial order in both is preserved + * (or return null if impossible). + */ + private List<String> combineDimensions(List<String> d1, List<String> d2) { + List<String> combined = new ArrayList<>(); + int d1Index = 0, d2Index=0; + while (d1Index < d1.size() && d2Index < d2.size()) { + if (d1.get(d1Index).equals(d2.get(d2Index))) { // agreement on next element + combined.add(d1.get(d1Index)); + d1Index++; + d2Index++; + } + else if ( ! d2.contains(d1.get(d1Index))) { // next in d1 is independent from d2 + combined.add(d1.get(d1Index++)); + } + else if ( ! d1.contains(d2.get(d2Index))) { // next in d2 is independent from d1 + combined.add(d2.get(d2Index++)); + } + else { + return null; // no independent and no agreement + } + } + if (d1Index < d1.size()) + combined.addAll(d1.subList(d1Index, d1.size())); + else if (d2Index < d2.size()) + combined.addAll(d2.subList(d2Index, d2.size())); + + return combined; + } + + /** + * Returns a combined map of dimension values from two separate maps, + * or null if they are incompatible. + */ + private Map<String, String> combineValues(Map<String, String> m1, Map<String, String> m2) { + Map<String, String> combinedValues = new HashMap<>(m1); + for (Map.Entry<String, String> m2Entry : m2.entrySet()) { + if (m2Entry.getValue() == null) continue; + String m1Value = m1.get(m2Entry.getKey()); + if (m1Value != null && ! m1Value.equals(m2Entry.getValue())) + return null; // conflicting values of a key + combinedValues.put(m2Entry.getKey(), m2Entry.getValue()); + } + return combinedValues; + } + + private boolean intersects(List<String> l1, List<String> l2) { + for (String l1Item : l1) + if (l2.contains(l1Item)) + return true; + return false; + } + + /** + * Returns true if <code>this == invalidBinding</code> + */ + public boolean isInvalid() { return this == invalidBinding; } + + @Override + public String toString() { + if (isInvalid()) return "Invalid DimensionBinding"; + if (dimensions==null) return "DimensionBinding []"; + StringBuilder b=new StringBuilder("DimensionBinding ["); + for (int i=0; i<dimensions.size(); i++) { + b.append(dimensions.get(i)).append("=").append(values.get(i)); + if (i<dimensions.size()-1) + b.append(", "); + } + b.append("]"); + return b.toString(); + } + + /** Two bindings are equal if they contain the same dimensions and the same non-null values */ + @Override + public boolean equals(Object o) { + if (o==this) return true; + if (! (o instanceof DimensionBinding)) return false; + DimensionBinding other = (DimensionBinding)o; + if ( ! this.dimensions.equals(other.dimensions)) return false; + if ( ! this.values.equals(other.values)) return false; + return true; + } + + @Override + public int hashCode() { + return dimensions.hashCode() + 17 * values.hashCode(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/DimensionValues.java b/container-search/src/main/java/com/yahoo/search/query/profile/DimensionValues.java new file mode 100644 index 00000000000..10435c4c6b5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/DimensionValues.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * An immutable set of dimension values. + * Note that this may contain more or fewer values than needed given a set of dimensions. + * Any missing values are treated as null. + */ +public class DimensionValues implements Comparable<DimensionValues> { + + private final String[] values; + + public static final DimensionValues empty=new DimensionValues(new String[] {}); + + public static DimensionValues createFrom(String[] values) { + if (values==null || values.length==0 || containsAllNulls(values)) return empty; + return new DimensionValues(values); + } + + /** + * Creates a set of dimension values, where the input array <b>must</b> be of + * the right size, and where no copying is done. + * + * @param values the dimension values. This need not be normalized to the right size. + * The input array is copied by this. + */ + private DimensionValues(String[] values) { + if (values==null) throw new NullPointerException("Dimension values cannot be null"); + this.values=Arrays.copyOf(values,values.length); + } + + /** Returns true if this is has the same value every place it has a value as the givenValues. */ + public boolean matches(DimensionValues givenValues) { + for (int i=0; i<this.size() || i<givenValues.size() ; i++) + if ( ! matches(this.get(i),givenValues.get(i))) + return false; + return true; + } + + private final boolean matches(String conditionString,String checkString) { + if (conditionString==null) return true; + return conditionString.equals(checkString); + } + + /** + * Implements the sort order of this which is based on specificity + * where dimensions to the left are more significant: + * -1 is returned if this is more specific than other, + * 1 is returned if other is more specific than this, + * 0 is returned if none is more specific than the other. + * <p> + * <b>Note:</b> This ordering is not consistent with equals - it returns 0 when the same dimensions + * are <i>set</i>, regardless of what they are set <i>to</i>. + */ + @Override + public int compareTo(DimensionValues other) { + for (int i=0; i<this.size() || i<other.size(); i++) { + if (get(i)!=null && other.get(i)==null) + return -1; + if (get(i)==null && other.get(i)!=null) + return 1; + } + return 0; + } + + /** Helper method which uses compareTo to return whether this is most specific */ + public boolean isMoreSpecificThan(DimensionValues other) { + return this.compareTo(other)<0; + } + + @Override + public boolean equals(Object o) { + if (this==o) return true; + if ( ! (o instanceof DimensionValues)) return false; + DimensionValues other=(DimensionValues)o; + for (int i=0; i<this.size() || i<other.size(); i++) { + if (get(i)==null) { + if (other.get(i)!=null) return false; + } + else { + if ( ! get(i).equals(other.get(i))) return false; + } + } + return true; + } + + @Override + public int hashCode() { + int hashCode = 0; + int i = 0; + for (String value : values) { + i++; + if (value != null) + hashCode += value.hashCode() * i; + } + return hashCode; + } + + @Override + public String toString() { return Arrays.toString(values); } + + public boolean isEmpty() { + return this==empty; + } + + private static boolean containsAllNulls(String[] values) { + for (String value : values) + if (value!=null) return false; + return true; + } + + public Map<String,String> asContext(List<String> dimensions) { + Map<String,String> context=new HashMap<>(); + if (dimensions==null) return context; + for (int i=0; i<dimensions.size(); i++) { + context.put(dimensions.get(i),get(i)); + } + return context; + } + + /** Returns the string at the given index, <b>or null if it has no value at this index.</b> */ + public String get(int index) { + if (index>=values.length) return null; + return values[index]; + } + + /** Returns the number of values in this (some of which may be null) */ + public int size() { return values.length; } + + /** Returns copy of the values in this in an array */ + public String[] getValues() { + return Arrays.copyOf(values,values.length); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/DumpTool.java b/container-search/src/main/java/com/yahoo/search/query/profile/DumpTool.java new file mode 100644 index 00000000000..b9d631cdd10 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/DumpTool.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import java.io.File; +import java.util.Map; + +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.query.profile.config.QueryProfileXMLReader; + +/** + * A standalone tool for dumping query profile properties + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DumpTool { + + /** Creates and returns a dump from some parameters */ + public String resolveAndDump(String... args) { + if (args.length==0 || args[0].startsWith("-")) { + StringBuilder result=new StringBuilder(); + result.append("Dumps all resolved query profile properties for a set of dimension values\n"); + result.append("USAGE: dump [query-profile] [dir]? [parameters]?\n"); + result.append(" and [query-profile] is the name of the query profile to dump the values of\n"); + result.append(" and [dir] is a path to an application package or query profile directory. Default: current dir\n"); + result.append(" and [parameters] is the http request encoded dimension keys used during resolving. Default: none\n"); + result.append("Examples:\n"); + result.append(" dump default\n"); + result.append(" - dumps the 'default' profile non-variant values in the current dir\n"); + result.append(" dump default x=x1&y=y1\n"); + result.append(" - dumps the 'default' profile resolved with dimensions values x=x1 and y=y1 in the current dir\n"); + result.append(" dump default myapppackage\n"); + result.append(" - dumps the 'default' profile non-variant values in myapppackage/search/query-profiles\n"); + result.append(" dump default dev/myprofiles x=x1&y=y1\n"); + result.append(" - dumps the 'default' profile resolved with dimensions values x=x1 and y=y1 in dev/myprofiles\n"); + return result.toString(); + } + + // Find what the arguments means + if (args.length>=3) { + return dump(args[0],args[1],args[2]); + } + else if (args.length==2) { + if (args[1].indexOf("=")>=0) + return dump(args[0],"",args[1]); + else + return dump(args[0],args[1],""); + } + else { // args.length=1 + return dump(args[0],"",""); + } + } + + private String dump(String profileName,String dir,String parameters) { + // Import profiles + if (dir.isEmpty()) + dir="."; + File dirInAppPackage=new File(dir,"search/query-profiles"); + if (dirInAppPackage.exists()) + dir=dirInAppPackage.getPath(); + QueryProfileXMLReader reader = new QueryProfileXMLReader(); + QueryProfileRegistry registry = reader.read(dir); + registry.freeze(); + + // Dump (through query to get wiring & parameter parsing done easily) + Query query = new Query("?" + parameters, registry.compile().findQueryProfile(profileName)); + Map<String,Object> properties=query.properties().listProperties(); + + // Create result + StringBuilder b=new StringBuilder(); + for (Map.Entry<String,Object> property : properties.entrySet()) { + b.append(property.getKey()); + b.append("="); + b.append(property.getValue().toString()); + b.append("\n"); + } + return b.toString(); + } + + public static void main(String... args) { + try { + System.out.print(new DumpTool().resolveAndDump(args)); + } + catch (Exception e) { + System.err.println(Exceptions.toMessageString(e)); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/FieldDescriptionQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/FieldDescriptionQueryProfileVisitor.java new file mode 100644 index 00000000000..73c0fcd2cb1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/FieldDescriptionQueryProfileVisitor.java @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.search.query.profile.types.FieldDescription; + +import java.util.List; + +/** + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class FieldDescriptionQueryProfileVisitor extends QueryProfileVisitor { + + /** The result, or null if none */ + private FieldDescription result = null; + + private final List<String> name; + + private int nameIndex=-1; + + private boolean enteringContent=false; + + public FieldDescriptionQueryProfileVisitor(List<String> name) { + this.name=name; + } + + @Override + public String getLocalKey() { + return name.get(nameIndex); + } + + @Override + public boolean enter(String name) { + if (nameIndex+2<this.name.size()) { + nameIndex++; + enteringContent=true; + } + else { + enteringContent=false; + } + return enteringContent; + } + + @Override + public void leave(String name) { + nameIndex--; + } + + @Override + public void onValue(String name,Object value, DimensionBinding binding, QueryProfile owner) { + } + + @Override + public void onQueryProfile(QueryProfile profile, DimensionBinding binding, QueryProfile owner) { + if (enteringContent) return; // not at leaf query profile + if (profile.getType() == null) return; + result = profile.getType().getField(name.get(name.size()-1)); + } + + @Override + public boolean isDone() { + return result != null; + } + + public FieldDescription result() { return result; } + + @Override + public String toString() { + return "a query profile type visitor (hash " + hashCode() + ") with current value " + result; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/ModelObjectMap.java b/container-search/src/main/java/com/yahoo/search/query/profile/ModelObjectMap.java new file mode 100644 index 00000000000..242c551f876 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/ModelObjectMap.java @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.properties.PropertyMap; + +/** + * A map which stores all types which cannot be stored in a query profile + * that is rich model objects. + * <p> + * This map will deep copy not only the model object map, but also each + * clonable member in the map. + * + * @author bratseth + */ +public class ModelObjectMap extends PropertyMap { + + /** Returns true if the class of the value is not acceptable as a query profile value */ + @Override + protected boolean shouldSet(CompoundName name,Object value) { + if (value==null) return true; + return FieldType.fromClass(value.getClass())==null; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/OverridableQueryProfile.java b/container-search/src/main/java/com/yahoo/search/query/profile/OverridableQueryProfile.java new file mode 100644 index 00000000000..5d0bffa1ea8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/OverridableQueryProfile.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.component.ComponentId; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.types.QueryProfileType; + +/** + * A regular query profile which knows it is storing overrides (not configured profiles) + * and that implements override legality checking. + * + * @author bratseth + */ +public class OverridableQueryProfile extends QueryProfile { + + private static final String simpleClassName = OverridableQueryProfile.class.getSimpleName(); + + /** Creates an unbacked overridable query profile */ + protected OverridableQueryProfile() { + super(ComponentId.createAnonymousComponentId(simpleClassName)); + } + + @Override + protected Object checkAndConvertAssignment(String localName, Object inputValue, QueryProfileRegistry registry) { + Object value=super.checkAndConvertAssignment(localName, inputValue, registry); + if (value!=null && value.getClass() == QueryProfile.class) { // We are assigning a query profile - make it overridable + return new BackedOverridableQueryProfile((QueryProfile)value); + } + return value; + } + + @Override + protected QueryProfile createSubProfile(String name,DimensionBinding binding) { + return new OverridableQueryProfile(); // Nothing is set in this branch, so nothing to override, but need override checking + } + + /** Returns a clone of this which can be independently overridden */ + @Override + public OverridableQueryProfile clone() { + if (isFrozen()) return this; + OverridableQueryProfile clone=(OverridableQueryProfile)super.clone(); + clone.initId(ComponentId.createAnonymousComponentId(simpleClassName)); + return clone; + } + + @Override + public String toString() { + return "an overridable query profile with no backing"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/PrefixQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/PrefixQueryProfileVisitor.java new file mode 100644 index 00000000000..2a22d58d8b7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/PrefixQueryProfileVisitor.java @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; + +/** + * A query profile visitor which keeps track of name prefixes and can skip values outside a given prefix + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +abstract class PrefixQueryProfileVisitor extends QueryProfileVisitor { + + /** Only call onValue/onQueryProfile for nodes having this prefix */ + private final CompoundName prefix; + + /** The current prefix, relative to prefix. */ + protected CompoundName currentPrefix = CompoundName.empty; + + private int prefixComponentIndex = -1; + + public PrefixQueryProfileVisitor(CompoundName prefix) { + if (prefix == null) + prefix = CompoundName.empty; + this.prefix = prefix; + } + + @Override + public final void onQueryProfile(QueryProfile profile, DimensionBinding binding, QueryProfile owner) { + if (prefixComponentIndex < prefix.size()) return; // Not in the prefix yet + onQueryProfileInsidePrefix(profile, binding, owner); + } + + protected abstract void onQueryProfileInsidePrefix(QueryProfile profile, DimensionBinding binding, QueryProfile owner); + + @Override + public final boolean enter(String name) { + prefixComponentIndex++; + if (prefixComponentIndex-1 < prefix.size()) return true; // we're in the given prefix, which should not be included in the name + currentPrefix = currentPrefix.append(name); + return true; + } + + @Override + public final void leave(String name) { + prefixComponentIndex--; + if (prefixComponentIndex < prefix.size()) return; // we're in the given prefix, which should not be included in the name + if ( ! name.isEmpty() && ! currentPrefix.isEmpty()) + currentPrefix = currentPrefix.first(currentPrefix.size() - 1); + } + + /** + * Returns the correct prefix component if we are still going down the prefix path, + * or null to get all if we are inside the prefix + */ + @Override + public String getLocalKey() { + if (prefixComponentIndex < prefix.size()) + return prefix.get(prefixComponentIndex); + else + return null; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfile.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfile.java new file mode 100644 index 00000000000..55210717305 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfile.java @@ -0,0 +1,835 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.google.common.collect.ImmutableList; +import com.yahoo.component.ComponentId; +import com.yahoo.component.provider.FreezableSimpleComponent; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.processing.request.Properties; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfile; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileFieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A query profile is a data container with an id and a class (type). More precisely, it contains + * <ul> + * <li>An id, on the form name:version, where the version is optional, and follows the same rules as for other search container components. + * <li>A class id referring to the class defining this profile (see Query Profile Classes below) + * <li>A (possibly empty) list of ids of inherited query profiles + * <li>A (possibly empty) list of declarative predicates over search request parameters which defines when this query profile is applicable (see Query Profile Selection below) + * <li>The data content, which consists of + * <ul> + * <li>named values + * <li>named references to other profiles + * </ul> + * </ul> + * + * This serves the purpose of an intermediate format between configuration and runtime structures - the runtime + * structure used is QueryProfileProperties. + * + * @author bratseth + */ +public class QueryProfile extends FreezableSimpleComponent implements Cloneable { + + /** Defines the permissible content of this, or null if any content is permissible */ + private QueryProfileType type=null; + + /** The value at this query profile - allows non-fields to have values, e.g a=value1, a.b=value2 */ + private Object value=null; + + /** The variants of this, or null if none */ + private QueryProfileVariants variants=null; + + /** The resolved variant dimensions of this, or null if none or not resolved yet (is resolved at freeze) */ + private List<String> resolvedDimensions=null; + + /** The query profiles inherited by this, or null if none */ + private List<QueryProfile> inherited=null; + + /** The content of this profile. The values may be primitives, substitutable strings or other query profiles */ + private CopyOnWriteContent content=new CopyOnWriteContent(); + + /** + * Field override settings: fieldName→OverrideValue. These overrides the override + * setting in the type (if any) of this field). If there are no query profile level settings, this is null. + */ + private Map<String,Boolean> overridable=null; + + /** + * Creates a new query profile from an id. + * The query profile can be modified freely (but not accessed) until it is {@link #freeze frozen}. + * At that point it becomes readable but unmodifiable, which it stays until it goes out of reference. + */ + public QueryProfile(ComponentId id) { + super(id); + if ( ! id.isAnonymous()) + validateName(id.getName()); + } + + /** Convenience shorthand for new QueryProfile(new ComponentId(idString)) */ + public QueryProfile(String idString) { + this(new ComponentId(idString)); + } + + // ----------------- Public API ------------------------------------------------------------------------------- + + // ----------------- Setters and getters + + /** Returns the type of this or null if it has no type */ + public QueryProfileType getType() { return type; } + + /** Sets the type of this, or set to null to not use any type checking in this profile */ + public void setType(QueryProfileType type) { this.type=type; } + + /** Returns the virtual variants of this, or null if none */ + public QueryProfileVariants getVariants() { return variants; } + + /** + * Returns the list of profiles inherited by this. + * Note that order matters for inherited profiles - variables are resolved depth first in the order found in + * the inherited list. This always returns an unmodifiable list - use addInherited to add. + */ + public List<QueryProfile> inherited() { + if (isFrozen()) return inherited; // Frozen profiles always have an unmodifiable, non-null list + if (inherited==null) return Collections.emptyList(); + return Collections.unmodifiableList(inherited); + } + + /** Adds a profile to the end of the inherited list of this. Throws an exception if this is frozen. */ + public void addInherited(QueryProfile profile) { + addInherited(profile,(DimensionValues)null); + } + + public final void addInherited(QueryProfile profile,String[] dimensionValues) { + addInherited(profile,DimensionValues.createFrom(dimensionValues)); + } + + /** Adds a profile to the end of the inherited list of this for the given variant. Throws an exception if this is frozen. */ + public void addInherited(QueryProfile profile, DimensionValues dimensionValues) { + ensureNotFrozen(); + + DimensionBinding dimensionBinding=DimensionBinding.createFrom(getDimensions(),dimensionValues); + if (dimensionBinding.isNull()) { + if (inherited==null) + inherited=new ArrayList<>(); + inherited.add(profile); + } + else { + if (variants==null) + variants=new QueryProfileVariants(dimensionBinding.getDimensions(), this); + variants.inherit(profile,dimensionBinding.getValues()); + } + } + + /** + * Returns the content fields declared in this (i.e not including those inherited) as a read-only map. + * @throws IllegalStateException if this is frozen + */ + public Map<String,Object> declaredContent() { + ensureNotFrozen(); + return content.unmodifiableMap(); + } + + /** + * Returns if the given field is declared explicitly as overridable or not in this or any <i>nested</i> profiles + * (i.e not including overridable settings <i>inherited</i> and from <i>types</i>). + * + * @param name the (possibly dotted) field name to return + * @param context the context in which the name is resolved, or null if none + * @return true/false if this is declared overridable/not overridable in this instance, null if it is not + * given any value is <i>this</i> profile instance + * @throws IllegalStateException if this is frozen + */ + public Boolean isDeclaredOverridable(String name, Map<String,String> context) { + return isDeclaredOverridable(new CompoundName(name),DimensionBinding.createFrom(getDimensions(),context)); + } + + /** Sets the dimensions over which this may vary. Note: This will erase any currently defined variants */ + public void setDimensions(String[] dimensions) { + ensureNotFrozen(); + variants=new QueryProfileVariants(dimensions, this); + } + + /** Returns the value set at this node, to allow non-leafs to have values. Returns null if none. */ + public Object getValue() { return value; } + + public void setValue(Object value) { + ensureNotFrozen(); + this.value=value; + } + + /** Returns the variant dimensions to be used in this - an unmodifiable list of dimension names */ + public List<String> getDimensions() { + if (isFrozen()) return resolvedDimensions; + if (variants!=null) return variants.getDimensions(); + if (inherited==null) return null; + for (QueryProfile inheritedProfile : inherited) { + List<String> inheritedDimensions=inheritedProfile.getDimensions(); + if (inheritedDimensions!=null) return inheritedDimensions; + } + return null; + } + + // ----------------- Query profile facade API + + /** + * Sets the overridability of a field in this profile, + * this overrides the corresponding setting in the type (if any) + */ + public final void setOverridable(String fieldName, boolean overridable, Map<String,String> context) { + setOverridable(new CompoundName(fieldName), overridable,DimensionBinding.createFrom(getDimensions(), context)); + } + + /** + * Return all objects that start with the given prefix path using no context. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(String prefix) { return listValues(new CompoundName(prefix)); } + + /** + * Return all objects that start with the given prefix path using no context. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(CompoundName prefix) { return listValues(prefix, null); } + + /** + * Return all objects that start with the given prefix path. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(String prefix, Map<String,String> context) { + return listValues(new CompoundName(prefix), context); + } + + /** + * Return all objects that start with the given prefix path. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(CompoundName prefix, Map<String,String> context) { + return listValues(prefix, context, null); + } + + /** + * Adds all objects that start with the given path prefix to the given value map. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public Map<String, Object> listValues(CompoundName prefix, Map<String, String> context, Properties substitution) { + DimensionBinding dimensionBinding=DimensionBinding.createFrom(getDimensions(),context); + + AllValuesQueryProfileVisitor visitor=new AllValuesQueryProfileVisitor(prefix); + accept(visitor,dimensionBinding, null); + Map<String,Object> values=visitor.getResult(); + + if (substitution==null) return values; + for (Map.Entry<String,Object> entry : values.entrySet()) { + if (entry.getValue().getClass()==String.class) continue; // Shortcut + if (entry.getValue() instanceof SubstituteString) + entry.setValue(((SubstituteString)entry.getValue()).substitute(context,substitution)); + } + return values; + } + + /** + * Lists types reachable from this, indexed by the prefix having that type. + * If this is itself typed, this' type will be included with an empty prefix + */ + Map<CompoundName, QueryProfileType> listTypes(CompoundName prefix, Map<String, String> context) { + DimensionBinding dimensionBinding = DimensionBinding.createFrom(getDimensions(), context); + AllTypesQueryProfileVisitor visitor = new AllTypesQueryProfileVisitor(prefix); + accept(visitor, dimensionBinding, null); + return visitor.getResult(); + } + + /** + * Lists references reachable from this. + */ + Set<CompoundName> listReferences(CompoundName prefix, Map<String, String> context) { + DimensionBinding dimensionBinding=DimensionBinding.createFrom(getDimensions(),context); + AllReferencesQueryProfileVisitor visitor=new AllReferencesQueryProfileVisitor(prefix); + accept(visitor,dimensionBinding,null); + return visitor.getResult(); + } + + /** + * Lists every entry (value or reference) reachable from this which is not overridable + */ + Set<CompoundName> listUnoverridable(CompoundName prefix, Map<String, String> context) { + DimensionBinding dimensionBinding = DimensionBinding.createFrom(getDimensions(),context); + AllUnoverridableQueryProfileVisitor visitor = new AllUnoverridableQueryProfileVisitor(prefix); + accept(visitor, dimensionBinding, null); + return visitor.getResult(); + } + + /** + * Returns a value from this query profile by resolving the given name: + * <ul> + * <li>The name up to the first dot is the value looked up in the value of this profile + * <li>The rest of the name (if any) is used as the name to look up in the referenced query profile + * </ul> + * + * If this name does not resolve <i>completely</i> into a value in this or any inherited profile, null is returned. + */ + public final Object get(String name) { return get(name,(Map<String,String>)null); } + + /** Returns a value from this using the given property context for resolution and using this for substitution */ + public final Object get(String name, Map<String,String> context) { + return get(name,context,null); + } + + /** Returns a value from this using the given dimensions for resolution */ + public final Object get(String name, String[] dimensionValues) { + return get(name,dimensionValues,null); + } + + public final Object get(String name, String[] dimensionValues, Properties substitution) { + return get(name,DimensionValues.createFrom(dimensionValues),substitution); + } + + /** Returns a value from this using the given dimensions for resolution */ + public final Object get(String name, DimensionValues dimensionValues, Properties substitution) { + return get(name,DimensionBinding.createFrom(getDimensions(),dimensionValues),substitution); + } + + public final Object get(String name, Map<String,String> context, Properties substitution) { + return get(name,DimensionBinding.createFrom(getDimensions(),context),substitution); + } + + public final Object get(CompoundName name, Map<String,String> context, Properties substitution) { + return get(name,DimensionBinding.createFrom(getDimensions(),context),substitution); + } + + final Object get(String name, DimensionBinding binding,Properties substitution) { + return get(new CompoundName(name),binding,substitution); + } + + final Object get(CompoundName name, DimensionBinding binding, Properties substitution) { + Object node=get(name,binding); + if (node!=null && node.getClass()==String.class) return node; // Shortcut + if (node instanceof SubstituteString) return ((SubstituteString)node).substitute(binding.getContext(),substitution); + return node; + } + + final Object get(CompoundName name,DimensionBinding dimensionBinding) { + return lookup(name,false,dimensionBinding); + } + + /** + * Returns the node at the position prescribed by the given name (without doing substitutions) - + * a primitive value, a substitutable string, a query profile, or null if not found. + */ + public final Object lookup(String name, Map<String,String> context) { + return lookup(new CompoundName(name),true,DimensionBinding.createFrom(getDimensions(),context)); + } + + /** Sets a value in this or any nested profile using null as context */ + public final void set(String name, Object value, QueryProfileRegistry registry) { + set(name,value,(Map<String,String>)null, registry); + } + + /** + * Sets a value in this or any nested profile. Any missing structure needed to set this will be created. + * If this value is already set, this will overwrite the previous value. + * + * @param name the name of the field, possibly a dotted name which will cause setting of a variable in a subprofile + * @param value the value to assign to the name, a primitive wrapper, string or a query profile + * @param context the context used to resolve where this value should be set, or null if none + * @throws IllegalArgumentException if the given name is illegal given the types of this or any nested query profile + * @throws IllegalStateException if this query profile is frozen + */ + public final void set(CompoundName name,Object value,Map<String,String> context, QueryProfileRegistry registry) { + set(name, value, DimensionBinding.createFrom(getDimensions(), context), registry); + } + + public final void set(String name,Object value,Map<String,String> context, QueryProfileRegistry registry) { + set(new CompoundName(name), value, DimensionBinding.createFrom(getDimensions(), context), registry); + } + + public final void set(String name,Object value,String[] dimensionValues, QueryProfileRegistry registry) { + set(name,value,DimensionValues.createFrom(dimensionValues), registry); + } + + /** + * Sets a value in this or any nested profile. Any missing structure needed to set this will be created. + * If this value is already set, this will overwrite the previous value. + * + * @param name the name of the field, possibly a dotted name which will cause setting of a variable in a subprofile + * @param value the value to assign to the name, a primitive wrapper, string or a query profile + * @param dimensionValues the dimension values - will be matched by order to the dimensions set in this - if this is + * shorter or longer than the number of dimensions it will be adjusted as needed + * @param registry the registry used to resolve query profile references. If null is passed query profile references + * will cause an exception + * @throws IllegalArgumentException if the given name is illegal given the types of this or any nested query profile + * @throws IllegalStateException if this query profile is frozen + */ + public final void set(String name,Object value,DimensionValues dimensionValues, QueryProfileRegistry registry) { + set(new CompoundName(name), value, DimensionBinding.createFrom(getDimensions(), dimensionValues), registry); + } + + // ----------------- Misc + + public boolean isExplicit() { + return !getId().isAnonymous(); + } + + /** + * Switches this from write-only to read-only mode. + * This profile can never be modified again after this method returns. + * Calling this on an already frozen profile has no effect. + * <p> + * Calling this will also freeze any profiles inherited and referenced by this. + */ + // TODO: Remove/simplify as query profiles are not used at query time + public synchronized void freeze() { + if (isFrozen()) return; + + resolvedDimensions=getDimensions(); + + if (variants !=null) + variants.freeze(); + + if (inherited!=null) { + for (QueryProfile inheritedProfile : inherited) + inheritedProfile.freeze(); + } + + content.freeze(); + + inherited= inherited==null ? ImmutableList.of() : ImmutableList.copyOf(inherited); + + super.freeze(); + } + + @Override + public String toString() { + return "query profile '" + getId() + "'" + (type!=null ? " of type '" + type.getId() + "'" : ""); + } + + /** + * Returns a clone of this. The clone will not be frozen and will contain copied inherited and content collections + * pointing to the same values as this. + */ + @Override + public QueryProfile clone() { + if (isFrozen()) return this; + QueryProfile clone=(QueryProfile)super.clone(); + if (variants !=null) + clone.variants = variants.clone(); + if (inherited!=null) + clone.inherited=new ArrayList<>(inherited); + + if (this.content!=null) + clone.content=content.clone(); + + return clone; + } + + /** + * Clones a value of a type which may appear in a query profile if cloning is necessary (i.e if it is + * not immutable). Returns the input type otherwise. + */ + static Object cloneIfNecessary(Object object) { + if (object instanceof QueryProfile) return ((QueryProfile)object).clone(); + return object; // Other types are immutable + } + + /** Throws IllegalArgumentException if the given string is not a valid query profile name */ + public static void validateName(String name) { + Matcher nameMatcher=namePattern.matcher(name); + if ( ! nameMatcher.matches()) + throw new IllegalArgumentException("Illegal name '" + name + "'"); + } + + // ----------------- For subclass use -------------------------------------------------------------------- + + /** Override this to intercept all writes to this profile (or any nested profiles) */ + protected void set(CompoundName name, Object value, DimensionBinding binding, QueryProfileRegistry registry) { + try { + setNode(name, value, null, binding, registry); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Could not set '" + name + "' to '" + value + "'",e); + } + } + + /** Returns this value, or its corresponding substitution string if it contains substitutions */ + protected Object convertToSubstitutionString(Object value) { + if (value==null) return value; + if (value.getClass()!=String.class) return value; + SubstituteString substituteString=SubstituteString.create((String)value); + if (substituteString==null) return value; + return substituteString; + } + + /** Returns the field description of this field, or null if it is not typed */ + protected FieldDescription getFieldDescription(CompoundName name, DimensionBinding binding) { + FieldDescriptionQueryProfileVisitor visitor=new FieldDescriptionQueryProfileVisitor(name.asList()); + accept(visitor, binding,null); + return visitor.result(); + } + + /** + * Returns true if this value is definitely overridable in this (set and not unoverridable), + * false if it is declared unoverridable (in instance or type), and null if this profile has no + * opinion on the matter because the value is not set in this. + */ + Boolean isLocalOverridable(String localName,DimensionBinding binding) { + if (localLookup(localName, binding)==null) return null; // Not set + Boolean isLocalInstanceOverridable=isLocalInstanceOverridable(localName); + if (isLocalInstanceOverridable!=null) + return isLocalInstanceOverridable.booleanValue(); + if (type!=null) return type.isOverridable(localName); + return true; + } + + protected Boolean isLocalInstanceOverridable(String localName) { + if (overridable==null) return null; + return overridable.get(localName); + } + + protected Object lookup(CompoundName name,boolean allowQueryProfileResult, DimensionBinding dimensionBinding) { + SingleValueQueryProfileVisitor visitor=new SingleValueQueryProfileVisitor(name.asList(),allowQueryProfileResult); + accept(visitor,dimensionBinding,null); + return visitor.getResult(); + } + + protected final void accept(QueryProfileVisitor visitor,DimensionBinding dimensionBinding, QueryProfile owner) { + acceptAndEnter("", visitor, dimensionBinding, owner); + } + + void acceptAndEnter(String key, QueryProfileVisitor visitor,DimensionBinding dimensionBinding, QueryProfile owner) { + boolean allowContent=visitor.enter(key); + accept(allowContent, visitor, dimensionBinding, owner); + if (allowContent) + visitor.leave(key); + } + + /** + * Visit the profiles and values referenced from this in order of decreasing precedence + * + * @param allowContent whether content in this should be visited + * @param visitor the visitor + * @param dimensionBinding the dimension binding to use + */ + final void accept(boolean allowContent,QueryProfileVisitor visitor, DimensionBinding dimensionBinding, QueryProfile owner) { + visitor.onQueryProfile(this, dimensionBinding, owner); + if (visitor.isDone()) return; + + visitVariants(allowContent,visitor,dimensionBinding); + if (visitor.isDone()) return; + + if (allowContent) { + visitContent(visitor,dimensionBinding); + if (visitor.isDone()) return; + } + + if (visitor.visitInherited()) + visitInherited(allowContent, visitor, dimensionBinding, owner); + } + + protected void visitVariants(boolean allowContent,QueryProfileVisitor visitor,DimensionBinding dimensionBinding) { + if (getVariants()!=null) + getVariants().accept(allowContent, getType(), visitor, dimensionBinding); + } + + protected void visitInherited(boolean allowContent,QueryProfileVisitor visitor,DimensionBinding dimensionBinding, QueryProfile owner) { + if (inherited==null) return; + for (QueryProfile inheritedProfile : inherited) { + inheritedProfile.accept(allowContent,visitor,dimensionBinding.createFor(inheritedProfile.getDimensions()), owner); + if (visitor.isDone()) return; + } + } + + private void visitContent(QueryProfileVisitor visitor,DimensionBinding dimensionBinding) { + String contentKey=visitor.getLocalKey(); + + // Visit this' content + if (contentKey!=null) { // Get only the content of the current key + if (type!=null) + contentKey=type.unalias(contentKey); + visitor.acceptValue(contentKey, getContent(contentKey), dimensionBinding, this); + } + else { // get all content in this + for (Map.Entry<String,Object> entry : getContent().entrySet()) { + visitor.acceptValue(entry.getKey(), entry.getValue(), dimensionBinding, this); + if (visitor.isDone()) return; + } + } + } + + /** Returns a value from the content of this, or null if not present */ + protected Object getContent(String key) { + return content.get(key); + } + + /** Returns all the content from this as an unmodifiable map */ + protected Map<String,Object> getContent() { + return content.unmodifiableMap(); + } + + /** Sets the value of a node in <i>this</i> profile - the local name given must not be nested (contain dots) */ + protected QueryProfile setLocalNode(String localName, Object value,QueryProfileType parentType, + DimensionBinding dimensionBinding, QueryProfileRegistry registry) { + if (parentType!=null && type==null && !isFrozen()) + type=parentType; + + value=checkAndConvertAssignment(localName, value, registry); + localPut(localName,value,dimensionBinding); + return this; + } + + /** + * Combines an existing and a new value for a query property key. + * Return the new object to add to the state of the owning profile (/variant), or null if no new value needs to + * be added (usually because the new value was added to the existing). + */ + static Object combineValues(Object newValue, Object existingValue) { + if (newValue instanceof QueryProfile) { + QueryProfile newProfile=(QueryProfile)newValue; + if ( existingValue==null || ! (existingValue instanceof QueryProfile)) { + if (!isModifiable(newProfile)) + newProfile=new BackedOverridableQueryProfile(newProfile); // Make the query profile reference overridable + newProfile.value=existingValue; + return newProfile; + } + + // if both are profiles: + return combineProfiles(newProfile,(QueryProfile)existingValue); + } + else { + if (existingValue instanceof QueryProfile) { // we need to set a non-leaf value on a query profile + QueryProfile existingProfile=(QueryProfile)existingValue; + if (isModifiable(existingProfile)) { + existingProfile.setValue(newValue); + return null; + } + else { + QueryProfile existingOverridable = new BackedOverridableQueryProfile((QueryProfile)existingValue); + existingOverridable.setValue(newValue); + return existingOverridable; + } + } + else { + return newValue; + } + } + } + + private static QueryProfile combineProfiles(QueryProfile newProfile,QueryProfile existingProfile) { + QueryProfile returnValue=null; + QueryProfile existingModifiable; + + // Ensure the existing profile is modifiable + if (existingProfile.getClass()==QueryProfile.class) { + existingModifiable = new BackedOverridableQueryProfile(existingProfile); + returnValue=existingModifiable; + } + else { // is an overridable wrapper + existingModifiable=existingProfile; // May be used as-is + } + + // Make the existing profile inherit the new one + if (existingModifiable instanceof BackedOverridableQueryProfile) + ((BackedOverridableQueryProfile)existingModifiable).addInheritedHere(newProfile); + else + existingModifiable.addInherited(newProfile); + + // Remove content from the existing which the new one does not allow overrides of + if (existingModifiable.content!=null) { + for (String key : existingModifiable.content.unmodifiableMap().keySet()) { + if ( ! newProfile.isLocalOverridable(key, null)) { + existingModifiable.content.remove(key); + } + } + } + + return returnValue; + } + + /** Returns whether the given profile may be modified from this profile */ + private static boolean isModifiable(QueryProfile profile) { + if (profile.isFrozen()) return false; + if ( ! profile.isExplicit()) return true; // Implicitly defined from this - ok to modify then + if (! (profile instanceof BackedOverridableQueryProfile)) return false; + return true; + } + + /** + * Converts to the type of the receiving field, if possible and necessary. + * + * @return the value to be assigned: the original or a converted value + * @throws IllegalArgumentException if the assignment is illegal + */ + protected Object checkAndConvertAssignment(String localName, Object value, QueryProfileRegistry registry) { + if (type==null) return value; // no type checking + + FieldDescription fieldDescription=type.getField(localName); + if (fieldDescription==null) { + if (type.isStrict()) + throw new IllegalArgumentException("'" + localName + "' is not declared in " + type + ", and the type is strict"); + return value; + } + + if (registry == null && (fieldDescription.getType() instanceof QueryProfileFieldType)) + throw new IllegalArgumentException("A registry was not passed: Query profile references is not supported"); + Object convertedValue = fieldDescription.getType().convertFrom(value, registry); + if (convertedValue == null) + throw new IllegalArgumentException("'" + value + "' is not a " + fieldDescription.getType().toInstanceDescription()); + return convertedValue; + } + + /** + * Looks up all inherited profiles and adds any that matches this name. + * This default implementation returns an empty profile. + */ + protected QueryProfile createSubProfile(String name,DimensionBinding dimensionBinding) { + QueryProfile queryProfile = new QueryProfile(ComponentId.createAnonymousComponentId(name)); + return queryProfile; + } + + /** Do a variant-aware content lookup in this */ + protected Object localLookup(String name, DimensionBinding dimensionBinding) { + Object node=null; + if ( variants!=null && !dimensionBinding.isNull()) + node=variants.get(name,type,true,dimensionBinding); + if (node==null) + node=content==null ? null : content.get(name); + return node; + } + + // ----------------- Private ---------------------------------------------------------------------------------- + + private Boolean isDeclaredOverridable(CompoundName name,DimensionBinding dimensionBinding) { + QueryProfile parent= lookupParentExact(name, true, dimensionBinding); + if (parent.overridable==null) return null; + return parent.overridable.get(name.last()); + } + + /** + * Sets the overridability of a field in this profile, + * this overrides the corresponding setting in the type (if any) + */ + private void setOverridable(CompoundName fieldName,boolean overridable,DimensionBinding dimensionBinding) { + QueryProfile parent= lookupParentExact(fieldName, true, dimensionBinding); + if (parent.overridable==null) + parent.overridable=new HashMap<>(); + parent.overridable.put(fieldName.last(),overridable); + } + + /** Sets a value to a (possibly non-local) node. The parent query profile holding the value is returned */ + private void setNode(CompoundName name, Object value, QueryProfileType parentType, + DimensionBinding dimensionBinding, QueryProfileRegistry registry) { + ensureNotFrozen(); + if (name.isCompound()) { + QueryProfile parent= getQueryProfileExact(name.first(), true, dimensionBinding); + parent.setNode(name.rest(), value,parentType, dimensionBinding.createFor(parent.getDimensions()), registry); + } + else { + setLocalNode(name.toString(), value,parentType, dimensionBinding, registry); + } + } + + /** + * Looks up and, if necessary, creates, the query profile which should hold the given local name portion of the + * given name. If the name contains no dots, this is returned. + * + * @param name the name of the variable to lookup the parent of + * @param create whether or not to create the parent if it is not present + * @return the parent, or null if not present and created is false + */ + private QueryProfile lookupParentExact(CompoundName name, boolean create, DimensionBinding dimensionBinding) { + CompoundName rest=name.rest(); + if (rest.isEmpty()) return this; + + QueryProfile topmostParent= getQueryProfileExact(name.first(), create, dimensionBinding); + if (topmostParent==null) return null; + return topmostParent.lookupParentExact(rest, create, dimensionBinding.createFor(topmostParent.getDimensions())); + } + + /** + * Returns a query profile from this by name + * + * @param localName the local name of the profile in this, this is never a compound + * @param create whether the profile should be created if missing + * @return the created profile, or null if not present, and create is false + */ + private QueryProfile getQueryProfileExact(String localName, boolean create, DimensionBinding dimensionBinding) { + Object node=localExactLookup(localName, dimensionBinding); + if (node!=null && node instanceof QueryProfile) { + return (QueryProfile)node; + } + if (!create) return null; + + QueryProfile queryProfile=createSubProfile(localName,dimensionBinding); + if (type!=null) { + Class<?> legalClass=type.getValueClass(localName); + if (legalClass==null || ! legalClass.isInstance(queryProfile)) + throw new RuntimeException("'" + localName + "' is not a legal query profile reference name in " + this); + queryProfile.setType(type.getType(localName)); + } + localPut(localName,queryProfile,dimensionBinding); + return queryProfile; + } + + /** Do a variant-aware content lookup in this - without looking in any wrapped content. But by matching variant bindings exactly only */ + private Object localExactLookup(String name,DimensionBinding dimensionBinding) { + if (dimensionBinding.isNull()) return content==null ? null : content.get(name); + if (variants==null) return null; + QueryProfileVariant variant=variants.getVariant(dimensionBinding.getValues(),false); + if (variant==null) return null; + return variant.values().get(name); + } + + /** Sets a value directly in this query profile (unless frozen) */ + private void localPut(String localName,Object value,DimensionBinding dimensionBinding) { + ensureNotFrozen(); + + if (type!=null) + localName=type.unalias(localName); + + validateName(localName); + value=convertToSubstitutionString(value); + + if (dimensionBinding.isNull()) { + Object combinedValue; + if (value instanceof QueryProfile) + combinedValue = combineValues(value,content==null ? null : content.get(localName)); + else + combinedValue = combineValues(value, localLookup(localName, dimensionBinding)); + + if (combinedValue!=null) + content.put(localName,combinedValue); + } + else { + if (variants==null) + variants=new QueryProfileVariants(dimensionBinding.getDimensions(), this); + variants.set(localName,dimensionBinding.getValues(),value); + } + } + + private static final Pattern namePattern=Pattern.compile("[$a-zA-Z_/][-$a-zA-Z0-9_/()]*"); + + /** + * Returns a compiled version of this which produces faster lookup times + * + * @param registry the registry this will be added to by the caller, or null if none + */ + public CompiledQueryProfile compile(CompiledQueryProfileRegistry registry) { + return QueryProfileCompiler.compile(this, registry); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileCompiler.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileCompiler.java new file mode 100644 index 00000000000..795c7655dfb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileCompiler.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfile; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.DimensionalMap; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Compile a set of query profiles into compiled profiles. + * + * @author bratseth + */ +public class QueryProfileCompiler { + + private static final Logger log = Logger.getLogger(QueryProfileCompiler.class.getName()); + + public static CompiledQueryProfileRegistry compile(QueryProfileRegistry input) { + CompiledQueryProfileRegistry output = new CompiledQueryProfileRegistry(input.getTypeRegistry()); + for (QueryProfile inputProfile : input.allComponents()) { + output.register(compile(inputProfile, output)); + } + return output; + } + + public static CompiledQueryProfile compile(QueryProfile in, CompiledQueryProfileRegistry registry) { + DimensionalMap.Builder<CompoundName, Object> values = new DimensionalMap.Builder<>(); + DimensionalMap.Builder<CompoundName, QueryProfileType> types = new DimensionalMap.Builder<>(); + DimensionalMap.Builder<CompoundName, Object> references = new DimensionalMap.Builder<>(); + DimensionalMap.Builder<CompoundName, Object> unoverridables = new DimensionalMap.Builder<>(); + + // Resolve values for each existing variant and combine into a single data structure + Set<DimensionBindingForPath> variants = new HashSet<>(); + collectVariants(CompoundName.empty, in, DimensionBinding.nullBinding, variants); + variants.add(new DimensionBindingForPath(DimensionBinding.nullBinding, CompoundName.empty)); // if this contains no variants + if (log.isLoggable(Level.FINE)) + log.fine("Compiling " + in.toString() + " having " + variants.size() + " variants"); + int i = 0; + for (DimensionBindingForPath variant : variants) { + if (log.isLoggable(Level.FINER)) + log.finer(" Compiling variant " + i++ + ": " + variant); + for (Map.Entry<String, Object> entry : in.listValues(variant.path(), variant.binding().getContext(), null).entrySet()) + values.put(variant.path().append(entry.getKey()), variant.binding(), entry.getValue()); + for (Map.Entry<CompoundName, QueryProfileType> entry : in.listTypes(variant.path(), variant.binding().getContext()).entrySet()) + types.put(variant.path().append(entry.getKey()), variant.binding(), entry.getValue()); + for (CompoundName reference : in.listReferences(variant.path(), variant.binding().getContext())) + references.put(variant.path().append(reference), variant.binding(), Boolean.TRUE); // Used as a set; value is ignored + for (CompoundName name : in.listUnoverridable(variant.path(), variant.binding().getContext())) + unoverridables.put(variant.path().append(name), variant.binding(), Boolean.TRUE); // Used as a set; value is ignored + } + + return new CompiledQueryProfile(in.getId(), in.getType(), + values.build(), types.build(), references.build(), unoverridables.build(), + registry); + } + + /** + * Returns all the unique combinations of dimension values which have values set reachable from this profile. + * + * @param profile the profile we are collecting the variants of + * @param currentVariant the variant we must have to arrive at this point in the query profile graph + * @param allVariants the set of all variants accumulated so far + */ + private static void collectVariants(CompoundName path, QueryProfile profile, DimensionBinding currentVariant, Set<DimensionBindingForPath> allVariants) { + for (QueryProfile inheritedProfile : profile.inherited()) + collectVariants(path, inheritedProfile, currentVariant, allVariants); + + collectVariantsFromValues(path, profile.getContent(), currentVariant, allVariants); + + collectVariantsInThis(path, profile, currentVariant, allVariants); + if (profile instanceof BackedOverridableQueryProfile) + collectVariantsInThis(path, ((BackedOverridableQueryProfile) profile).getBacking(), currentVariant, allVariants); + } + + private static void collectVariantsInThis(CompoundName path, QueryProfile profile, DimensionBinding currentVariant, Set<DimensionBindingForPath> allVariants) { + QueryProfileVariants profileVariants = profile.getVariants(); + if (profileVariants != null) { + for (QueryProfileVariant variant : profile.getVariants().getVariants()) { + DimensionBinding combinedVariant = + DimensionBinding.createFrom(profile.getDimensions(), variant.getDimensionValues()).combineWith(currentVariant); + if (combinedVariant.isInvalid()) continue; // values at this point in the graph are unreachable + collectVariantsFromValues(path, variant.values(), combinedVariant, allVariants); + for (QueryProfile variantInheritedProfile : variant.inherited()) + collectVariants(path, variantInheritedProfile, combinedVariant, allVariants); + } + } + } + + private static void collectVariantsFromValues(CompoundName path, Map<String, Object> values, DimensionBinding currentVariant, Set<DimensionBindingForPath> allVariants) { + if ( ! values.isEmpty()) + allVariants.add(new DimensionBindingForPath(currentVariant, path)); // there are actual values for this variant + + for (Map.Entry<String, Object> entry : values.entrySet()) { + if (entry.getValue() instanceof QueryProfile) + collectVariants(path.append(entry.getKey()), (QueryProfile)entry.getValue(), currentVariant, allVariants); + } + } + + private static class DimensionBindingForPath { + + private final DimensionBinding binding; + private final CompoundName path; + + public DimensionBindingForPath(DimensionBinding binding, CompoundName path) { + this.binding = binding; + this.path = path; + } + + public DimensionBinding binding() { return binding; } + public CompoundName path() { return path; } + + @Override + public boolean equals(Object o) { + if ( o == this ) return true; + if ( ! (o instanceof DimensionBindingForPath)) return false; + DimensionBindingForPath other = (DimensionBindingForPath)o; + return other.binding.equals(this.binding) && other.path.equals(this.path); + } + + @Override + public int hashCode() { + return binding.hashCode() + 17*path.hashCode(); + } + + @Override + public String toString() { + return binding + " for path " + path; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java new file mode 100644 index 00000000000..2432cb2ab33 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java @@ -0,0 +1,258 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.collections.Pair; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.processing.request.properties.PropertyMap; +import com.yahoo.protect.Validator; +import com.yahoo.search.query.Properties; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfile; +import com.yahoo.search.query.profile.compiled.DimensionalValue; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Properties backed by a query profile. + * This has the scope of one query and is not multithread safe. + * + * @author bratseth + */ +public class QueryProfileProperties extends Properties { + + private final CompiledQueryProfile profile; + + // Note: The priority order is: values has precedence over references + + /** Values which has been overridden at runtime, or null if none */ + private Map<CompoundName, Object> values = null; + /** Query profile references which has been overridden at runtime, or null if none. Earlier values has precedence */ + private List<Pair<CompoundName, CompiledQueryProfile>> references = null; + + /** Creates an instance from a profile, throws an exception if the given profile is null */ + public QueryProfileProperties(CompiledQueryProfile profile) { + Validator.ensureNotNull("The profile wrapped by this cannot be null", profile); + this.profile = profile; + } + + /** Returns the query profile backing this, or null if none */ + public CompiledQueryProfile getQueryProfile() { return profile; } + + /** Gets a value from the query profile, or from the nested profile if the value is null */ + @Override + public Object get(CompoundName name, Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + name = unalias(name, context); + Object value = null; + if (values != null) + value = values.get(name); + if (value == null) { + Pair<CompoundName, CompiledQueryProfile> reference = findReference(name); + if (reference != null) + return reference.getSecond().get(name.rest(reference.getFirst().size()), context, substitution); // yes; even if null + } + + if (value == null) + value = profile.get(name, context, substitution); + if (value == null) + value = super.get(name, context, substitution); + return value; + } + + /** + * Sets a value in this query profile + * + * @throws IllegalArgumentException if this property cannot be set in the wrapped query profile + */ + @Override + public void set(CompoundName name, Object value, Map<String,String> context) { + // TODO: Refactor + try { + name = unalias(name, context); + + if (context == null) + context = Collections.emptyMap(); + + if ( ! profile.isOverridable(name, context)) return; + + // Check runtime references + Pair<CompoundName, CompiledQueryProfile> runtimeReference = findReference(name); + if (runtimeReference != null && ! runtimeReference.getSecond().isOverridable(name.rest(runtimeReference.getFirst().size()), context)) + return; + + // Check types + if ( ! profile.getTypes().isEmpty()) { + for (int i = 0; i<name.size(); i++) { + QueryProfileType type = profile.getType(name.first(i), context); + if (type == null) continue; + String localName = name.get(i); + FieldDescription fieldDescription = type.getField(localName); + if (fieldDescription == null && type.isStrict()) + throw new IllegalArgumentException("'" + localName + "' is not declared in " + type + ", and the type is strict"); + + // TODO: In addition to strictness, check legality along the way + + if (i == name.size()-1 && fieldDescription != null) { // at the end of the path, check the assignment type + value = fieldDescription.getType().convertFrom(value, profile.getRegistry()); + if (value == null) + throw new IllegalArgumentException("'" + value + "' is not a " + fieldDescription.getType().toInstanceDescription()); + } + } + } + + if (value instanceof String && value.toString().startsWith("ref:")) { + if (profile.getRegistry() == null) + throw new IllegalArgumentException("Runtime query profile references does not work when the " + + "QueryProfileProperties are constructed without a registry"); + String queryProfileId = value.toString().substring(4); + value = profile.getRegistry().findQueryProfile(queryProfileId); + if (value == null) + throw new IllegalArgumentException("Query profile '" + queryProfileId + "' is not found"); + } + + if (value instanceof CompiledQueryProfile) { // this will be due to one of the two clauses above + if (references == null) + references = new ArrayList<>(); + references.add(0, new Pair<>(name, (CompiledQueryProfile)value)); // references set later has precedence - put first + } + else { + if (values == null) + values = new HashMap<>(); + values.put(name, value); + } + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Could not set '" + name + "' to '" + value + "': " + e.getMessage()); // TODO: Nest instead + } + } + + @Override + public Map<String, Object> listProperties(CompoundName path, Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + path = unalias(path, context); + if (context == null) context = Collections.emptyMap(); + + Map<String, Object> properties = profile.listValues(path, context, substitution); + + properties.putAll(super.listProperties(path, context, substitution)); + + if (references != null) { + for (Pair<CompoundName, CompiledQueryProfile> refEntry : references) { + if ( ! refEntry.getFirst().hasPrefix(path.first(Math.min(refEntry.getFirst().size(), path.size())))) continue; + + CompoundName pathInReference; + CompoundName prefixToReferenceKeys; + if (refEntry.getFirst().size() > path.size()) { + pathInReference = CompoundName.empty; + prefixToReferenceKeys = refEntry.getFirst().rest(path.size()); + } + else { + pathInReference = path.rest(refEntry.getFirst().size()); + prefixToReferenceKeys = CompoundName.empty; + } + for (Map.Entry<String, Object> valueEntry : refEntry.getSecond().listValues(pathInReference, context, substitution).entrySet()) { + properties.put(prefixToReferenceKeys.append(new CompoundName(valueEntry.getKey())).toString(), valueEntry.getValue()); + } + } + + } + + if (values != null) { + for (Map.Entry<CompoundName, Object> entry : values.entrySet()) { + if (entry.getKey().hasPrefix(path)) + properties.put(entry.getKey().rest(path.size()).toString(), entry.getValue()); + } + } + + return properties; + } + + public boolean isComplete(StringBuilder firstMissingName, Map<String,String> context) { + // Are all types reachable from this complete? + if ( ! reachableTypesAreComplete(CompoundName.empty, profile, firstMissingName, context)) + return false; + + // Are all runtime references in this complete? + if (references == null) return true; + for (Pair<CompoundName, CompiledQueryProfile> reference : references) { + if ( ! reachableTypesAreComplete(reference.getFirst(), reference.getSecond(), firstMissingName, context)) + return false; + } + + return true; + } + + private boolean reachableTypesAreComplete(CompoundName prefix, CompiledQueryProfile profile, StringBuilder firstMissingName, Map<String,String> context) { + for (Map.Entry<CompoundName, DimensionalValue<QueryProfileType>> typeEntry : profile.getTypes().entrySet()) { + QueryProfileType type = typeEntry.getValue().get(context); + if (type == null) continue; + if ( ! typeIsComplete(prefix.append(typeEntry.getKey()), type, firstMissingName, context)) + return false; + } + return true; + } + + private boolean typeIsComplete(CompoundName prefix, QueryProfileType type, StringBuilder firstMissingName, Map<String,String> context) { + if (type == null) return true; + for (FieldDescription field : type.fields().values()) { + if ( ! field.isMandatory()) continue; + + CompoundName fieldName = prefix.append(field.getName()); + if ( get(fieldName, null) != null) continue; + if ( hasReference(fieldName)) continue; + + if (profile.getReferences().get(fieldName, context) != null) continue; + + if (firstMissingName != null) + firstMissingName.append(fieldName); + return false; + } + return true; + } + + private boolean hasReference(CompoundName name) { + if (references == null) return false; + for (Pair<CompoundName, CompiledQueryProfile> reference : references) + if (reference.getFirst().equals(name)) + return true; + return false; + } + + private Pair<CompoundName, CompiledQueryProfile> findReference(CompoundName name) { + if (references == null) return null; + for (Pair<CompoundName, CompiledQueryProfile> entry : references) { + if (name.hasPrefix(entry.getFirst())) return entry; + } + return null; + } + + CompoundName unalias(CompoundName name, Map<String,String> context) { + if (profile.getTypes().isEmpty()) return name; + + CompoundName unaliasedName = name; + for (int i = 0; i<name.size(); i++) { + QueryProfileType type = profile.getType(name.first(i), context); + if (type == null) continue; + if (type.aliases() == null) continue; // TODO: Make never null + if (type.aliases().isEmpty()) continue; + String localName = name.get(i); + String unaliasedLocalName = type.unalias(localName); + unaliasedName = unaliasedName.set(i, unaliasedLocalName); + } + return unaliasedName; + } + + @Override + public QueryProfileProperties clone() { + QueryProfileProperties clone = (QueryProfileProperties)super.clone(); + if (this.values != null) + clone.values = PropertyMap.cloneMap(this.values); + return clone; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileRegistry.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileRegistry.java new file mode 100644 index 00000000000..a4bca752d18 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileRegistry.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; + +/** + * A set of query profiles. This also holds the query profile types as a dependent registry + * + * @author bratseth + */ +public class QueryProfileRegistry extends ComponentRegistry<QueryProfile> { + + private QueryProfileTypeRegistry queryProfileTypeRegistry = new QueryProfileTypeRegistry(); + + /** The current default instance of this registry */ + private static QueryProfileRegistry instance = new QueryProfileRegistry(); + + /** Register this type by its id */ + public void register(QueryProfile profile) { + super.register(profile.getId(), profile); + } + + /** Returns a query profile type by name, or null if not found */ + public QueryProfileType getType(String type) { + return queryProfileTypeRegistry.getComponent(type); + } + + /** Returns the type registry attached to this */ + public QueryProfileTypeRegistry getTypeRegistry() { return queryProfileTypeRegistry; } + + /** + * <p>Returns a query profile for the given request string, or null if a suitable one is not found.</p> + * + * The request string must be a valid {@link com.yahoo.component.ComponentId} or null. + * + * <p> + * If the string is null, the profile named "default" is returned, or null if that does not exists. + * + * <p> + * The version part (if any) is matched used the usual component version patching rules. + * If the name part matches a query profile name perfectly, that profile is returned. + * If not, and the name is a slash-separated path, the profile with the longest matching left sub-path + * which has a type which allows path mahting is used. If there is no such profile, null is returned. + */ + public QueryProfile findQueryProfile(String idString) { + if (idString==null) return getComponent("default"); + ComponentSpecification id=new ComponentSpecification(idString); + QueryProfile profile=getComponent(id); + if (profile!=null) return profile; + + return findPathParentQueryProfile(new ComponentSpecification(idString)); + } + + private QueryProfile findPathParentQueryProfile(ComponentSpecification id) { + // Try the name with "/" appended - should have the same semantics with path matching + QueryProfile slashedProfile=getComponent(new ComponentSpecification(id.getName() + "/",id.getVersionSpecification())); + if (slashedProfile!=null && slashedProfile.getType()!=null && slashedProfile.getType().getMatchAsPath()) + return slashedProfile; + + // Extract the parent (if any) + int slashIndex=id.getName().lastIndexOf("/"); + if (slashIndex<1) return null; + String parentName=id.getName().substring(0,slashIndex); + if (parentName.equals("")) return null; + + ComponentSpecification parentId=new ComponentSpecification(parentName,id.getVersionSpecification()); + + QueryProfile pathParentProfile=getComponent(parentId); + + if (pathParentProfile!=null && pathParentProfile.getType()!=null && pathParentProfile.getType().getMatchAsPath()) + return pathParentProfile; + return findPathParentQueryProfile(parentId); + } + + /** Freezes this, and all owned query profiles and query profile types */ + public @Override void freeze() { + if (isFrozen()) return; + queryProfileTypeRegistry.freeze(); + for (QueryProfile queryProfile : allComponents()) + queryProfile.freeze(); + } + + public CompiledQueryProfileRegistry compile() { return QueryProfileCompiler.compile(this); } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVariant.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVariant.java new file mode 100644 index 00000000000..42ea4a96d8f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVariant.java @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.*; + +/** + * A variant of a query profile + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> +*/ +public class QueryProfileVariant implements Cloneable, Comparable<QueryProfileVariant> { + + private List<QueryProfile> inherited=null; + + private DimensionValues dimensionValues; + + private Map<String,Object> values; + + private boolean frozen=false; + + private QueryProfile owner; + + public QueryProfileVariant(DimensionValues dimensionValues, QueryProfile owner) { + this.dimensionValues=dimensionValues; + this.owner = owner; + } + + public DimensionValues getDimensionValues() { return dimensionValues; } + + /** + * Returns the live reference to the values of this. This may be modified + * if this is not frozen. + */ + public Map<String,Object> values() { + if (values==null) { + if (frozen) + return Collections.emptyMap(); + else + values=new HashMap<>(); + } + return values; + } + + /** + * Returns the live reference to the inherited profiles of this. This may be modified + * if this is not frozen. + */ + public List<QueryProfile> inherited() { + if (inherited==null) { + if (frozen) + return Collections.emptyList(); + else + inherited=new ArrayList<>(); + } + return inherited; + } + + public void set(String key, Object newValue) { + if (values==null) + values=new HashMap<>(); + + Object oldValue = values.get(key); + + if (oldValue == null) { + values.put(key, newValue); + } else { + Object combinedOrNull = QueryProfile.combineValues(newValue, oldValue); + if (combinedOrNull != null) { + values.put(key, combinedOrNull); + } + } + } + + public void inherit(QueryProfile profile) { + if (inherited==null) + inherited=new ArrayList<>(1); + inherited.add(profile); + } + + /** + * Implements the sort order of this which is based on specificity + * where dimensions to the left are more significant. + * <p> + * <b>Note:</b> This ordering is not consistent with equals - it returns 0 when the same dimensions + * are <i>set</i>, regardless of what they are set <i>to</i>. + */ + public @Override int compareTo(QueryProfileVariant other) { + return this.dimensionValues.compareTo(other.dimensionValues); + } + + public boolean matches(DimensionValues givenDimensionValues) { + return this.dimensionValues.matches(givenDimensionValues); + } + + /** Accepts a visitor to the values of this */ + public void accept(boolean allowContent,QueryProfileType type,QueryProfileVisitor visitor, DimensionBinding dimensionBinding) { + // Visit this + if (allowContent) { + String key=visitor.getLocalKey(); + if (key!=null) { + if (type!=null) + type.unalias(key); + + visitor.acceptValue(key, values().get(key), dimensionBinding, owner); + if (visitor.isDone()) return; + } + else { + for (Map.Entry<String,Object> entry : values().entrySet()) { + visitor.acceptValue(entry.getKey(), entry.getValue(), dimensionBinding, owner); + if (visitor.isDone()) return; + } + } + } + + // Visit inherited + for (QueryProfile profile : inherited()) { + if (visitor.visitInherited()) { + profile.accept(allowContent,visitor,dimensionBinding.createFor(profile.getDimensions()), owner); + } + if (visitor.isDone()) return; + } + } + + public void freeze() { + if (frozen) return; + if (inherited != null) + inherited = ImmutableList.copyOf(inherited); + if (values != null) + values = ImmutableMap.copyOf(values); + frozen=true; + } + + public QueryProfileVariant clone() { + if (frozen) return this; + try { + QueryProfileVariant clone=(QueryProfileVariant)super.clone(); + if (this.inherited!=null) + clone.inherited=new ArrayList<>(this.inherited); // TODO: Deep clone is more correct, but probably does not matter in practice + + clone.values=CopyOnWriteContent.deepClone(this.values); + + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + public @Override String toString() { + return "query profile variant for " + dimensionValues; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVariants.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVariants.java new file mode 100644 index 00000000000..fde851bdc75 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVariants.java @@ -0,0 +1,486 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.yahoo.component.provider.Freezable; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.*; + +/** + * This class represent a set of query profiles virtually - rather + * than storing and instantiating each profile this structure represents explicitly only + * the values set in the various virtual profiles. The set of virtual profiles are defined by a set of + * <i>dimensions</i>. Values may be set for any point in this multi-dimensional space, and may also be set for + * any regular hyper-region by setting values for any point in certain of these dimensions. + * The set of virtual profiles defined by this consists of all the combinations of dimension points for + * which one or more values is set in this, as well as any possible less specified regions. + * <p> + * A set of virtual profiles are always owned by a single profile, which is also their parent + * in the inheritance hierarchy. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class QueryProfileVariants implements Freezable, Cloneable { + + private boolean frozen=false; + + /** Properties indexed by name, to support fast lookup of single values */ + private Map<String,FieldValues> fieldValuesByName=new HashMap<>(); + + /** The inherited profiles for various dimensions settings - a set of fieldvalues of List<QueryProfile> */ + private FieldValues inheritedProfiles=new FieldValues(); + + /** + * Field and inherited profiles sorted by specificity used for all-value visiting. + * This is the same as how the source data looks (apart from the sorting). + */ + private List<QueryProfileVariant> variants=new ArrayList<>(); + + /** + * The names of the dimensions (which are possible properties in the context given on lookup) of this. + * Order matters - more specific values to the left in this list are more significant than more specific values + * to the right + */ + private List<String> dimensions; + + /** The query profile this variants of */ + private QueryProfile owner; + + /** + * Creates a set of virtual query profiles which may return varying values over the set of dimensions given. + * Each dimension is a name for which a key-value may be supplied in the context properties + * on lookup time to influence the value returned. + */ + public QueryProfileVariants(String[] dimensions, QueryProfile owner) { + this(Arrays.asList(dimensions), owner); + } + + /** + * Creates a set of virtual query profiles which may return varying values over the set of dimensions given. + * Each dimension is a name for which a key-value may be supplied in the context properties + * on lookup time to influence the value returned. + * + * @param dimensions the dimension names this may vary over. The list gets owned by this, so it must not be further + * modified from outside). This will not modify the list. + */ + public QueryProfileVariants(List<String> dimensions, QueryProfile owner) { + // Note: This is not made unmodifiable (here or in freeze) because we depend on map identity comparisons of this + // list (in dimensionBinding) for performance reasons. + this.dimensions = dimensions; + this.owner = owner; + } + + /** Irreversibly prevents any further modifications to this */ + public void freeze() { + if (frozen) return; + for (FieldValues fieldValues : fieldValuesByName.values()) + fieldValues.freeze(); + fieldValuesByName = ImmutableMap.copyOf(fieldValuesByName); + inheritedProfiles.freeze(); + + Collections.sort(variants); + for (QueryProfileVariant variant : variants) + variant.freeze(); + variants = ImmutableList.copyOf(variants); + + frozen=true; + } + + @Override + public boolean isFrozen() { + return frozen; + } + + /** Visits the most specific match to the dimension binding of each variable (or the one named by the visitor) */ + void accept(boolean allowContent,QueryProfileType type,QueryProfileVisitor visitor,DimensionBinding dimensionBinding) { + String contentName=null; + if (allowContent) + contentName=visitor.getLocalKey(); + + if (contentName!=null) { + if (type!=null) + contentName=type.unalias(contentName); + acceptSingleValue(contentName,allowContent,visitor,dimensionBinding); // Special cased for performance + } + else { + acceptAllValues(allowContent,visitor,type,dimensionBinding); + } + } + + // PERF: 90% + void acceptSingleValue(String name,boolean allowContent,QueryProfileVisitor visitor,DimensionBinding dimensionBinding) { + FieldValues fieldValues=fieldValuesByName.get(name); + if (fieldValues==null || !allowContent) + fieldValues=new FieldValues(); + + fieldValues.sort(); + inheritedProfiles.sort(); + + int inheritedIndex=0; + int fieldIndex=0; + // Go through both the fields and the inherited profiles at the same time and try the single must specific pick + // from either of the lists at each step + while(fieldIndex<fieldValues.size() || inheritedIndex<inheritedProfiles.size()) { // PERF: 8% - fieldValues.size() + // Get the next most specific from field and inherited + FieldValue fieldValue=fieldValues.getIfExists(fieldIndex); // PERF: 11% - getIfExists + FieldValue inheritedProfileValue=inheritedProfiles.getIfExists(inheritedIndex); // PERF: 11% - getIfExists + + // Try the most specific first, then the other + if (inheritedProfileValue==null || (fieldValue!=null && fieldValue.compareTo(inheritedProfileValue)<=0)) { // Field is most specific, or both are equally specific + if (fieldValue.matches(dimensionBinding.getValues())) { // PERF: 42% - matches, together with the other matches + visitor.acceptValue(name, fieldValue.getValue(), dimensionBinding, owner); + } + if (visitor.isDone()) return; + fieldIndex++; + } + else if (inheritedProfileValue!=null) { // Inherited is most specific at this point + if (inheritedProfileValue.matches(dimensionBinding.getValues())) { // PERF: 42% - matches, together with the other matches + @SuppressWarnings("unchecked") + List<QueryProfile> inheritedProfileList=(List<QueryProfile>)inheritedProfileValue.getValue(); + for (QueryProfile inheritedProfile : inheritedProfileList) { + if (visitor.visitInherited()) { + inheritedProfile.accept(allowContent,visitor,dimensionBinding.createFor(inheritedProfile.getDimensions()), owner); + } + if (visitor.isDone()) return; + } + } + inheritedIndex++; + } + if (visitor.isDone()) return; + } + } + + void acceptAllValues(boolean allowContent,QueryProfileVisitor visitor, QueryProfileType type,DimensionBinding dimensionBinding) { + if (!frozen) + Collections.sort(variants); + for (QueryProfileVariant variant : variants) { + if (variant.matches(dimensionBinding.getValues())) + variant.accept(allowContent,type,visitor,dimensionBinding); + if (visitor.isDone()) return; + } + } + + /** + * Returns the most specific matching value of a name for a given set of <b>canonical</b> dimension values. + * + * @param name the name to return the best matching value of + * @param dimensionBinding the dimension bindings to use in this + */ + public Object get(String name, QueryProfileType type, boolean allowQueryProfileResult, DimensionBinding dimensionBinding) { + SingleValueQueryProfileVisitor visitor=new SingleValueQueryProfileVisitor(Collections.singletonList(name),allowQueryProfileResult); + visitor.enter(""); + accept(true,type,visitor,dimensionBinding); + visitor.leave(""); + return visitor.getResult(); + } + + /** Inherits a particular profile in a variant of this */ + public void inherit(QueryProfile profile,DimensionValues dimensionValues) { + ensureNotFrozen(); + + // Update variant + getVariant(dimensionValues,true).inherit(profile); + + // Update per-variable optimized structure + @SuppressWarnings("unchecked") + List<QueryProfile> inheritedAtDimensionValues=(List<QueryProfile>)inheritedProfiles.getExact(dimensionValues); + if (inheritedAtDimensionValues==null) { + inheritedAtDimensionValues=new ArrayList<>(); + inheritedProfiles.put(dimensionValues,inheritedAtDimensionValues); + } + inheritedAtDimensionValues.add(profile); + } + + /** + * Sets a value to this + * + * @param fieldName the name of the field to set. This cannot be a compound (dotted) name + * @param binding the dimension values for which this value applies. + * The dimensions must be canonicalized, and ownership is transferred to this. + * @param value the value to set + */ + /** + * Sets a value to this + * + * @param fieldName the name of the field to set. This cannot be a compound (dotted) name + * @param dimensionValues the dimension values for which this value applies + * @param value the value to set + */ + public void set(String fieldName,DimensionValues dimensionValues,Object value) { + ensureNotFrozen(); + + // Update variant + getVariant(dimensionValues,true).set(fieldName,value); + + // Update per-variable optimized structure + FieldValues fieldValues=fieldValuesByName.get(fieldName); + if (fieldValues==null) { + fieldValues=new FieldValues(); + fieldValuesByName.put(fieldName,fieldValues); + } + + Object combinedValue=QueryProfile.combineValues(value,fieldValues.getExact(dimensionValues)); + if (combinedValue!=null) + fieldValues.put(dimensionValues,combinedValue); + } + + /** + * Returns the dimensions over which the virtual profiles in this may return different values. + * Each dimension is a name for which a key-value may be supplied in the context properties + * on lookup time to influence the value returned. + * The dimensions may not be modified - the returned list is always read only. + */ + // Note: A performance optimization in DimensionBinding depends on the identity of the list returned from this + public List<String> getDimensions() { return dimensions; } + + /** Returns the map of field values of this indexed by field name. */ + public Map<String,FieldValues> getFieldValues() { return fieldValuesByName; } + + /** Returns the profiles inherited from various variants of this */ + public FieldValues getInherited() { return inheritedProfiles; } + + /** + * Returns all the variants of this, sorted by specificity. This is content as declared. + * The returned list is always unmodifiable. + */ + public List<QueryProfileVariant> getVariants() { + if (frozen) return variants; // Already unmodifiable + return Collections.unmodifiableList(variants); + } + + public QueryProfileVariants clone() { + try { + if (frozen) return this; + QueryProfileVariants clone=(QueryProfileVariants)super.clone(); + clone.inheritedProfiles=inheritedProfiles.clone(); + + clone.variants=new ArrayList<>(); + for (QueryProfileVariant variant : variants) + clone.variants.add(variant.clone()); + + clone.fieldValuesByName=new HashMap<>(); + for (Map.Entry<String,FieldValues> entry : fieldValuesByName.entrySet()) + clone.fieldValuesByName.put(entry.getKey(),entry.getValue().clone(entry.getKey(),clone.variants)); + + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + /** Throws an IllegalStateException if this is frozen */ + protected void ensureNotFrozen() { + if (frozen) + throw new IllegalStateException(this + " is frozen and cannot be modified"); + } + + /** + * Returns the query profile variant having exactly the given dimensions, and creates it if create is set and + * it is missing + * + * @param dimensionValues the dimension values + * @param create whether or not to create the variant if missing + * @return the profile variant, or null if not found and create is false + */ + public QueryProfileVariant getVariant(DimensionValues dimensionValues,boolean create) { + for (QueryProfileVariant profileVariant : variants) + if (profileVariant.getDimensionValues().equals(dimensionValues)) + return profileVariant; + + // Not found + if (!create) return null; + QueryProfileVariant variant=new QueryProfileVariant(dimensionValues, owner); + variants.add(variant); + return variant; + } + + public static class FieldValues implements Freezable, Cloneable { + + private List<FieldValue> resolutionList=null; + + private boolean frozen=false; + + @Override + public void freeze() { + if (frozen) return; + sort(); + if (resolutionList != null) + resolutionList = ImmutableList.copyOf(resolutionList); + frozen = true; + } + + @Override + public boolean isFrozen() { + return frozen; + } + + public void put(DimensionValues dimensionValues,Object value) { + ensureNotFrozen(); + if (resolutionList==null) resolutionList=new ArrayList<>(); + FieldValue fieldValue=getExactFieldValue(dimensionValues); + if (fieldValue!=null) // Replace + fieldValue.setValue(value); + else + resolutionList.add(new FieldValue(dimensionValues,value)); + } + + /** Returns the value having exactly the given dimensions, or null if none */ + public Object getExact(DimensionValues dimensionValues) { + FieldValue value=getExactFieldValue(dimensionValues); + if (value==null) return null; + return value.getValue(); + } + + /** Returns the field value having exactly the given dimensions, or null if none */ + private FieldValue getExactFieldValue(DimensionValues dimensionValues) { + for (FieldValue fieldValue : asList()) + if (fieldValue.getDimensionValues().equals(dimensionValues)) + return fieldValue; + return null; + } + + /** Returns the field values (values for various dimensions) for this field as a read-only list (never null) */ + public List<FieldValue> asList() { + if (resolutionList==null) return Collections.emptyList(); + return resolutionList; + } + + public FieldValue getIfExists(int index) { + if (index>=size()) return null; + return resolutionList.get(index); + } + + public void sort() { + if (frozen) return ; // sorted already + if (resolutionList!=null) + Collections.sort(resolutionList); + } + + /** Same as asList().size() */ + public int size() { + if (resolutionList==null) return 0; + return resolutionList.size(); + } + + /** Throws an IllegalStateException if this is frozen */ + protected void ensureNotFrozen() { + if (frozen) + throw new IllegalStateException(this + " is frozen and cannot be modified"); + } + + /** Clone by filling in values from the given variants */ + public FieldValues clone(String fieldName,List<QueryProfileVariant> clonedVariants) { + try { + if (frozen) return this; + FieldValues clone=(FieldValues)super.clone(); + + if (resolutionList!=null) { + clone.resolutionList=new ArrayList<>(resolutionList.size()); + for (FieldValue value : resolutionList) + clone.resolutionList.add(value.clone(fieldName,clonedVariants)); + } + + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + public @Override FieldValues clone() { + try { + if (frozen) return this; + FieldValues clone=(FieldValues)super.clone(); + + if (resolutionList!=null) { + clone.resolutionList=new ArrayList<>(resolutionList.size()); + for (FieldValue value : resolutionList) + clone.resolutionList.add(value.clone()); + } + + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + } + + public static class FieldValue implements Comparable<FieldValue>, Cloneable { + + private DimensionValues dimensionValues; + private Object value; + + public FieldValue(DimensionValues dimensionValues,Object value) { + this.dimensionValues=dimensionValues; + this.value=value; + } + + /** + * Returns the dimension values for which this value should be used. + * The dimension array is always of the exact size of the dimensions specified by the owning QueryProfileVariants, + * and the values appear in the order defined. "Wildcard" dimensions are represented by a null. + */ + public DimensionValues getDimensionValues() { return dimensionValues; } + + /** Returns the value to use for this set of dimension values */ + public Object getValue() { return value; } + + /** Sets the value to use for this set of dimension values */ + public void setValue(Object value) { this.value=value; } + + public boolean matches(DimensionValues givenDimensionValues) { + return dimensionValues.matches(givenDimensionValues); + } + + /** + * Implements the sort order of this which is based on specificity + * where dimensions to the left are more significant. + * <p> + * <b>Note:</b> This ordering is not consistent with equals - it returns 0 when the same dimensions + * are <i>set</i>, regardless of what they are set <i>to</i>. + */ + public @Override int compareTo(FieldValue other) { + return this.dimensionValues.compareTo(other.dimensionValues); + } + + /** Clone by filling in the value from the given variants */ + public FieldValue clone(String fieldName,List<QueryProfileVariant> clonedVariants) { + try { + FieldValue clone=(FieldValue)super.clone(); + if (this.value instanceof QueryProfile) + clone.value=lookupInVariants(fieldName,dimensionValues,clonedVariants); + // Otherwise the value is immutable, so keep it as-is + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + public FieldValue clone() { + try { + FieldValue clone=(FieldValue)super.clone(); + clone.value=QueryProfile.cloneIfNecessary(this.value); + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + private Object lookupInVariants(String fieldName,DimensionValues dimensionValues,List<QueryProfileVariant> variants) { + for (QueryProfileVariant variant : variants) { + if ( ! variant.getDimensionValues().equals(dimensionValues)) continue; + return variant.values().get(fieldName); + } + return null; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVisitor.java new file mode 100644 index 00000000000..8cb6bf34021 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileVisitor.java @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +/** + * Instances of this is used to visit nodes in a graph of query profiles + * + * <code> + * Visitor are called in the following sequence on each query profile: + * enter=enter(referenceName); + * onQueryProfile(this) + * if (enter) { + * getLocalKey() + * ...calls on nested content found in variants, this and inherited, in that order + * leave(referenceName) + * } + * + * The first enter call will be on the root node, which has an empt reference name. + * </code> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +abstract class QueryProfileVisitor { + + /** + * Called when a new <b>nested</b> profile in the graph is entered. + * This default implementation does nothing but returning true. + * If the node is entered (if true is returned from this), a corresponding {@link #leave(String)} call will happen + * later. + * + * @param name the name this profile is nested as, or the empty string if we are entering the root profile + * @return whether we should visit the content of this node or not + */ + public boolean enter(String name) { return true; } + + /** + * Called when the last {@link #enter(String) entered} nested profile is left. + * That is: One leave call is made for each enter call which returns true, + * but due to nesting those calls are not necessarily alternating. + * This default implementation does nothing. + */ + public void leave(String name) { } + + /** + * Called when a value (not a query profile) is encountered. + * + * @param localName the local name of this value (the full name, if needed, must be reconstructed + * by the information given by the history of {@link #enter(String)} and {@link #leave(String)} calls + * @param value the value + * @param binding the binding this holds for + * @param owner the query profile having this value, or null only when profile is the root profile + */ + public abstract void onValue(String localName, Object value, DimensionBinding binding, QueryProfile owner); + + /** + * Called when a query profile is encountered. + * + * @param profile the query profile reference encountered + * @param binding the binding this holds for + * @param owner the profile making this reference, or null only when profile is the root profile + */ + public abstract void onQueryProfile(QueryProfile profile, DimensionBinding binding, QueryProfile owner); + + /** Returns whether this visitor is done visiting what it needed to visit at this point */ + public abstract boolean isDone(); + + /** Returns whether we should, at this point, visit inherited profiles. This default implementation returns true */ + public boolean visitInherited() { return true; } + + /** + * Returns the current local key which should be visited in the last {@link #enter(String) entered} sub-profile + * (or in the top level profile if none is entered), or null to visit all content + */ + public abstract String getLocalKey(); + + /** Calls onValue or onQueryProfile on this and visits the content if it's a profile */ + final void acceptValue(String key, Object value, DimensionBinding dimensionBinding, QueryProfile owner) { + if (value==null) return; + if (value instanceof QueryProfile) { + QueryProfile queryProfileValue=(QueryProfile)value; + queryProfileValue.acceptAndEnter(key, this, dimensionBinding.createFor(queryProfileValue.getDimensions()), owner); + } + else { + onValue(key, value, dimensionBinding, owner); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/SingleValueQueryProfileVisitor.java b/container-search/src/main/java/com/yahoo/search/query/profile/SingleValueQueryProfileVisitor.java new file mode 100644 index 00000000000..6d5d1b0686a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/SingleValueQueryProfileVisitor.java @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import java.util.List; + +/** + * Visitor which stores the first non-query-profile value encountered, + * or the first query profile encountered at a stop where we do not have any name components left which can be used to + * visit further subprofiles. Hence this may be used both to get the highest prioritized primitive + * value, or query profile, whichever is encountered first which matches the name. + * <p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class SingleValueQueryProfileVisitor extends QueryProfileVisitor { + + /** The value found, or null if none */ + private Object value=null; + + private final List<String> name; + + private int nameIndex=-1; + + private final boolean allowQueryProfileResult; + + private boolean enteringContent=true; + + public SingleValueQueryProfileVisitor(List<String> name,boolean allowQueryProfileResult) { + this.name=name; + this.allowQueryProfileResult=allowQueryProfileResult; + } + + public @Override String getLocalKey() { + return name.get(nameIndex); + } + + public @Override boolean enter(String name) { + if (nameIndex+1<this.name.size()) { + nameIndex++; + enteringContent=true; + } + else { + enteringContent=false; + } + return enteringContent; + } + + public @Override void leave(String name) { + nameIndex--; + } + + public @Override void onValue(String key,Object value, DimensionBinding binding, QueryProfile owner) { + if (nameIndex==name.size()-1) + this.value=value; + } + + public @Override void onQueryProfile(QueryProfile profile,DimensionBinding binding, QueryProfile owner) { + if (enteringContent) return; // still waiting for content + if (allowQueryProfileResult) + this.value = profile; + else + this.value = profile.getValue(); + } + + public @Override boolean isDone() { + return value!=null; + } + + /** Returns the value found during visiting, or null if none */ + public Object getResult() { return value; } + + public @Override String toString() { + return "a single value visitor (hash " + hashCode() + ") with current value " + value; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/SubstituteString.java b/container-search/src/main/java/com/yahoo/search/query/profile/SubstituteString.java new file mode 100644 index 00000000000..59401592378 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/SubstituteString.java @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile; + +import com.yahoo.processing.request.Properties; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * A string which contains one or more elements of the form %{name}, + * where these occurrences are to be replaced by a query profile lookup on name. + * <p> + * This objects does the analysis on creation and provides a (reasonably) fast method of + * performing the actual substitution (at lookup time). + * <p> + * This is a value object. Lookups in this are thread safe. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class SubstituteString { + + private final List<Component> components; + private final String stringValue; + + /** + * Returns a new SubstituteString if the given string contains substitutions, null otherwise. + */ + public static SubstituteString create(String value) { + int lastEnd=0; + int start=value.indexOf("%{"); + if (start<0) return null; // Shortcut + List<Component> components=new ArrayList<>(); + while (start>=0) { + int end=value.indexOf("}",start+2); + if (end<0) + throw new IllegalArgumentException("Unterminated value substitution '" + value.substring(start) + "'"); + String propertyName=value.substring(start+2,end); + if (propertyName.indexOf("%{")>=0) + throw new IllegalArgumentException("Unterminated value substitution '" + value.substring(start) + "'"); + components.add(new StringComponent(value.substring(lastEnd,start))); + components.add(new PropertyComponent(propertyName)); + lastEnd=end+1; + start=value.indexOf("%{",lastEnd); + } + components.add(new StringComponent(value.substring(lastEnd,value.length()))); + return new SubstituteString(components, value); + } + + private SubstituteString(List<Component> components, String stringValue) { + this.components = components; + this.stringValue = stringValue; + } + + /** + * Perform the substitution in this, by looking up in the given query profile, + * and returns the resulting string + */ + public String substitute(Map<String,String> context,Properties substitution) { + StringBuilder b=new StringBuilder(); + for (Component component : components) + b.append(component.getValue(context,substitution)); + return b.toString(); + } + + @Override + public int hashCode() { + return stringValue.hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == this) return true; + if ( ! (other instanceof SubstituteString)) return false; + return this.stringValue.equals(((SubstituteString)other).stringValue); + } + + /** Returns this string in original (unsubstituted) form */ + public @Override String toString() { + return stringValue; + } + + private abstract static class Component { + + protected abstract String getValue(Map<String,String> context,Properties substitution); + + } + + private final static class StringComponent extends Component { + + private final String value; + + public StringComponent(String value) { + this.value=value; + } + + public @Override String getValue(Map<String,String> context,Properties substitution) { + return value; + } + + public @Override String toString() { + return value; + } + + } + + private final static class PropertyComponent extends Component { + + private final String propertyName; + + public PropertyComponent(String propertyName) { + this.propertyName=propertyName; + } + + public @Override String getValue(Map<String,String> context,Properties substitution) { + Object value=substitution.get(propertyName,context,substitution); + if (value==null) return ""; + return String.valueOf(value); + } + + public @Override String toString() { + return "%{" + propertyName + "}"; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/compiled/Binding.java b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/Binding.java new file mode 100644 index 00000000000..a440365ceba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/Binding.java @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.compiled; + +import com.yahoo.search.query.profile.DimensionBinding; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * An immutable binding of a set of dimensions to values. + * This binding is minimal in that it only includes dimensions which actually have values. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Binding implements Comparable<Binding> { + + private static final int maxDimensions = 31; + + /** + * A higher number means this is more general. This accounts for both the number and position of the bindings + * in the dimensional space, such that bindings in earlier dimensions are matched before bindings in + * later dimensions + */ + private final int generality; + + /** The dimensions of this. Unenforced invariant: Content never changes. */ + private final String[] dimensions; + + /** The values of those dimensions. Unenforced invariant: Content never changes. */ + private final String[] dimensionValues; + + private final int hashCode; + + @SuppressWarnings("unchecked") + public static final Binding nullBinding= new Binding(Integer.MAX_VALUE, Collections.<String,String>emptyMap()); + + public static Binding createFrom(DimensionBinding dimensionBinding) { + if (dimensionBinding.getDimensions().size() > maxDimensions) + throw new IllegalArgumentException("More than 31 dimensions is not supported"); + + int generality = 0; + Map<String, String> context = new HashMap<>(); + if (dimensionBinding.getDimensions() == null || dimensionBinding.getDimensions().isEmpty()) { // TODO: Just have this return the nullBinding + generality = Integer.MAX_VALUE; + } + else { + for (int i = 0; i <= maxDimensions; i++) { + String value = i < dimensionBinding.getDimensions().size() ? dimensionBinding.getValues().get(i) : null; + if (value == null) + generality += Math.pow(2, maxDimensions - i-1); + else + context.put(dimensionBinding.getDimensions().get(i), value); + } + } + return new Binding(generality, context); + } + + private Binding(int generality, Map<String, String> binding) { + this.generality = generality; + + // Map -> arrays to limit memory consumption and speed up evaluation + dimensions = new String[binding.size()]; + dimensionValues = new String[binding.size()]; + + int i = 0; + int bindingHash = 0; + for (Map.Entry<String,String> entry : binding.entrySet()) { + dimensions[i] = entry.getKey(); + dimensionValues[i] = entry.getValue(); + bindingHash += i * entry.getKey().hashCode() + 11 * i * entry.getValue().hashCode(); + i++; + } + this.hashCode = bindingHash; + } + + /** Returns true only if this binding is null (contains no values for its dimensions (if any) */ + public boolean isNull() { return dimensions.length == 0; } + + @Override + public String toString() { + StringBuilder b = new StringBuilder("Binding["); + for (int i = 0; i < dimensions.length; i++) + b.append(dimensions[i]).append("=").append(dimensionValues[i]).append(","); + if (dimensions.length > 0) + b.setLength(b.length()-1); + b.append("] (generality " + generality + ")"); + return b.toString(); + } + + /** Returns whether the given binding has exactly the same values as this */ + @Override + public boolean equals(Object o) { + if (o == this) return true; + if (! (o instanceof Binding)) return false; + Binding other = (Binding)o; + return Arrays.equals(this.dimensions, other.dimensions) + && Arrays.equals(this.dimensionValues, other.dimensionValues); + } + + @Override + public int hashCode() { return hashCode; } + + /** + * Returns true if all the dimension values in this have the same values + * in the given context. + */ + public boolean matches(Map<String,String> context) { + for (int i = 0; i < dimensions.length; i++) { + if ( ! dimensionValues[i].equals(context.get(dimensions[i]))) return false; + } + return true; + } + + /** + * Implements a partial ordering where more specific bindings come before less specific ones, + * taking both the number of bindings and their positions into account (earlier dimensions + * take precedence over later ones. + * <p> + * The order is not well defined for bindings in different dimensional spaces. + */ + @Override + public int compareTo(Binding other) { + return Integer.compare(this.generality, other.generality); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/compiled/CompiledQueryProfile.java b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/CompiledQueryProfile.java new file mode 100644 index 00000000000..a4056ee55a2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/CompiledQueryProfile.java @@ -0,0 +1,183 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.compiled; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.ComponentId; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.processing.request.Properties; +import com.yahoo.search.query.profile.QueryProfileProperties; +import com.yahoo.search.query.profile.SubstituteString; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A query profile in a state where it is optimized for fast lookups. + * + * @author bratseth + */ +public class CompiledQueryProfile extends AbstractComponent implements Cloneable { + + private static final Pattern namePattern=Pattern.compile("[$a-zA-Z_/][-$a-zA-Z0-9_/()]*"); + + private final CompiledQueryProfileRegistry registry; + + /** The type of this, or null if none */ + private final QueryProfileType type; + + /** The values of this */ + private final DimensionalMap<CompoundName, Object> entries; + + /** Keys which have a type in this */ + private final DimensionalMap<CompoundName, QueryProfileType> types; + + /** Keys which are (typed or untyped) references to other query profiles in this. Used as a set. */ + private final DimensionalMap<CompoundName, Object> references; + + /** Values which are not overridable in this. Used as a set. */ + private final DimensionalMap<CompoundName, Object> unoverridables; + + /** + * Creates a new query profile from an id. + */ + public CompiledQueryProfile(ComponentId id, QueryProfileType type, + DimensionalMap<CompoundName, Object> entries, + DimensionalMap<CompoundName, QueryProfileType> types, + DimensionalMap<CompoundName, Object> references, + DimensionalMap<CompoundName, Object> unoverridables, + CompiledQueryProfileRegistry registry) { + super(id); + this.registry = registry; + if (type != null) + type.freeze(); + this.type = type; + this.entries = entries; + this.types = types; + this.references = references; + this.unoverridables = unoverridables; + if ( ! id.isAnonymous()) + validateName(id.getName()); + } + + // ----------------- Public API ------------------------------------------------------------------------------- + + /** Returns the registry this belongs to, or null if none (in which case runtime profile reference assignment won't work) */ + public CompiledQueryProfileRegistry getRegistry() { return registry; } + + /** Returns the type of this or null if it has no type */ + // TODO: Move into below + public QueryProfileType getType() { return type; } + + /** + * Returns whether or not the given field name can be overridden at runtime. + * Attempts to override values which cannot be overridden will not fail but be ignored. + * Default: true. + * + * @param name the name of the field to check + * @param context the context in which to check, or null if none + */ + public final boolean isOverridable(CompoundName name, Map<String, String> context) { + return unoverridables.get(name, context) == null; + } + + /** Returns the type of a given prefix reachable from this profile, or null if none */ + public final QueryProfileType getType(CompoundName name, Map<String, String> context) { + return types.get(name, context); + } + + /** Returns the types reachable from this, or an empty map (never null) if none */ + public DimensionalMap<CompoundName, QueryProfileType> getTypes() { return types; } + + /** Returns the references reachable from this, or an empty map (never null) if none */ + public DimensionalMap<CompoundName, Object> getReferences() { return references; } + + /** + * Return all objects that start with the given prefix path using no context. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(final CompoundName prefix) { return listValues(prefix, Collections.<String,String>emptyMap()); } + public final Map<String, Object> listValues(final String prefix) { return listValues(new CompoundName(prefix)); } + /** + * Return all objects that start with the given prefix path. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(final String prefix,Map<String,String> context) { + return listValues(new CompoundName(prefix), context); + } + /** + * Return all objects that start with the given prefix path. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public final Map<String, Object> listValues(final CompoundName prefix,Map<String,String> context) { + return listValues(prefix, context, null); + } + /** + * Adds all objects that start with the given path prefix to the given value map. Use "" to list all. + * <p> + * For example, if {a.d => "a.d-value" ,a.e => "a.e-value", b.d => "b.d-value", then calling listValues("a") + * will return {"d" => "a.d-value","e" => "a.e-value"} + */ + public Map<String, Object> listValues(CompoundName prefix, Map<String,String> context, Properties substitution) { + Map<String, Object> values = new HashMap<>(); + for (Map.Entry<CompoundName, DimensionalValue<Object>> entry : entries.entrySet()) { + if ( entry.getKey().size() <= prefix.size()) continue; + if ( ! entry.getKey().hasPrefix(prefix)) continue; + + Object value = entry.getValue().get(context); + if (value == null) continue; + + value = substitute(value, context, substitution); + CompoundName suffixName = entry.getKey().rest(prefix.size()); + values.put(suffixName.toString(), value); + } + return values; + } + + public final Object get(String name) { + return get(name, Collections.<String,String>emptyMap()); + } + public final Object get(String name, Map<String,String> context) { + return get(name, context, new QueryProfileProperties(this)); + } + public final Object get(String name, Map<String,String> context, Properties substitution) { + return get(new CompoundName(name), context, substitution); + } + public final Object get(CompoundName name, Map<String, String> context, Properties substitution) { + return substitute(entries.get(name, context), context, substitution); + } + + private Object substitute(Object value, Map<String,String> context, Properties substitution) { + if (value == null) return value; + if (substitution == null) return value; + if (value.getClass() != SubstituteString.class) return value; + return ((SubstituteString)value).substitute(context, substitution); + } + + /** Throws IllegalArgumentException if the given string is not a valid query profile name */ + private static void validateName(String name) { + Matcher nameMatcher=namePattern.matcher(name); + if ( ! nameMatcher.matches()) + throw new IllegalArgumentException("Illegal name '" + name + "'"); + } + + @Override + public CompiledQueryProfile clone() { + return this; // immutable + } + + @Override + public String toString() { + return "query profile '" + getId() + "'" + (type!=null ? " of type '" + type.getId() + "'" : ""); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/compiled/CompiledQueryProfileRegistry.java b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/CompiledQueryProfileRegistry.java new file mode 100644 index 00000000000..91a81888267 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/CompiledQueryProfileRegistry.java @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.compiled; + +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; + +/** + * A set of compiled query profiles. + * + * @author bratseth + */ +public class CompiledQueryProfileRegistry extends ComponentRegistry<CompiledQueryProfile> { + + private final QueryProfileTypeRegistry typeRegistry; + + /** Creates a compiled query profile registry with no types */ + public CompiledQueryProfileRegistry() { + this(QueryProfileTypeRegistry.emptyFrozen()); + } + + public CompiledQueryProfileRegistry(QueryProfileTypeRegistry typeRegistry) { + this.typeRegistry = typeRegistry; + } + + /** Registers a type by its id */ + public void register(CompiledQueryProfile profile) { + super.register(profile.getId(), profile); + } + + public QueryProfileTypeRegistry getTypeRegistry() { return typeRegistry; } + + /** + * <p>Returns a query profile for the given request string, or null if a suitable one is not found.</p> + * + * The request string must be a valid {@link com.yahoo.component.ComponentId} or null.<br> + * If the string is null, the profile named "default" is returned, or null if that does not exists. + * + * <p> + * The version part (if any) is matched used the usual component version patching rules. + * If the name part matches a query profile name perfectly, that profile is returned. + * If not, and the name is a slash-separated path, the profile with the longest matching left sub-path + * which has a type which allows path matching is used. If there is no such profile, null is returned. + */ + public CompiledQueryProfile findQueryProfile(String idString) { + if (idString==null || idString.isEmpty()) return getComponent("default"); + ComponentSpecification id=new ComponentSpecification(idString); + CompiledQueryProfile profile=getComponent(id); + if (profile!=null) return profile; + + return findPathParentQueryProfile(new ComponentSpecification(idString)); + } + + private CompiledQueryProfile findPathParentQueryProfile(ComponentSpecification id) { + // Try the name with "/" appended - should have the same semantics with path matching + CompiledQueryProfile slashedProfile=getComponent(new ComponentSpecification(id.getName() + "/",id.getVersionSpecification())); + if (slashedProfile!=null && slashedProfile.getType()!=null && slashedProfile.getType().getMatchAsPath()) + return slashedProfile; + + // Extract the parent (if any) + int slashIndex=id.getName().lastIndexOf("/"); + if (slashIndex<1) return null; + String parentName=id.getName().substring(0,slashIndex); + if (parentName.equals("")) return null; + + ComponentSpecification parentId=new ComponentSpecification(parentName,id.getVersionSpecification()); + + CompiledQueryProfile pathParentProfile=getComponent(parentId); + + if (pathParentProfile!=null && pathParentProfile.getType()!=null && pathParentProfile.getType().getMatchAsPath()) + return pathParentProfile; + return findPathParentQueryProfile(parentId); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/compiled/DimensionalMap.java b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/DimensionalMap.java new file mode 100644 index 00000000000..b82939fa4ac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/DimensionalMap.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.compiled; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.search.query.profile.DimensionBinding; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * A map which may return different values depending on the values given in a context + * supplied with the key on all operations. + * <p> + * Dimensional maps are immutable and created through a DimensionalMap.Builder + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DimensionalMap<KEY, VALUE> { + + private final Map<KEY, DimensionalValue<VALUE>> values; + + private DimensionalMap(Map<KEY, DimensionalValue<VALUE>> values) { + this.values = ImmutableMap.copyOf(values); + } + + /** Returns the value for this key matching a context, or null if none */ + public VALUE get(KEY key, Map<String, String> context) { + DimensionalValue<VALUE> variants = values.get(key); + if (variants == null) return null; + return variants.get(context); + } + + /** Returns the set of dimensional entries across all contexts. */ + public Set<Map.Entry<KEY, DimensionalValue<VALUE>>> entrySet() { + return values.entrySet(); + } + + /** Returns true if this is empty for all contexts. */ + public boolean isEmpty() { + return values.isEmpty(); + } + + public static class Builder<KEY, VALUE> { + + private Map<KEY, DimensionalValue.Builder<VALUE>> entries = new HashMap<>(); + + // TODO: DimensionBinding -> Binding? + public void put(KEY key, DimensionBinding binding, VALUE value) { + DimensionalValue.Builder<VALUE> entry = entries.get(key); + if (entry == null) { + entry = new DimensionalValue.Builder<>(); + entries.put(key, entry); + } + entry.add(value, binding); + } + + public DimensionalMap<KEY, VALUE> build() { + Map<KEY, DimensionalValue<VALUE>> map = new HashMap<>(); + for (Map.Entry<KEY, DimensionalValue.Builder<VALUE>> entry : entries.entrySet()) { + map.put(entry.getKey(), entry.getValue().build()); + } + return new DimensionalMap<>(map); + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/compiled/DimensionalValue.java b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/DimensionalValue.java new file mode 100644 index 00000000000..0112928ada6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/compiled/DimensionalValue.java @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.compiled; + +import com.yahoo.search.query.profile.DimensionBinding; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Contains the values a given key in a DimensionalMap may take for different dimensional contexts. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DimensionalValue<VALUE> { + + private final List<Value<VALUE>> values; + + /** Create a set of variants which is a single value regardless of dimensions */ + public DimensionalValue(Value<VALUE> value) { + this.values = Collections.singletonList(value); + } + + public DimensionalValue(List<Value<VALUE>> valueVariants) { + if (valueVariants.size() == 1) { // special cased for efficiency + this.values = Collections.singletonList(valueVariants.get(0)); + } + else { + this.values = new ArrayList<>(valueVariants); + Collections.sort(this.values); + } + } + + /** Returns the value matching this context, or null if none */ + public VALUE get(Map<String, String> context) { + if (context == null) + context = Collections.emptyMap(); + for (Value<VALUE> value : values) { + if (value.matches(context)) + return value.value(); + } + return null; + } + + public boolean isEmpty() { return values.isEmpty(); } + + @Override + public String toString() { + return values.toString(); + } + + public static class Builder<VALUE> { + + /** The minimal set of variants needed to capture all values at this key */ + private Map<VALUE, Value.Builder<VALUE>> buildableVariants = new HashMap<>(); + + public void add(VALUE value, DimensionBinding variantBinding) { + // Note: We know we can index by the value because its possible types are constrained + // to what query profiles allow: String, primitives and query profiles + Value.Builder variant = buildableVariants.get(value); + if (variant == null) { + variant = new Value.Builder<>(value); + buildableVariants.put(value, variant); + } + variant.addVariant(variantBinding); + } + + public DimensionalValue<VALUE> build() { + List<Value> variants = new ArrayList<>(); + for (Value.Builder buildableVariant : buildableVariants.values()) { + variants.addAll(buildableVariant.build()); + } + return new DimensionalValue(variants); + } + + } + + /** A value for a particular binding */ + private static class Value<VALUE> implements Comparable<Value> { + + private VALUE value = null; + + /** The minimal binding this holds for */ + private Binding binding = null; + + public Value(VALUE value, Binding binding) { + this.value = value; + this.binding = binding; + } + + /** Returns the value at this entry or null if none */ + public VALUE value() { return value; } + + /** Returns the binding that must match for this to be a valid entry, or Binding.nullBinding if none */ + public Binding binding() { + if (binding == null) return Binding.nullBinding; + return binding; + } + + public boolean matches(Map<String, String> context) { + return binding.matches(context); + } + + @Override + public int compareTo(Value other) { + return this.binding.compareTo(other.binding); + } + + @Override + public String toString() { + return " value '" + value + "' for " + binding; + } + + /** + * A single value with the minimal set of dimension combinations it holds for. + */ + private static class Builder<VALUE> { + + private final VALUE value; + + /** + * The set of bindings this value is for. + * Some of these are more general versions of others. + * We need to keep both to allow interleaving a different value with medium generality. + */ + private Set<DimensionBinding> variants = new HashSet<>(); + + public Builder(VALUE value) { + this.value = value; + } + + /** Add a binding this holds for */ + public void addVariant(DimensionBinding binding) { + variants.add(binding); + } + + /** Build a separate value object for each dimension combination which has this value */ + public List<Value<VALUE>> build() { + // Shortcut for efficiency of the normal case + if (variants.size()==1) + return Collections.singletonList(new Value<>(value, Binding.createFrom(variants.iterator().next()))); + + List<Value<VALUE>> values = new ArrayList<>(variants.size()); + for (DimensionBinding variant : variants) + values.add(new Value<>(value, Binding.createFrom(variant))); + return values; + } + + public Object value() { + return value; + } + + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/config/QueryProfileConfigurer.java b/container-search/src/main/java/com/yahoo/search/query/profile/config/QueryProfileConfigurer.java new file mode 100644 index 00000000000..5770665e3a1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/config/QueryProfileConfigurer.java @@ -0,0 +1,227 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.config; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.search.query.profile.DimensionValues; +import com.yahoo.search.query.profile.QueryProfile; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; +import com.yahoo.text.BooleanParser; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author bratseth + */ +public class QueryProfileConfigurer implements ConfigSubscriber.SingleSubscriber<QueryProfilesConfig> { + + private final ConfigSubscriber subscriber = new ConfigSubscriber(); + + private volatile QueryProfileRegistry currentRegistry; + + public QueryProfileConfigurer(String configId) { + subscriber.subscribe(this, QueryProfilesConfig.class, configId); + } + + /** Returns the registry created by the last occurring call to configure */ + public QueryProfileRegistry getCurrentRegistry() { return currentRegistry; } + + private void setCurrentRegistry(QueryProfileRegistry registry) { + this.currentRegistry=registry; + } + + public void configure(QueryProfilesConfig config) { + QueryProfileRegistry registry = createFromConfig(config); + setCurrentRegistry(registry); + } + + public static QueryProfileRegistry createFromConfig(QueryProfilesConfig config) { + QueryProfileRegistry registry=new QueryProfileRegistry(); + + // Pass 1: Create all profiles and profile types + for (QueryProfilesConfig.Queryprofiletype profileTypeConfig : config.queryprofiletype()) { + createProfileType(profileTypeConfig,registry.getTypeRegistry()); + } + for (QueryProfilesConfig.Queryprofile profileConfig : config.queryprofile()) { + createProfile(profileConfig,registry); + } + + // Pass 2: Resolve references and add content + for (QueryProfilesConfig.Queryprofiletype profileTypeConfig : config.queryprofiletype()) { + fillProfileType(profileTypeConfig,registry.getTypeRegistry()); + } + + // To ensure topological sorting, using DPS. This will _NOT_ detect cycles (but it will not fail if they + // exist either) + Set<ComponentId> filled = new HashSet<>(); + for (QueryProfilesConfig.Queryprofile profileConfig : config.queryprofile()) { + fillProfile(profileConfig, config, registry, filled); + } + + registry.freeze(); + return registry; + } + + /** Stop subscribing from this configurer */ + public void shutdown() { + subscriber.close(); + } + + private static void createProfile(QueryProfilesConfig.Queryprofile config,QueryProfileRegistry registry) { + QueryProfile profile=new QueryProfile(config.id()); + try { + String typeId=config.type(); + if (typeId!=null && !typeId.isEmpty()) + profile.setType(registry.getType(typeId)); + + if (config.dimensions().size()>0) { + String[] dimensions=new String[config.dimensions().size()]; + for (int i=0; i<config.dimensions().size(); i++) + dimensions[i]=config.dimensions().get(i); + profile.setDimensions(dimensions); + } + + registry.register(profile); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid " + profile,e); + } + } + + private static void createProfileType(QueryProfilesConfig.Queryprofiletype config, QueryProfileTypeRegistry registry) { + QueryProfileType type=new QueryProfileType(config.id()); + type.setStrict(config.strict()); + type.setMatchAsPath(config.matchaspath()); + registry.register(type); + } + + private static void fillProfile(QueryProfilesConfig.Queryprofile config, + QueryProfilesConfig queryProfilesConfig, + QueryProfileRegistry registry, + Set<ComponentId> filled) { + QueryProfile profile=registry.getComponent(new ComponentSpecification(config.id()).toId()); + if (filled.contains(profile.getId())) return; + filled.add(profile.getId()); + try { + for (String inheritedId : config.inherit()) { + QueryProfile inherited=registry.getComponent(inheritedId); + if (inherited==null) + throw new IllegalArgumentException("Inherited query profile '" + inheritedId + "' in " + profile + " was not found"); + fillProfile(inherited, queryProfilesConfig, registry, filled); + profile.addInherited(inherited); + } + + for (QueryProfilesConfig.Queryprofile.Reference referenceConfig : config.reference()) { + QueryProfile referenced=registry.getComponent(referenceConfig.value()); + if (referenced==null) + throw new IllegalArgumentException("Query profile '" + referenceConfig.value() + "' referenced as '" + + referenceConfig.name() + "' in " + profile + " was not found"); + profile.set(referenceConfig.name(),referenced, registry); + if (referenceConfig.overridable()!=null && !referenceConfig.overridable().isEmpty()) + profile.setOverridable(referenceConfig.name(),BooleanParser.parseBoolean(referenceConfig.overridable()),null); + } + + for (QueryProfilesConfig.Queryprofile.Property propertyConfig : config.property()) { + profile.set(propertyConfig.name(),propertyConfig.value(), registry); + if (propertyConfig.overridable()!=null && !propertyConfig.overridable().isEmpty()) + profile.setOverridable(propertyConfig.name(),BooleanParser.parseBoolean(propertyConfig.overridable()),null); + } + + for (QueryProfilesConfig.Queryprofile.Queryprofilevariant variantConfig : config.queryprofilevariant()) { + String[] forDimensionValueArray=new String[variantConfig.fordimensionvalues().size()]; + for (int i=0; i<variantConfig.fordimensionvalues().size(); i++) { + forDimensionValueArray[i]=variantConfig.fordimensionvalues().get(i).trim(); + if ("*".equals(forDimensionValueArray[i])) + forDimensionValueArray[i]=null; + } + DimensionValues forDimensionValues=DimensionValues.createFrom(forDimensionValueArray); + + for (String inheritedId : variantConfig.inherit()) { + QueryProfile inherited=registry.getComponent(inheritedId); + if (inherited==null) + throw new IllegalArgumentException("Inherited query profile '" + inheritedId + "' in " + profile + + " for '" + forDimensionValues + "' was not found"); + fillProfile(inherited, queryProfilesConfig, registry, filled); + profile.addInherited(inherited, forDimensionValues); + } + + for (QueryProfilesConfig.Queryprofile.Queryprofilevariant.Reference referenceConfig : variantConfig.reference()) { + QueryProfile referenced=registry.getComponent(referenceConfig.value()); + if (referenced==null) + throw new IllegalArgumentException("Query profile '" + referenceConfig.value() + "' referenced as '" + + referenceConfig.name() + "' in " + profile + " for '" + forDimensionValues + "' was not found"); + profile.set(referenceConfig.name(), referenced, forDimensionValues, registry); + } + + for (QueryProfilesConfig.Queryprofile.Queryprofilevariant.Property propertyConfig : variantConfig.property()) { + profile.set(propertyConfig.name(), propertyConfig.value(), forDimensionValues, registry); + } + + } + + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid " + profile,e); + } + } + + /** Fill a given profile by locating its config */ + private static void fillProfile(QueryProfile inherited, + QueryProfilesConfig queryProfilesConfig, + QueryProfileRegistry registry, + Set<ComponentId> visited) { + for (QueryProfilesConfig.Queryprofile inheritedConfig : queryProfilesConfig.queryprofile()) { + if (inherited.getId().stringValue().equals(inheritedConfig.id())) { + fillProfile(inheritedConfig, queryProfilesConfig, registry, visited); + } + } + } + + private static void fillProfileType(QueryProfilesConfig.Queryprofiletype config,QueryProfileTypeRegistry registry) { + QueryProfileType type=registry.getComponent(new ComponentSpecification(config.id()).toId()); + try { + + for (String inheritedId : config.inherit()) { + QueryProfileType inherited=registry.getComponent(inheritedId); + if (inherited==null) + throw new IllegalArgumentException("Inherited query profile type '" + inheritedId + "' in " + type + " was not found"); + else + type.inherited().add(inherited); + + } + + for (QueryProfilesConfig.Queryprofiletype.Field fieldConfig : config.field()) + instantiateFieldDescription(fieldConfig,type,registry); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid " + type,e); + } + } + + private static void instantiateFieldDescription(QueryProfilesConfig.Queryprofiletype.Field fieldConfig, + QueryProfileType type, + QueryProfileTypeRegistry registry) { + try { + FieldType fieldType=FieldType.fromString(fieldConfig.type(),registry); + FieldDescription field=new FieldDescription( + fieldConfig.name(), + fieldType, + fieldConfig.alias(), + fieldConfig.mandatory(), + fieldConfig.overridable() + ); + type.addField(field, registry); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid field '" + fieldConfig.name() + "' in " + type,e); + } + } + + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/config/QueryProfileXMLReader.java b/container-search/src/main/java/com/yahoo/search/query/profile/config/QueryProfileXMLReader.java new file mode 100644 index 00000000000..97e3fb90dc9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/config/QueryProfileXMLReader.java @@ -0,0 +1,366 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.config; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.search.query.profile.DimensionValues; +import com.yahoo.search.query.profile.QueryProfile; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; +import com.yahoo.text.XML; +import org.w3c.dom.Element; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.logging.Logger; + +/** + * A class which imports query profiles and types from XML files + * + * @author bratseth + */ +public class QueryProfileXMLReader { + + private static Logger logger=Logger.getLogger(QueryProfileXMLReader.class.getName()); + + /** + * Reads all query profile xml files in a given directory, + * and all type xml files from the immediate subdirectory "types/" (if any) + * + * @throws RuntimeException if <code>directory</code> is not a readable directory, or if there is some error in the XML + */ + public QueryProfileRegistry read(String directory) { + List<NamedReader> queryProfileReaders=new ArrayList<>(); + List<NamedReader> queryProfileTypeReaders=new ArrayList<>(); + try { + File dir=new File(directory); + if ( !dir.isDirectory() ) throw new IllegalArgumentException("Could not read query profiles: '" + + directory + "' is not a valid directory."); + + for (File file : sortFiles(dir)) { + if ( ! file.getName().endsWith(".xml")) continue; + queryProfileReaders.add(new NamedReader(file.getName(),new FileReader(file))); + } + File typeDir=new File(dir,"types"); + if (typeDir.isDirectory()) { + for (File file : sortFiles(typeDir)) { + if ( ! file.getName().endsWith(".xml")) continue; + queryProfileTypeReaders.add(new NamedReader(file.getName(),new FileReader(file))); + } + } + + return read(queryProfileTypeReaders,queryProfileReaders); + } + catch (IOException e) { + throw new IllegalArgumentException("Could not read query profiles from '" + directory + "'",e); + } + finally { + closeAll(queryProfileReaders); + closeAll(queryProfileTypeReaders); + } + } + + private List<File> sortFiles(File dir) { + ArrayList<File> files = new ArrayList<>(); + files.addAll(Arrays.asList(dir.listFiles())); + Collections.sort(files); + return files; + } + + private void closeAll(List<NamedReader> readers) { + for (NamedReader reader : readers) { + try { reader.close(); } catch (IOException e) { } + } + } + + /** + * Read the XML file readers into a registry. This does not close the readers. + * This method is used directly from the admin system. + */ + public QueryProfileRegistry read(List<NamedReader> queryProfileTypeReaders,List<NamedReader> queryProfileReaders) { + QueryProfileRegistry registry=new QueryProfileRegistry(); + + // Phase 1 + List<Element> queryProfileTypeElements=createQueryProfileTypes(queryProfileTypeReaders,registry.getTypeRegistry()); + List<Element> queryProfileElements=createQueryProfiles(queryProfileReaders,registry); + + // Phase 2 + fillQueryProfileTypes(queryProfileTypeElements,registry.getTypeRegistry()); + fillQueryProfiles(queryProfileElements,registry); + return registry; + } + + public List<Element> createQueryProfileTypes(List<NamedReader> queryProfileTypeReaders, QueryProfileTypeRegistry registry) { + List<Element> queryProfileTypeElements=new ArrayList<>(queryProfileTypeReaders.size()); + for (NamedReader reader : queryProfileTypeReaders) { + Element root=XML.getDocument(reader).getDocumentElement(); + if ( ! root.getNodeName().equals("query-profile-type")) { + logger.info("Ignoring '" + reader.getName() + + "': Expected XML root element 'query-profile-type' but was '" + root.getNodeName() + "'"); + continue; + } + + String idString=root.getAttribute("id"); + if (idString==null || idString.equals("")) + throw new IllegalArgumentException("'" + reader.getName() + "' has no 'id' attribute in the root element"); + ComponentId id=new ComponentId(idString); + validateFileNameToId(reader.getName(),id,"query profile type"); + QueryProfileType type=new QueryProfileType(id); + type.setMatchAsPath(XML.getChild(root,"match") != null); + type.setStrict(XML.getChild(root,"strict") != null); + registry.register(type); + queryProfileTypeElements.add(root); + } + return queryProfileTypeElements; + } + + public List<Element> createQueryProfiles(List<NamedReader> queryProfileReaders, QueryProfileRegistry registry) { + List<Element> queryProfileElements=new ArrayList<>(queryProfileReaders.size()); + for (NamedReader reader : queryProfileReaders) { + Element root=XML.getDocument(reader).getDocumentElement(); + if ( ! root.getNodeName().equals("query-profile")) { + logger.info("Ignoring '" + reader.getName() + + "': Expected XML root element 'query-profile' but was '" + root.getNodeName() + "'"); + continue; + } + + String idString=root.getAttribute("id"); + if (idString==null || idString.equals("")) + throw new IllegalArgumentException("Query profile '" + reader.getName() + "' has no 'id' attribute in the root element"); + ComponentId id=new ComponentId(idString); + validateFileNameToId(reader.getName(),id,"query profile"); + + QueryProfile queryProfile=new QueryProfile(id); + String typeId=root.getAttribute("type"); + if (typeId!=null && ! typeId.equals("")) { + QueryProfileType type=registry.getType(typeId); + if (type==null) + throw new IllegalArgumentException("Query profile '" + reader.getName() + "': Type id '" + typeId + "' can not be resolved"); + queryProfile.setType(type); + } + + Element dimensions=XML.getChild(root,"dimensions"); + if (dimensions!=null) + queryProfile.setDimensions(toArray(XML.getValue(dimensions))); + + registry.register(queryProfile); + queryProfileElements.add(root); + } + return queryProfileElements; + } + + /** Throws an exception if the name is not corresponding to the id */ + private void validateFileNameToId(final String actualName,ComponentId id,String artifactName) { + String expectedCanonicalFileName=id.toFileName(); + String expectedAlternativeFileName=id.stringValue().replace(":","-").replace("/","_"); // legacy + String fileName=new File(actualName).getName(); + fileName=stripXmlEnding(fileName); + String canonicalFileName=ComponentId.fromFileName(fileName).toFileName(); + if ( ! canonicalFileName.equals(expectedCanonicalFileName) && ! canonicalFileName.equals(expectedAlternativeFileName)) + throw new IllegalArgumentException("The file name of " + artifactName + " '" + id + + "' must be '" + expectedCanonicalFileName + ".xml' but was '" + actualName + "'"); + } + + private String stripXmlEnding(String fileName) { + if (!fileName.endsWith(".xml")) + throw new IllegalArgumentException("'" + fileName + "' should have a .xml ending"); + else + return fileName.substring(0,fileName.length()-4); + } + + private String[] toArray(String csv) { + String[] array=csv.split(","); + for (int i=0; i<array.length; i++) + array[i]=array[i].trim(); + return array; + } + + public void fillQueryProfileTypes(List<Element> queryProfileTypeElements, QueryProfileTypeRegistry registry) { + for (Element element : queryProfileTypeElements) { + QueryProfileType type=registry.getComponent(new ComponentSpecification(element.getAttribute("id")).toId()); + try { + readInheritedTypes(element,type,registry); + readFieldDefinitions(element,type,registry); + } + catch (RuntimeException e) { + throw new IllegalArgumentException("Error reading " + type,e); + } + } + } + + private void readInheritedTypes(Element element,QueryProfileType type,QueryProfileTypeRegistry registry) { + String inheritedString=element.getAttribute("inherits"); + if (inheritedString==null || inheritedString.equals("")) return; + for (String inheritedId : inheritedString.split(" ")) { + inheritedId=inheritedId.trim(); + if (inheritedId.equals("")) continue; + QueryProfileType inheritedType=registry.getComponent(inheritedId); + if (inheritedType==null) throw new IllegalArgumentException("Could not resolve inherited query profile type '" + inheritedId); + type.inherited().add(inheritedType); + } + } + + private void readFieldDefinitions(Element element,QueryProfileType type,QueryProfileTypeRegistry registry) { + for (Element field : XML.getChildren(element,"field")) { + String name=field.getAttribute("name"); + if (name==null || name.equals("")) throw new IllegalArgumentException("A field has no 'name' attribute"); + try { + String fieldTypeName=field.getAttribute("type"); + if (fieldTypeName==null) throw new IllegalArgumentException("Field '" + field + "' has no 'type' attribute"); + FieldType fieldType=FieldType.fromString(fieldTypeName,registry); + type.addField(new FieldDescription(name,fieldType,field.getAttribute("alias"), + getBooleanAttribute("mandatory",false,field),getBooleanAttribute("overridable",true,field)), registry); + } + catch(RuntimeException e) { + throw new IllegalArgumentException("Invalid field '" + name + "'",e); + } + } + } + + public void fillQueryProfiles(List<Element> queryProfileElements, QueryProfileRegistry registry) { + for (Element element : queryProfileElements) { + // Lookup by exact id + QueryProfile profile=registry.getComponent(new ComponentSpecification(element.getAttribute("id")).toId()); + try { + readInherited(element,profile,registry,null,profile.toString()); + readFields(element,profile,registry,null,profile.toString()); + readVariants(element,profile,registry); + } + catch (RuntimeException e) { + throw new IllegalArgumentException("Error reading " + profile,e); + } + } + } + + private void readInherited(Element element,QueryProfile profile,QueryProfileRegistry registry,DimensionValues dimensionValues,String sourceDescription) { + String inheritedString=element.getAttribute("inherits"); + if (inheritedString==null || inheritedString.equals("")) return; + for (String inheritedId : inheritedString.split(" ")) { + inheritedId=inheritedId.trim(); + if (inheritedId.equals("")) continue; + QueryProfile inheritedProfile=registry.getComponent(inheritedId); + if (inheritedProfile==null) throw new IllegalArgumentException("Could not resolve inherited query profile '" + inheritedId + "' in " + sourceDescription); + profile.addInherited(inheritedProfile,dimensionValues); + } + } + + private void readFields(Element element,QueryProfile profile,QueryProfileRegistry registry,DimensionValues dimensionValues,String sourceDescription) { + List<KeyValue> references=new ArrayList<>(); + List<KeyValue> properties=new ArrayList<>(); + for (Element field : XML.getChildren(element,"field")) { + String name=field.getAttribute("name"); + if (name==null || name.equals("")) throw new IllegalArgumentException("A field in " + sourceDescription + " has no 'name' attribute"); + try { + Boolean overridable=getBooleanAttribute("overridable",null,field); + if (overridable!=null) + profile.setOverridable(name,overridable,null); + + Object fieldValue=readFieldValue(field,name,sourceDescription,registry); + if (fieldValue instanceof QueryProfile) + references.add(new KeyValue(name,fieldValue)); + else + properties.add(new KeyValue(name,fieldValue)); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid field '" + name + "' in " + sourceDescription,e); + } + } + // Must set references before properties + for (KeyValue keyValue : references) + profile.set(keyValue.getKey() ,keyValue.getValue(), dimensionValues, registry); + for (KeyValue keyValue : properties) + profile.set(keyValue.getKey(), keyValue.getValue(), dimensionValues, registry); + + } + + private Object readFieldValue(Element field,String name,String targetDescription,QueryProfileRegistry registry) { + Element ref=XML.getChild(field,"ref"); + if (ref!=null) { + String referencedName=XML.getValue(ref); + QueryProfile referenced=registry.getComponent(referencedName); + if (referenced==null) + throw new IllegalArgumentException("Could not find query profile '" + referencedName + "' referenced as '" + + name + "' in " + targetDescription); + return referenced; + } + else { + return XML.getValue(field); + } + } + + private void readVariants(Element element,QueryProfile profile,QueryProfileRegistry registry) { + for (Element queryProfileVariantElement : XML.getChildren(element,"query-profile")) { // A "virtual" query profile contained inside another + List<String> dimensions=profile.getDimensions(); + if (dimensions==null) + throw new IllegalArgumentException("Cannot create a query profile variant in " + profile + + ", as it has not declared any variable dimensions"); + String dimensionString=queryProfileVariantElement.getAttribute("for"); + String[] dimensionValueArray=makeStarsNull(toArray(dimensionString)); + if (dimensions.size()<dimensionValueArray.length) + throw new IllegalArgumentException("Cannot create a query profile variant for '" + dimensionString + + "' as only " + dimensions.size() + " dimensions has been defined"); + DimensionValues dimensionValues=DimensionValues.createFrom(dimensionValueArray); + + String description="variant '" + dimensionString + "' in " + profile.toString(); + readInherited(queryProfileVariantElement,profile,registry,dimensionValues,description); + readFields(queryProfileVariantElement,profile,registry,dimensionValues,description); + } + } + + private String[] makeStarsNull(String[] strings) { + for (int i=0; i<strings.length; i++) + if (strings[i].equals("*")) + strings[i]=null; + return strings; + } + + /** + * Returns true if the string is "true".<br> + * Returns false if the string is "false".<br> + * Returns <code>default</code> if the string is null or empty (this parameter may be null)<br> + * @throws IllegalArgumentException if the string has any other value + */ + private Boolean asBoolean(String s,Boolean defaultValue) { + if (s==null) return defaultValue; + if (s.isEmpty()) return defaultValue; + if ("true".equals(s)) return true; + if ("false".equals(s)) return false; + throw new IllegalArgumentException("Expected 'true' or 'false' but was'" + s + "'"); + } + + /** Returns the given attribute as a boolean, using the semantics of {@link #asBoolean} */ + private Boolean getBooleanAttribute(String attributeName,Boolean defaultValue,Element from) { + try { + return asBoolean(from.getAttribute(attributeName),defaultValue); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Attribute '" + attributeName,e); + } + } + + private static class KeyValue { + + private String key; + private Object value; + + public KeyValue(String key,Object value) { + this.key=key; + this.value=value; + } + + public String getKey() { return key; } + + public Object getValue() { return value; } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/config/package-info.java b/container-search/src/main/java/com/yahoo/search/query/profile/config/package-info.java new file mode 100644 index 00000000000..8ea4e887661 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/config/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.search.query.profile.config; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/package-info.java b/container-search/src/main/java/com/yahoo/search/query/profile/package-info.java new file mode 100644 index 00000000000..df3f4ac45ab --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/package-info.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Query Profiles provide nested sets of named (and optionally typed) key-values which can be referenced in a Query + * to proviode initial values of Query properties. Values in nested query profiles can be looked up from + * the query properties by dotting the names. Query profiles supports inheritance to allow variations + * for, e.g different buckets, client types, markets etc. */ +@ExportPackage +@PublicApi +package com.yahoo.search.query.profile; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldDescription.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldDescription.java new file mode 100644 index 00000000000..c522ec04023 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldDescription.java @@ -0,0 +1,148 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.google.common.collect.ImmutableList; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.QueryProfile; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * A field description of a query profile type. Immutable. + * Field descriptions can be sorted by name. + * + * @author bratseth + */ +public class FieldDescription implements Comparable<FieldDescription> { + + private final CompoundName name; + private final FieldType type; + private final List<String> aliases; + + /** If true, this value must be provided either in the query profile or in the search request */ + private final boolean mandatory; + + /** If true, assignments to this value from outside will be ignored */ + private final boolean overridable; + + public FieldDescription(String name, FieldType type) { + this(name,type,false); + } + + public FieldDescription(String name, String type) { + this(name,FieldType.fromString(type,null)); + } + + public FieldDescription(String name, FieldType type, boolean mandatory) { + this(name, type, mandatory, true); + } + + public FieldDescription(String name, String type, String aliases) { + this(name,type,aliases,false,true); + } + + public FieldDescription(String name, FieldType type, String aliases) { + this(name, type, aliases, false, true); + } + + /** + * Creates a field description + * + * @param name the name of the field + * @param typeString the type of the field represented as a string - see {@link com.yahoo.search.query.profile.types.FieldType} + * @param aliases a space-separated list of alias names of this field name. Aliases are not following dotted + * (meaning they are global, not that they cannot contain dots) and are case insensitive. Null is permissible + * if there are no aliases + * @param mandatory whether it is mandatory to provide a value for this field. default: false + * @param overridable whether this can be overridden when first set in a profile. Default: true + */ + public FieldDescription(String name, String typeString, String aliases, boolean mandatory, boolean overridable) { + this(name,FieldType.fromString(typeString,null),aliases,mandatory,overridable); + } + + public FieldDescription(String name, FieldType type, boolean mandatory, boolean overridable) { + this(name, type, null, mandatory, overridable); + } + + public FieldDescription(String name, FieldType type, String aliases, boolean mandatory, boolean overridable) { + this(new CompoundName(name), type, aliases, mandatory, overridable); + } + + /** + * Creates a field description from a list where the aliases are represented as a comma-separated string + */ + public FieldDescription(CompoundName name, FieldType type, String aliases, boolean mandatory, boolean overridable) { + this(name, type, toList(aliases), mandatory, overridable); + } + + /** + * Creates a field description + * + * @param name the name of the field + * @param type the type of the field represented as a string - see {@link com.yahoo.search.query.profile.types.FieldType} + * @param aliases a list of aliases, never null. Aliases are not following dotted + * (meaning they are global, not that they cannot contain dots) and are case insensitive. + * @param mandatory whether it is mandatory to provide a value for this field. default: false + * @param overridable whether this can be overridden when first set in a profile. Default: true + */ + public FieldDescription(CompoundName name, FieldType type, List<String> aliases, boolean mandatory, boolean overridable) { + if (name.isEmpty()) + throw new IllegalArgumentException("Illegal name ''"); + for (String nameComponent : name.asList()) + QueryProfile.validateName(nameComponent); + this.name = name; + this.type = type; + + // Forbidden until we can figure out the right semantics + if (name.isCompound() && ! aliases.isEmpty()) throw new IllegalArgumentException("Aliases is not allowed with compound names"); + + this.aliases = ImmutableList.copyOf(aliases); + this.mandatory = mandatory; + this.overridable = overridable; + } + + private static List<String> toList(String string) { + if (string == null || string.isEmpty()) return ImmutableList.of(); + return ImmutableList.copyOf(Arrays.asList(string.split(" "))); + } + + /** Returns the full name of this as a string */ + public String getName() { return name.toString(); } + + /** Returns the full name of this as a compound name */ + public CompoundName getCompoundName() { return name; } + + public FieldType getType() { return type; } + + /** Returns a unmodifiable list of the aliases of this. An empty list (never null) if there are none. */ + public List<String> getAliases() { return aliases; } + + /** Returns whether this field must be provided in the query profile or the search definition. Default: false */ + public boolean isMandatory() { return mandatory; } + + /** Returns false if overrides to values for this field from the outside should be ignored. Default: true */ + public boolean isOverridable() { return overridable; } + + public int compareTo(FieldDescription other) { + return name.toString().compareTo(other.name.toString()); + } + + /** Returns a copy of this with the name set to the argument name */ + public FieldDescription withName(CompoundName name) { + return new FieldDescription(name, type, aliases, mandatory, overridable); + } + + /** Returns a copy of this with the type set to the argument type */ + public FieldDescription withType(FieldType type) { + return new FieldDescription(name, type, aliases, mandatory, overridable); + } + + @Override + public String toString() { + return "field '" + name + "' type " + type.stringValue() + "" + + (mandatory?" (mandatory)":"") + (!overridable?" (not overridable)":""); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java new file mode 100644 index 00000000000..abe3c4425ae --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.yahoo.search.query.profile.QueryProfile; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.yql.YqlQuery; +import com.yahoo.tensor.Tensor; + +import java.util.Optional; + +/** + * Superclass of query type field types. + * Field types are immutable. + * + * @author bratseth + */ +@SuppressWarnings("rawtypes") +public abstract class FieldType { + + public static final PrimitiveFieldType stringType = new PrimitiveFieldType(String.class); + public static final PrimitiveFieldType integerType = new PrimitiveFieldType(Integer.class); + public static final PrimitiveFieldType longType = new PrimitiveFieldType(Long.class); + public static final PrimitiveFieldType floatType = new PrimitiveFieldType(Float.class); + public static final PrimitiveFieldType doubleType = new PrimitiveFieldType(Double.class); + public static final PrimitiveFieldType booleanType = new PrimitiveFieldType(Boolean.class); + public static final TensorFieldType genericTensorType = new TensorFieldType(Optional.empty()); + public static final QueryFieldType queryType = new QueryFieldType(); + public static final QueryProfileFieldType genericQueryProfileType = new QueryProfileFieldType(); + + /** Returns the class of instance values of this field type */ + public abstract Class getValueClass(); + + /** Returns a string representation of this type which can be converted back to a type class by {@link #fromString} */ + public abstract String stringValue(); + + public abstract String toString(); + + /** Returns a string describing possible instances of this type, suitable for user error messages */ + public abstract String toInstanceDescription(); + + /** Converts the given type to an instance of this type, if possible. Returns null if not possible. */ + public abstract Object convertFrom(Object o, QueryProfileRegistry registry); + + /** Converts the given type to an instance of this type, if possible. Returns null if not possible. */ + public abstract Object convertFrom(Object o, CompiledQueryProfileRegistry registry); + + /** + * Returns the field type for a given string name. + * + * @param typeString a type string - a primitive name, "query-profile" or "query-profile:profile-name" + * @param registry the registry in which query profile references are resolved when the last form above is used, + * or null in which case that form cannot be used + * @throws IllegalArgumentException if the string does not resolve to a type + */ + public static FieldType fromString(String typeString, QueryProfileTypeRegistry registry) { + if ("string".equals(typeString)) + return stringType; + if ("integer".equals(typeString)) + return integerType; + if ("long".equals(typeString)) + return longType; + if ("float".equals(typeString)) + return floatType; + if ("double".equals(typeString)) + return doubleType; + if ("boolean".equals(typeString)) + return booleanType; + if ("query".equals(typeString)) + return queryType; + if (typeString.startsWith("tensor")) + return TensorFieldType.fromTypeString(typeString); + if ("query-profile".equals(typeString)) + return genericQueryProfileType; + if (typeString.startsWith("query-profile:")) + return QueryProfileFieldType.fromString(typeString.substring("query-profile:".length()),registry); + throw new IllegalArgumentException("Unknown type '" + typeString + "'"); + } + + /** Returns the field type from a value class, or null if there is no type for it */ + public static FieldType fromClass(Class clazz) { + if (clazz == String.class) return stringType; + if (clazz == Integer.class) return integerType; + if (clazz == Long.class) return longType; + if (clazz == Float.class) return floatType; + if (clazz == Double.class) return doubleType; + if (clazz == Boolean.class) return booleanType; + if (clazz == Tensor.class) return genericTensorType; + if (clazz == YqlQuery.class) return queryType; + if (clazz == QueryProfile.class) return genericQueryProfileType; + return null; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java new file mode 100644 index 00000000000..76b3f78ac2f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * Represents a query field type which is a primitive - String, Integer, Float, Double or Long. + * + * @author bratseth + */ +@SuppressWarnings("rawtypes") +public class PrimitiveFieldType extends FieldType { + + private Class primitiveClass; + + PrimitiveFieldType(Class primitiveClass) { + this.primitiveClass=primitiveClass; + } + + public @Override Class getValueClass() { return primitiveClass; } + + public @Override String stringValue() { + return toLowerCase(primitiveClass.getSimpleName()); + } + + public @Override String toString() { return "field type " + stringValue(); } + + public @Override String toInstanceDescription() { + return toLowerCase(primitiveClass.getSimpleName()); + } + + @Override + public Object convertFrom(Object object, CompiledQueryProfileRegistry registry) { + return convertFrom(object, (QueryProfileRegistry)null); + } + + public @Override Object convertFrom(Object object, QueryProfileRegistry registry) { + if (primitiveClass == object.getClass()) return object; + + if (object.getClass() == String.class) return convertFromString((String)object); + if (object instanceof Number) return convertFromNumber((Number)object); + + return null; + } + + private Object convertFromString(String string) { + try { + if (primitiveClass==Integer.class) return Integer.valueOf(string); + if (primitiveClass==Double.class) return Double.valueOf(string); + if (primitiveClass==Float.class) return Float.valueOf(string); + if (primitiveClass==Long.class) return Long.valueOf(string); + if (primitiveClass==Boolean.class) return Boolean.valueOf(string); + } + catch (NumberFormatException e) { + return null; // Handled in caller + } + throw new RuntimeException("Programming error"); + } + + private Object convertFromNumber(Number number) { + if (primitiveClass==Integer.class) return number.intValue(); + if (primitiveClass==Double.class) return number.doubleValue(); + if (primitiveClass==Float.class) return number.floatValue(); + if (primitiveClass==Long.class) return number.longValue(); + if (primitiveClass==String.class) return String.valueOf(number); + throw new RuntimeException("Programming error: Input type is " + number.getClass() + + " primitiveClass is " + primitiveClass); + } + + @Override + public int hashCode() { + return primitiveClass.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof PrimitiveFieldType)) return false; + PrimitiveFieldType other = (PrimitiveFieldType)o; + return other.primitiveClass.equals(this.primitiveClass); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java new file mode 100644 index 00000000000..a0982fdf0f6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.yql.YqlQuery; +import com.yahoo.tensor.MapTensor; +import com.yahoo.tensor.Tensor; + +/** + * A YQL query template field type in a query profile + * + * @author bratseth + */ +public class QueryFieldType extends FieldType { + + @Override + public Class getValueClass() { return YqlQuery.class; } + + @Override + public String stringValue() { return "query"; } + + @Override + public String toString() { return "field type " + stringValue(); } + + @Override + public String toInstanceDescription() { return "a YQL query template"; } + + @Override + public Object convertFrom(Object o, QueryProfileRegistry registry) { + if (o instanceof YqlQuery) return o; + if (o instanceof String) return YqlQuery.from((String)o); + return null; + } + + @Override + public Object convertFrom(Object o, CompiledQueryProfileRegistry registry) { + return convertFrom(o, (QueryProfileRegistry)null); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java new file mode 100644 index 00000000000..df52e78c6ef --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.yahoo.search.query.profile.QueryProfile; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfile; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; + +/** + * Represents a query profile field type which is a reference to a query profile. + * The reference may optionally specify the type of the referred query profile. + * + * @author bratseth + */ +public class QueryProfileFieldType extends FieldType { + + private final QueryProfileType type; + + public static QueryProfileFieldType fromString(String queryProfileName, QueryProfileTypeRegistry registry) { + if (queryProfileName==null || queryProfileName.equals("")) + return new QueryProfileFieldType(null); + + if (registry==null) + throw new IllegalArgumentException("Can not resolve query profile type '" + queryProfileName + + "' because no registry is provided"); + QueryProfileType queryProfileType=registry.getComponent(queryProfileName); + if (queryProfileType==null) + throw new IllegalArgumentException("Could not resolve query profile type '" + queryProfileName + "'"); + return new QueryProfileFieldType(registry.getComponent(queryProfileName)); + } + + public QueryProfileFieldType() { this(null); } + + public QueryProfileFieldType(QueryProfileType type) { + this.type = type; + } + + /** Returns the query profile type of this, or null if any type works */ + public QueryProfileType getQueryProfileType() { return type; } + + public @Override Class<?> getValueClass() { return QueryProfile.class; } + + public @Override String stringValue() { + return "query-profile" + (type!=null ? ":" + type.getId().getName() : ""); + } + + public @Override String toString() { + return "field type " + stringValue(); + } + + public @Override String toInstanceDescription() { + return "reference to a query profile" + (type!=null ? " of type '" + type.getId().getName() + "'" : ""); + } + + @Override + public CompiledQueryProfile convertFrom(Object object, CompiledQueryProfileRegistry registry) { + String profileId = object.toString(); + if (profileId.startsWith("ref:")) + profileId = profileId.substring("ref:".length()); + CompiledQueryProfile profile = registry.getComponent(profileId); + if (profile == null) return null; + if (type != null && ! type.equals(profile.getType())) return null; + return profile; + } + + @Override + public QueryProfile convertFrom(Object object, QueryProfileRegistry registry) { + QueryProfile profile; + if (object instanceof String) + profile = registry.getComponent((String)object); + else if (object instanceof QueryProfile) + profile = (QueryProfile)object; + else + return null; + + // Verify its type as well + if (type!=null && type!=profile.getType()) return null; + return profile; + } + + @Override + public int hashCode() { + if (type == null) return 17; + return type.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof QueryProfileFieldType)) return false; + QueryProfileFieldType other = (QueryProfileFieldType)o; + return equals(this.type.getId(), other.type.getId()); + } + + private boolean equals(Object o1, Object o2) { + if (o1 == null) return o2 == null; + return o1.equals(o2); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileType.java new file mode 100644 index 00000000000..ecf60f8723d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileType.java @@ -0,0 +1,355 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.yahoo.component.ComponentId; +import com.yahoo.component.provider.FreezableSimpleComponent; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.profile.QueryProfile; + +import java.util.*; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * Defines a kind of query profiles + * + * @author bratseth + */ +public class QueryProfileType extends FreezableSimpleComponent { + + /** The fields of this query profile type */ + private Map<String, FieldDescription> fields = new HashMap<>(); + + /** The query profile types this inherits */ + private List<QueryProfileType> inherited = new ArrayList<>(); + + /** If this is true, keys which are not declared in this type cannot be set in instances */ + private boolean strict = false; + + /** True if the name of instances of this profile should be matched as path names, see QueryProfileRegistry */ + private boolean matchAsPath = false; + + private boolean builtin = false; + + /** Aliases *from* any strings *to* field names. Aliases are case insensitive */ + private Map<String, String> aliases = null; + + public QueryProfileType(String idString) { + this(new ComponentId(idString)); + } + + public QueryProfileType(ComponentId id) { + super(id); + QueryProfile.validateName(id.getName()); + } + + private QueryProfileType(ComponentId id, Map<String, FieldDescription> fields, List<QueryProfileType> inherited, + boolean strict, boolean matchAsPath, boolean builtin, Map<String,String> aliases) { + super(id); + this.fields = new HashMap<>(fields); + this.inherited = new ArrayList<>(inherited); + this.strict = strict; + this.matchAsPath = matchAsPath; + this.builtin = builtin; + this.aliases = aliases == null ? null : new HashMap<>(aliases); + } + + /** Return this is it is not frozen, returns a modifiable deeply unfrozen copy otherwise */ + public QueryProfileType unfrozen() { + if ( ! isFrozen()) return this; + + // Unfreeze inherited query profile references + List<QueryProfileType> unfrozenInherited = new ArrayList<>(); + for (QueryProfileType inheritedType : inherited) { + unfrozenInherited.add(inheritedType.unfrozen()); + } + + // Unfreeze nested query profile references + Map<String, FieldDescription> unfrozenFields = new HashMap<>(); + for (Map.Entry<String, FieldDescription> field : fields.entrySet()) { + FieldDescription unfrozenFieldValue = field.getValue(); + if (field.getValue().getType() instanceof QueryProfileFieldType) { + QueryProfileFieldType queryProfileFieldType = (QueryProfileFieldType)field.getValue().getType(); + if (queryProfileFieldType.getQueryProfileType() != null) { + QueryProfileFieldType unfrozenType = + new QueryProfileFieldType(queryProfileFieldType.getQueryProfileType().unfrozen()); + unfrozenFieldValue = field.getValue().withType(unfrozenType); + } + } + unfrozenFields.put(field.getKey(), unfrozenFieldValue); + } + + return new QueryProfileType(getId(), unfrozenFields, unfrozenInherited, strict, matchAsPath, builtin, aliases); + } + + /** Mark this type as built into the system. Do not use */ + public void setBuiltin(boolean builtin) { this.builtin=builtin; } + + /** Returns whether this type is built into the system */ + public boolean isBuiltin() { return builtin; } + + /** + * Returns the query profile types inherited from this (never null). + * If this profile type is not frozen, this list can be modified to change the set of inherited types. + * If it is frozen, the returned list is immutable. + */ + public List<QueryProfileType> inherited() { return inherited; } + + /** + * Returns the fields declared in this (i.e not including those inherited) as an immutable map. + * + * @throws IllegalStateException if this is frozen + */ + public Map<String,FieldDescription> declaredFields() { + ensureNotFrozen(); + return Collections.unmodifiableMap(fields); + } + + /** + * Returns true if <i>this</i> is declared strict. + * @throws IllegalStateException if this is frozen + */ + public boolean isDeclaredStrict() { + ensureNotFrozen(); + return strict; + } + + /** + * Returns true if <i>this</i> is declared as match as path. + * @throws IllegalStateException if this is frozen + */ + public boolean getDeclaredMatchAsPath() { + ensureNotFrozen(); + return matchAsPath; + } + + /** Set whether nondeclared fields are permissible. Throws an exception if this is frozen. */ + public void setStrict(boolean strict) { + ensureNotFrozen(); + this.strict=strict; + } + + /** Returns whether field not declared in this type is permissible in instances. Default is false: Additional values are allowed */ + public boolean isStrict() { + if (isFrozen()) return strict; + + // Check if any of this or an inherited is true + if (strict) return true; + for (QueryProfileType inheritedType : inherited) + if (inheritedType.isStrict()) return true; + return false; + } + + /** Returns whether instances of this should be matched as path names. Throws if this is frozen. */ + public void setMatchAsPath(boolean matchAsPath) { + ensureNotFrozen(); + this.matchAsPath=matchAsPath; + } + + /** Returns whether instances of this should be matched as path names. Default is false: Use exact name matching. */ + public boolean getMatchAsPath() { + if (isFrozen()) return matchAsPath; + + // Check if any of this or an inherited is true + if (matchAsPath) return true; + for (QueryProfileType inheritedType : inherited) + if (inheritedType.getMatchAsPath()) return true; + return false; + } + + public void freeze() { + if (isFrozen()) return; + // Flatten the inheritance hierarchy into this to facilitate faster lookup + for (QueryProfileType inheritedType : inherited) { + for (FieldDescription field : inheritedType.fields().values()) + if ( ! fields.containsKey(field.getName())) + fields.put(field.getName(),field); + } + fields = ImmutableMap.copyOf(fields); + inherited = ImmutableList.copyOf(inherited); + strict = isStrict(); + matchAsPath = getMatchAsPath(); + super.freeze(); + } + + /** + * Returns whether the given field name is overridable in this type. + * Default: true (so all non-declared fields returns true) + */ + public boolean isOverridable(String fieldName) { + FieldDescription field=getField(fieldName); + if (field==null) return true; + return field.isOverridable(); + } + + /** + * Returns the permissible class for the value of the given name in this type + * + * @return the permissible class for a value, <code>Object</code> if all types are legal, + * null if no types are legal (i.e if the name is not legal) + */ + public Class<?> getValueClass(String name) { + FieldDescription fieldDescription=getField(name); + if (fieldDescription==null) { + if (strict) + return null; // Undefined -> Not legal + else + return Object.class; // Undefined -> Anything is legal + } + return fieldDescription.getType().getValueClass(); + } + + /** Returns the type of the given query profile type declared as a field in this */ + public QueryProfileType getType(String localName) { + FieldDescription fieldDescription=getField(localName); + if (fieldDescription ==null) return null; + if ( ! (fieldDescription.getType() instanceof QueryProfileFieldType)) return null; + return ((QueryProfileFieldType) fieldDescription.getType()).getQueryProfileType(); + } + + /** + * Returns the description of the field with the given name in this type or an inherited type + * (depth first left to right search). Returns null if the field is not defined in this or an inherited profile. + */ + public FieldDescription getField(String name) { + FieldDescription field=fields.get(name); + if ( field!=null ) return field; + + if ( isFrozen() ) return null; // Inherited are collapsed into this + + for (QueryProfileType inheritedType : this.inherited() ) { + field=inheritedType.getField(name); + if (field!=null) return field; + } + + return null; + } + + /** + * Removes a field from this (not from any inherited profile) + * + * @return the removed field or null if none + * @throws IllegalStateException if this is frozen + */ + public FieldDescription removeField(String fieldName) { + ensureNotFrozen(); + return fields.remove(fieldName); + } + + /** + * Adds a field to this, without associating with a type registry; field descriptions with compound + * is not be supported. + * + * @throws IllegalStateException if this is frozen + */ + public void addField(FieldDescription fieldDescription) { + // Compound names translates to new types, which must be added to a supplied registry + if (fieldDescription.getCompoundName().isCompound()) + throw new IllegalArgumentException("Adding compound names is only legal when supplying a registry"); + addField(fieldDescription, null); + } + + /** + * Adds a field to this + * + * @throws IllegalStateException if this is frozen + */ + public void addField(FieldDescription fieldDescription, QueryProfileTypeRegistry registry) { + CompoundName name = fieldDescription.getCompoundName(); + if (name.isCompound()) { + // Add (/to) a query profile type containing the rest of the name. + // (we do not need the field description settings for intermediate query profile types + // as the leaf entry will enforce them) + QueryProfileType type = getOrCreateQueryProfileType(name.first(), registry); + type.addField(fieldDescription.withName(name.rest()), registry); + } + else { + ensureNotFrozen(); + fields.put(fieldDescription.getName(), fieldDescription); + } + + for (String alias : fieldDescription.getAliases()) + addAlias(alias, fieldDescription.getName()); + } + + private QueryProfileType getOrCreateQueryProfileType(String name, QueryProfileTypeRegistry registry) { + FieldDescription fieldDescription = getField(name); + if (fieldDescription != null) { + if ( ! ( fieldDescription.getType() instanceof QueryProfileFieldType)) + throw new IllegalArgumentException("Cannot use name '" + name + "' as a prefix because it is " + + "already a " + fieldDescription.getType()); + QueryProfileFieldType fieldType = (QueryProfileFieldType) fieldDescription.getType(); + QueryProfileType type = fieldType.getQueryProfileType(); + if (type == null) { // an as-yet untyped reference; add type + type = new QueryProfileType(name); + registry.register(type.getId(), type); + fields.put(name, fieldDescription.withType(new QueryProfileFieldType(type))); + } + return type; + } + else { + QueryProfileType type = new QueryProfileType(name); + registry.register(type.getId(), type); + fields.put(name, new FieldDescription(name, new QueryProfileFieldType(type))); + return type; + } + } + + private void addAlias(String alias,String field) { + ensureNotFrozen(); + if (aliases==null) + aliases=new HashMap<>(); + aliases.put(toLowerCase(alias),field); + } + + /** Returns all the fields of this profile type and all types it inherits as a read-only map */ + public Map<String,FieldDescription> fields() { + if (isFrozen()) return fields; + if (inherited().size()==0) return Collections.unmodifiableMap(fields); + + // Collapse inherited + Map<String,FieldDescription> allFields=new HashMap<>(fields); + for (QueryProfileType inheritedType : inherited) + allFields.putAll(inheritedType.fields()); + return Collections.unmodifiableMap(allFields); + } + + /** + * Returns the alias to field mapping of this type as a read-only map. This is never null. + * Note that all keys are lower-cased because aliases are case-insensitive + */ + public Map<String,String> aliases() { + if (isFrozen()) return aliases; + if (aliases == null) return Collections.emptyMap(); + return Collections.unmodifiableMap(aliases); + } + + /** Returns the field name of an alias or field name */ + public String unalias(String aliasOrField) { + if (aliases==null || aliases.isEmpty()) return aliasOrField; + String field=aliases.get(toLowerCase(aliasOrField)); + if (field!=null) return field; + return aliasOrField; + } + + @Override + public int hashCode() { + return getId().hashCode(); + } + + /** Two types are equal if they have the same id */ + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof QueryProfileType)) return false; + QueryProfileType other = (QueryProfileType)o; + return other.getId().equals(this.getId()); + } + + public String toString() { + return "query profile type '" + getId() + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileTypeRegistry.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileTypeRegistry.java new file mode 100644 index 00000000000..3f64caa7ab1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileTypeRegistry.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.Query; +import com.yahoo.search.query.profile.QueryProfileRegistry; + +/** + * A registry of query profile types + * + * @author bratseth + */ +public class QueryProfileTypeRegistry extends ComponentRegistry<QueryProfileType> { + + public QueryProfileTypeRegistry() { + Query.addNativeQueryProfileTypesTo(this); + } + + /** Register this type by its id */ + public void register(QueryProfileType type) { + super.register(type.getId(), type); + } + + @Override + public void freeze() { + if (isFrozen()) return; + for (QueryProfileType queryProfileType : allComponents()) + queryProfileType.freeze(); + } + + public static QueryProfileTypeRegistry emptyFrozen() { + QueryProfileTypeRegistry registry = new QueryProfileTypeRegistry(); + registry.freeze(); + return registry; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java new file mode 100644 index 00000000000..747cf73acb3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.profile.types; + +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.tensor.MapTensor; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; + +import java.util.Optional; + +/** + * A tensor field type in a query profile + * + * @author bratseth + */ +public class TensorFieldType extends FieldType { + + private final Optional<TensorType> type; + + /** Creates a tensor field type with optional information about the kind of tensor this will hold */ + public TensorFieldType(Optional<TensorType> type) { + this.type = type; + } + + /** Returns information about the type of tensor this will hold, or empty to allow any kind of tensor */ + public Optional<TensorType> type() { return type; } + + @Override + public Class getValueClass() { return Tensor.class; } + + @Override + public String stringValue() { return "tensor"; } + + @Override + public String toString() { return "field type " + stringValue(); } + + @Override + public String toInstanceDescription() { return "a tensor"; } + + @Override + public Object convertFrom(Object o, QueryProfileRegistry registry) { + if (o instanceof Tensor) return o; + if (o instanceof String) return MapTensor.from((String)o); + return null; + } + + @Override + public Object convertFrom(Object o, CompiledQueryProfileRegistry registry) { + return convertFrom(o, (QueryProfileRegistry)null); + } + + public static TensorFieldType fromTypeString(String s) { + if (s.equals("tensor")) return genericTensorType; + return new TensorFieldType(Optional.of(TensorType.fromSpec(s))); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/package-info.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/package-info.java new file mode 100644 index 00000000000..1f9fa7a1fb4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Query profile types defines the set of fields a query profile may, can or must have. Query profile + * types may be inherited in a type hierarchy. + */ +@ExportPackage +@PublicApi +package com.yahoo.search.query.profile.types; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java new file mode 100644 index 00000000000..01c861b879e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Properties; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.Map; + +/** + * Default values for properties that are meant to be customized in query profiles. + * @author tonytv + */ +public final class DefaultProperties extends Properties { + public static final CompoundName MAX_OFFSET = new CompoundName("maxOffset"); + public static final CompoundName MAX_HITS = new CompoundName("maxHits"); + + + public static final QueryProfileType argumentType = new QueryProfileType("DefaultProperties"); + static { + argumentType.setBuiltin(true); + + argumentType.addField(new FieldDescription(MAX_OFFSET.toString(), "integer")); + argumentType.addField(new FieldDescription(MAX_HITS.toString(), "integer")); + + argumentType.freeze(); + } + + @Override + public Object get(CompoundName name, Map<String, String> context, com.yahoo.processing.request.Properties substitution) { + if (MAX_OFFSET.equals(name)) { + return 1000; + } else if (MAX_HITS.equals(name)) { + return 400; + } else { + return super.get(name, context, substitution); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/PropertyAliases.java b/container-search/src/main/java/com/yahoo/search/query/properties/PropertyAliases.java new file mode 100644 index 00000000000..cc2c08c5504 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/PropertyAliases.java @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Properties; + +import java.util.Map; + +/** + * A properties implementation which translates the incoming name to its standard name + * if it is a registered alias. + * <p> + * Aliases are case insensitive. One standard name may have multiple aliases. + * <p> + * This is multithread safe or not depending on the status of the passed map of aliases. + * Cloning will not deep copy the set of aliases. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class PropertyAliases extends Properties { + + /** A map from aliases to standard names */ + private final Map<String,CompoundName> aliases; + + /** + * Creates an instance with a set of aliases. The given aliases will be used directly by this class. + * To make this class immutable and thread safe, relinquish ownership of the parameter map. + */ + public PropertyAliases(Map<String,CompoundName> aliases) { + this.aliases=aliases; + } + + /** + * Returns the standard name for an alias, or the given name if it is not a registered alias + * + * @param nameOrAlias the name to check if is an alias + * @return the real name if an alias or the input name itself + */ + protected CompoundName unalias(CompoundName nameOrAlias) { + CompoundName properName = aliases.get(nameOrAlias.getLowerCasedName()); + return (properName != null) ? properName : nameOrAlias; + } + + public @Override Map<String, Object> listProperties(CompoundName property,Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + return super.listProperties(unalias(property),context,substitution); + } + + public @Override Object get(CompoundName name,Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + return super.get(unalias(name),context,substitution); + } + + public @Override void set(CompoundName name,Object value,Map<String,String> context) { + super.set(unalias(name),value,context); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/PropertyMap.java b/container-search/src/main/java/com/yahoo/search/query/properties/PropertyMap.java new file mode 100644 index 00000000000..820c4fc8ea3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/PropertyMap.java @@ -0,0 +1,132 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Properties; +import com.yahoo.search.result.Hit; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.Map; +import java.util.logging.Logger; + +/** + * A Map backing of Properties. + * <p> + * When this is cloned it will deep copy not only the model object map, but also each + * clonable member inside the map. + * <p> + * Subclassing is supported, a hook can be implemented to provide conditional inclusion in the map. + * By default - all properties are accepted, so set is never propagated. + * <p> + * This class is not multithread safe. + * + * @author bratseth + */ +public class PropertyMap extends Properties { + + private static Logger log=Logger.getLogger(PropertyMap.class.getName()); + + /** The properties of this */ + private Map<CompoundName, Object> properties = new LinkedHashMap<>(); + + public void set(CompoundName name, Object value, Map<String,String> context) { + if (shouldSet(name, value)) + properties.put(name, value); + else + super.set(name, value, context); + } + + /** + * Return true if this value should be set in this map, false if the set should be propagated instead + * This default implementation always returns true. + */ + protected boolean shouldSet(CompoundName name,Object value) { return true; } + + public @Override Object get(CompoundName name, Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + if ( ! properties.containsKey(name)) return super.get(name,context,substitution); + return properties.get(name); + } + + /** + * Returns a direct reference to the map containing the properties set in this instance. + */ + public Map<CompoundName, Object> propertyMap() { + return properties; + } + + public @Override PropertyMap clone() { + PropertyMap clone = (PropertyMap)super.clone(); + clone.properties = new HashMap<>(); + for (Map.Entry<CompoundName, Object> entry : this.properties.entrySet()) { + Object cloneValue = clone(entry.getValue()); + if (cloneValue == null) + cloneValue = entry.getValue(); // Shallow copy objects which does not support cloning + clone.properties.put(entry.getKey(), cloneValue); + } + return clone; + } + + /** Clones this object if it is clonable, and the clone is public. Returns null if not */ + public static Object clone(Object object) { + if (object==null) return null; + if (! ( object instanceof Cloneable) ) return null; + if (object instanceof Object[]) + return arrayClone((Object[])object); + else + return objectClone(object); + } + + private static Object arrayClone(Object[] object) { + Object[] arrayClone= Arrays.copyOf(object, object.length); + // deep clone + for (int i=0; i<arrayClone.length; i++) { + Object elementClone=clone(arrayClone[i]); + if (elementClone!=null) + arrayClone[i]=elementClone; + } + return arrayClone; + } + + private static Object objectClone(Object object) { + if (object instanceof Hit) { + return ((Hit) object).clone(); + } else if (object instanceof LinkedList) { + return ((LinkedList) object).clone(); + } + try { + Method cloneMethod=object.getClass().getMethod("clone"); + return cloneMethod.invoke(object); + } + catch (NoSuchMethodException e) { + log.warning("'" + object + "' is Cloneable, but has no clone method - will use the same instance in all requests"); + return null; + } + catch (IllegalAccessException e) { + log.warning("'" + object + "' is Cloneable, but clone method cannot be accessed - will use the same instance in all requests"); + return null; + } + catch (InvocationTargetException e) { + throw new RuntimeException("Exception cloning '" + object + "'",e); + } + } + + @Override + public Map<String, Object> listProperties(CompoundName path, Map<String, String> context, com.yahoo.processing.request.Properties substitution) { + Map<String, Object> map = super.listProperties(path, context, substitution); + + for (Map.Entry<CompoundName, Object> entry : properties.entrySet()) { + if ( ! entry.getKey().hasPrefix(path)) continue; + CompoundName propertyName = entry.getKey().rest(path.size()); + if (propertyName.isEmpty()) continue; + map.put(propertyName.toString(), entry.getValue()); + } + return map; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java new file mode 100644 index 00000000000..cd4e02dc768 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java @@ -0,0 +1,296 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.component.ComponentId; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.query.*; +import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; +import com.yahoo.search.query.ranking.Diversity; +import com.yahoo.search.query.ranking.MatchPhase; +import com.yahoo.tensor.Tensor; + +import java.util.Map; + +/** + * Maps between the query model and text properties. + * This can be done simpler by using reflection but the performance penalty was not worth it, + * especially since we should be conservative in adding things to the query model. + * + * @author bratseth + */ +public class QueryProperties extends Properties { + + private static final String MODEL_PREFIX = Model.MODEL + "."; + private static final String RANKING_PREFIX = Ranking.RANKING + "."; + private static final String PRESENTATION_PREFIX = Presentation.PRESENTATION + "."; + + public static final CompoundName[] PER_SOURCE_QUERY_PROPERTIES = new CompoundName[] { + new CompoundName(MODEL_PREFIX + Model.QUERY_STRING), + new CompoundName(MODEL_PREFIX + Model.TYPE), + new CompoundName(MODEL_PREFIX + Model.FILTER), + new CompoundName(MODEL_PREFIX + Model.DEFAULT_INDEX), + new CompoundName(MODEL_PREFIX + Model.LANGUAGE), + new CompoundName(MODEL_PREFIX + Model.ENCODING), + new CompoundName(MODEL_PREFIX + Model.SOURCES), + new CompoundName(MODEL_PREFIX + Model.SEARCH_PATH), + new CompoundName(MODEL_PREFIX + Model.RESTRICT), + new CompoundName(RANKING_PREFIX + Ranking.LOCATION), + new CompoundName(RANKING_PREFIX + Ranking.PROFILE), + new CompoundName(RANKING_PREFIX + Ranking.SORTING), + new CompoundName(RANKING_PREFIX + Ranking.FRESHNESS), + new CompoundName(RANKING_PREFIX + Ranking.QUERYCACHE), + new CompoundName(RANKING_PREFIX + Ranking.LIST_FEATURES), + new CompoundName(PRESENTATION_PREFIX + Presentation.BOLDING), + new CompoundName(PRESENTATION_PREFIX + Presentation.SUMMARY), + new CompoundName(PRESENTATION_PREFIX + Presentation.REPORT_COVERAGE), + new CompoundName(PRESENTATION_PREFIX + Presentation.FORMAT), + new CompoundName(PRESENTATION_PREFIX + Presentation.SUMMARY_FIELDS), + Query.HITS, + Query.OFFSET, + Query.TRACE_LEVEL, + Query.TIMEOUT, + Query.NO_CACHE, + Query.GROUPING_SESSION_CACHE }; + + private Query query; + private final CompiledQueryProfileRegistry profileRegistry; + + public QueryProperties(Query query, CompiledQueryProfileRegistry profileRegistry) { + this.query = query; + this.profileRegistry = profileRegistry; + } + + public void setParentQuery(Query query) { + this.query=query; + super.setParentQuery(query); + } + + @SuppressWarnings("deprecation") + @Override + public Object get(final CompoundName key, Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + if (key.size()==2 && key.first().equals(Model.MODEL)) { + if (key.last().equals(Model.QUERY_STRING)) return query.getModel().getQueryString(); + if (key.last().equals(Model.TYPE)) return query.getModel().getType(); + if (key.last().equals(Model.FILTER)) return query.getModel().getFilter(); + if (key.last().equals(Model.DEFAULT_INDEX)) return query.getModel().getDefaultIndex(); + if (key.last().equals(Model.LANGUAGE)) return query.getModel().getLanguage(); + if (key.last().equals(Model.ENCODING)) return query.getModel().getEncoding(); + if (key.last().equals(Model.SOURCES)) return query.getModel().getSources(); + if (key.last().equals(Model.SEARCH_PATH)) return query.getModel().getSearchPath(); + if (key.last().equals(Model.RESTRICT)) return query.getModel().getRestrict(); + } + else if (key.first().equals(Ranking.RANKING)) { + if (key.size()==2) { + if (key.last().equals(Ranking.LOCATION)) return query.getRanking().getLocation(); + if (key.last().equals(Ranking.PROFILE)) return query.getRanking().getProfile(); + if (key.last().equals(Ranking.SORTING)) return query.getRanking().getSorting(); + if (key.last().equals(Ranking.FRESHNESS)) return query.getRanking().getFreshness(); + if (key.last().equals(Ranking.QUERYCACHE)) return query.getRanking().getQueryCache(); + if (key.last().equals(Ranking.LIST_FEATURES)) return query.getRanking().getListFeatures(); + } + else if (key.size()>=3 && key.get(1).equals(Ranking.MATCH_PHASE)) { + if (key.size() == 3) { + MatchPhase matchPhase = query.getRanking().getMatchPhase(); + if (key.last().equals(MatchPhase.ATTRIBUTE)) return matchPhase.getAttribute(); + if (key.last().equals(MatchPhase.ASCENDING)) return matchPhase.getAscending(); + if (key.last().equals(MatchPhase.MAX_HITS)) return matchPhase.getMaxHits(); + if (key.last().equals(MatchPhase.MAX_FILTER_COVERAGE)) return matchPhase.getMaxFilterCoverage(); + } else if (key.size() >= 4 && key.get(2).equals(Ranking.DIVERSITY)) { + Diversity diversity = query.getRanking().getMatchPhase().getDiversity(); + if (key.size() == 4) { + if (key.last().equals(Diversity.ATTRIBUTE)) return diversity.getAttribute(); + if (key.last().equals(Diversity.MINGROUPS)) return diversity.getMinGroups(); + } else if ((key.size() == 5) && key.get(3).equals(Diversity.CUTOFF)) { + if (key.last().equals(Diversity.FACTOR)) return diversity.getCutoffFactor(); + if (key.last().equals(Diversity.STRATEGY)) return diversity.getCutoffStrategy(); + } + } + } + else if (key.size()>2) { + // pass the portion after "ranking.features/properties" down + if (key.get(1).equals(Ranking.FEATURES)) return query.getRanking().getFeatures().getObject(key.rest().rest().toString()); + if (key.get(1).equals(Ranking.PROPERTIES)) return query.getRanking().getProperties().get(key.rest().rest().toString()); + } + } + else if (key.size()==2 && key.first().equals(Presentation.PRESENTATION)) { + if (key.last().equals(Presentation.BOLDING)) return query.getPresentation().getBolding(); + if (key.last().equals(Presentation.SUMMARY)) return query.getPresentation().getSummary(); + if (key.last().equals(Presentation.REPORT_COVERAGE)) return query.getPresentation().getReportCoverage(); + if (key.last().equals(Presentation.FORMAT)) return query.getPresentation().getFormat(); + if (key.last().equals(Presentation.TIMING)) return query.getPresentation().getTiming(); + if (key.last().equals(Presentation.SUMMARY_FIELDS)) return query.getPresentation().getSummaryFields(); + } + else if (key.first().equals("rankfeature") || key.first().equals("featureoverride")) { // featureoverride is deprecated + return query.getRanking().getFeatures().getObject(key.rest().toString()); + } else if (key.first().equals("rankproperty")) { + return query.getRanking().getProperties().get(key.rest().toString()); + } else if (key.size()==1) { + if (key.equals(Query.HITS)) return query.getHits(); + if (key.equals(Query.OFFSET)) return query.getOffset(); + if (key.equals(Query.TRACE_LEVEL)) return query.getTraceLevel(); + if (key.equals(Query.TIMEOUT)) return query.getTimeout(); + if (key.equals(Query.NO_CACHE)) return query.getNoCache(); + if (key.equals(Query.GROUPING_SESSION_CACHE)) return query.getGroupingSessionCache(); + if (key.toString().equals(Model.MODEL)) return query.getModel(); + if (key.toString().equals(Ranking.RANKING)) return query.getRanking(); + if (key.toString().equals(Presentation.PRESENTATION)) return query.getPresentation(); + } + return super.get(key,context,substitution); + } + + @SuppressWarnings("deprecation") + @Override + public void set(final CompoundName key,Object value,Map<String,String> context) { + // Note: The defaults here are never used + try { + if (key.size()==2 && key.first().equals(Model.MODEL)) { + if (key.last().equals(Model.QUERY_STRING)) + query.getModel().setQueryString(asString(value, "")); + else if (key.last().equals(Model.TYPE)) + query.getModel().setType(asString(value, "ANY")); + else if (key.last().equals(Model.FILTER)) + query.getModel().setFilter(asString(value, "")); + else if (key.last().equals(Model.DEFAULT_INDEX)) + query.getModel().setDefaultIndex(asString(value, "")); + else if (key.last().equals(Model.LANGUAGE)) + query.getModel().setLanguage(asString(value, "")); + else if (key.last().equals(Model.ENCODING)) + query.getModel().setEncoding(asString(value,"")); + else if (key.last().equals(Model.SEARCH_PATH)) + query.getModel().setSearchPath(asString(value,"")); + else if (key.last().equals(Model.SOURCES)) + query.getModel().setSources(asString(value,"")); + else if (key.last().equals(Model.RESTRICT)) + query.getModel().setRestrict(asString(value,"")); + else + throwIllegalParameter(key.last(),Model.MODEL); + } + else if (key.first().equals(Ranking.RANKING)) { + if (key.size()==2) { + if (key.last().equals(Ranking.LOCATION)) + query.getRanking().setLocation(asString(value,"")); + else if (key.last().equals(Ranking.PROFILE)) + query.getRanking().setProfile(asString(value,"")); + else if (key.last().equals(Ranking.SORTING)) + query.getRanking().setSorting(asString(value,"")); + else if (key.last().equals(Ranking.FRESHNESS)) + query.getRanking().setFreshness(asString(value, "")); + else if (key.last().equals(Ranking.QUERYCACHE)) + query.getRanking().setQueryCache(asBoolean(value, false)); + else if (key.last().equals(Ranking.LIST_FEATURES)) + query.getRanking().setListFeatures(asBoolean(value,false)); + } + else if (key.size()>=3 && key.get(1).equals(Ranking.MATCH_PHASE)) { + if (key.size() == 3) { + MatchPhase matchPhase = query.getRanking().getMatchPhase(); + if (key.last().equals(MatchPhase.ATTRIBUTE)) { + matchPhase.setAttribute(asString(value, null)); + } else if (key.last().equals(MatchPhase.ASCENDING)) { + matchPhase.setAscending(asBoolean(value, false)); + } else if (key.last().equals(MatchPhase.MAX_HITS)) { + matchPhase.setMaxHits(asLong(value, null)); + } else if (key.last().equals(MatchPhase.MAX_FILTER_COVERAGE)) { + matchPhase.setMaxFilterCoverage(asDouble(value, 1.0)); + } + } else if (key.size() > 3 && key.get(2).equals(Ranking.DIVERSITY)) { + Diversity diversity = query.getRanking().getMatchPhase().getDiversity(); + if (key.last().equals(Diversity.ATTRIBUTE)) { + diversity.setAttribute(asString(value, null)); + } else if (key.last().equals(Diversity.MINGROUPS)) { + diversity.setMinGroups(asLong(value, null)); + } else if ((key.size() > 4) && key.get(3).equals(Diversity.CUTOFF)) { + if (key.last().equals(Diversity.FACTOR)) { + diversity.setCutoffFactor(asDouble(value, 10.0)); + } else if (key.last().equals(Diversity.STRATEGY)) { + diversity.setCutoffStrategy(asString(value, "loose")); + } + } + } + } + else if (key.size()>2) { + String restKey = key.rest().rest().toString(); + if (key.get(1).equals(Ranking.FEATURES)) + setRankingFeature(query, restKey, toSpecifiedType(restKey, value, profileRegistry.getTypeRegistry().getComponent("features"))); + else if (key.get(1).equals(Ranking.PROPERTIES)) + query.getRanking().getProperties().put(restKey, toSpecifiedType(restKey, value, profileRegistry.getTypeRegistry().getComponent("properties"))); + else + throwIllegalParameter(key.rest().toString(),Ranking.RANKING); + } + } + else if (key.size()==2 && key.first().equals(Presentation.PRESENTATION)) { + if (key.last().equals(Presentation.BOLDING)) + query.getPresentation().setBolding(asBoolean(value, true)); + else if (key.last().equals(Presentation.SUMMARY)) + query.getPresentation().setSummary(asString(value, "")); + else if (key.last().equals(Presentation.REPORT_COVERAGE)) + query.getPresentation().setReportCoverage(asBoolean(value,true)); + else if (key.last().equals(Presentation.FORMAT)) + query.getPresentation().setFormat(asString(value,"")); + else if (key.last().equals(Presentation.TIMING)) + query.getPresentation().setTiming(asBoolean(value, true)); + else if (key.last().equals(Presentation.SUMMARY_FIELDS)) + query.getPresentation().setSummaryFields(asString(value,"")); + else + throwIllegalParameter(key.last(), Presentation.PRESENTATION); + } + else if (key.first().equals("rankfeature") || key.first().equals("featureoverride") ) { // featureoverride is deprecated + setRankingFeature(query, key.rest().toString(), toSpecifiedType(key.rest().toString(), value, profileRegistry.getTypeRegistry().getComponent("features"))); + } else if (key.first().equals("rankproperty")) { + query.getRanking().getProperties().put(key.rest().toString(), toSpecifiedType(key.rest().toString(), value, profileRegistry.getTypeRegistry().getComponent("properties"))); + } else if (key.size()==1) { + if (key.equals(Query.HITS)) + query.setHits(asInteger(value,10)); + else if (key.equals(Query.OFFSET)) + query.setOffset(asInteger(value,0)); + else if (key.equals(Query.TRACE_LEVEL)) + query.setTraceLevel(asInteger(value,0)); + else if (key.equals(Query.TIMEOUT)) + query.setTimeout(value.toString()); + else if (key.equals(Query.NO_CACHE)) + query.setNoCache(asBoolean(value,false)); + else if (key.equals(Query.GROUPING_SESSION_CACHE)) + query.setGroupingSessionCache(asBoolean(value, false)); + else + super.set(key,value,context); + } + else + super.set(key,value,context); + } + catch (Exception e) { // Make sure error messages are informative. This should be moved out of this properties implementation + if (e.getMessage().startsWith("Could not set")) + throw e; + else + throw new IllegalArgumentException("Could not set '" + key + "' to '" + value + "'", e); + } + } + + private void setRankingFeature(Query query, String key, Object value) { + if (value instanceof Tensor) + query.getRanking().getFeatures().put(key, (Tensor)value); + else + query.getRanking().getFeatures().put(key, asString(value, "")); + } + + private Object toSpecifiedType(String key, Object value, QueryProfileType type) { + if ( ! ( value instanceof String)) return value; // already typed + if (type == null) return value; // no type info -> keep as string + FieldDescription field = type.getField(key); + if (field == null) return value; // ditto + return field.getType().convertFrom(value, profileRegistry); + } + + private void throwIllegalParameter(String key,String namespace) { + throw new IllegalArgumentException("'" + key + "' is not a valid property in '" + namespace + + "'. See the search api for valid keys starting by '" + namespace + "'."); + } + + @Override + public final Query getParentQuery() { + return query; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/QueryPropertyAliases.java b/container-search/src/main/java/com/yahoo/search/query/properties/QueryPropertyAliases.java new file mode 100644 index 00000000000..15544e8ff4c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/QueryPropertyAliases.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.processing.request.CompoundName; + +import java.util.Map; + +/** + * Property aliases which contains some hardcoded unaliasing of prefixes of + * rankfeature and rankproperty maps. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class QueryPropertyAliases extends PropertyAliases { + + /** + * Creates an instance with a set of aliases. The given aliases will be used directly by this class. + * To make this class immutable and thread safe, relinquish ownership of the parameter map. + */ + public QueryPropertyAliases(Map<String,CompoundName> aliases) { + super(aliases); + } + + @Override + protected CompoundName unalias(CompoundName nameOrAlias) { + if (nameOrAlias.first().equalsIgnoreCase("rankfeature")) + return nameOrAlias.rest().prepend("ranking", "features"); + else if (nameOrAlias.first().equalsIgnoreCase("rankproperty")) + return nameOrAlias.rest().prepend("ranking", "properties"); + return super.unalias(nameOrAlias); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/RequestContextProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/RequestContextProperties.java new file mode 100644 index 00000000000..c97f4daf6d4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/RequestContextProperties.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Properties; + +import java.util.Map; + +/** + * Turns get(name) into get(name,request) using the request given at construction time. + * This is used to allow the query's request to be supplied to all property requests + * without forcing users of the query.properties() to supply this explicitly. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class RequestContextProperties extends Properties { + + private final Map<String,String> requestMap; + + public RequestContextProperties(Map<String, String> properties) { + this.requestMap=properties; + } + + @Override + public Object get(CompoundName name,Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + return super.get(name,context==null ? requestMap : context,substitution); + } + + @Override + public void set(CompoundName name,Object value,Map<String,String> context) { + super.set(name,value,context==null ? requestMap : context); + } + + @Override + public Map<String, Object> listProperties(CompoundName path,Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + return super.listProperties(path,context==null ? requestMap : context,substitution); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/SubProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/SubProperties.java new file mode 100644 index 00000000000..7f5c2ec2558 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/SubProperties.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.properties; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.processing.request.Properties; + +import java.util.Map; + +/** + * A wrapper around a chain of property objects that prefixes all gets/sets with a given path + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class SubProperties extends com.yahoo.search.query.Properties { + + final private CompoundName pathPrefix; + final private Properties parent; + + public SubProperties(String pathPrefix, Properties properties) { + this(new CompoundName(pathPrefix),properties); + } + + public SubProperties(CompoundName pathPrefix, Properties properties) { + this.pathPrefix = pathPrefix; + this.parent = properties; + } + + @Override + public Object get(CompoundName key, Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + if(key == null) return null; + Object result = parent.get(getPathPrefix() + "." + key,context,substitution); + if(result == null) { + return super.get(key,context,substitution); + } else { + return result; + } + } + + @Override + public void set(CompoundName key, Object obj, Map<String,String> context) { + if(key == null) return; + parent.set(getPathPrefix() + "." + key, obj, context); + } + + @Override + public Map<String, Object> listProperties(CompoundName path,Map<String,String> context, + com.yahoo.processing.request.Properties substitution) { + Map<String, Object> map = super.listProperties(path,context,substitution); + if(path.isEmpty()) { + map.putAll(parent.listProperties(getPathPrefix(),context,substitution)); + } else { + map.putAll(parent.listProperties(getPathPrefix() + "." + path,context,substitution)); + } + return map; + } + + public CompoundName getPathPrefixCompound() { + return pathPrefix; + } + + /** Returns getPatchPrefixCompound.toString() */ + public String getPathPrefix() { + return getPathPrefixCompound().toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/package-info.java b/container-search/src/main/java/com/yahoo/search/query/properties/package-info.java new file mode 100644 index 00000000000..047a5494e53 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/properties/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.query.properties; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/Diversity.java b/container-search/src/main/java/com/yahoo/search/query/ranking/Diversity.java new file mode 100644 index 00000000000..b1865ad9d75 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/Diversity.java @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.ranking; + +import com.yahoo.search.query.Ranking; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.Objects; + +/** + * <p>The diversity settings during match phase of a query. + * These are the same settings for diversity during match phase that can be set in a rank profile + * and is used for achieving guaranteed diversity at the cost of slightly higher cost as more hits must be + * considered compared to plain match-phase.</p> + * + * <p>You specify an additional attribute to be the diversifier and also min diversity needed.</p> + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +public class Diversity implements Cloneable { + + /** The type representing the property arguments consumed by this */ + private static final QueryProfileType argumentType; + + public static final String ATTRIBUTE = "attribute"; + public static final String MINGROUPS = "minGroups"; + public static final String CUTOFF = "cutoff"; + public static final String FACTOR = "factor"; + public static final String STRATEGY = "strategy"; + + + static { + argumentType =new QueryProfileType(Ranking.DIVERSITY); + argumentType.setStrict(true); + argumentType.setBuiltin(true); + argumentType.addField(new FieldDescription(ATTRIBUTE, "string")); + argumentType.addField(new FieldDescription(MINGROUPS, "long")); + argumentType.freeze(); + } + public static QueryProfileType getArgumentType() { return argumentType; } + + public enum CutoffStrategy {loose, strict}; + private String attribute = null; + private Long minGroups = null; + private Double cutoffFactor = null; + private CutoffStrategy cutoffStrategy= null; + + /** + * Sets the attribute field which will be used to guarantee diversity. + * Set to null (default) to disable diversification. + * <p> + * If this is set, make sure to also set the maxGroups value. + * <p> + * This attribute must be singlevalue. + */ + public void setAttribute(String attribute) { this.attribute = attribute; } + + /** Returns the attribute to use for diversity, or null if none */ + public String getAttribute() { return attribute; } + + /** + * Sets the max hits to aim for producing in the match phase. + * This must be set if an attribute value is set. + * It should be set to a reasonable fraction of the total documents on each partition. + */ + public void setMinGroups(long minGroups) { this.minGroups = minGroups; } + + /** Returns the max hits to aim for producing in the match phase on each content node, or null if not set */ + public Long getMinGroups() { return minGroups; } + + public void setCutoffFactor(double cutoffFactor) { this.cutoffFactor = cutoffFactor; } + public Double getCutoffFactor() { return cutoffFactor; } + public void setCutoffStrategy(String cutoffStrategy) { this.cutoffStrategy = CutoffStrategy.valueOf(cutoffStrategy); } + public CutoffStrategy getCutoffStrategy() { return cutoffStrategy; } + + /** Internal operation - DO NOT USE */ + public void prepare(RankProperties rankProperties) { + if (attribute == null && minGroups == null) return; + + if (attribute != null && !attribute.isEmpty()) { + rankProperties.put("vespa.matchphase.diversity.attribute", attribute); + } + if (minGroups != null) { + rankProperties.put("vespa.matchphase.diversity.mingroups", String.valueOf(minGroups)); + } + if (cutoffFactor != null) { + rankProperties.put("vespa.matchphase.diversity.cutoff.factor", String.valueOf(cutoffFactor)); + } + if (cutoffStrategy != null) { + rankProperties.put("vespa.matchphase.diversity.cutoff.strategy", cutoffStrategy); + } + } + + @Override + public Diversity clone() { + try { + return (Diversity)super.clone(); + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Won't happen", e); + } + } + + @Override + public int hashCode() { + int hash = 0; + if (attribute != null) hash += 11 * attribute.hashCode(); + if (minGroups != null) hash += 13 * minGroups.hashCode(); + if (cutoffFactor != null) hash += 17 * cutoffFactor.hashCode(); + if (cutoffStrategy != null) hash += 19 * cutoffStrategy.hashCode(); + return hash; + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Diversity)) return false; + + Diversity other = (Diversity)o; + if ( ! Objects.equals(this.attribute, other.attribute)) return false; + if ( ! Objects.equals(this.minGroups, other.minGroups)) return false; + if ( ! Objects.equals(this.cutoffFactor, other.cutoffFactor)) return false; + if ( ! Objects.equals(this.cutoffStrategy, other.cutoffStrategy)) return false; + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/MatchPhase.java b/container-search/src/main/java/com/yahoo/search/query/ranking/MatchPhase.java new file mode 100644 index 00000000000..ba25ddbe7e6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/MatchPhase.java @@ -0,0 +1,153 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.ranking; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Ranking; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; + +import java.util.Objects; + +/** + * The match phase ranking settings of this query. + * These are the same settings for match phase that can be set in a rank profile + * and is used for achieving reasonable query behavior given a query which causes too many matches: + * The engine will fall back to retrieving the best values according to the attribute given here + * during matching. + * <p> + * For this feature to work well, the order given by the attribute should correlate reasonably with the order + * of results produced if full evaluation is performed. + * + * @author bratseth + */ +public class MatchPhase implements Cloneable { + + /** The type representing the property arguments consumed by this */ + private static final QueryProfileType argumentType; + + public static final String ATTRIBUTE = "attribute"; + public static final String ASCENDING = "ascending"; + public static final String MAX_HITS = "maxHits"; + public static final String MAX_FILTER_COVERAGE = "maxFilterCoverage"; + + static { + argumentType =new QueryProfileType(Ranking.MATCH_PHASE); + argumentType.setStrict(true); + argumentType.setBuiltin(true); + argumentType.addField(new FieldDescription(ATTRIBUTE, "string")); + argumentType.addField(new FieldDescription(ASCENDING, "boolean")); + argumentType.addField(new FieldDescription(MAX_HITS, "long")); + argumentType.addField(new FieldDescription(MAX_FILTER_COVERAGE, "double")); + argumentType.addField(new FieldDescription(Ranking.DIVERSITY, "query-profile", "diversity")); + argumentType.freeze(); + } + public static QueryProfileType getArgumentType() { return argumentType; } + + private String attribute = null; + private boolean ascending = false; + private Long maxHits = null; + private Double maxFilterCoverage = 1.0; + private Diversity diversity = new Diversity(); + + /** + * Sets the attribute field which will be used to decide the best matches after it has been determined + * during matching that this query is going to cause too many matches. + * Set to null (default) to disable degradation. + * <p> + * If this is set, make sure to also set the maxHits value. + * Otherwise, the attribute setting is ignored. + * <p> + * This attribute should have fast-search turned on. + */ + public void setAttribute(String attribute) { this.attribute = attribute; } + + /** Returns the attribute to use for degradation, or null if none */ + public String getAttribute() { return attribute; } + + /** + * Set to true to sort by the attribute in ascending order when this is in use during the match phase, + * false (default) to use descending order. + */ + public void setAscending(boolean ascending) { this.ascending = ascending; } + + /** + * Returns the order to sort the attribute during the path phase when this takes effect. + */ + public boolean getAscending() { return ascending; } + + /** + * Sets the max hits to aim for producing in the match phase. + * This must be set if an attribute value is set. + * It should be set to a reasonable fraction of the total documents on each partition. + */ + public void setMaxHits(long maxHits) { this.maxHits = maxHits; } + + public void setMaxFilterCoverage(double maxFilterCoverage) { + if ((maxFilterCoverage < 0.0) || (maxFilterCoverage > 1.0)) { + throw new IllegalArgumentException("maxFilterCoverage must be in the range [0.0, 1.0]. It is " + maxFilterCoverage); + } + this.maxFilterCoverage = maxFilterCoverage; + } + + /** Returns the max hits to aim for producing in the match phase on each content node, or null if not set */ + public Long getMaxHits() { return maxHits; } + + public Double getMaxFilterCoverage() { return maxFilterCoverage; } + + public Diversity getDiversity() { return diversity; } + + public void setDiversity(Diversity diversity) { + this.diversity = diversity; + } + + /** Internal operation - DO NOT USE */ + public void prepare(RankProperties rankProperties) { + if (attribute == null || maxHits == null) return; + + rankProperties.put("vespa.matchphase.degradation.attribute", attribute); + if (ascending) { // backend default is descending + rankProperties.put("vespa.matchphase.degradation.ascendingorder", "true"); + } + rankProperties.put("vespa.matchphase.degradation.maxhits", String.valueOf(maxHits)); + rankProperties.put("vespa.matchphase.degradation.maxfiltercoverage", String.valueOf(maxFilterCoverage)); + diversity.prepare(rankProperties); + } + + @Override + public int hashCode() { + int hash = 0; + hash += 13 * Boolean.hashCode(ascending); + hash += 19 * diversity.hashCode(); + if (attribute != null) hash += 11 * attribute.hashCode(); + if (maxHits != null) hash += 17 * maxHits.hashCode(); + hash += 23 * maxFilterCoverage.hashCode(); + return hash; + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof MatchPhase)) return false; + + MatchPhase other = (MatchPhase)o; + if ( this.ascending != other.ascending) return false; + if ( ! Objects.equals(this.attribute, other.attribute)) return false; + if ( ! Objects.equals(this.maxHits, other.maxHits)) return false; + if ( ! Objects.equals(this.diversity, other.diversity)) return false; + if ( ! Objects.equals(this.maxFilterCoverage, other.maxFilterCoverage)) return false; + return true; + } + + @Override + public MatchPhase clone() { + try { + MatchPhase clone = (MatchPhase)super.clone(); + clone.diversity = diversity.clone(); + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Won't happen", e); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/RankFeatures.java b/container-search/src/main/java/com/yahoo/search/query/ranking/RankFeatures.java new file mode 100644 index 00000000000..1bcd548882c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/RankFeatures.java @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.ranking; + +import com.yahoo.fs4.MapEncoder; +import com.yahoo.tensor.Tensor; +import com.yahoo.text.JSON; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Contains the rank features of a query. + * + * @author bratseth + */ +public class RankFeatures implements Cloneable { + + private final Map<String, Object> features; + + public RankFeatures() { + this(new LinkedHashMap<>()); + } + + private RankFeatures(Map<String, Object> features) { + this.features = features; + } + + /** Sets a rank feature by full name to a value */ + public void put(String name, String value) { + features.put(name, value); + } + + /** Sets a tensor rank feature */ + public void put(String name, Tensor value) { + features.put(name, value); + } + + /** Returns a rank feature as a string by full name or null if not set */ + public String get(String name) { + Object value = features.get(name); + if (value == null) return null; + return value.toString(); + } + + /** Returns this value as whatever type it was stored as. Returns null if the value is not set. */ + public Object getObject(String name) { + return features.get(name); + } + + /** + * Returns a tensor rank feature, or empty if there is no value with this name. + * + * @throws IllegalArgumentException if the value is set but is not a tensor + */ + public Optional<Tensor> getTensor(String name) { + Object feature = features.get(name); + if (feature == null) return Optional.empty(); + if (feature instanceof Tensor) return Optional.of((Tensor)feature); + throw new IllegalArgumentException("Expected a tensor value of '" + name + "' but has " + feature); + } + + /** + * Returns the map holding the features of this. + * This map may be modified to change the rank features of the query. + */ + public Map<String, Object> asMap() { return features; } + + public boolean isEmpty() { + return features.isEmpty(); + } + + /** + * Prepares this for encoding, not for external use. See encode on Query for details. + * <p> + * If the query feature is found in the rank feature set, + * remove all these entries and insert them into the rank property set instead. + * We want to hide from the user that the query feature value is sent down as a rank property + * and picked up by the query feature executor in the backend. + */ + public void prepare(RankProperties rankProperties) { + if (isEmpty()) return; + + List<String> featuresToRemove = new ArrayList<>(); + List<String> propertiesToInsert = new ArrayList<>(); + for (String key : features.keySet()) { + if (key.startsWith("query(") && key.endsWith(")")) { + featuresToRemove.add(key); + propertiesToInsert.add(key.substring("query(".length(), key.length() - 1)); + } else if (key.startsWith("$")) { + featuresToRemove.add(key); + propertiesToInsert.add(key.substring(1)); + } + } + for (int i = 0; i < featuresToRemove.size(); ++i) { + rankProperties.put(propertiesToInsert.get(i), features.remove(featuresToRemove.get(i))); + } + } + + public int encode(ByteBuffer buffer) { + return MapEncoder.encodeMap("feature", features, buffer); + } + + @Override + public boolean equals(Object other) { + if (other == this) return true; + if ( ! (other instanceof RankFeatures)) return false; + + return this.features.equals(((RankFeatures)other).features); + } + + @Override + public int hashCode() { + return features.hashCode(); + } + + @Override + public RankFeatures clone() { + return new RankFeatures(new LinkedHashMap<>(features)); + } + + @Override + public String toString() { + return JSON.encode(features); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java b/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java new file mode 100644 index 00000000000..eccb8bac2d4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.ranking; + +import com.yahoo.fs4.GetDocSumsPacket; +import com.yahoo.fs4.MapEncoder; +import com.yahoo.text.JSON; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Contains the properties properties of a query. + * This is a multimap: Multiple properties may be set for the same key. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class RankProperties implements Cloneable { + + private Map<String, List<Object>> properties = new LinkedHashMap<>(); + + public RankProperties() { + this(new LinkedHashMap<>()); + } + + private RankProperties(Map<String, List<Object>> properties) { + this.properties = properties; + } + + public void put(String name, String value) { + put(name, (Object)value); + } + + /** Adds a property by full name to a value */ + public void put(String name, Object value) { + List<Object> list = properties.get(name); + if (list == null) { + list = new ArrayList<>(); + properties.put(name, list); + } + list.add(value); + } + + /** + * Returns a read-only list of properties properties by full name. + * If this is not set, null is returned. If this is explicitly set to + * have no values, and empty list is returned. + */ + public List<String> get(String name) { + List<Object> values = properties.get(name); + if (values == null) return null; + if (values.isEmpty()) return Collections.<String>emptyList(); + + // Compatibility ... + List<String> stringValues = new ArrayList<>(values.size()); + for (Object value : values) + stringValues.add(value.toString()); + return Collections.unmodifiableList(stringValues); + } + + /** Removes all properties properties for a given name */ + public void remove(String name) { + properties.remove(name); + } + + public boolean isEmpty() { + return properties.isEmpty(); + } + + /** Returns a modifiable map of the properties of this */ + public Map<String, List<Object>> asMap() { return properties; } + + /** Encodes this in a binary internal representation and returns the number of property maps encoded (0 or 1) */ + public int encode(ByteBuffer buffer, boolean encodeQueryData) { + if (encodeQueryData) { + return MapEncoder.encodeObjectMultiMap("rank", properties, buffer); + } + else { + List<Object> sessionId = properties.get(GetDocSumsPacket.sessionIdKey); + if (sessionId == null) return 0; + return MapEncoder.encodeSingleValue("rank", GetDocSumsPacket.sessionIdKey, sessionId.get(0), buffer); + } + } + + @Override + public boolean equals(Object other) { + if (other == this) return true; + if ( ! (other instanceof RankProperties)) return false; + + return this.properties.equals(((RankProperties)other).properties); + } + + @Override + public int hashCode() { + return properties.hashCode(); + } + + @Override + public RankProperties clone() { + Map<String, List<Object>> clone = new LinkedHashMap<>(); + for (Map.Entry<String, List<Object>> entry : properties.entrySet()) + clone.put(entry.getKey(), new ArrayList<>(entry.getValue())); + return new RankProperties(clone); + } + + @Override + public String toString() { + return JSON.encode(properties); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/package-info.java b/container-search/src/main/java/com/yahoo/search/query/ranking/package-info.java new file mode 100644 index 00000000000..f254b327f96 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.query.ranking; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/QueryRewriteSearcher.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/QueryRewriteSearcher.java new file mode 100644 index 00000000000..bb76c1006f2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/QueryRewriteSearcher.java @@ -0,0 +1,423 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite; + +import com.google.inject.Inject; +import com.yahoo.search.*; +import com.yahoo.config.*; +import com.yahoo.search.query.rewrite.RewritesConfig.FsaDict; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.fsa.FSA; +import com.yahoo.filedistribution.fileacquirer.FileAcquirer; +import com.yahoo.component.ComponentId; + +import java.io.*; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +/** + * <p>A template class for all rewriters</p> + * + * <p>All rewriters extending this class would need to implement the + * rewrite method which contains the rewriter's main logic, + * getSkipRewriterIfRewritten method which indicates whether this + * rewriter should be skipped if the query has been rewritten, + * getRewriterName method which returns the name of the rewriter used + * in query profile, configure method which contains any instance + * creation time configuration besides the default FSA loading, and + * getDefaultDicts method which return the pair of dictionary name + * and filename.</p> + * + * <p>Common rewrite features are in RewriterFeatures.java. + * Common rewriter utils are in RewriterUtils.java.</p> + * + * @author Karen Sze Wing Lee + */ +public abstract class QueryRewriteSearcher extends Searcher { + + // Indicate whether rewriter is properly initiated + private boolean isOk = false; + + protected final Logger logger = Logger.getLogger(QueryRewriteSearcher.class.getName()); + + // HashMap which store the rewriter dicts + // It has the following format: + // HashMap<String(e.g. dictionary name, etc), + // Object(e.g. FSA, etc)>> + protected HashMap<String, Object> rewriterDicts = new HashMap<>(); + + /** + * Constructor for this rewriter. + * Prepare the data needed by the rewriter + * @param id Component ID (see vespa's search container doc for more detail) + * @param fileAcquirer Required param for retrieving file type config + * (see vespa's search container doc for more detail) + * @param config Config from vespa-services.xml (see vespa's search + * container doc for more detail) + */ + @Inject + protected QueryRewriteSearcher(ComponentId id, + FileAcquirer fileAcquirer, + RewritesConfig config) { + super(id); + RewriterUtils.log(logger, "In QueryRewriteSearcher(ComponentId id, " + + "FileAcquirer fileAcquirer, " + + "RewritesConfig config)"); + isOk = loadFSADicts(fileAcquirer, config, null); + isOk = isOk && configure(fileAcquirer, config, null); + if(isOk) { + RewriterUtils.log(logger, "Rewriter is configured properly"); + } else { + RewriterUtils.log(logger, "Rewriter is not configured properly"); + } + } + + /** + * Constructor for unit test. + * Prepare the data needed by the rewriter + * @param config Config from vespa-services.xml (see vespa's search + * container doc for more detail) + * @param fileList pairs of file name and file handler for unit tests + */ + protected QueryRewriteSearcher(RewritesConfig config, + HashMap<String, File> fileList) { + RewriterUtils.log(logger, "In QueryRewriteSearcher(RewritesConfig config, " + + "HashMap<String, File> fileList)"); + isOk = loadFSADicts(null, config, fileList); + isOk = isOk && configure(null, config, fileList); + if(isOk) { + RewriterUtils.log(logger, "Rewriter is configured properly"); + } else { + RewriterUtils.log(logger, "Rewriter is not configured properly"); + } + } + + /** + * Empty constructor. + * Do nothing at instance creation time + */ + protected QueryRewriteSearcher(ComponentId id) { + super(id); + RewriterUtils.log(logger, "In QueryRewriteSearcher(Component id)"); + RewriterUtils.log(logger, "Configuring rewriter: " + getRewriterName()); + isOk = true; + RewriterUtils.log(logger, "Rewriter is configured properly"); + } + + /** + * Empty constructor for unit test. + * Do nothing at instance creation time + */ + protected QueryRewriteSearcher() { + RewriterUtils.log(logger, "In QueryRewriteSearcher()"); + RewriterUtils.log(logger, "Configuring rewriter: " + getRewriterName()); + isOk = true; + RewriterUtils.log(logger, "Rewriter is configured properly"); + } + + /** + * Load the dicts specified in vespa-services.xml + * + * @param fileAcquirer Required param for retrieving file type config + * (see vespa's search container doc for more detail) + * @param config Config from vespa-services.xml (see vespa's search + * container doc for more detail) + * @param fileList pairs of file name and file handler for unit tests + * @return boolean true if loaded successfully, false otherwise + */ + private boolean loadFSADicts(FileAcquirer fileAcquirer, + RewritesConfig config, + HashMap<String, File> fileList) + throws RuntimeException { + + // Check if getRewriterName method is properly implemented + String rewriterName = getRewriterName(); + if(rewriterName==null) { + RewriterUtils.error(logger, "Rewriter required method is not properly implemented: "); + return false; + } + + RewriterUtils.log(logger, "Configuring rewriter: " + rewriterName); + + // Check if there's no config need to be loaded + if(config==null || (fileAcquirer==null && fileList==null)) { + RewriterUtils.log(logger, "No FSA dictionary file need to be loaded"); + return true; + } + + // Check if config contains the FSADict param + if(config.fsaDict()==null) { + RewriterUtils.error(logger, "FSADict is not properly set in config"); + return false; + } + + RewriterUtils.log(logger, "Loading rewriter dictionaries"); + + // Retrieve FSA names and paths + ListIterator<FsaDict> fsaList = config.fsaDict().listIterator(); + + // Load default dictionaries if no user dictionaries is configured + if(!fsaList.hasNext()) { + RewriterUtils.log(logger, "Loading default dictionaries"); + HashMap<String, String> defaultFSAs = getDefaultFSAs(); + + if(defaultFSAs==null) { + RewriterUtils.log(logger, "No default FSA dictionary is configured"); + return true; + } + Iterator<Map.Entry<String, String>> defaultFSAList = defaultFSAs.entrySet().iterator(); + while(defaultFSAList.hasNext()) { + try{ + Map.Entry<String, String> currFSA = defaultFSAList.next(); + String fsaName = currFSA.getKey(); + String fsaPath = currFSA.getValue(); + + RewriterUtils.log(logger, + "FSA file location for " + fsaName + ": " + fsaPath); + + // Load FSA + FSA fsa = RewriterUtils.loadFSA(RewriterConstants.DEFAULT_DICT_DIR + fsaPath, null); + + // Store FSA into dictionary map + rewriterDicts.put(fsaName, fsa); + } catch (IOException e) { + RewriterUtils.error(logger, "Error loading FSA dictionary: " + + e.getMessage()); + return false; + } + } + } else { + // Load user configured dictionaries + while(fsaList.hasNext()) { + try{ + FsaDict currFSA = fsaList.next(); + // fsaName and fsaPath are not null + // or else vespa config server would not have been + // able to start up + String fsaName = currFSA.name(); + FileReference fsaPath = currFSA.path(); + + RewriterUtils.log(logger, + "FSA file location for " + fsaName + ": " + fsaPath); + + // Retrieve FSA File handler + File fsaFile = null; + if(fileAcquirer!=null) { + fsaFile = fileAcquirer.waitFor(fsaPath, 5, TimeUnit.MINUTES); + } else if(fileList!=null) { + fsaFile = fileList.get(fsaName); + } + + if(fsaFile==null) { + RewriterUtils.error(logger, "Error loading FSA dictionary file handler"); + return false; + } + + // Load FSA + FSA fsa = RewriterUtils.loadFSA(fsaFile, null); + + // Store FSA into dictionary map + rewriterDicts.put(fsaName, fsa); + } catch (InterruptedException e1) { + RewriterUtils.error(logger, "Error loading FSA dictionary file handler: " + + e1.getMessage()); + return false; + } catch (IOException e2) { + RewriterUtils.error(logger, "Error loading FSA dictionary: " + + e2.getMessage()); + return false; + } + } + } + RewriterUtils.log(logger, "Successfully loaded rewriter dictionaries"); + return true; + } + + /** + * Perform instance creation time configuration besides the + * default FSA loading + * + * @param fileAcquirer Required param for retrieving file type config + * (see vespa's search container doc for more detail) + * @param config Config from vespa-services.xml (see vespa's search + * container doc for more detail) + * @param fileList pairs of file name and file handler for unit tests + * @return boolean true if loaded successfully, false otherwise + */ + public abstract boolean configure(FileAcquirer fileAcquirer, + RewritesConfig config, + HashMap<String, File> fileList) + throws RuntimeException; + + /** + * Perform main rewrite logics for this searcher<br> + * - Skip to next rewriter if query is previously + * rewritten and getSkipRewriterIfRewritten() is + * true for this rewriter<br> + * - Execute rewriter's main rewrite logic<br> + * - Pass to the next rewriter the query to be used + * for dictionary retrieval<br> + */ + public @Override Result search(Query query, Execution execution) { + RewriterUtils.log(logger, query, "Executing " + getRewriterName()); + + // Check if rewriter is properly initialized + if(!isOk) { + RewriterUtils.error(logger, query, "Rewriter is not properly initialized"); + return execution.search(query); + } + + RewriterUtils.log(logger, query, "Original query: " + query.toDetailString()); + + // Retrieve metadata passed by previous rewriter + HashMap<String, Object> rewriteMeta = RewriterUtils.getRewriteMeta(query); + + // This key would be updated by each rewriter to specify + // the key to be used for dict retrieval in next + // rewriter downstream. This controls whether the + // next rewriter should use the rewritten query or the + // original query for dict retrieval. e.g. rewriters + // following misspell rewriter should use the rewritten + // query by misspell rewriter for dict retrieval + String prevDictKey = (String)rewriteMeta.get(RewriterConstants.DICT_KEY); + + // Whether the query has been rewritten + Boolean prevRewritten = (Boolean)rewriteMeta.get(RewriterConstants.REWRITTEN); + + // Check if rewriter should be skipped if the query + // has been rewritten + if(prevRewritten && getSkipRewriterIfRewritten()) { + RewriterUtils.log(logger, query, "Skipping rewriter since the " + + "query has been rewritten"); + return execution.search(query); + } + + // Store rewriter result + HashMap<String, Object> rewriterResult = null; + Query originalQueryObj = query.clone(); + + try { + // Execute rewriter's main rewrite logic + rewriterResult = rewrite(query, prevDictKey); + + } catch (RuntimeException e) { + RewriterUtils.error(logger, originalQueryObj, "Error executing this rewriter, " + + "skipping to next rewriter: " + e.getMessage()); + return execution.search(originalQueryObj); + } + + // Check if rewriter result is set properly + if(rewriterResult==null) { + RewriterUtils.error(logger, originalQueryObj, "Rewriter result are not set properly, " + + "skipping to next rewriter"); + return execution.search(originalQueryObj); + } + + // Retrieve results from rewriter + Boolean rewritten = (Boolean)rewriterResult.get(RewriterConstants.REWRITTEN); + String dictKey = (String)rewriterResult.get(RewriterConstants.DICT_KEY); + + if(rewritten==null || dictKey==null) { + RewriterUtils.error(logger, originalQueryObj, "Rewriter result are not set properly, " + + "skipping to next rewriter"); + return execution.search(originalQueryObj); + } + + // Retrieve results from rewriter + rewriteMeta.put(RewriterConstants.REWRITTEN, (rewritten || prevRewritten)); + rewriteMeta.put(RewriterConstants.DICT_KEY, dictKey); + + // Pass metadata to the next rewriter + RewriterUtils.setRewriteMeta(query, rewriteMeta); + + RewriterUtils.log(logger, query, "Final query: " + query.toDetailString()); + + return execution.search(query); + } + + /** + * Perform the main rewrite logic + * + * @param query Query object from searcher + * @param dictKey the key passed from previous rewriter + * to be treated as "original query from user" + * For example, if previous is misspell rewriter, + * it would pass the corrected query as the + * "original query from user". For other rewriters which + * add variants, abbr, etc to the query, the original + * query should be passed as a key. This rewriter could + * still choose to ignore this key. This key + * is not the rewritten query itself. For example, + * if original query is (willl smith) and the + * rewritten query is (willl smith) OR (will smith) + * the key to be passed could be (will smith) + * @return HashMap which contains the key value pairs:<br> + * - whether this query has been rewritten by this + * rewriter<br> + * key: rewritten<br> + * value: true or false<br> + * - the key to be treated as "original query from user" in next + * rewriter downstream, for example, misspell rewriter + * would pass the corrected query as the "original query from + * user" to the next rewriter. For other rewriters which + * add variants, abbr, etc to the query, the original + * query should be passed as a key. This key is not necessarily + * consumed by the next rewriter. The next rewriter + * can still choose to ignore this key.<br> + * key: newDictKey<br> + * value: new dict key<br> + */ + protected abstract HashMap<String, Object> rewrite(Query query, + String dictKey) throws RuntimeException; + + /** + * Check whether rewriter should be skipped if + * the query has been rewritten by other rewriter + * + * @return boolean Whether rewriter should be skipped + */ + protected abstract boolean getSkipRewriterIfRewritten(); + + /** + * Retrieve rewriter name + * It should match the name used in query profile + * + * @return Name of the rewriter + */ + public abstract String getRewriterName(); + + /** + * Get default FSA dictionary names + * + * @return Pair of FSA dictionary name and filename + */ + public abstract HashMap<String, String> getDefaultFSAs(); + + /** + * Get config parameter value set in query profile + * + * @param query Query object from the searcher + * @param paramName parameter to be retrieved + * @return parameter value or null if not found + */ + protected String getQPConfig(Query query, + String paramName) { + return RewriterUtils.getQPConfig(query, getRewriterName(), paramName); + } + + /** + * Retrieve rewrite from FSA given the original query + * + * @param query Query object from searcher + * @param dictName FSA dictionary name + * @param key The original query used to retrieve rewrite + * from the dictionary + * @return String The retrieved rewrites, null if query + * doesn't exist + */ + protected String getRewriteFromFSA(Query query, + String dictName, + String key) throws RuntimeException { + return RewriterUtils.getRewriteFromFSA(query, rewriterDicts, dictName, key); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterConstants.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterConstants.java new file mode 100644 index 00000000000..45ce08de9d5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterConstants.java @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite; + +import com.yahoo.processing.request.CompoundName; +import com.yahoo.vespa.defaults.Defaults; + +/** + * Contains common constant strings used by rewriters + * + * @author Karen Sze Wing Lee + */ +public class RewriterConstants { + + /** Config flag for addUnitToOriginalQuery */ + public static final String ORIGINAL_AS_UNIT = "OriginalAsUnit"; + + /** Config flag for addUnitEquivToOriginalQuery */ + public static final String ORIGINAL_AS_UNIT_EQUIV = "OriginalAsUnitEquiv"; + + /** Config flag for addRewritesAsEquiv(false) */ + public static final String REWRITES_AS_EQUIV = "RewritesAsEquiv"; + + /** Config flag for addRewritesAsEquiv(true) */ + public static final String REWRITES_AS_UNIT_EQUIV = "RewritesAsUnitEquiv"; + + /** Config flag for addExpansions */ + public static final String PARTIAL_PHRASE_MATCH = "PartialPhraseMatch"; + + /** Config flag for max number of rewrites added per rewriter */ + public static final String MAX_REWRITES = "MaxRewrites"; + + /** Config flag for considering QSS Rewrite in spell correction */ + public static final String QSS_RW = "QSSRewrite"; + + /** Config flag for considering QSS Suggest in spell correction */ + public static final String QSS_SUGG = "QSSSuggest"; + + /** Config flag for expansion index name */ + public static final String EXPANSION_INDEX = "ExpansionIndex"; + + /** Name for market chain retrieval from user param */ + public static final String REWRITER_CHAIN = "QRWChain"; + + /** Name for rewrite metadata retrieval from query properties */ + public static final CompoundName REWRITE_META = new CompoundName("RewriteMeta"); + + /** Name for rewritten field retrieval from query properties */ + public static final String REWRITTEN = "Rewritten"; + + /** Name for new dictionary key field retrieval from query properties */ + public static final String DICT_KEY = "DictKey"; + + /** Default dictionaries dir */ + public static final String DEFAULT_DICT_DIR = Defaults.getDefaults().vespaHome() + "share/qrw_data/"; +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java new file mode 100644 index 00000000000..0a5110dbd7e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java @@ -0,0 +1,651 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite; + +import java.util.*; +import java.util.logging.Logger; + +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.parser.CustomParser; +import com.yahoo.search.*; +import com.yahoo.search.query.*; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.querytransform.PhraseMatcher.Phrase; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; + +/** + * Contains commonly used rewriter features + * + * @author Karen Sze Wing Lee + */ +public class RewriterFeatures { + + private static final Logger logger = Logger.getLogger(RewriterFeatures.class.getName()); + + /** + * <p>Add proximity boosting to original query by modifying + * the query tree directly</p> + * e.g. original Query Tree: (AND aa bb)<br> + * if keepOriginalQuery: true<br> + * new Query tree: (OR (AND aa bb) "aa bb")<br> + * if keepOriginalQuery: false<br> + * new Query Tree: "aa bb"<br><br> + * + * original Query Tree: (OR (AND aa bb) (AND cc dd))<br> + * boostingQuery: cc dd<br> + * if keepOriginalQuery: true<br> + * new Query Tree: (OR (AND aa bb) (AND cc dd) "cc dd")<br> + * if keepOriginalQuery: false<br> + * new Query Tree: (OR (AND aa bb) "cc dd") <br> + * + * @param query Query object from searcher + * @param boostingQuery query to be boosted + * @param keepOriginalQuery whether to keep original unboosted query as equiv + * @return Modified Query object, return original query object + * on error + */ + public static Query addUnitToOriginalQuery(Query query, String boostingQuery, + boolean keepOriginalQuery) + throws RuntimeException { + RewriterUtils.log(logger, query, "Adding proximity boosting to [" + boostingQuery + "]"); + + Model queryModel = query.getModel(); + QueryTree qTree = queryModel.getQueryTree(); + Item oldRoot = qTree.getRoot(); + + if (oldRoot == null) { + RewriterUtils.error(logger, query, "Error retrieving query tree root"); + throw new RuntimeException("Error retrieving query tree root"); + } + + // Convert original query to query tree item + Item origQueryItem = convertStringToQTree(query, boostingQuery); + + // Boost proximity by phrasing the original query + // query tree structure: (AND aa bb) + if (oldRoot instanceof AndItem && + oldRoot.equals(origQueryItem)) { + PhraseItem phrase = convertAndToPhrase((AndItem)oldRoot); + + if(!keepOriginalQuery) { + qTree.setRoot(phrase); + } else { + OrItem newRoot = new OrItem(); + newRoot.addItem(oldRoot); + newRoot.addItem(phrase); + qTree.setRoot(newRoot); + queryModel.setType(Query.Type.ADVANCED); //set type=adv + } + RewriterUtils.log(logger, query, "Added proximity boosting successfully"); + return query; + + // query tree structure: (OR (AND aa bb) (AND cc dd)) + } else if (oldRoot instanceof OrItem && + ((OrItem)oldRoot).getItemIndex(origQueryItem)!=-1 && + origQueryItem instanceof AndItem) { + + // Remove original unboosted query + if(!keepOriginalQuery) + ((OrItem)oldRoot).removeItem(origQueryItem); + + // Check if the tree already contained the phrase item + PhraseItem pI = convertAndToPhrase((AndItem)origQueryItem); + if(((OrItem)oldRoot).getItemIndex(pI)==-1) { + ((OrItem)oldRoot).addItem(convertAndToPhrase((AndItem)origQueryItem)); + RewriterUtils.log(logger, query, "Added proximity boosting successfully"); + return query; + } + } + RewriterUtils.log(logger, query, "No proximity boosting added"); + return query; + } + + /** + * <p>Add query expansion to the query tree</p> + * e.g. origQuery: aa bb<br> + * matchingStr: aa bb<br> + * rewrite: cc dd, ee ff<br> + * if addUnitToRewrites: false<br> + * new query tree: (OR (AND aa bb) (AND cc dd) (AND ee ff))<br> + * if addUnitToRewrites: true<br> + * new query tree: (OR (AND aa bb) "cc dd" "ee ff") <br> + * + * @param query Query object from searcher + * @param matchingStr string used to retrieve the rewrite + * @param rewrites The rewrite string retrieved from + * dictionary + * @param addUnitToRewrites Whether to add unit to rewrites + * @param maxNumRewrites Max number of rewrites to be added, + * 0 if no limit + * @return Modified Query object, return original query object + * on error + */ + public static Query addRewritesAsEquiv(Query query, String matchingStr, + String rewrites, + boolean addUnitToRewrites, + int maxNumRewrites) throws RuntimeException { + String normalizedQuery = RewriterUtils.getNormalizedOriginalQuery(query); + + RewriterUtils.log(logger, query, + "Adding rewrites [" + rewrites + + "] to the query [" + normalizedQuery + "]"); + if (rewrites.equalsIgnoreCase(normalizedQuery) || rewrites.equalsIgnoreCase("n/a")) { + RewriterUtils.log(logger, query, "No rewrite added"); + return query; + } + + Model queryModel = query.getModel(); + QueryTree qTree = queryModel.getQueryTree(); + Item oldRoot = qTree.getRoot(); + + if (oldRoot == null) { + RewriterUtils.error(logger, query, "Error retrieving query tree root"); + throw new RuntimeException("Error retrieving query tree root"); + } + + StringTokenizer rewrite_list = new StringTokenizer(rewrites, "\t"); + Item rI = null; + + // Convert matching string to query tree item + Item matchingStrItem = convertStringToQTree(query, matchingStr); + PhraseItem matchingStrPhraseItem = null; + if(matchingStrItem instanceof AndItem) { + matchingStrPhraseItem = convertAndToPhrase(((AndItem)matchingStrItem)); + } + + // Add rewrites as OR item to the query tree + // Only should rewrite in this case: + // - origQuery: (OR (AND aa bb) (AND cc dd)) + // - matchingStr: (AND aa bb) + // Or in this case: + // - origQuery: (AND aa bb) + // - matching Str: (AND aa bb) + // Should not rewrite in this case: + // - origQuery: (OR (AND cc (OR dd (AND aa bb)) ee) + // - matchingStr: (AND aa bb) + // - for this case, should use getNonOverlappingMatches instead + OrItem newRoot; + if(oldRoot instanceof OrItem) { + if(((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { + RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); + return query; + } + newRoot = (OrItem)oldRoot; + } else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { + newRoot = new OrItem(); + newRoot.addItem(oldRoot); + } else { + RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); + return query; + } + int numRewrites = 0; + while(rewrite_list.hasMoreTokens() && + (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { + rI = convertStringToQTree(query, rewrite_list.nextToken()); + if(addUnitToRewrites && rI instanceof AndItem) { + rI = convertAndToPhrase((AndItem)rI); + } + if(newRoot.getItemIndex(rI)==-1) { + newRoot.addItem(rI); + numRewrites++; + } else { + RewriterUtils.log(logger, query, "Rewrite already exist, skipping"); + } + } + qTree.setRoot(newRoot); + queryModel.setType(Query.Type.ADVANCED); //set type=adv + RewriterUtils.log(logger, query, "Added rewrite successfully"); + + return query; + } + + /** + * <p>Retrieve the longest, from left to right non overlapping full + * phrase substrings in query based on FSA dictionary</p> + * + * e.g. query: ((modern AND new AND york AND city AND travel) OR travel) AND + * ((sunny AND travel AND agency) OR nyc)<br> + * dictionary: <br> + * mny\tmodern new york<br> + * mo\tmodern<br> + * modern\tn/a<br> + * modern\tnew york\tn/a<br> + * new york\tn/a<br> + * new york city\tn/a<br> + * new york city travel\tn/a<br> + * new york company\tn/a<br> + * ny\tnew york<br> + * nyc\tnew york city\tnew york company<br> + * nyct\tnew york city travel<br> + * ta\ttravel agency<br> + * travel agency\tn/a<br> + * return: nyc + * @param phraseMatcher PhraseMatcher object loaded with FSA dict + * @param query Query object from the searcher + * @return Matching phrases + */ + public static Set<PhraseMatcher.Phrase> getNonOverlappingFullPhraseMatches(PhraseMatcher phraseMatcher, + Query query) + throws RuntimeException { + RewriterUtils.log(logger, query, "Retrieving longest non-overlapping full phrase matches"); + if(phraseMatcher==null) + return null; + + Item root = query.getModel().getQueryTree().getRoot(); + List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); + if (matches==null || matches.isEmpty()) + return null; + + Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); + ListIterator<Phrase> matchesIter = matches.listIterator(); + + // Iterate through all matches + while(matchesIter.hasNext()) { + PhraseMatcher.Phrase phrase = matchesIter.next(); + RewriterUtils.log(logger, query, "Working on phrase: " + phrase); + CompositeItem currOwner = phrase.getOwner(); + + // Check if this is full phrase + // If phrase is not an AND item, only keep those that are single word + // in order to eliminate cases such as (new RANK york) from being treated + // as match if only new york but not new or york is in the dictionary + if((currOwner!=null && + ((phrase.isComplete() && currOwner instanceof AndItem) || + (phrase.getLength()==1 && currOwner instanceof OrItem) || + (phrase.getLength()==1 && currOwner instanceof RankItem && phrase.getStartIndex()==0))) || + (currOwner==null && phrase.getLength()==1)) { + resultMatches.add(phrase); + RewriterUtils.log(logger, query, "Keeping phrase: " + phrase); + } + } + + RewriterUtils.log(logger, query, "Successfully Retrieved longest non-overlapping full phrase matches"); + return resultMatches; + } + + + /** + * <p>Retrieve the longest, from left to right non overlapping partial + * phrase substrings in query based on FSA dictionary</p> + * + * e.g. query: ((modern AND new AND york AND city AND travel) OR travel) AND + * ((sunny AND travel AND agency) OR nyc)<br> + * dictionary: <br> + * mny\tmodern new york<br> + * mo\tmodern<br> + * modern\tn/a<br> + * modern new york\tn/a<br> + * new york\tn/a<br> + * new york city\tn/a<br> + * new york city travel\tn/a<br> + * new york company\tn/a<br> + * ny\tnew york<br> + * nyc\tnew york city\tnew york company<br> + * nyct\tnew york city travel<br> + * ta\ttravel agency<br> + * travel agency\tn/a<br> + * return: <br> + * modern<br> + * new york city travel<br> + * travel agency<br> + * nyc<br> + * @param phraseMatcher PhraseMatcher object loaded with FSA dict + * @param query Query object from the searcher + * @return Matching phrases + */ + public static Set<PhraseMatcher.Phrase> getNonOverlappingPartialPhraseMatches(PhraseMatcher phraseMatcher, + Query query) + throws RuntimeException { + RewriterUtils.log(logger, query, "Retrieving longest non-overlapping partial phrase matches"); + if(phraseMatcher==null) + return null; + + Item root = query.getModel().getQueryTree().getRoot(); + List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); + if (matches==null || matches.isEmpty()) + return null; + + Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); + ArrayList<PhraseMatcher.Phrase> phrasesInSubTree = new ArrayList<>(); + CompositeItem prevOwner = null; + ListIterator<PhraseMatcher.Phrase> matchesIter = matches.listIterator(); + + // Iterate through all matches + while(matchesIter.hasNext()) { + PhraseMatcher.Phrase phrase = matchesIter.next(); + RewriterUtils.log(logger, query, "Working on phrase: " + phrase); + CompositeItem currOwner = phrase.getOwner(); + + // Check if previous is AND item and this phrase is in a different item + // If so, work on the previous set to eliminate overlapping matches + if(!phrasesInSubTree.isEmpty() && currOwner!=null && + prevOwner!=null && !currOwner.equals(prevOwner)) { + RewriterUtils.log(logger, query, "Previous phrase is in different AND item"); + List<PhraseMatcher.Phrase> subTreeMatches + = getNonOverlappingMatchesInAndItem(phrasesInSubTree, query); + if(subTreeMatches==null) { + RewriterUtils.error(logger, query, "Error retrieving matches from subtree"); + throw new RuntimeException("Error retrieving matches from subtree"); + } + resultMatches.addAll(subTreeMatches); + phrasesInSubTree.clear(); + } + + // Check if this is an AND item + if(currOwner!=null && currOwner instanceof AndItem) { + phrasesInSubTree.add(phrase); + // If phrase is not an AND item, only keep those that are single word + // in order to eliminate cases such as (new RANK york) from being treated + // as match if only new york but not new or york is in the dictionary + } else if (phrase.getLength()==1 && + !(currOwner!=null && currOwner instanceof RankItem && phrase.getStartIndex()!=0)) { + resultMatches.add(phrase); + } + + prevOwner = currOwner; + } + + // Check if last item is AND item + // If so, work on the previous set to elimate overlapping matches + if(!phrasesInSubTree.isEmpty()) { + RewriterUtils.log(logger, query, "Last phrase is in AND item"); + List<PhraseMatcher.Phrase> subTreeMatches + = getNonOverlappingMatchesInAndItem(phrasesInSubTree, query); + if(subTreeMatches==null) { + RewriterUtils.error(logger, query, "Error retrieving matches from subtree"); + throw new RuntimeException("Error retrieving matches from subtree"); + } + resultMatches.addAll(subTreeMatches); + } + RewriterUtils.log(logger, query, "Successfully Retrieved longest non-overlapping partial phrase matches"); + return resultMatches; + } + + /** + * <p>Retrieve the longest, from left to right non overlapping substrings in + * AndItem based on FSA dictionary</p> + * + * e.g. subtree: (modern AND new AND york AND city AND travel)<br> + * dictionary:<br> + * mny\tmodern new york<br> + * mo\tmodern<br> + * modern\tn/a<br> + * modern new york\tn/a<br> + * new york\tn/a<br> + * new york city\tn/a<br> + * new york city travel\tn/a<br> + * new york company\tn/a<br> + * ny\tnew york<br> + * nyc\tnew york city\tnew york company<br> + * nyct\tnew york city travel<br> + * allMatches:<br> + * modern<br> + * modern new york<br> + * new york<br> + * new york city<br> + * new york city travel<br> + * return: <br> + * modern<br> + * new york city travel<br> + * @param allMatches All matches within the subtree + * @param query Query object from the searcher + * @return Matching phrases + */ + public static List<PhraseMatcher.Phrase> getNonOverlappingMatchesInAndItem( + List<PhraseMatcher.Phrase> allMatches, + Query query) + throws RuntimeException { + RewriterUtils.log(logger, query, "Retrieving longest non-overlapping matches in subtree"); + + if (allMatches==null || allMatches.isEmpty()) + return null; + + if(allMatches.size()==1) { + RewriterUtils.log(logger, query, "Only one match in subtree"); + return allMatches; + } + + // Phrase are sorted based on length, if both have the + // same length, the lefter one ranks higher + RewriterUtils.log(logger, query, "Sorting the phrases"); + PhraseLength phraseLength = new PhraseLength(); + Collections.sort(allMatches, phraseLength); + + // Create a bitset with length equal to the number of + // items in the subtree + int numWords = allMatches.get(0).getOwner().getItemCount(); + BitSet matchPos = new BitSet(numWords); + + // Removing matches that are overlapping with previously selected ones + RewriterUtils.log(logger, query, "Removing matches that are overlapping " + + "with previously selected ones"); + ListIterator<Phrase> allMatchesIter = allMatches.listIterator(); + while(allMatchesIter.hasNext()) { + PhraseMatcher.Phrase currMatch = allMatchesIter.next(); + PhraseMatcher.Phrase.MatchIterator matchIter = currMatch.itemIterator(); + if(matchIter.hasNext() && matchIter.next().isFilter()) { + RewriterUtils.log(logger, query, "Removing filter item" + currMatch); + allMatchesIter.remove(); + continue; + } + + BitSet currMatchPos = new BitSet(numWords); + currMatchPos.set(currMatch.getStartIndex(), + currMatch.getLength()+currMatch.getStartIndex()); + if(currMatchPos.intersects(matchPos)) { + RewriterUtils.log(logger, query, "Removing " + currMatch); + allMatchesIter.remove(); + } else { + RewriterUtils.log(logger, query, "Keeping " + currMatch); + matchPos.or(currMatchPos); + } + } + return allMatches; + } + + /** + * <p>Add Expansions to the matching phrases</p> + * + * e.g. Query: nyc travel agency<br> + * matching phrase: nyc\tnew york city\tnew york company + * travel agency\tn/a<br> + * if expandIndex is not null and removeOriginal is true<br> + * New Query: ((new york city) OR ([expandIndex]:new york city) + * OR (new york company) OR + * ([expandIndex]:new york company)) AND + * ((travel agency) OR ([expandIndex]:travel agency))<br> + * if expandIndex is null and removeOriginal is true<br> + * New Query: ((new york city) OR (new york company)) AND + * travel agency<br> + * if expandIndex is null and removeOriginal is false<br> + * New Query: (nyc OR (new york city) OR (new york company)) AND + * travel agency<br> + * + * @param query Query object from searcher + * @param matches Set of longest non-overlapping matches + * @param expandIndex Name of expansion index or null if + * default index + * @param maxNumRewrites Max number of rewrites to be added, + * 0 if no limit + * @param removeOriginal Whether to remove the original matching phrase + * @param addUnitToRewrites Whether to add rewrite as phrase + */ + public static Query addExpansions(Query query, Set<PhraseMatcher.Phrase> matches, + String expandIndex, int maxNumRewrites, + boolean removeOriginal, boolean addUnitToRewrites) + throws RuntimeException { + + if(matches==null) { + RewriterUtils.log(logger, query, "No expansions to be added"); + return query; + } + + RewriterUtils.log(logger, query, "Adding expansions to matching phrases"); + Model queryModel = query.getModel(); + QueryTree qTree = queryModel.getQueryTree(); + Iterator<Phrase> matchesIter = matches.iterator(); + CompositeItem parent = null; + + // Iterate through all matches + while(matchesIter.hasNext()) { + PhraseMatcher.Phrase match = matchesIter.next(); + RewriterUtils.log(logger, query, "Working on phrase: " + match); + + // Retrieve expansion phrases + String expansionStr = match.getData(); + if(expansionStr.equalsIgnoreCase("n/a") && expandIndex==null) { + continue; + } + StringTokenizer expansions = new StringTokenizer(expansionStr,"\t"); + + // Create this structure for all expansions of this match + // (OR (AND expandsion1) indexName:expansion1 + // (AND expansion2) indexName:expansion2..) + OrItem expansionGrp = new OrItem(); + int numRewrites = 0; + String matchStr = convertMatchToString(match); + while(expansions.hasMoreTokens() && + (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { + String expansion = expansions.nextToken(); + RewriterUtils.log(logger, query, "Working on expansion: " + expansion); + if(expansion.equalsIgnoreCase("n/a")) { + expansion = matchStr; + } + // (AND expansion) or "expansion" + Item expansionItem = convertStringToQTree(query, expansion); + if(addUnitToRewrites && expansionItem instanceof AndItem) { + expansionItem = convertAndToPhrase((AndItem)expansionItem); + } + expansionGrp.addItem(expansionItem); + + if(expandIndex!=null) { + // indexName:expansion + WordItem expansionIndexItem = new WordItem(expansion, expandIndex); + expansionGrp.addItem(expansionIndexItem); + } + numRewrites++; + RewriterUtils.log(logger, query, "Adding expansion: " + expansion); + } + + if(!removeOriginal) { + //(AND original) + Item matchItem = convertStringToQTree(query, matchStr); + if(expansionGrp.getItemIndex(matchItem)==-1) { + expansionGrp.addItem(matchItem); + } + } + + parent = match.getOwner(); + int matchIndex = match.getStartIndex(); + if(parent!=null) { + // Remove matching phrase from original query + for(int i=0; i<match.getLength(); i++) { + parent.removeItem(matchIndex); + } + // Adding back expansions + parent.addItem(matchIndex, expansionGrp); + } else { + RewriterUtils.log(logger, query, "Single root item"); + // If there's no parent, i.e. single root item + qTree.setRoot(expansionGrp); + break; + } + } + + // Not root single item + if(parent!=null) { + // Cleaning up the query after rewrite to remove redundant tags + // e.g. (AND (OR (AND a b) c)) => (OR (AND a b) c) + String cleanupError = QueryCanonicalizer.canonicalize(qTree); + if(cleanupError!=null) { + RewriterUtils.error(logger, query, "Error canonicalizing query tree"); + throw new RuntimeException("Error canonicalizing query tree"); + } + } + queryModel.setType(Query.Type.ADVANCED); //set type=adv + RewriterUtils.log(logger, query, "Successfully added expansions to matching phrases"); + return query; + } + + /** + * Convert Match to String + * + * @param phrase Match from PhraseMatcher + * @return String format of the phrase + */ + public static String convertMatchToString(PhraseMatcher.Phrase phrase) { + StringBuilder buffer = new StringBuilder(); + for (Iterator<Item> i = phrase.itemIterator(); i.hasNext();) { + buffer.append(i.next().toString()); + if (i.hasNext()) { + buffer.append(" "); + } + } + return buffer.toString(); + } + + /** + * Convert String to query tree + * + * @param stringToParse The string to be converted to a + * query tree + * @param query Query object from searcher + * @return Item The resulting query tree + */ + static Item convertStringToQTree(Query query, String stringToParse) { + RewriterUtils.log(logger, query, "Converting string [" + stringToParse + "] to query tree"); + if(stringToParse==null) { + return new NullItem(); + } + Model model = query.getModel(); + CustomParser parser = (CustomParser) ParserFactory.newInstance(model.getType(), + ParserEnvironment.fromExecutionContext(query.getModel().getExecution().context())); + IndexFacts indexFacts = new IndexFacts(); + Item item = parser.parse(stringToParse, null, model.getParsingLanguage(), + indexFacts.newSession(model.getSources(), model.getRestrict()), + model.getDefaultIndex()); + RewriterUtils.log(logger, query, "Converted string: [" + item.toString() + "]"); + return item; + } + + /** + * Convert AndItem to PhraseItem<br> + * + * e.g. (AND a b) to "a b" + * @param andItem query tree to be converted + * @return converted PhraseItem + */ + private static PhraseItem convertAndToPhrase(AndItem andItem) { + PhraseItem result = new PhraseItem(); + Iterator<Item> subItems = andItem.getItemIterator(); + while(subItems.hasNext()) { + Item curr = (subItems.next()); + if(curr instanceof IntItem) { + WordItem numItem = new WordItem(((IntItem)curr).stringValue()); + result.addItem(numItem); + } else { + result.addItem(curr); + } + } + return result; + } + + /** + * Class for comparing phrase. + * A phrase is larger if its length is longer. + * If both phrases are of the same length, the lefter one + * is considered larger + */ + private static class PhraseLength implements Comparator<PhraseMatcher.Phrase> { + public int compare(PhraseMatcher.Phrase phrase1, PhraseMatcher.Phrase phrase2) { + if((phrase2.getLength()>phrase1.getLength()) || + (phrase2.getLength()==phrase1.getLength() && + phrase2.getStartIndex()<=phrase1.getStartIndex())) { + return 1; + } else { + return -1; + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterUtils.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterUtils.java new file mode 100644 index 00000000000..26ead8de5e5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterUtils.java @@ -0,0 +1,334 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite; + +import com.yahoo.fsa.FSA; +import com.yahoo.log.LogLevel; +import com.yahoo.search.Query; +import com.yahoo.search.intent.model.IntentModel; +import com.yahoo.search.intent.model.InterpretationNode; +import com.yahoo.text.interpretation.Annotations; +import com.yahoo.text.interpretation.Modification; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.logging.Logger; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Contains common utilities used by rewriters + * + * @author Karen Sze Wing Lee + */ +public class RewriterUtils { + + private static final Logger utilsLogger = Logger.getLogger(RewriterUtils.class.getName()); + + // Tracelevel for debug log of this rewriter + private static final int TRACELEVEL = 3; + + /** + * Load FSA from file + * + * @param file FSA dictionary file object + * @param query Query object from the searcher, could be null if not available + * @return FSA The FSA object for the input file path + */ + public static FSA loadFSA(File file, Query query) throws IOException { + log(utilsLogger, query, "Loading FSA file"); + String filePath = null; + + try { + filePath = file.getAbsolutePath(); + } catch (SecurityException e1) { + error(utilsLogger, query, "No read access for the FSA file"); + throw new IOException("No read access for the FSA file"); + } + + FSA fsa = loadFSA(filePath, query); + + return fsa; + } + + /** + * Load FSA from file + * + * @param filename FSA dictionary file path + * @param query Query object from the searcher, could be null if not available + * @return FSA The FSA object for the input file path + */ + public static FSA loadFSA(String filename, Query query) throws IOException { + log(utilsLogger, query, "Loading FSA file from: " + filename); + + if(!new File(filename).exists()) { + error(utilsLogger, query, "File does not exist : " + filename); + throw new IOException("File does not exist : " + filename); + } + + FSA fsa; + try { + fsa = new FSA(filename); + } catch (RuntimeException e) { + error(utilsLogger, query, "Invalid FSA file"); + throw new IOException("Invalid FSA file"); + } + + if (!fsa.isOk()) { + error(utilsLogger, query, "Unable to load FSA file from : " + filename); + throw new IOException("Not able to load FSA file from : " + filename); + } + log(utilsLogger, query, "Loaded FSA successfully from file : " + filename); + return fsa; + } + + /** + * Retrieve rewrite from FSA given the original query + * + * @param query Query object from searcher + * @param dictName FSA dictionary name + * @param rewriterDicts list of rewriter dictionaries + * It has the following format: + * HashMap<dictionary name, FSA> + * @param key The original query used to retrieve rewrite + * from the dictionary + * @return String The retrieved rewrites, null if query + * doesn't exist + */ + public static String getRewriteFromFSA(Query query, + HashMap<String, Object> rewriterDicts, + String dictName, + String key) throws RuntimeException { + if(rewriterDicts==null) { + error(utilsLogger, query, "HashMap containing rewriter dicts is null"); + throw new RuntimeException("HashMap containing rewriter dicts is null"); + } + + FSA fsa = (FSA)rewriterDicts.get(dictName); + + if(fsa==null) { + error(utilsLogger, query, "Error retrieving FSA dictionary: " + dictName); + throw new RuntimeException("Error retrieving FSA dictionary: " + dictName); + } + + String result = null; + result = fsa.lookup(key); + log(utilsLogger, query, "Retrieved rewrite: " + result); + + return result; + } + + /** + * Get config parameter value set in query profile + * + * @param query Query object from the searcher + * @param rewriterName Name of the rewriter + * @param paramName parameter to be retrieved + * @return parameter value or null if not found + */ + public static String getQPConfig(Query query, + String rewriterName, + String paramName) { + log(utilsLogger, query, "Retrieving config parameter value of: " + + rewriterName + "." + paramName); + + return getUserParam(query, rewriterName + "." + paramName); + } + + /** + * Get rewriter chain value + * + * @param query Query object from the searcher + * @return parameter value or null if not found + */ + public static String getRewriterChain(Query query) { + log(utilsLogger, query, "Retrieving rewriter chain value: " + + RewriterConstants.REWRITER_CHAIN); + + return getUserParam(query, RewriterConstants.REWRITER_CHAIN); + } + + /** + * Get user param value + * + * @param query Query object from the searcher + * @param paramName parameter to be retrieved + * + * @return parameter value or null if not found + */ + public static String getUserParam(Query query, String paramName) { + log(utilsLogger, query, "Retrieving user param value: " + paramName); + + if(paramName==null) { + error(utilsLogger, query, "Parameter name is null"); + return null; + } + + String paramValue = null; + paramValue = query.properties().getString(paramName); + log(utilsLogger, query, "Param value retrieved is: " + paramValue); + + return paramValue; + } + + /** + * Retrieve metadata passed by previous rewriter + * from query properties + * Initialize values if this is the first rewriter + * + * @param query Query object from the searcher + * @return hashmap containing the metadata + */ + public static HashMap<String, Object> getRewriteMeta(Query query) { + log(utilsLogger, query, "Retrieving metadata passed by previous rewriter"); + + @SuppressWarnings("unchecked") + HashMap<String, Object> rewriteMeta = (HashMap<String, Object>) query + .properties().get(RewriterConstants.REWRITE_META); + + if(rewriteMeta==null) { + log(utilsLogger, query, "No metadata available from previous rewriter"); + rewriteMeta = new HashMap<>(); + rewriteMeta.put(RewriterConstants.REWRITTEN, false); + rewriteMeta.put(RewriterConstants.DICT_KEY, getNormalizedOriginalQuery(query)); + } else { + if((Boolean)rewriteMeta.get(RewriterConstants.REWRITTEN)) { + log(utilsLogger, query, "Query has been rewritten by previous rewriters"); + } else { + log(utilsLogger, query, "Query has not been rewritten by previous rewriters"); + } + log(utilsLogger, query, "Dict key passed by previous rewriter: " + + rewriteMeta.get(RewriterConstants.DICT_KEY)); + } + + return rewriteMeta; + } + + /** + * Pass metadata to the next rewriter through query properties + * + * @param query Query object from the searcher + * @param metadata HashMap containing the metadata + */ + public static void setRewriteMeta(Query query, HashMap<String, Object> metadata) { + log(utilsLogger, query, "Passing metadata to the next rewriter"); + + query.properties().set(RewriterConstants.REWRITE_META, metadata); + log(utilsLogger, query, "Successfully passed metadata to the next rewriter"); + } + + + /** + * Retrieve spell corrected query with highest score from QLAS + * + * @param query Query object from the searcher + * @param qss_rw Whether to consider qss_rw modification + * @param qss_sugg Whether ot consider qss_sugg modification + * @return Spell corrected query or null if not found + */ + public static String getSpellCorrected(Query query, + boolean qss_rw, + boolean qss_sugg) + throws RuntimeException { + log(utilsLogger, query, "Retrieving spell corrected query"); + + // Retrieve Intent Model + IntentModel intentModel = IntentModel.getFrom(query); + if(intentModel==null) { + error(utilsLogger, query, "Unable to retrieve intent model"); + throw new RuntimeException("Not able to retrieve intent model"); + } + + double max_score = 0; + String spellCorrected = null; + + // Iterate through all interpretations to get a spell corrected + // query with highest score + for (InterpretationNode interpretationNode : intentModel.children()) { + Modification modification = interpretationNode.getInterpretation() + .getModification(); + Annotations annotations = modification.getAnnotation(); + Double score = annotations.getDouble("score"); + + // Check if it's higher than the max score + if(score!=null && score>max_score) { + Boolean isQSSRewrite = annotations.getBoolean("qss_rw"); + Boolean isQSSSuggest = annotations.getBoolean("qss_sugg"); + + // Check if it's qss_rw or qss_sugg + if((qss_rw && isQSSRewrite!=null && isQSSRewrite) || + (qss_sugg && isQSSSuggest!=null && isQSSSuggest)) { + max_score = score; + spellCorrected = modification.getText(); + } + } + } + + if(spellCorrected!=null) { + log(utilsLogger, query, "Successfully retrieved spell corrected query: " + + spellCorrected); + } else { + log(utilsLogger, query, "No spell corrected query is retrieved"); + } + + return spellCorrected; + } + + /** + * Retrieve normalized original query from query object + * + * @param query Query object from searcher + * @return normalized query + */ + public static String getNormalizedOriginalQuery(Query query) { + return toLowerCase(query.getModel().getQueryString()).trim(); + } + + /** + * Log message + * + * @param logger Logger used for this msg + * @param msg Log message + */ + public static void log(Logger logger, String msg) { + logger.log(LogLevel.DEBUG, logger.getName() + ": " + msg); + } + + /** + * Log message + * + * @param logger Logger used for this msg + * @param query Query object from searcher + * @param msg Log message + */ + public static void log(Logger logger, Query query, String msg) { + if(query!=null) { + query.trace(logger.getName() + ": " + msg, true, TRACELEVEL); + } + logger.log(LogLevel.DEBUG, logger.getName() + ": " + msg); + } + + /** + * Print error message + * + * @param logger Logger used for this msg + * @param msg Error message + */ + public static void error(Logger logger, String msg) { + logger.severe(logger.getName() + ": " + msg); + } + + /** + * Print error message + * + * @param logger Logger used for this msg + * @param query Query object from searcher + * @param msg Error message + */ + public static void error(Logger logger, Query query, String msg) { + if(query!=null) { + query.trace(logger.getName() + ": " + msg, true, TRACELEVEL); + } + logger.severe(logger.getName() + ": " + msg); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/SearchChainDispatcherSearcher.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/SearchChainDispatcherSearcher.java new file mode 100644 index 00000000000..589696c4e77 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/SearchChainDispatcherSearcher.java @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite; + +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.search.*; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.component.ComponentId; + +import java.util.logging.Logger; + +/** + * Execute rewriter search chain specified by the user. + * It's inteneded to be used for executing rewriter search chains + * for different markets. + * + * @author Karen Sze Wing Lee + */ +@Provides("SearchChainDispatcher") +@After("QLAS") +public class SearchChainDispatcherSearcher extends Searcher { + + protected final Logger logger = Logger.getLogger(SearchChainDispatcherSearcher.class.getName()); + + /** + * Constructor for this searcher + * @param id Component ID (see vespa's search container doc for more detail) + */ + public SearchChainDispatcherSearcher(ComponentId id) { + super(id); + } + + /** + * Constructor for unit test + */ + public SearchChainDispatcherSearcher() { + } + + /** + * Execute another search chain specified by the user<br> + * - Retrieve search chain specified by the user through + * param<br> + * - Execute specified search chain if exist + */ + public @Override Result search(Query query, Execution execution) { + RewriterUtils.log(logger, query, "Entering SearchChainDispatcherSearcher"); + + // Retrieve search chain specified by user through REWRITER_CHAIN + String rewriterChain = RewriterUtils.getRewriterChain(query); + + // Skipping to next searcher if no rewriter chain is specified + if(rewriterChain==null || rewriterChain.equals("")) { + RewriterUtils.log(logger, query, "No rewriter chain is specified, " + + "skipping to the next searcher"); + return execution.search(query); + } + + // Execute rewriter search chain + RewriterUtils.log(logger, query, "Redirecting to chain " + rewriterChain); + Chain<Searcher> myChain = execution.searchChainRegistry().getChain(rewriterChain); + if(myChain==null) { + RewriterUtils.log(logger, query, "Invalid search chain specified, " + + "skipping to the next searcher"); + return execution.search(query); + } + new Execution(myChain, execution.context()).search(query); + RewriterUtils.log(logger, query, "Finish executing search chain " + rewriterChain); + + // Continue down the chain ignoring the result from REWRITER_CHAIN + // since the rewriters only modify the query but not the result + return execution.search(query); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/package-info.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/package-info.java new file mode 100644 index 00000000000..c435ed45623 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.query.rewrite; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/GenericExpansionRewriter.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/GenericExpansionRewriter.java new file mode 100644 index 00000000000..3d57675c4ab --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/GenericExpansionRewriter.java @@ -0,0 +1,213 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite.rewriters; + +import java.io.*; +import java.util.*; +import java.util.logging.Logger; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.fsa.FSA; +import com.yahoo.search.query.rewrite.*; +import com.yahoo.search.*; +import com.yahoo.component.ComponentId; +import com.yahoo.filedistribution.fileacquirer.FileAcquirer; +import com.yahoo.search.query.rewrite.RewritesConfig; +import com.yahoo.prelude.querytransform.PhraseMatcher; + +/** + * This rewriter would add rewrites to entities (e.g abbreviation, synonym, etc)<br> + * to boost precision + * - FSA dict: [normalized original query]\t[rewrite 1]\t[rewrite 2]\t[etc]<br> + * - Features:<br> + * RewritesAsUnitEquiv flag: add proximity boosted rewrites<br> + * PartialPhraseMatch flag: whether to match whole phrase or partial phrase<br> + * MaxRewrites flag: the maximum number of rewrites to be added<br> + * + * @author Karen Sze Wing Lee + */ +@Provides("GenericExpansionRewriter") +public class GenericExpansionRewriter extends QueryRewriteSearcher { + + // Flag for skipping this rewriter if the query has been rewritten + private final boolean SKIP_REWRITER_IF_REWRITTEN = false; + + // Name of the rewriter + public static final String REWRITER_NAME = "GenericExpansionRewriter"; + + // Generic expansion dictionary name + public static final String GENERIC_EXPAND_DICT = "GenericExpansion"; + + // Default generic expansion dictionary file name + public static final String GENERIC_EXPAND_DICT_FILENAME = "GenericExpansionRewriter.fsa"; + + // PhraseMatcher created from FSA dict + private PhraseMatcher phraseMatcher; + + private Logger logger; + + + /** + * Constructor for GenericExpansionRewriter. + * Load configs using default format + */ + @Inject + public GenericExpansionRewriter(ComponentId id, + FileAcquirer fileAcquirer, + RewritesConfig config) { + super(id, fileAcquirer, config); + } + + /** + * Constructor for GenericExpansionRewriter unit test. + * Load configs using default format + */ + public GenericExpansionRewriter(RewritesConfig config, + HashMap<String, File> fileList) { + super(config, fileList); + } + + /** + * Instance creation time config loading besides FSA. + * Create PhraseMatcher from FSA dict + */ + public boolean configure(FileAcquirer fileAcquirer, + RewritesConfig config, + HashMap<String, File> fileList) { + logger = Logger.getLogger(GenericExpansionRewriter.class.getName()); + FSA fsa = (FSA)rewriterDicts.get(GENERIC_EXPAND_DICT); + if(fsa==null) { + RewriterUtils.error(logger, "Error retrieving FSA dictionary: " + + GENERIC_EXPAND_DICT); + return false; + } + // Create Phrase Matcher + RewriterUtils.log(logger, "Creating PhraseMatcher"); + try { + phraseMatcher = new PhraseMatcher(fsa, false); + } catch (IllegalArgumentException e) { + RewriterUtils.error(logger, "Error creating phrase matcher"); + return false; + } + + // Match single word as well + phraseMatcher.setMatchSingleItems(true); + + // Return all matches instead of only the longest match + phraseMatcher.setMatchAll(true); + + return true; + } + + /** + * Main logic of rewriter<br> + * - Retrieve rewrites from FSA dict<br> + * - rewrite query using features that are enabled by user + */ + public HashMap<String, Object> rewrite(Query query, + String dictKey) throws RuntimeException { + + Boolean rewritten = false; + + // Pass the original dict key to the next rewriter + HashMap<String, Object> result = new HashMap<>(); + result.put(RewriterConstants.REWRITTEN, rewritten); + result.put(RewriterConstants.DICT_KEY, dictKey); + + RewriterUtils.log(logger, query, + "In GenericExpansionRewriter, query used for dict retrieval=[" + dictKey + "]"); + + // Retrieve flags for choosing between whole query match + // or partial query match + String partialPhraseMatch = getQPConfig(query, RewriterConstants.PARTIAL_PHRASE_MATCH); + + if(partialPhraseMatch==null) { + RewriterUtils.error(logger, query, "Required param " + RewriterConstants.PARTIAL_PHRASE_MATCH + + " is not set, skipping rewriter"); + throw new RuntimeException("Required param " + RewriterConstants.PARTIAL_PHRASE_MATCH + + " is not set, skipping rewriter"); + } + + // Retrieve max number of rewrites allowed + int maxNumRewrites = 0; + String maxNumRewritesStr = getQPConfig(query, RewriterConstants.MAX_REWRITES); + if(maxNumRewritesStr!=null) { + maxNumRewrites = Integer.parseInt(maxNumRewritesStr); + RewriterUtils.log(logger, query, + "Limiting max number of rewrites to: " + maxNumRewrites); + } else { + RewriterUtils.log(logger, query, "No limit on number of rewrites"); + } + + // Retrieve flags for choosing whether to add + // the rewrites as phrase, default to false + String rewritesAsUnitEquiv = getQPConfig(query, RewriterConstants.REWRITES_AS_UNIT_EQUIV); + if(rewritesAsUnitEquiv==null) { + rewritesAsUnitEquiv = "false"; + } + + Set<PhraseMatcher.Phrase> matches; + + // Partial Phrase Matching + if(partialPhraseMatch.equalsIgnoreCase("true")) { + RewriterUtils.log(logger, query, "Partial phrase matching"); + + // Retrieve longest non overlapping matches + matches = RewriterFeatures.getNonOverlappingPartialPhraseMatches(phraseMatcher, query); + + // Full Phrase Matching if set to anything else + } else { + RewriterUtils.log(logger, query, "Full phrase matching"); + + // Retrieve longest non overlapping matches + matches = RewriterFeatures.getNonOverlappingFullPhraseMatches(phraseMatcher, query); + } + + if(matches==null) { + return result; + } + + // Add expansions to the query + query = RewriterFeatures.addExpansions(query, matches, null, maxNumRewrites, false, + rewritesAsUnitEquiv.equalsIgnoreCase("true")); + + rewritten = true; + + RewriterUtils.log(logger, query, "GenericExpansionRewriter final query: " + query.toDetailString()); + + result.put(RewriterConstants.REWRITTEN, rewritten); + + return result; + } + + /** + * Get the flag which specifies whether this rewriter + * should be skipped if the query has been rewritten + * + * @return true if rewriter should be skipped, false + * otherwise + */ + public boolean getSkipRewriterIfRewritten() { + return SKIP_REWRITER_IF_REWRITTEN; + } + + /** + * Get the name of the rewriter + * + * @return Name of the rewriter + */ + public String getRewriterName() { + return REWRITER_NAME; + } + + /** + * Get default FSA dictionary names + * + * @return Pair of FSA dictionary name and filename + */ + public HashMap<String, String> getDefaultFSAs() { + HashMap<String, String> defaultDicts = new HashMap<>(); + defaultDicts.put(GENERIC_EXPAND_DICT, GENERIC_EXPAND_DICT_FILENAME); + return defaultDicts; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/MisspellRewriter.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/MisspellRewriter.java new file mode 100644 index 00000000000..a1b46926cbd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/MisspellRewriter.java @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite.rewriters; + +import java.io.*; +import java.util.*; +import java.util.logging.Logger; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.search.query.rewrite.*; +import com.yahoo.search.*; +import com.yahoo.component.ComponentId; +import com.yahoo.filedistribution.fileacquirer.FileAcquirer; +import com.yahoo.search.query.rewrite.RewritesConfig; + +/** + * This rewriter would retrieve spell corrected query from QLAS and + * add it to the original query tree as equiv<br> + * - Features:<br> + * RewritesAsEquiv flag: add rewrites to original query as equiv + * + * @author Karen Sze Wing Lee + */ +@After("QLAS") +@Provides("MisspellRewriter") +public class MisspellRewriter extends QueryRewriteSearcher { + + // Flag for skipping this rewriter if the query has been rewritten + private final boolean SKIP_REWRITER_IF_REWRITTEN = false; + + // Name of the rewriter + public static final String REWRITER_NAME = "MisspellRewriter"; + + private Logger logger = Logger.getLogger(MisspellRewriter.class.getName()); + + /** + * Constructor for MisspellRewriter + */ + @Inject + public MisspellRewriter(ComponentId id) { + super(id); + } + + /** + * Constructor for MisspellRewriter unit test + */ + public MisspellRewriter() { + super(); + } + + /** + * Instance creation time config loading besides FSA. + * Empty for this rewriter + */ + public boolean configure(FileAcquirer fileAcquirer, + RewritesConfig config, + HashMap<String, File> fileList) { + return true; + } + + /** + * Main logic of rewriter<br> + * - Retrieve spell corrected query from QLAS<br> + * - Add spell corrected query as equiv + */ + public HashMap<String, Object> rewrite(Query query, + String dictKey) throws RuntimeException { + + Boolean rewritten = false; + + HashMap<String, Object> result = new HashMap<>(); + result.put(RewriterConstants.REWRITTEN, rewritten); + result.put(RewriterConstants.DICT_KEY, dictKey); + + RewriterUtils.log(logger, query, + "In MisspellRewriter"); + + // Retrieve flags for enabling the features + String qssRw = getQPConfig(query, RewriterConstants.QSS_RW); + String qssSugg = getQPConfig(query, RewriterConstants.QSS_SUGG); + + boolean isQSSRw = false; + boolean isQSSSugg = false; + + if(qssRw!=null) { + isQSSRw = qssRw.equalsIgnoreCase("true"); + } + if(qssSugg!=null) { + isQSSSugg = qssSugg.equalsIgnoreCase("true"); + } + + // Rewrite is not enabled + if(!isQSSRw && !isQSSSugg) { + return result; + } + + // Retrieve spell corrected query from QLAS + String rewrites = RewriterUtils.getSpellCorrected(query, isQSSRw, isQSSSugg); + + // No rewrites + if(rewrites==null) { + RewriterUtils.log(logger, query, "No rewrite is retrieved"); + return result; + } else { + RewriterUtils.log(logger, query, "Retrieved spell corrected query: " + + rewrites); + } + + // Adding rewrite to the query tree + query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, false, 0); + + rewritten = true; + RewriterUtils.log(logger, query, "MisspellRewriter final query: " + + query.toDetailString()); + + result.put(RewriterConstants.REWRITTEN, rewritten); + result.put(RewriterConstants.DICT_KEY, rewrites); + + return result; + } + + /** + * Get the flag which specifies whether this rewriter + * should be skipped if the query has been rewritten + * + * @return true if rewriter should be skipped, false + * otherwise + */ + public boolean getSkipRewriterIfRewritten() { + return SKIP_REWRITER_IF_REWRITTEN; + } + + /** + * Get the name of the rewriter + * + * @return Name of the rewriter + */ + public String getRewriterName() { + return REWRITER_NAME; + } + + /** + * Get default FSA dictionary names + * + * @return Pair of FSA dictionary name and filename + */ + public HashMap<String, String> getDefaultFSAs() { + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/NameRewriter.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/NameRewriter.java new file mode 100644 index 00000000000..5ecf7893c63 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/NameRewriter.java @@ -0,0 +1,194 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.rewrite.rewriters; + +import java.io.*; +import java.util.*; +import java.util.logging.Logger; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.search.query.rewrite.*; +import com.yahoo.search.*; +import com.yahoo.component.ComponentId; +import com.yahoo.filedistribution.fileacquirer.FileAcquirer; +import com.yahoo.search.query.rewrite.RewritesConfig; + +/** + * This rewriter would add rewrites to name entities to boost precision<br> + * - FSA dict: [normalized original query]\t[rewrite 1]\t[rewrite 2]\t[etc]<br> + * - Features:<br> + * OriginalAsUnit flag: add proximity boosting to original query<br> + * RewritesAsUnitEquiv flag: add proximity boosted rewrites to original query<br> + * RewritesAsEquiv flag: add rewrites to original query<br> + * + * @author Karen Sze Wing Lee + */ +@Provides("NameRewriter") +public class NameRewriter extends QueryRewriteSearcher { + + // Flag for skipping this rewriter if the query has been rewritten + private final boolean SKIP_REWRITER_IF_REWRITTEN = false; + + // Name of the rewriter + public static final String REWRITER_NAME = "NameRewriter"; + + // Name entity expansion dictionary name + public static final String NAME_ENTITY_EXPAND_DICT = "NameEntityExpansion"; + + // Default Name entity expansion dictionary file name + public static final String NAME_ENTITY_EXPAND_DICT_FILENAME = "NameRewriter.fsa"; + + private Logger logger; + + /** + * Constructor for NameRewriter<br> + * Load configs using default format + */ + @Inject + public NameRewriter(ComponentId id, + FileAcquirer fileAcquirer, + RewritesConfig config) { + super(id, fileAcquirer, config); + } + + /** + * Constructor for NameRewriter unit test<br> + * Load configs using default format + */ + public NameRewriter(RewritesConfig config, + HashMap<String, File> fileList) { + super(config, fileList); + } + + /** + * Instance creation time config loading besides FSA<br> + * Empty for this rewriter + */ + public boolean configure(FileAcquirer fileAcquirer, + RewritesConfig config, + HashMap<String, File> fileList) { + logger = Logger.getLogger(NameRewriter.class.getName()); + return true; + } + + /** + * Main logic of rewriter<br> + * - Retrieve rewrites from FSA dict<br> + * - rewrite query using features that are enabled by user + */ + public HashMap<String, Object> rewrite(Query query, + String dictKey) throws RuntimeException { + + Boolean rewritten = false; + + // Pass the original dict key to the next rewriter + HashMap<String, Object> result = new HashMap<>(); + result.put(RewriterConstants.REWRITTEN, rewritten); + result.put(RewriterConstants.DICT_KEY, dictKey); + + RewriterUtils.log(logger, query, + "In NameRewriter, query used for dict retrieval=[" + dictKey + "]"); + + // Retrieve rewrite from FSA dict using normalized query + String rewrites = super.getRewriteFromFSA(query, NAME_ENTITY_EXPAND_DICT, dictKey); + RewriterUtils.log(logger, query, "Retrieved rewrites: " + rewrites); + + // No rewrites + if(rewrites==null) { + RewriterUtils.log(logger, query, "No rewrite is retrieved"); + return result; + } + + // Retrieve max number of rewrites allowed + int maxNumRewrites = 0; + String maxNumRewritesStr = getQPConfig(query, RewriterConstants.MAX_REWRITES); + if(maxNumRewritesStr!=null) { + maxNumRewrites = Integer.parseInt(maxNumRewritesStr); + RewriterUtils.log(logger, query, + "Limiting max number of rewrites to: " + maxNumRewrites); + } else { + RewriterUtils.log(logger, query, "No limit on number of rewrites"); + } + + // Retrieve flags for enabling the features + String originalAsUnit = getQPConfig(query, RewriterConstants.ORIGINAL_AS_UNIT); + String originalAsUnitEquiv = getQPConfig(query, RewriterConstants.ORIGINAL_AS_UNIT_EQUIV); + String rewritesAsUnitEquiv = getQPConfig(query, RewriterConstants.REWRITES_AS_UNIT_EQUIV); + String rewritesAsEquiv = getQPConfig(query, RewriterConstants.REWRITES_AS_EQUIV); + + // Add proximity boosting to original query and keeping + // the original query if it's enabled + if(originalAsUnitEquiv!=null && originalAsUnitEquiv.equalsIgnoreCase("true")) { + RewriterUtils.log(logger, query, "OriginalAsUnitEquiv is enabled"); + query = RewriterFeatures.addUnitToOriginalQuery(query, dictKey, true); + RewriterUtils.log(logger, query, + "Query after OriginalAsUnitEquiv: " + query.toDetailString()); + rewritten = true; + + // Add proximity boosting to original query + // if it's enabled + } else if(originalAsUnit!=null && originalAsUnit.equalsIgnoreCase("true")) { + RewriterUtils.log(logger, query, "OriginalAsUnit is enabled"); + query = RewriterFeatures.addUnitToOriginalQuery(query, dictKey, false); + RewriterUtils.log(logger, query, + "Query after OriginalAsUnit: " + query.toDetailString()); + rewritten = true; + } + + // Add rewrites as unit equiv if it's enabled + if(rewritesAsUnitEquiv!=null && rewritesAsUnitEquiv.equalsIgnoreCase("true")) { + RewriterUtils.log(logger, query, "RewritesAsUnitEquiv is enabled"); + //query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, true, maxNumRewrites); + query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, true, maxNumRewrites); + RewriterUtils.log(logger, query, + "Query after RewritesAsUnitEquiv: " + query.toDetailString()); + rewritten = true; + + // Add rewrites as equiv if it's enabled + } else if(rewritesAsEquiv!=null && rewritesAsEquiv.equalsIgnoreCase("true")) { + RewriterUtils.log(logger, query, "RewritesAsEquiv is enabled"); + //query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, false, maxNumRewrites); + query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, false, maxNumRewrites); + RewriterUtils.log(logger, query, + "Query after RewritesAsEquiv: " + query.toDetailString()); + rewritten = true; + } + + RewriterUtils.log(logger, query, "NameRewriter final query: " + query.toDetailString()); + + result.put(RewriterConstants.REWRITTEN, rewritten); + + return result; + } + + /** + * Get the flag which specifies whether this rewriter. + * should be skipped if the query has been rewritten + * + * @return true if rewriter should be skipped, false + * otherwise + */ + public boolean getSkipRewriterIfRewritten() { + return SKIP_REWRITER_IF_REWRITTEN; + } + + /** + * Get the name of the rewriter + * + * @return Name of the rewriter + */ + public String getRewriterName() { + return REWRITER_NAME; + } + + /** + * Get default FSA dictionary names + * + * @return Pair of FSA dictionary name and filename + */ + public HashMap<String, String> getDefaultFSAs() { + HashMap<String, String> defaultDicts = new HashMap<>(); + defaultDicts.put(NAME_ENTITY_EXPAND_DICT, NAME_ENTITY_EXPAND_DICT_FILENAME); + return defaultDicts; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/package-info.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/package-info.java new file mode 100644 index 00000000000..bfbb73f661e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.query.rewrite.rewriters; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/TextSerialize.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/TextSerialize.java new file mode 100644 index 00000000000..bac9f2af237 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/TextSerialize.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize; + +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.textserialize.item.ItemContext; +import com.yahoo.search.query.textserialize.item.ItemFormHandler; +import com.yahoo.search.query.textserialize.parser.ParseException; +import com.yahoo.search.query.textserialize.parser.Parser; +import com.yahoo.search.query.textserialize.parser.TokenMgrError; +import com.yahoo.search.query.textserialize.serializer.QueryTreeSerializer; + +import java.io.StringReader; + +/** + * @author tonytv + * Facade + * Allows serializing/deserializing a query to the programmatic format. + */ +public class TextSerialize { + public static Item parse(String serializedQuery) { + try { + ItemContext context = new ItemContext(); + Object result = new Parser(new StringReader(serializedQuery.replace("'", "\"")), new ItemFormHandler(), context).start(); + context.connectItems(); + + if (!(result instanceof Item)) { + throw new RuntimeException("The serialized query '" + serializedQuery + "' did not evaluate to an Item" + + "(type = " + result.getClass() + ")"); + } + return (Item) result; + } catch (ParseException e) { + throw new RuntimeException(e); + } catch (TokenMgrError e) { + throw new RuntimeException(e); + } + } + + public static String serialize(Item item) { + return new QueryTreeSerializer().serialize(item); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/AndNotRestConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/AndNotRestConverter.java new file mode 100644 index 00000000000..c4e54ca748d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/AndNotRestConverter.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NotItem; + +import java.util.List; + +import static com.yahoo.search.query.textserialize.item.ListUtil.butFirst; +import static com.yahoo.search.query.textserialize.item.ListUtil.first; + +/** + * @author tonytv + */ +public class AndNotRestConverter extends CompositeConverter<NotItem> { + static final String andNotRest = "AND-NOT-REST"; + + public AndNotRestConverter() { + super(NotItem.class); + } + + @Override + protected void addChildren(NotItem item, ItemArguments arguments, ItemContext context) { + if (firstIsNull(arguments.children)) { + addNegativeItems(item, arguments.children); + } else { + addItems(item, arguments.children); + } + } + + private void addNegativeItems(NotItem notItem, List<Object> children) { + for (Object child: butFirst(children)) { + TypeCheck.ensureInstanceOf(child, Item.class); + notItem.addNegativeItem((Item) child); + } + } + + private void addItems(NotItem notItem, List<Object> children) { + for (Object child : children) { + TypeCheck.ensureInstanceOf(child, Item.class); + notItem.addItem((Item) child); + } + } + + + private boolean firstIsNull(List<Object> children) { + return !children.isEmpty() && first(children) == null; + } + + @Override + protected String getFormName(Item item) { + return andNotRest; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/CompositeConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/CompositeConverter.java new file mode 100644 index 00000000000..7f7c5e48d0a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/CompositeConverter.java @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.textserialize.serializer.DispatchForm; +import com.yahoo.search.query.textserialize.serializer.ItemIdMapper; + +import java.util.ListIterator; + +/** + * @author tonytv + */ +public class CompositeConverter<T extends CompositeItem> implements ItemFormConverter { + private final Class<T> itemClass; + + public CompositeConverter(Class<T> itemClass) { + this.itemClass = itemClass; + } + + @Override + public Object formToItem(String name, ItemArguments arguments, ItemContext itemContext) { + T item = newInstance(); + addChildren(item, arguments, itemContext); + return item; + } + + protected void addChildren(T item, ItemArguments arguments, ItemContext itemContext) { + for (Object child : arguments.children) { + item.addItem(asItem(child)); + } + ItemInitializer.initialize(item, arguments, itemContext); + } + + private static Item asItem(Object child) { + if (!(child instanceof Item) && child != null) { + throw new RuntimeException("Expected query item, but got '" + child.toString() + + "' [" + child.getClass().getName() + "]"); + } + return (Item) child; + } + + private T newInstance() { + try { + return itemClass.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + @Override + public DispatchForm itemToForm(Item item, ItemIdMapper itemIdMapper) { + CompositeItem compositeItem = (CompositeItem) item; + + DispatchForm form = new DispatchForm(getFormName(item)); + for (ListIterator<Item> i = compositeItem.getItemIterator(); i.hasNext() ;) { + form.addChild(i.next()); + } + ItemInitializer.initializeForm(form, item, itemIdMapper); + return form; + } + + protected String getFormName(Item item) { + return item.getItemType().name(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ExactStringConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ExactStringConverter.java new file mode 100644 index 00000000000..4b68ecfe5a9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ExactStringConverter.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.ExactstringItem; + +/** + * @author balder + */ +// TODO: balder to fix javadoc +public class ExactStringConverter extends WordConverter { + @Override + ExactstringItem newTermItem(String word) { + return new ExactstringItem(word); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/IntConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/IntConverter.java new file mode 100644 index 00000000000..43b96d17773 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/IntConverter.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.TermItem; + +/** + * @author tonytv + */ +public class IntConverter extends TermConverter { + @Override + IntItem newTermItem(String word) { + return new IntItem(word); + } + + @Override + protected String getValue(TermItem item) { + return ((IntItem)item).getNumber(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemArguments.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemArguments.java new file mode 100644 index 00000000000..50cc9c42773 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemArguments.java @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static com.yahoo.search.query.textserialize.item.ListUtil.firstInstanceOf; + +/** + * @author tonytv + */ +public class ItemArguments { + public final Map<?, ?> properties; + public final List<Object> children; + + public ItemArguments(List<Object> arguments) { + if (firstInstanceOf(arguments, Map.class)) { + properties = (Map<?, ?>) ListUtil.first(arguments); + children = ListUtil.rest(arguments); + } else { + properties = Collections.emptyMap(); + children = arguments; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemContext.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemContext.java new file mode 100644 index 00000000000..fd21b4e02e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemContext.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TaggableItem; + +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.Map; + +/** + * @author tonytv + */ +public class ItemContext { + private class Connectivity { + final String id; + final double strength; + + public Connectivity(String id, double strength) { + this.id = id; + this.strength = strength; + } + } + + private final Map<String, Item> itemById = new HashMap<>(); + private final Map<TaggableItem, Connectivity> connectivityByItem = new IdentityHashMap<>(); + + + public void setItemId(String id, Item item) { + itemById.put(id, item); + } + + public void setConnectivity(TaggableItem item, String id, Double strength) { + connectivityByItem.put(item, new Connectivity(id, strength)); + } + + public void connectItems() { + for (Map.Entry<TaggableItem, Connectivity> entry : connectivityByItem.entrySet()) { + entry.getKey().setConnectivity(getItem(entry.getValue().id), entry.getValue().strength); + } + } + + private Item getItem(String id) { + Item item = itemById.get(id); + if (item == null) + throw new IllegalArgumentException("No item with id '" + id + "'."); + return item; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemExecutorRegistry.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemExecutorRegistry.java new file mode 100644 index 00000000000..20ef9f4e5cc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemExecutorRegistry.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.ONearItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.RankItem; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author tonytv + */ +public class ItemExecutorRegistry { + + private static final Map<String, ItemFormConverter> executorsByName = new HashMap<>(); + static { + register(Item.ItemType.AND, createCompositeConverter(AndItem.class)); + register(Item.ItemType.OR, createCompositeConverter(OrItem.class)); + register(Item.ItemType.RANK, createCompositeConverter(RankItem.class)); + register(Item.ItemType.PHRASE, createCompositeConverter(PhraseItem.class)); + register(Item.ItemType.EQUIV, createCompositeConverter(EquivItem.class)); + + register(AndNotRestConverter.andNotRest, new AndNotRestConverter()); + + register(Item.ItemType.NEAR, new NearConverter(NearItem.class)); + register(Item.ItemType.ONEAR, new NearConverter(ONearItem.class)); + + register(Item.ItemType.WORD, new WordConverter()); + register(Item.ItemType.INT, new IntConverter()); + register(Item.ItemType.PREFIX, new PrefixConverter()); + register(Item.ItemType.SUBSTRING, new SubStringConverter()); + register(Item.ItemType.EXACT, new ExactStringConverter()); + register(Item.ItemType.SUFFIX, new SuffixConverter()); + } + + private static <T extends CompositeItem> ItemFormConverter createCompositeConverter(Class<T> itemClass) { + return new CompositeConverter<>(itemClass); + } + + private static void register(Item.ItemType type, ItemFormConverter executor) { + register(type.toString(), executor); + } + + private static void register(String type, ItemFormConverter executor) { + executorsByName.put(type, executor); + } + + public static ItemFormConverter getByName(String name) { + ItemFormConverter executor = executorsByName.get(name); + ensureNotNull(executor, name); + return executor; + } + + private static void ensureNotNull(ItemFormConverter executor, String name) { + if (executor == null) { + throw new RuntimeException("No item type named '" + name + "'."); + } + } + + public static ItemFormConverter getByType(Item.ItemType itemType) { + String name = (itemType == Item.ItemType.NOT) ? AndNotRestConverter.andNotRest : itemType.name(); + return getByName(name); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemFormConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemFormConverter.java new file mode 100644 index 00000000000..256ad569686 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemFormConverter.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.textserialize.serializer.DispatchForm; +import com.yahoo.search.query.textserialize.serializer.ItemIdMapper; + +/** + * @author tonytv + */ +public interface ItemFormConverter { + Object formToItem(String name, ItemArguments arguments, ItemContext context); + DispatchForm itemToForm(Item item, ItemIdMapper itemIdMapper); +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemFormHandler.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemFormHandler.java new file mode 100644 index 00000000000..81b13a107c8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemFormHandler.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.search.query.textserialize.parser.DispatchFormHandler; + +import java.util.List; + +/** + * @author tonytv + */ +public class ItemFormHandler implements DispatchFormHandler{ + @Override + public Object dispatch(String name, List<Object> arguments, Object dispatchContext) { + ItemFormConverter executor = ItemExecutorRegistry.getByName(name); + return executor.formToItem(name, new ItemArguments(arguments), (ItemContext)dispatchContext); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemInitializer.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemInitializer.java new file mode 100644 index 00000000000..ae54165abef --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ItemInitializer.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.IndexedItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.search.query.textserialize.serializer.DispatchForm; +import com.yahoo.search.query.textserialize.serializer.ItemIdMapper; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +/** + * @author tonytv + */ +public class ItemInitializer { + private static final String indexProperty = "index"; + private static final String idProperty = "id"; + private static final String significanceProperty = "significance"; + private static final String uniqueIdProperty = "uniqueId"; + private static final String weightProperty = "weight"; + + public static void initialize(Item item, ItemArguments arguments, ItemContext itemContext) { + storeIdInContext(item, arguments.properties, itemContext); + + Object weight = arguments.properties.get(weightProperty); + if (weight != null) { + TypeCheck.ensureInstanceOf(weight, Number.class); + item.setWeight(((Number)weight).intValue()); + } + + if (item instanceof TaggableItem) { + initializeTaggableItem((TaggableItem)item, arguments, itemContext); + } + + if (item instanceof IndexedItem) { + initializeIndexedItem((IndexedItem)item, arguments, itemContext); + } + } + + private static void storeIdInContext(Item item, Map<?, ?> properties, ItemContext itemContext) { + Object id = properties.get("id"); + if (id != null) { + TypeCheck.ensureInstanceOf(id, String.class); + itemContext.setItemId((String) id, item); + } + } + + private static void initializeTaggableItem(TaggableItem item, ItemArguments arguments, ItemContext itemContext) { + Object connectivity = arguments.properties.get("connectivity"); + if (connectivity != null) { + storeConnectivityInContext(item, connectivity, itemContext); + } + + Object significance = arguments.properties.get(significanceProperty); + if (significance != null) { + TypeCheck.ensureInstanceOf(significance, Number.class); + item.setSignificance(((Number)significance).doubleValue()); + } + + Object uniqueId = arguments.properties.get(uniqueIdProperty); + if (uniqueId != null) { + TypeCheck.ensureInstanceOf(uniqueId, Number.class); + item.setUniqueID(((Number)uniqueId).intValue()); + } + } + + private static void initializeIndexedItem(IndexedItem indexedItem, ItemArguments arguments, ItemContext itemContext) { + Object index = arguments.properties.get(indexProperty); + if (index != null) { + TypeCheck.ensureInstanceOf(index, String.class); + indexedItem.setIndexName((String) index); + } + } + + private static void storeConnectivityInContext(TaggableItem item, Object connectivity, ItemContext itemContext) { + TypeCheck.ensureInstanceOf(connectivity, List.class); + List<?> connectivityList = (List<?>) connectivity; + if (connectivityList.size() != 2) { + throw new IllegalArgumentException("Expected two elements for connectivity, got " + connectivityList.size()); + } + + Object id = connectivityList.get(0); + Object strength = connectivityList.get(1); + + TypeCheck.ensureInstanceOf(id, String.class); + TypeCheck.ensureInstanceOf(strength, Number.class); + + itemContext.setConnectivity(item, (String)id, ((Number)strength).doubleValue()); + } + + public static void initializeForm(DispatchForm form, Item item, ItemIdMapper itemIdMapper) { + if (item.getWeight() != Item.DEFAULT_WEIGHT) { + form.setProperty(weightProperty, item.getWeight()); + } + + if (item instanceof IndexedItem) { + initializeIndexedForm(form, (IndexedItem) item); + } + if (item instanceof TaggableItem) { + initializeTaggableForm(form, (TaggableItem) item, itemIdMapper); + } + initializeFormWithIdIfConnected(form, item, itemIdMapper); + } + + private static void initializeFormWithIdIfConnected(DispatchForm form, Item item, ItemIdMapper itemIdMapper) { + if (item.hasConnectivityBackLink()) { + form.setProperty(idProperty, itemIdMapper.getId(item)); + } + } + + @SuppressWarnings("unchecked") + private static void initializeTaggableForm(DispatchForm form, TaggableItem taggableItem, ItemIdMapper itemIdMapper) { + Item connectedItem = taggableItem.getConnectedItem(); + if (connectedItem != null) { + form.setProperty("connectivity", + Arrays.asList(itemIdMapper.getId(connectedItem), taggableItem.getConnectivity())); + } + + if (taggableItem.hasExplicitSignificance()) { + form.setProperty(significanceProperty, taggableItem.getSignificance()); + } + + if (taggableItem.hasUniqueID()) { + form.setProperty(uniqueIdProperty, taggableItem.getUniqueID()); + } + } + + private static void initializeIndexedForm(DispatchForm form, IndexedItem item) { + String index = item.getIndexName(); + if (!index.isEmpty()) { + form.setProperty(indexProperty, index); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ListUtil.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ListUtil.java new file mode 100644 index 00000000000..9349b01a3bc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/ListUtil.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import java.util.*; + +/** + * @author tonytv + */ +public class ListUtil { + public static <T> List<T> rest(List<T> list) { + return list.subList(1, list.size()); + } + + public static <T> T first(Collection<T> collection) { + return collection.iterator().next(); + } + + public static boolean firstInstanceOf(Collection<?> collection, @SuppressWarnings("rawtypes") Class c) { + return !collection.isEmpty() && c.isInstance(first(collection)); + } + + public static <T> List<T> butFirst(List<T> list) { + return list.subList(1, list.size()); + } + + public static <T> Iterable<T> butFirst(final Collection<T> collection) { + return () -> { + Iterator<T> i = collection.iterator(); + i.next(); + return i; + }; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/NearConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/NearConverter.java new file mode 100644 index 00000000000..3be8d3d1c65 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/NearConverter.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.search.query.textserialize.serializer.DispatchForm; +import com.yahoo.search.query.textserialize.serializer.ItemIdMapper; + +/** + * @author tonytv + */ +@SuppressWarnings("rawtypes") +public class NearConverter extends CompositeConverter { + final private String distanceProperty = "distance";; + + @SuppressWarnings("unchecked") + public NearConverter(Class<? extends NearItem> nearItemClass) { + super(nearItemClass); + } + + @Override + public Object formToItem(String name, ItemArguments arguments, ItemContext itemContext) { + NearItem nearItem = (NearItem) super.formToItem(name, arguments, itemContext); + setDistance(nearItem, arguments); + return nearItem; + } + + private void setDistance(NearItem nearItem, ItemArguments arguments) { + Object distance = arguments.properties.get(distanceProperty); + if (distance != null) { + TypeCheck.ensureInteger(distance); + nearItem.setDistance(((Number)distance).intValue()); + } + } + + @Override + public DispatchForm itemToForm(Item item, ItemIdMapper itemIdMapper) { + DispatchForm dispatchForm = super.itemToForm(item, itemIdMapper); + + NearItem nearItem = (NearItem)item; + dispatchForm.setProperty(distanceProperty, nearItem.getDistance()); + return dispatchForm; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/PrefixConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/PrefixConverter.java new file mode 100644 index 00000000000..cb3a6c1943c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/PrefixConverter.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.PrefixItem; + +/** + * @author tonytv + */ +public class PrefixConverter extends WordConverter { + @Override + PrefixItem newTermItem(String word) { + return new PrefixItem(word); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/SubStringConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/SubStringConverter.java new file mode 100644 index 00000000000..e61a189684f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/SubStringConverter.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.SubstringItem; + +/** + * @author tonytv + */ +public class SubStringConverter extends WordConverter { + @Override + SubstringItem newTermItem(String word) { + return new SubstringItem(word); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/SuffixConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/SuffixConverter.java new file mode 100644 index 00000000000..4390e3464d2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/SuffixConverter.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.SuffixItem; + +/** + * @author tonytv + */ +public class SuffixConverter extends WordConverter { + @Override + SuffixItem newTermItem(String word) { + return new SuffixItem(word); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/TermConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/TermConverter.java new file mode 100644 index 00000000000..8bc6cba7f67 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/TermConverter.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.search.query.textserialize.serializer.DispatchForm; +import com.yahoo.search.query.textserialize.serializer.ItemIdMapper; + +/** + * @author tonytv + */ +public abstract class TermConverter implements ItemFormConverter { + @Override + public Object formToItem(String name, ItemArguments arguments, ItemContext context) { + ensureOnlyOneChild(arguments); + String word = getWord(arguments); + + TermItem item = newTermItem(word); + ItemInitializer.initialize(item, arguments, context); + return item; + } + + abstract TermItem newTermItem(String word); + + + private void ensureOnlyOneChild(ItemArguments arguments) { + if (arguments.children.size() != 1) { + throw new IllegalArgumentException("Expected exactly one argument, got '" + + arguments.children.toString() + "'"); + } + } + + private String getWord(ItemArguments arguments) { + Object word = arguments.children.get(0); + + if (!(word instanceof String)) { + throw new RuntimeException("Expected string, got '" + word + "' [" + word.getClass().getName() + "]."); + } + return (String)word; + } + + @Override + public DispatchForm itemToForm(Item item, ItemIdMapper itemIdMapper) { + TermItem termItem = (TermItem)item; + + DispatchForm form = new DispatchForm(termItem.getItemType().name()); + ItemInitializer.initializeForm(form, item, itemIdMapper); + form.addChild(getValue(termItem)); + return form; + } + + protected abstract String getValue(TermItem item); +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/TypeCheck.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/TypeCheck.java new file mode 100644 index 00000000000..a6e38d288a4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/TypeCheck.java @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.protect.Validator; + +/** + * @author tonytv + */ +public class TypeCheck { + public static void ensureInstanceOf(Object object, Class<?> c) { + Validator.ensureInstanceOf(expectationString(c.getName(), object.getClass().getSimpleName()), + object, c); + } + + public static void ensureInteger(Object value) { + ensureInstanceOf(value, Number.class); + Number number = (Number)value; + + int intValue = number.intValue(); + if (intValue != number.doubleValue()) + throw new IllegalArgumentException("Invalid integer '" + number + "'"); + } + + private static String expectationString(String expected, String got) { + return "Expected " + expected + ", but got " + got; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/item/WordConverter.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/WordConverter.java new file mode 100644 index 00000000000..dce33e392ae --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/item/WordConverter.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.item; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; + +/** + * @author tonytv + */ +public class WordConverter extends TermConverter { + @Override + WordItem newTermItem(String word) { + return new WordItem(word); + } + + @Override + protected String getValue(TermItem item) { + return ((WordItem)item).getWord(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/package-info.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/package-info.java new file mode 100644 index 00000000000..1e1d3052731 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.query.textserialize; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/parser/.gitignore b/container-search/src/main/java/com/yahoo/search/query/textserialize/parser/.gitignore new file mode 100644 index 00000000000..add88bd6807 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/parser/.gitignore @@ -0,0 +1,7 @@ +/TokenMgrError.java +/Token.java +/SimpleCharStream.java +/ParserTokenManager.java +/ParserConstants.java +/ParseException.java +/Parser.java diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/parser/DispatchFormHandler.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/parser/DispatchFormHandler.java new file mode 100644 index 00000000000..33c8e36bd57 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/parser/DispatchFormHandler.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.parser; + +import java.util.List; + +/** + * @author tonytv + */ +public interface DispatchFormHandler { + Object dispatch(String name, List<Object> arguments, Object dispatchContext); +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/DispatchForm.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/DispatchForm.java new file mode 100644 index 00000000000..091efa0a01b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/DispatchForm.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.serializer; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * @author tonytv + */ +public class DispatchForm { + private final String name; + public final Map<Object, Object> properties = new LinkedHashMap<>(); + public final List<Object> children = new ArrayList<>(); + + public DispatchForm(String name) { + this.name = name; + } + + public void addChild(Object child) { + children.add(child); + } + + /** + * Only public for the purpose of testing. + */ + public String serialize(ItemIdMapper itemIdMapper) { + StringBuilder builder = new StringBuilder(); + builder.append('(').append(name); + + serializeProperties(builder, itemIdMapper); + serializeChildren(builder, itemIdMapper); + + builder.append(')'); + return builder.toString(); + } + + private void serializeProperties(StringBuilder builder, ItemIdMapper itemIdMapper) { + if (properties.isEmpty()) + return; + + builder.append(' ').append(Serializer.serializeMap(properties, itemIdMapper)); + } + + + private void serializeChildren(StringBuilder builder, ItemIdMapper itemIdMapper) { + for (Object child : children) { + builder.append(' ').append(Serializer.serialize(child, itemIdMapper)); + } + } + + public void setProperty(Object key, Object value) { + properties.put(key, value); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/ItemIdMapper.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/ItemIdMapper.java new file mode 100644 index 00000000000..c32a7f52c0a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/ItemIdMapper.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.serializer; + +import com.yahoo.prelude.query.Item; + +import java.util.IdentityHashMap; +import java.util.Map; + +/** + * @author tonytv + */ +public class ItemIdMapper { + private final Map<Item, String> idByItem = new IdentityHashMap<>(); + private int idCounter = 0; + + public String getId(Item item) { + String id = idByItem.get(item); + if (id != null) { + return id; + } else { + idByItem.put(item, generateId(item)); + return getId(item); + } + } + + private String generateId(Item item) { + return item.getName() + "_" + nextCount(); + } + + private int nextCount() { + return idCounter++; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/QueryTreeSerializer.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/QueryTreeSerializer.java new file mode 100644 index 00000000000..e3090930369 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/QueryTreeSerializer.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.serializer; + +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.textserialize.item.ItemExecutorRegistry; + + +/** + * @author tonytv + */ +public class QueryTreeSerializer { + public String serialize(Item root) { + ItemIdMapper itemIdMapper = new ItemIdMapper(); + return ItemExecutorRegistry.getByType(root.getItemType()).itemToForm(root, itemIdMapper).serialize(itemIdMapper); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/Serializer.java b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/Serializer.java new file mode 100644 index 00000000000..e8352254551 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/textserialize/serializer/Serializer.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query.textserialize.serializer; + +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.textserialize.item.ItemExecutorRegistry; + +import java.util.List; +import java.util.Map; + +import static com.yahoo.search.query.textserialize.item.ListUtil.butFirst; +import static com.yahoo.search.query.textserialize.item.ListUtil.first; + +/** + * @author tonytv + */ +class Serializer { + static String serialize(Object child, ItemIdMapper itemIdMapper) { + if (child instanceof DispatchForm) { + return ((DispatchForm) child).serialize(itemIdMapper); + } else if (child instanceof Item) { + return serializeItem((Item) child, itemIdMapper); + } else if (child instanceof String) { + return serializeString((String) child); + } else if (child instanceof Number) { + return child.toString(); + } else if (child instanceof Map) { + return serializeMap((Map<?, ?>)child, itemIdMapper); + } else if (child instanceof List) { + return serializeList((List<?>)child, itemIdMapper); + } else { + throw new IllegalArgumentException("Can't serialize type " + child.getClass()); + } + } + + private static String serializeString(String string) { + return '"' + string.replace("\\", "\\\\").replace("\"", "\\\"") + '"'; + } + + static String serializeList(List<?> list, ItemIdMapper itemIdMapper) { + StringBuilder builder = new StringBuilder(); + builder.append('['); + + if (!list.isEmpty()) { + builder.append(serialize(first(list), itemIdMapper)); + + for (Object element : butFirst(list)) { + builder.append(", ").append(serialize(element, itemIdMapper)); + } + } + + builder.append(']'); + return builder.toString(); + } + + static String serializeMap(Map<?, ?> map, ItemIdMapper itemIdMapper) { + StringBuilder builder = new StringBuilder(); + builder.append("{"); + + if (!map.isEmpty()) { + serializeEntry(builder, first(map.entrySet()), itemIdMapper); + for (Map.Entry<?, ?> entry : butFirst(map.entrySet())) { + builder.append(", "); + serializeEntry(builder, entry, itemIdMapper); + } + } + + builder.append('}'); + return builder.toString(); + } + + static void serializeEntry(StringBuilder builder, Map.Entry<?, ?> entry, ItemIdMapper itemIdMapper) { + builder.append(serialize(entry.getKey(), itemIdMapper)).append(' '). + append(serialize(entry.getValue(), itemIdMapper)); + } + + static String serializeItem(Item item, ItemIdMapper itemIdMapper) { + return ItemExecutorRegistry.getByType(item.getItemType()).itemToForm(item, itemIdMapper).serialize(itemIdMapper); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/AllLowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/AllLowercasingSearcher.java new file mode 100644 index 00000000000..deed9e20aa5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/AllLowercasingSearcher.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import java.util.Collection; + +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.WordItem; + +/** + * Transform all terms in the incoming query tree and highlight terms to lower + * case. This searcher is a compatibility layer for customers needing to use + * FSAs created for pre-5.1 systems. + * + * <p> + * Add this searcher to your search chain before any searcher running + * case-dependent automata with only lowercased contents, query transformers + * assuming lowercased input etc. Refer to the Vespa documentation on search + * chains and search chain ordering. + * </p> + * + * @since 5.1.3. + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AllLowercasingSearcher extends LowercasingSearcher { + + @Override + public boolean shouldLowercase(WordItem word, IndexFacts.Session settings) { + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/BooleanAttributeParser.java b/container-search/src/main/java/com/yahoo/search/querytransform/BooleanAttributeParser.java new file mode 100644 index 00000000000..902de89c94e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/BooleanAttributeParser.java @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.text.PositionedString; +import com.yahoo.text.SimpleMapParser; + +import java.math.BigInteger; + +/** + * Parses an attribute string on the format <code>{attribute:value, ...}</code> + * where <code>value</code>' is either a single value or a list of values + * <code>[value1,value2,...]</code>, and each of the values can have an optional + * bitmap specified <code>value:bitmap</code>. <code>bitmap</code> can be either + * a 64-bit hex number <code>0x1234</code> or a list of bits <code>[0, 2, 43, + * 22, ...]</code>. + * + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + * @since 5.1.15 + */ +abstract class BooleanAttributeParser extends SimpleMapParser { + private boolean isMap = true; + + @Override + public void parse(String s) { + if (s == null || s.length() == 0) return; + super.parse(s); + if (string().position() != string().string().length()) { + throw new IllegalArgumentException("Expected end of string " + string().at()); + } + } + + // Value ends at ',' or '}' for map, and at ',' or ']' for list. + @Override + protected int findEndOfValue() { + if (isMap) { + return findNextButSkipLists(new char[]{',','}'}, string().string(), string().position()); + } + return findNextButSkipLists(new char[]{',',']'}, string().string(), string().position()); + } + + @Override + protected void handleKeyValue(String attribute, String value) { + // string() will point to the start of value. + if (string().peek('[') && isMap) { + // begin parsing MultiValueQueryTerm + isMap = false; + parseMultiValue(attribute); + isMap = true; + } else { + handleAttribute(attribute, value); + } + } + + /** + * Parses a list of values for a given attribute. When calling this + * function, string() must point to the start of the list. + */ + private void parseMultiValue(String attribute) { + // string() will point to the start of value. + string().consume('['); + while (!string().peek(']')) { + string().consumeSpaces(); + consumeValue(attribute); + string().consumeOptional(','); + string().consumeSpaces(); + } + } + + /** + * Handles one attribute, possibly with a subquery bitmap. + * @param attribute Attribute name + * @param value Either value, or value:bitmap, where bitmap is either a 64-bit hex number or a list of bits. + */ + private void handleAttribute(String attribute, String value) { + int pos = value.indexOf(':'); + if (pos != -1) { + parseBitmap(attribute, value.substring(0, pos), value.substring(pos + 1)); + } else { + addAttribute(attribute, value); + } + } + + // Parses a bitmap string that's either a list of bits or a hex number. + private void parseBitmap(String attribute, String value, String bitmap) { + if (bitmap.charAt(0) == '[') { + parseBitmapList(attribute, value, bitmap); + } else { + parseBitmapHex(attribute, value, bitmap); + } + } + + /** + * Adds attributes with the specified bitmap to normalizer. + * @param attribute Attribute to add + * @param value Value of attribute + * @param bitmap Bitmap as a hex number, with a '0x' prefix. + */ + private void parseBitmapHex(String attribute, String value, String bitmap) { + PositionedString s = new PositionedString(bitmap); + s.consume('0'); + s.consume('x'); + addAttribute(attribute, value, new BigInteger(s.substring().trim(),16)); + } + + /** + * Adds attributes with the specified bitmap to normalizer. + * @param attribute Attribute to add + * @param value Value of attribute + * @param bitmap Bitmap as a list of bits, e.g. '[0, 3, 45]' + */ + private void parseBitmapList(String attribute, String value, String bitmap) { + PositionedString s = new PositionedString(bitmap); + s.consume('['); + BigInteger mask = BigInteger.ZERO; + while (!s.peek(']')) { + s.consumeSpaces(); + int pos = findNextButSkipLists(new char[]{',',']'}, s.string(), s.position()); + if (pos == -1) { + break; + } + int subqueryIndex = Integer.parseUnsignedInt(s.substring(pos).trim()); + if (subqueryIndex > 63 || subqueryIndex < 0) { + throw new IllegalArgumentException("Subquery index must be in the range 0-63"); + } + mask = mask.or(BigInteger.ONE.shiftLeft(subqueryIndex)); + s.setPosition(pos); + s.consumeOptional(','); + s.consumeSpaces(); + } + addAttribute(attribute, value, mask); + } + + /** + * Add an attribute without a subquery mask + * @param attribute name of attribute + * @param value value of attribute + */ + protected abstract void addAttribute(String attribute, String value); + + /** + * Add an attribute with a subquery mask + * @param attribute name of attribute + * @param value value of attribute + * @param subqueryMask subquery mask for attribute (64-bit) + */ + protected abstract void addAttribute(String attribute, String value, BigInteger subqueryMask); + + /** + * Finds next index of a set of chars, but skips past any lists ("[...]"). + * @param chars Characters to find. Note that '[' should not be in this list. + * @param s String to search + * @param position position in s to start at. + * @return position of first char from "chars" that does not appear within brackets. + */ + private static int findNextButSkipLists(char[] chars, String s, int position) { + for (; position<s.length(); position++) { + if (s.charAt(position)=='[') { + position = findNextButSkipLists(new char[]{']'}, s, position + 1); + if (position<0) return -1; + } else { + for (char c : chars) { + if (s.charAt(position)==c) + return position; + } + } + } + return -1; + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/BooleanSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/BooleanSearcher.java new file mode 100644 index 00000000000..1fd394acd54 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/BooleanSearcher.java @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.request.parser.TokenMgrError; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +import java.math.BigInteger; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; +import static com.yahoo.yolean.Exceptions.toMessageString; + +/** + * Searcher that builds a PredicateItem from the &boolean properties and inserts it into a query. + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +@After({ STEMMING, ACCENT_REMOVAL }) +@Provides(BooleanSearcher.PREDICATE) +public class BooleanSearcher extends Searcher { + private static final CompoundName FIELD = new CompoundName("boolean.field"); + private static final CompoundName ATTRIBUTES = new CompoundName("boolean.attributes"); + private static final CompoundName RANGE_ATTRIBUTES = new CompoundName("boolean.rangeAttributes"); + public static final String PREDICATE = "predicate"; + + @Override + public Result search(Query query, Execution execution) { + String fieldName = query.properties().getString(FIELD); + if (fieldName != null) { + return search(query, execution, fieldName); + } else { + if (query.isTraceable(5)) { + query.trace("BooleanSearcher: Nothing added to query", false, 5); + } + } + return execution.search(query); + } + + private Result search(Query query, Execution execution, String fieldName) { + String attributes = query.properties().getString(ATTRIBUTES); + String rangeAttributes = query.properties().getString(RANGE_ATTRIBUTES); + if (query.isTraceable(5)) { + query.trace("BooleanSearcher: fieldName(" + fieldName + "), attributes(" + attributes + + "), rangeAttributes(" + rangeAttributes + ")", false, 5); + } + + if (attributes != null || rangeAttributes != null) { + try { + addPredicateTerm(query, fieldName, attributes, rangeAttributes); + if (query.isTraceable(4)) { + query.trace("BooleanSearcher: Added boolean operator", true, 4); + } + } catch (TokenMgrError e) { + return new Result(query, ErrorMessage.createInvalidQueryParameter(toMessageString(e))); + } + } else { + if (query.isTraceable(5)) { + query.trace("BooleanSearcher: Nothing added to query", false, 5); + } + } + return execution.search(query); + } + + // Adds a boolean term ANDed to the query, based on the supplied properties. + private void addPredicateTerm(Query query, String fieldName, String attributes, String rangeAttributes) { + PredicateQueryItem item = new PredicateQueryItem(); + item.setIndexName(fieldName); + new PredicateValueAttributeParser(item).parse(attributes); + new PredicateRangeAttributeParser(item).parse(rangeAttributes); + QueryTreeUtil.andQueryItemWithRoot(query, item); + } + + static public class PredicateValueAttributeParser extends BooleanAttributeParser { + private PredicateQueryItem item; + public PredicateValueAttributeParser(PredicateQueryItem item) { + this.item = item; + } + + @Override + protected void addAttribute(String attribute, String value) { + item.addFeature(attribute, value); + } + + @Override + protected void addAttribute(String attribute, String value, BigInteger subQueryMask) { + item.addFeature(attribute, value, subQueryMask.longValue()); + } + } + + static private class PredicateRangeAttributeParser extends BooleanAttributeParser { + private PredicateQueryItem item; + public PredicateRangeAttributeParser(PredicateQueryItem item) { + this.item = item; + } + + @Override + protected void addAttribute(String attribute, String value) { + item.addRangeFeature(attribute, Long.parseLong(value)); + } + + @Override + protected void addAttribute(String attribute, String value, BigInteger subQueryMask) { + item.addRangeFeature(attribute, Long.parseLong(value), subQueryMask.longValue()); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/DefaultPositionSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/DefaultPositionSearcher.java new file mode 100644 index 00000000000..c2d462a17e4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/DefaultPositionSearcher.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import static com.yahoo.prelude.searcher.PosSearcher.POSITION_PARSING; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.Location; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.List; +import java.util.Set; + +/** + * If default position has not been set, it will be set here. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +@After({PhaseNames.RAW_QUERY, POSITION_PARSING}) +@Before(PhaseNames.TRANSFORMED_QUERY) +public class DefaultPositionSearcher extends Searcher { + + @Override + public com.yahoo.search.Result search(Query query, Execution execution) { + Location location = query.getRanking().getLocation(); + if (location != null && (location.getAttribute() == null)) { + IndexFacts facts = execution.context().getIndexFacts(); + List<String> search = facts.newSession(query.getModel().getSources(), query.getModel().getRestrict()).documentTypes(); + + for (String sd : search) { + String defaultPosition = facts.getDefaultPosition(sd); + if (defaultPosition != null) { + location.setAttribute(defaultPosition); + } + } + if (location.getAttribute() == null) { + location.setAttribute(facts.getDefaultPosition(null)); + } + } + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/LegacyCombinator.java b/container-search/src/main/java/com/yahoo/search/querytransform/LegacyCombinator.java new file mode 100644 index 00000000000..41af5736da7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/LegacyCombinator.java @@ -0,0 +1,365 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.language.Language; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.IndexedItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.parser.CustomParser; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.query.Properties; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +/** + * Compatibility layer to implement the old multi part query syntax, along with + * the features of QueryCombinator. Do <b>not</b> use both QueryCombinator and + * LegacyCombinator in a single search. + * + * <p> + * A searcher which grabs query parameters of the form + * "defidx.(identifier)=(index name)" and "query.(identifier)=(user query)", + * parses them and adds them as AND items to the query root. + * + * <p> + * If the given default index does not exist in the search definition, the query + * part will be parsed with the settings of the default index set to "". + * + * <p> + * If any of the following arguments exist, they will be used: + * + * <p> + * query.(identifier)=query string<br> + * query.(identifier).operator={"req", "rank", "not"}, where "req" is default<br> + * query.(identifier).defidx=default index<br> + * query.(identifier).type={"all", "any", "phrase", "adv", "web"} where "all" is + * default + * + * <p> + * If both defidx.(identifier) and any of + * query.(identifier).{operator,defidx,type} is present in the query, an + * InvalidQueryParameter error will be added, and the query will be passed + * through untransformed. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Before({"transformedQuery", "com.yahoo.prelude.querytransform.StemmingSearcher"}) +public class LegacyCombinator extends Searcher { + + private static final String TYPESUFFIX = ".type"; + private static final String OPERATORSUFFIX = ".operator"; + private static final String DEFIDXSUFFIX = ".defidx"; + private static final String DEFIDXPREFIX = "defidx."; + private static final String QUERYPREFIX = "query."; + + private enum Combinator { + REQUIRED("req"), PREFERRED("rank"), EXCLUDED("not"); + + String parameterValue; + + private Combinator(String parameterValue) { + this.parameterValue = parameterValue; + } + + static Combinator getCombinator(String name) { + for (Combinator c : Combinator.values()) { + if (c.parameterValue.equals(name)) { + return c; + } + } + return REQUIRED; + } + } + + private static class QueryPart { + final String query; + final String defaultIndex; + final Combinator operator; + final String identifier; + final Query.Type syntax; + + QueryPart(String identifier, String defaultIndex, String oldIndex, + String operator, String query, String syntax) { + validateArguments(identifier, defaultIndex, oldIndex, + operator,syntax); + this.query = query; + if (defaultIndex != null) { + this.defaultIndex = defaultIndex; + } else { + this.defaultIndex = oldIndex; + } + this.operator = Combinator.getCombinator(operator); + this.identifier = identifier; + this.syntax = Query.Type.getType(syntax); + } + + private static void validateArguments(String identifier, String defaultIndex, + String oldIndex, String operator, String syntax) { + if (defaultIndex == null) { + return; + } + if (oldIndex != null) { + throw new IllegalArgumentException(createErrorMessage(identifier, DEFIDXSUFFIX)); + } + if (operator != null) { + throw new IllegalArgumentException(createErrorMessage(identifier, OPERATORSUFFIX)); + } + if (syntax != null) { + throw new IllegalArgumentException(createErrorMessage(identifier, TYPESUFFIX)); + } + } + + private static String createErrorMessage(String identifier, String legacyArgument) { + return "Cannot set both " + DEFIDXPREFIX + identifier + " and " + + QUERYPREFIX + identifier + legacyArgument + "."; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((identifier == null) ? 0 : identifier.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + QueryPart other = (QueryPart) obj; + if (identifier == null) { + if (other.identifier != null) + return false; + } else if (!identifier.equals(other.identifier)) + return false; + return true; + } + + @Override + public String toString() { + return "QueryPart(" + identifier + ", " + defaultIndex + ", " + + operator + ", " + syntax + ")"; + } + } + + @Override + public Result search(Query query, Execution execution) { + Set<QueryPart> pieces; + Set<String> usedSources; + IndexFacts indexFacts = execution.context().getIndexFacts(); + try { + pieces = findQuerySnippets(query.properties()); + } catch (IllegalArgumentException e) { + query.errors().add(ErrorMessage.createInvalidQueryParameter("LegacyCombinator got invalid parameters: " + + e.getMessage())); + return execution.search(query); + } + if (pieces.size() == 0) { + return execution.search(query); + } + IndexFacts.Session session = indexFacts.newSession(query); + Language language = query.getModel().getParsingLanguage(); + addAndItems(language, query, pieces, session, execution.context()); + addRankItems(language, query, pieces, session, execution.context()); + try { + addNotItems(language, query, pieces, session, execution.context()); + } catch (IllegalArgumentException e) { + query.errors().add(ErrorMessage.createInvalidQueryParameter("LegacyCombinator found only excluding terms, no including.")); + return execution.search(query); + } + query.trace("Adding extra query parts.", true, 2); + return execution.search(query); + } + + private void addNotItems(Language language, Query query, Set<QueryPart> pieces, + IndexFacts.Session session, Execution.Context context) { + for (QueryPart part : pieces) { + if (part.operator != Combinator.EXCLUDED) continue; + + String defaultIndex = defaultIndex(session, part); + Item item = parse(language, query, part, defaultIndex, context); + if (item == null) continue; + + setDefaultIndex(part, defaultIndex, item); + addNotItem(query.getModel().getQueryTree(), item); + } + + } + + private void addNotItem(QueryTree queryTree, Item item) { + Item root = queryTree.getRoot(); + // JavaDoc claims I can get null, code gives NullItem... well, well, well... + if (root instanceof NullItem || root == null) { + // errr... no positive branch at all? + throw new IllegalArgumentException("No positive terms for query."); + } else if (root.getClass() == NotItem.class) { + ((NotItem) root).addNegativeItem(item); + } else { + NotItem newRoot = new NotItem(); + newRoot.addPositiveItem(root); + newRoot.addNegativeItem(item); + queryTree.setRoot(newRoot); + } + } + + private void addRankItems(Language language, Query query, Set<QueryPart> pieces, IndexFacts.Session session, Execution.Context context) { + for (QueryPart part : pieces) { + if (part.operator != Combinator.PREFERRED) continue; + + String defaultIndex = defaultIndex(session, part); + Item item = parse(language, query, part, defaultIndex, context); + if (item == null) continue; + + setDefaultIndex(part, defaultIndex, item); + addRankItem(query.getModel().getQueryTree(), item); + } + } + + private void addRankItem(QueryTree queryTree, Item item) { + Item root = queryTree.getRoot(); + // JavaDoc claims I can get null, code gives NullItem... well, well, well... + if (root instanceof NullItem || root == null) { + queryTree.setRoot(item); + } else if (root.getClass() == RankItem.class) { + // if no clear recall terms, just set the rank term as recall + ((RankItem) root).addItem(item); + } else { + RankItem newRoot = new RankItem(); + newRoot.addItem(root); + newRoot.addItem(item); + queryTree.setRoot(newRoot); + } + } + + private void addAndItems(Language language, Query query, Iterable<QueryPart> pieces, IndexFacts.Session session, Execution.Context context) { + for (QueryPart part : pieces) { + if (part.operator != Combinator.REQUIRED) continue; + + String defaultIndex = defaultIndex(session, part); + Item item = parse(language, query, part, defaultIndex, context); + if (item == null) continue; + + setDefaultIndex(part, defaultIndex, item); + addAndItem(query.getModel().getQueryTree(), item); + } + } + + private void setDefaultIndex(QueryPart part, String defaultIndex, Item item) { + if (defaultIndex == null) { + assignDefaultIndex(item, part.defaultIndex); + } + } + + private Item parse(Language language, Query query, QueryPart part, String defaultIndex, Execution.Context context) { + Item item = null; + try { + CustomParser parser = (CustomParser)ParserFactory.newInstance( + part.syntax, ParserEnvironment.fromExecutionContext(context)); + item = parser.parse(part.query, null, language, query.getModel().getSources(), + context.getIndexFacts(), defaultIndex); + } catch (RuntimeException e) { + String err = Exceptions.toMessageString(e); + query.trace("Query parser threw an exception: " + err, true, 1); + getLogger().log(LogLevel.WARNING, + "Query parser threw exception in searcher LegacyCombinator for " + + query.getHttpRequest().toString() + ", query part " + part.query + ": " + err); + } + return item; + } + + private String defaultIndex(IndexFacts.Session indexFacts, QueryPart part) { + String defaultIndex; + if (indexFacts.getIndex(part.defaultIndex) == Index.nullIndex) { + defaultIndex = null; + } else { + defaultIndex = part.defaultIndex; + } + return defaultIndex; + } + + private static void addAndItem(QueryTree queryTree, Item item) { + Item root = queryTree.getRoot(); + // JavaDoc claims I can get null, code gives NullItem... well, well, well... + if (root instanceof NullItem || root == null) { + queryTree.setRoot(item); + } else if (root.getClass() == AndItem.class) { + ((AndItem) root).addItem(item); + } else { + AndItem newRoot = new AndItem(); + newRoot.addItem(root); + newRoot.addItem(item); + queryTree.setRoot(newRoot); + } + } + + private static void assignDefaultIndex(Item item, String defaultIndex) { + if (item instanceof IndexedItem) { + IndexedItem indexName = (IndexedItem) item; + + if ("".equals(indexName.getIndexName())) { + indexName.setIndexName(defaultIndex); + } + } else if (item instanceof CompositeItem) { + Iterator<Item> items = ((CompositeItem) item).getItemIterator(); + while (items.hasNext()) { + Item i = items.next(); + assignDefaultIndex(i, defaultIndex); + } + } + + } + + private static Set<QueryPart> findQuerySnippets(Properties properties) { + Set<QueryPart> pieces = new HashSet<>(); + for (Map.Entry<String, Object> k : properties.listProperties() + .entrySet()) { + String key = k.getKey(); + if (!key.startsWith(QUERYPREFIX)) { + continue; + } + String name = key.substring(QUERYPREFIX.length()); + if (hasDots(name)) { + continue; + } + String index = properties.getString(DEFIDXPREFIX + name); + String oldIndex = properties.getString(QUERYPREFIX + name + + DEFIDXSUFFIX); + String operator = properties.getString(QUERYPREFIX + name + + OPERATORSUFFIX); + String type = properties.getString(QUERYPREFIX + name + TYPESUFFIX); + pieces.add(new QueryPart(name, index, oldIndex, operator, k + .getValue().toString(), type)); + } + return pieces; + } + + private static boolean hasDots(String name) { + int index = name.indexOf('.', 0); + return index != -1; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java new file mode 100644 index 00000000000..d3916c4bfe1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Traverse a query tree and lowercase terms based on decision made in subclasses. + * + * @since 5.1.3 + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public abstract class LowercasingSearcher extends Searcher { + + private final boolean transformWeightedSets; + + public LowercasingSearcher() { + this(new LowercasingConfig(new LowercasingConfig.Builder())); + } + + public LowercasingSearcher(LowercasingConfig cfg) { + this.transformWeightedSets = cfg.transform_weighted_sets(); + } + + @Override + public Result search(Query query, Execution execution) { + IndexFacts.Session indexFacts = execution.context().getIndexFacts().newSession(query); + traverse(query.getModel().getQueryTree(), indexFacts); + traverseHighlight(query.getPresentation().getHighlight(), indexFacts); + query.trace("Lowercasing", true, 2); + return execution.search(query); + } + + private void traverseHighlight(Highlight highlight, IndexFacts.Session indexFacts) { + if (highlight == null) return; + + for (AndItem item : highlight.getHighlightItems().values()) { + traverse(item, indexFacts); + } + } + + private void traverse(CompositeItem base, IndexFacts.Session indexFacts) { + for (Iterator<Item> i = base.getItemIterator(); i.hasNext();) { + Item next = i.next(); + if (next instanceof WordItem) { + lowerCase((WordItem) next, indexFacts); + } else if (next instanceof CompositeItem) { + traverse((CompositeItem) next, indexFacts); + } else if (next instanceof WeightedSetItem) { + if (transformWeightedSets) { + lowerCase((WeightedSetItem) next, indexFacts); + } + } else if (next instanceof WordAlternativesItem) { + lowerCase((WordAlternativesItem) next, indexFacts); + } + } + } + + private void lowerCase(WordItem word, IndexFacts.Session indexFacts) { + if (shouldLowercase(word, indexFacts)) { + word.setWord(toLowerCase(word.getWord())); + word.setLowercased(true); + } + } + + private static final class WeightedSetToken { + final String token; + final String originalToken; + final int weight; + + WeightedSetToken(String token, String originalToken, int weight) { + this.token = token; + this.originalToken = originalToken; + this.weight = weight; + } + } + + private boolean syntheticLowerCaseCheck(String indexName, IndexFacts.Session indexFacts, boolean isFromQuery) { + WordItem w = new WordItem("", indexName, isFromQuery); + return shouldLowercase(w, indexFacts); + } + + private void lowerCase(WeightedSetItem set, IndexFacts.Session indexFacts) { + if (!syntheticLowerCaseCheck(set.getIndexName(), indexFacts, true)) { + return; + } + + List<WeightedSetToken> terms = new ArrayList<>(set.getNumTokens()); + for (Iterator<Map.Entry<Object, Integer>> i = set.getTokens(); i.hasNext();) { + Map.Entry<Object, Integer> e = i.next(); + if (e.getKey() instanceof String) { + String originalToken = (String) e.getKey(); + String token = toLowerCase(originalToken); + if ( ! originalToken.equals(token)) { + terms.add(new WeightedSetToken(token, originalToken, e.getValue().intValue())); + } + } + } + // has to do it in two passes on cause of the "interesting" API in + // weighted set, and remove before put on cause of the semantics of + // addInternal as well as changed values... + for (WeightedSetToken t : terms) { + set.removeToken(t.originalToken); + set.addToken(t.token, t.weight); + } + } + + private void lowerCase(WordAlternativesItem alternatives, IndexFacts.Session indexFacts) { + if (!syntheticLowerCaseCheck(alternatives.getIndexName(), indexFacts, alternatives.isFromQuery())) { + return; + } + for (WordAlternativesItem.Alternative term : alternatives.getAlternatives()) { + String lowerCased = toLowerCase(term.word); + alternatives.addTerm(lowerCased, term.exactness * .7d); + } + + } + + /** + * Override this to control whether a given term should be lowercased. + * + * @param word a WordItem or subclass thereof which is a candidate for lowercasing + * @return whether to convert the term to lower case + */ + public abstract boolean shouldLowercase(WordItem word, IndexFacts.Session indexFacts); + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/NGramSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/NGramSearcher.java new file mode 100644 index 00000000000..c487182c65d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/NGramSearcher.java @@ -0,0 +1,285 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.CharacterClasses; +import com.yahoo.language.process.GramSplitter; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.hitfield.AnnotateStringFieldPart; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import static com.yahoo.prelude.searcher.JuniperSearcher.JUNIPER_TAG_REPLACING; +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Handles NGram indexes by splitting query terms to them into grams and combining summary field values + * from such fields into the original text. + * <p> + * This declares it must be placed after Juniper searchers because it assumes Juniper token separators + * (which are returned on bolding) are not replaced by highlight tags when this is run (and "after" means + * "before" from the point of view of the result). + * + * @author bratseth + */ +@After(JUNIPER_TAG_REPLACING) +public class NGramSearcher extends Searcher { + + private final GramSplitter gramSplitter; + + private final CharacterClasses characterClasses; + + public NGramSearcher(Linguistics linguistics) { + gramSplitter= linguistics.getGramSplitter(); + characterClasses= linguistics.getCharacterClasses(); + } + + @Override + public Result search(Query query, Execution execution) { + IndexFacts indexFacts = execution.context().getIndexFacts(); + if ( ! indexFacts.hasNGramIndices()) return execution.search(query); // shortcut + + IndexFacts.Session session = indexFacts.newSession(query); + boolean rewritten = rewriteToNGramMatching(query.getModel().getQueryTree().getRoot(), 0, session, query); + if (rewritten) + query.trace("Rewritten to n-gram matching",true,2); + + Result result=execution.search(query); + recombineNGrams(result.hits().deepIterator(), session); + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + IndexFacts indexFacts = execution.context().getIndexFacts(); + if (indexFacts.hasNGramIndices()) + recombineNGrams(result.hits().deepIterator(), indexFacts.newSession(result.getQuery())); + } + + private boolean rewriteToNGramMatching(Item item, int indexInParent, IndexFacts.Session indexFacts, Query query) { + boolean rewritten = false; + if (item instanceof SegmentItem) { // handle CJK segmented terms which should be grams instead + SegmentItem segments = (SegmentItem)item; + Index index = indexFacts.getIndex(segments.getIndexName()); + if (index.isNGram()) { + Item grams = splitToGrams(segments, toLowerCase(segments.getRawWord()), index.getGramSize(), query); + replaceItemByGrams(item, grams, indexInParent); + rewritten = true; + } + } + else if (item instanceof CompositeItem) { + CompositeItem composite = (CompositeItem)item; + for (int i=0; i<composite.getItemCount(); i++) + rewritten = rewriteToNGramMatching(composite.getItem(i), i, indexFacts, query) || rewritten; + } + else if (item instanceof TermItem) { + TermItem term = (TermItem)item; + Index index = indexFacts.getIndex(term.getIndexName()); + if (index.isNGram()) { + Item grams = splitToGrams(term,term.stringValue(), index.getGramSize(), query); + replaceItemByGrams(item, grams, indexInParent); + rewritten = true; + } + } + return rewritten; + } + + /** + * Splits the given item into n-grams and adds them as a CompositeItem containing WordItems searching the + * index of the input term. If the result is a single gram, that single WordItem is returned rather than the AndItem + * + * @param term the term to split, must be an item which implement the IndexedItem and BlockItem "mixins" + * @param text the text of the item, just stringValue() if the item is a TermItem + * @param gramSize the gram size to split to + * @param query the query in which this rewriting is done + * @return the root of the query subtree produced by this, containing the split items + */ + protected Item splitToGrams(Item term, String text, int gramSize, Query query) { + CompositeItem and = createGramRoot(query); + String index = ((HasIndexItem)term).getIndexName(); + Substring origin = ((BlockItem)term).getOrigin(); + for (Iterator<GramSplitter.Gram> i = getGramSplitter().split(text,gramSize); i.hasNext(); ) { + GramSplitter.Gram gram = i.next(); + WordItem gramWord = new WordItem(gram.extractFrom(text), index, false, origin); + gramWord.setWeight(term.getWeight()); + gramWord.setProtected(true); + and.addItem(gramWord); + } + return and.getItemCount()==1 ? and.getItem(0) : and; // return the AndItem, or just the single gram if not multiple + } + + /** + * Returns the (thread-safe) object to use to split the query text into grams. + */ + protected final GramSplitter getGramSplitter() { return gramSplitter; } + + /** + * Creates the root of the query subtree which will contain the grams to match, + * called by {@link #splitToGrams}. This hook is provided to make it easy to create a subclass which + * matches grams using a different composite item, e.g an OrItem. + * <p> + * This default implementation return new AndItem(); + * + * @param query the input query, to make it possible to return a different composite item type + * depending on the query content + * @return the composite item to add the gram items to in {@link #splitToGrams} + */ + protected CompositeItem createGramRoot(Query query) { + return new AndItem(); + } + + private void replaceItemByGrams(Item item, Item grams, int indexInParent) { + if (!(grams instanceof CompositeItem) || !(item.getParent() instanceof PhraseItem)) { // usually, simply replace + item.getParent().setItem(indexInParent, grams); + } + else { // but if the parent is a phrase, we cannot add the AND to it, so add each gram to the phrase + PhraseItem phraseParent = (PhraseItem)item.getParent(); + phraseParent.removeItem(indexInParent); + int addedTerms = 0; + for (Iterator<Item> i = ((CompositeItem)grams).getItemIterator(); i.hasNext(); ) { + phraseParent.addItem(indexInParent+(addedTerms++),i.next()); + } + } + } + + private void recombineNGrams(Iterator<Hit> hits, IndexFacts.Session session) { + while (hits.hasNext()) { + Hit hit = hits.next(); + if (hit.isMeta()) continue; + Object sddocname = hit.getField(Hit.SDDOCNAME_FIELD); + if (sddocname == null) return; + for (String fieldName : hit.fieldKeys()) { + Index index = session.getIndex(fieldName, sddocname.toString()); + if (index.isNGram() && (index.getHighlightSummary() || index.getDynamicSummary())) { + hit.setField(fieldName, recombineNGramsField(hit.getField(fieldName), index.getGramSize())); + } + } + } + } + + private Object recombineNGramsField(Object fieldValue,int gramSize) { + String recombined=recombineNGrams(fieldValue.toString(),gramSize); + if (fieldValue instanceof JSONString) + return new JSONString(recombined); + else if (fieldValue instanceof XMLString) + return new XMLString(recombined); + else + return recombined; + } + + /** + * Converts grams to the original string. + * + * Example (gram size 3): <code>blulue rededs</code> becomes <code>blue reds</code> + */ + private String recombineNGrams(final String string,final int gramSize) { + StringBuilder b=new StringBuilder(); + int consecutiveWordChars=0; + boolean inBolding=false; + MatchTokenStrippingCharacterIterator characters=new MatchTokenStrippingCharacterIterator(string); + while (characters.hasNext()) { + char c=characters.next(); + boolean atBoldingSeparator = (c=='\u001f'); + + if (atBoldingSeparator && characters.peek()=='\u001f') { + characters.next(); + } + else if ( ! characterClasses.isLetterOrDigit(c)) { + if (atBoldingSeparator) + inBolding=!inBolding; + if ( ! (atBoldingSeparator && nextIsLetterOrDigit(characters))) + consecutiveWordChars=0; + if (inBolding && atBoldingSeparator && areWordCharactersBackwards(gramSize-1,b)) { + // we are going to skip characters from a gram, so move bolding start earlier + b.insert(b.length()-(gramSize-1),c); + } + else { + b.append(c); + } + } + else { + consecutiveWordChars++; + if (consecutiveWordChars<gramSize || (consecutiveWordChars % gramSize)==0) + b.append(c); + } + } + return b.toString(); + } + + private boolean areWordCharactersBackwards(int count,StringBuilder b) { + for (int i=0; i<count; i++) { + int checkIndex=b.length()-1-i; + if (checkIndex<0) return false; + if ( ! characterClasses.isLetterOrDigit(b.charAt(checkIndex))) return false; + } + return true; + } + + private boolean nextIsLetterOrDigit(MatchTokenStrippingCharacterIterator characters) { + return characterClasses.isLetterOrDigit(characters.peek()); + } + + /** + * A string wrapper which skips match token forms marked up Juniper style, such that + * \uFFF9originalToken\uFFFAtoken\uFFFB is returned as originalToken + */ + private static class MatchTokenStrippingCharacterIterator { + + private final String s; + private int current =0; + + public MatchTokenStrippingCharacterIterator(String s) { + this.s=s; + } + + public boolean hasNext() { + skipMarkup(); + return current <s.length(); + } + + public char next() { + skipMarkup(); + return s.charAt(current++); + } + + /** Returns the next character without moving to it. Returns \uFFFF if there is no next */ + public char peek() { + skipMarkup(); + if (s.length()< current +1) + return '\uFFFF'; + else + return s.charAt(current); + } + + private void skipMarkup() { + if (current>=s.length()) return; + char c=s.charAt(current); + if (c== AnnotateStringFieldPart.RAW_ANNOTATE_BEGIN_CHAR) { // skip it + current++; + } + else if (c==AnnotateStringFieldPart.RAW_ANNOTATE_SEPARATOR_CHAR) { // skip to RAW_ANNOTATE_END_CHAR + do { + current++; + } while (current<s.length() && s.charAt(current)!=AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR); + current++; // also skip the RAW_ANNOTATE_END_CHAR + skipMarkup(); // skip any immediately following markup + } + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/QueryCombinator.java b/container-search/src/main/java/com/yahoo/search/querytransform/QueryCombinator.java new file mode 100644 index 00000000000..3a209a58f4a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/QueryCombinator.java @@ -0,0 +1,155 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.yahoo.component.ComponentId; +import com.yahoo.language.Language; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.parser.CustomParser; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Properties; +import com.yahoo.search.query.QueryTree; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.IndexedItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; +import com.yahoo.search.searchchain.Execution; + +/** + * <p>A searcher which grabs query parameters of the form "defidx.(identifier)=(index name)" and + * "query.(identifier)=(user query)", * parses them and adds them as AND items to the query root.</p> + * + * <p>If the given default index does not exist in the search definition, the query part will be parsed with the + * settings of the default index set to the "".</p> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QueryCombinator extends Searcher { + private static final String QUERYPREFIX = "query."; + + private static class QueryPart { + final String query; + final String defaultIndex; + + QueryPart(String query, String defaultIndex) { + this.query = query; + this.defaultIndex = defaultIndex; + } + } + + public QueryCombinator(ComponentId id) { + super(id); + } + + @Override + public Result search(Query query, Execution execution) { + Set<QueryPart> pieces = findQuerySnippets(query.properties()); + if (pieces.size() == 0) { + return execution.search(query); + } + addAndItems(query, pieces, execution.context()); + query.trace("Adding extra query parts.", true, 2); + return execution.search(query); + } + + private void addAndItems(Query query, Iterable<QueryPart> pieces, Execution.Context context) { + IndexFacts indexFacts = context.getIndexFacts(); + IndexFacts.Session session = indexFacts.newSession(query); + Set<String> usedSources = new HashSet<>(session.documentTypes()); + Language language = query.getModel().getParsingLanguage(); + for (QueryPart part : pieces) { + String defaultIndex; + Item item = null; + Index index = session.getIndex(part.defaultIndex); + if (index == Index.nullIndex) { + defaultIndex = null; + } else { + defaultIndex = part.defaultIndex; + } + try { + CustomParser parser = (CustomParser)ParserFactory.newInstance(query.getModel().getType(), + ParserEnvironment.fromExecutionContext(context)); + item = parser.parse(part.query, null, language, usedSources, indexFacts, defaultIndex); + } catch (RuntimeException e) { + String err = Exceptions.toMessageString(e); + query.trace("Query parser threw an exception: " + err, true, 1); + getLogger().log(LogLevel.WARNING, + "Query parser threw exception searcher QueryCombinator for " + + query.getHttpRequest().toString() + ", query part " + part.query + ": " + err); + } + if (item == null) { + continue; + } + if (defaultIndex == null) { + assignDefaultIndex(item, part.defaultIndex); + } + addAndItem(query.getModel().getQueryTree(), item); + } + } + + private static void addAndItem(QueryTree queryTree, Item item) { + Item root = queryTree.getRoot(); + // JavaDoc claims I can get null, code gives NullItem... well, well, well... + if (root instanceof NullItem || root == null) { + queryTree.setRoot(item); + } else if (root.getClass() == AndItem.class) { + ((AndItem) root).addItem(item); + } else { + AndItem newRoot = new AndItem(); + newRoot.addItem(root); + newRoot.addItem(item); + queryTree.setRoot(newRoot); + } + } + + private static void assignDefaultIndex(Item item, String defaultIndex) { + if (item instanceof IndexedItem) { + IndexedItem indexName = (IndexedItem) item; + + if ("".equals(indexName.getIndexName())) { + indexName.setIndexName(defaultIndex); + } + } else if (item instanceof CompositeItem) { + Iterator<Item> items = ((CompositeItem) item).getItemIterator(); + while (items.hasNext()) { + Item i = items.next(); + assignDefaultIndex(i, defaultIndex); + } + } + } + + private static Set<QueryPart> findQuerySnippets(Properties properties) { + Set<QueryPart> pieces = new HashSet<>(); + for (Map.Entry<String, Object> k : properties.listProperties().entrySet()) { + String key = k.getKey(); + if (!key.startsWith(QUERYPREFIX)) { + continue; + } + String name = key.substring(QUERYPREFIX.length()); + if (hasDots(name)) { + continue; + } + String index = properties.getString("defidx." + name); + pieces.add(new QueryPart(k.getValue().toString(), index)); + } + return pieces; + } + + private static boolean hasDots(String name) { + int index = name.indexOf('.', 0); + return index != -1; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/QueryTreeUtil.java b/container-search/src/main/java/com/yahoo/search/querytransform/QueryTreeUtil.java new file mode 100644 index 00000000000..fb5373d59ea --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/QueryTreeUtil.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +/** + * Utility class for manipulating a QueryTree. + * + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +public class QueryTreeUtil { + + static public void andQueryItemWithRoot(Query query, Item item) { + andQueryItemWithRoot(query.getModel().getQueryTree(), item); + } + + static public void andQueryItemWithRoot(QueryTree tree, Item item) { + if (tree.isEmpty()) { + tree.setRoot(item); + } else { + Item oldRoot = tree.getRoot(); + if (oldRoot.getClass() == AndItem.class) { + ((AndItem) oldRoot).addItem(item); + } else { + AndItem newRoot = new AndItem(); + newRoot.addItem(oldRoot); + newRoot.addItem(item); + tree.setRoot(newRoot); + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/RangeQueryOptimizer.java b/container-search/src/main/java/com/yahoo/search/querytransform/RangeQueryOptimizer.java new file mode 100644 index 00000000000..65832d99461 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/RangeQueryOptimizer.java @@ -0,0 +1,212 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.prelude.query.Limit; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.FalseItem; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.yolean.chain.After; +import com.yahoo.yolean.chain.Before; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; + +/** + * Finds and optimizes ranges in queries: + * For single value attributes c1 $lt; x AND x > c2 becomes x IN <c1; c2>. + * The query cost saving from this has been shown to be 2 orders of magnitude in real cases. + * + * @author bratseth + */ +@Before(QueryCanonicalizer.queryCanonicalization) +@After(PhaseNames.TRANSFORMED_QUERY) +public class RangeQueryOptimizer extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + if (execution.context().getIndexFacts() == null) return execution.search(query); // this is a test query + + boolean optimized = recursiveOptimize(query.getModel().getQueryTree(), execution.context().getIndexFacts().newSession(query)); + if (optimized) + query.trace("Optimized query ranges", true, 2); + return execution.search(query); + } + + /** Recursively performs the range optimization on this query tree and returns whether at least one optimization was done */ + private boolean recursiveOptimize(Item item, IndexFacts.Session indexFacts) { + if ( ! (item instanceof CompositeItem)) return false; + + boolean optimized = false; + for (Iterator<Item> i = ((CompositeItem) item).getItemIterator(); i.hasNext(); ) + optimized |= recursiveOptimize(i.next(), indexFacts); + + if (item instanceof AndItem) + optimized |= optimizeAnd((AndItem)item, indexFacts); + return optimized; + } + + private boolean optimizeAnd(AndItem and, IndexFacts.Session indexFacts) { + // Find consolidated ranges by collecting a list of compatible ranges + List<FieldRange> fieldRanges = null; + for (Iterator<Item> i = and.getItemIterator(); i.hasNext(); ) { + Item item = i.next(); + if ( ! (item instanceof IntItem)) continue; + IntItem intItem = (IntItem)item; + if (intItem.getHitLimit() != 0) continue; // each such op gets a different partial set: Cannot be optimized + if (intItem.getFromLimit().equals(intItem.getToLimit())) continue; // don't optimize searches for single numbers + if (indexFacts.getIndex(intItem.getIndexName()).isMultivalue()) continue; // May match different values in each range + + if (fieldRanges == null) fieldRanges = new ArrayList<>(); + Optional<FieldRange> compatibleRange = findCompatibleRange(intItem, fieldRanges); + if (compatibleRange.isPresent()) + compatibleRange.get().addRange(intItem); + else + fieldRanges.add(new FieldRange(intItem)); + i.remove(); + } + + // Add consolidated ranges + if (fieldRanges == null) return false; + + boolean optimized = false; + for (FieldRange fieldRange : fieldRanges) { + and.addItem(fieldRange.toItem()); + optimized |= fieldRange.isOptimization(); + } + return optimized; + } + + private Optional<FieldRange> findCompatibleRange(IntItem item, List<FieldRange> fieldRanges) { + for (FieldRange fieldRange : fieldRanges) { + if (fieldRange.isCompatibleWith(item)) + return Optional.of(fieldRange); + } + return Optional.empty(); + } + + /** Represents the ranges searched in a single field */ + private static final class FieldRange { + + private Range range = new Range(new Limit(Double.NEGATIVE_INFINITY, false), new Limit(Double.POSITIVE_INFINITY, false)); + private int sourceRangeCount = 0; + + // IntItem fields which must be preserved in the produced item. + // This is an unfortunate coupling and ideally we should delegate this (creation, compatibility) + // to the Item classes + private final String indexName; + private final Item.ItemCreator creator; + private final boolean ranked; + private final int weight; + + public FieldRange(IntItem item) { + this.indexName = item.getIndexName(); + this.creator = item.getCreator(); + this.ranked = item.isRanked(); + this.weight = item.getWeight(); + addRange(item); + } + + public String getIndexName() { return indexName; } + + public boolean isCompatibleWith(IntItem item) { + if ( ! indexName.equals(item.getIndexName())) return false; + if (creator != item.getCreator()) return false; + if (ranked != item.isRanked()) return false; + if (weight != item.getWeight()) return false; + return true; + } + + /** Adds a range for this field */ + public void addRange(IntItem item) { + range = range.intersection(new Range(item)); + sourceRangeCount++; + } + + public Item toItem() { + Item item = range.toItem(indexName); + item.setCreator(creator); + item.setRanked(ranked); + item.setWeight(weight); + return item; + } + + /** Returns whether this range is actually an optimization over what was in the source query */ + public boolean isOptimization() { return sourceRangeCount > 1; } + + } + + /** An immutable numerical range */ + private static class Range { + + private final Limit from; + private final Limit to; + + private static final Range empty = new EmptyRange(); + + public Range(Limit from, Limit to) { + this.from = from; + this.to = to; + } + + public Range(IntItem range) { + from = range.getFromLimit(); + to = range.getToLimit(); + } + + /** Returns true if these two ranges overlap */ + public boolean overlaps(Range other) { + if (other.from.isSmallerOrEqualTo(this.to) && other.to.isLargerOrEqualTo(this.from)) return true; + if (other.to.isLargerOrEqualTo(this.from) && other.from.isSmallerOrEqualTo(this.to)) return true; + return false; + } + + /** + * Returns the intersection of this and the given range. + * If the ranges does not overlap, an empty range is returned. + */ + public Range intersection(Range other) { + if ( ! overlaps(other)) return empty; + return new Range(from.max(other.from), to.min(other.to)); + } + + public Item toItem(String fieldName) { + return IntItem.from(fieldName, from, to, 0); + } + + @Override + public String toString() { return "[" + from + ";" + to + "]"; } + + } + + private static class EmptyRange extends Range { + + public EmptyRange() { + super(new Limit(0, false), new Limit(0, false)); // the to and from of an empty range is never used. + } + + @Override + public boolean overlaps(Range other) { return false; } + + @Override + public Range intersection(Range other) { return this; } + + @Override + public Item toItem(String fieldName) { return new FalseItem(); } + + @Override + public String toString() { return "(empty)"; } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/SortingDegrader.java b/container-search/src/main/java/com/yahoo/search/querytransform/SortingDegrader.java new file mode 100644 index 00000000000..5886014deed --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/SortingDegrader.java @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.GroupingQueryParser; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.query.properties.DefaultProperties; +import com.yahoo.search.query.ranking.MatchPhase; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.yolean.chain.After; +import com.yahoo.yolean.chain.Before; + +import java.util.List; +import java.util.Set; + +/** + * If the query is eligible, specify that the query should degrade if it causes too many hits + * to avoid excessively expensive queries. + * <p> + * Queries are eligible if they do sorting, don't do grouping, and the first sort criteria is a fast-search attribute. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ + +// This writes fields to query.getRanking which are moved to rank.properties during query.prepare() +// Query.prepare is done at the same time as canonicalization (by GroupingExecutor), so use that constraint. +// (we're not adding another constraint at this point because all this preparation and encoding business +// should be fixed when we move to Slime for serialization. - Jon, in the spring of the year of 2014) +@Before(QueryCanonicalizer.queryCanonicalization) + +// We are checking if there is a grouping expression, not if there is a raw grouping instruction property, +// so we must run after the property is transferred to a grouping expression +@After(GroupingQueryParser.SELECT_PARAMETER_PARSING) + +public class SortingDegrader extends Searcher { + + /** Set this to false in query.properties to turn off degrading. Default: on */ + // (this is not called ranking.sorting.degrading because it should not be part of the query object model + public static final CompoundName DEGRADING = new CompoundName("sorting.degrading"); + + public static final CompoundName PAGINATION = new CompoundName("to_be_removed_pagination"); + + @Override + public Result search(Query query, Execution execution) { + if (shouldBeDegraded(query, execution.context().getIndexFacts().newSession(query))) + setDegradation(query); + return execution.search(query); + } + + private boolean shouldBeDegraded(Query query, IndexFacts.Session indexFacts) { + if (query.getRanking().getSorting() == null) return false; + if (query.getRanking().getSorting().fieldOrders().isEmpty()) return false; + if ( ! GroupingRequest.getRequests(query).isEmpty()) return false; + if ( ! query.properties().getBoolean(DEGRADING, true)) return false; + + Index index = indexFacts.getIndex(query.getRanking().getSorting().fieldOrders().get(0).getFieldName()); + if (index == null) return false; + if ( ! index.isFastSearch()) return false; + if ( ! index.isNumerical()) return false; + + return true; + } + + private void setDegradation(Query query) { + Sorting.FieldOrder primarySort = query.getRanking().getSorting().fieldOrders().get(0); // ensured above + MatchPhase matchPhase = query.getRanking().getMatchPhase(); + + matchPhase.setAttribute(primarySort.getFieldName()); + matchPhase.setAscending(primarySort.getSortOrder() == Sorting.Order.ASCENDING); + if (matchPhase.getMaxHits() == null) + matchPhase.setMaxHits(decideDefaultMaxHits(query)); + } + + /** + * Look at a "reasonable" number of this by default. We don't want to set this too low because it impacts + * the totalHits value returned. + * <p> + * If maxhits/offset is set high, use that as the default instead because it means somebody will want to be able to + * get lots of hits. We could use hits+offset instead of maxhits+maxoffset but that would destroy pagination + * with large values because totalHits is wrong. + * <p> + * If we ever get around to estimate totalhits we can rethink this. + */ + private long decideDefaultMaxHits(Query query) { + int maxHits; + int maxOffset; + if (query.properties().getBoolean(PAGINATION, true)) { + maxHits = query.properties().getInteger(DefaultProperties.MAX_HITS); + maxOffset = query.properties().getInteger(DefaultProperties.MAX_OFFSET); + } else { + maxHits = query.getHits(); + maxOffset = query.getOffset(); + } + return maxHits + maxOffset; + } + +} + diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java new file mode 100644 index 00000000000..2e8e0861656 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +import java.util.Collection; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.WordItem; + +/** + * Transform terms in query tree to lower case based on Vespa index settings. + * + * @since 5.1.3 + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After({ STEMMING, ACCENT_REMOVAL }) +@Provides(VespaLowercasingSearcher.LOWERCASING) +public class VespaLowercasingSearcher extends LowercasingSearcher { + + public static final String LOWERCASING = "LowerCasing"; + + public VespaLowercasingSearcher(LowercasingConfig cfg) { + super(cfg); + } + + @Override + public boolean shouldLowercase(WordItem word, IndexFacts.Session indexFacts) { + if (word.isLowercased()) return false; + + Index index = indexFacts.getIndex(word.getIndexName()); + return index.isLowercase() || index.isAttribute(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/WandSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/WandSearcher.java new file mode 100644 index 00000000000..6120a7aee30 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/WandSearcher.java @@ -0,0 +1,206 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.querytransform; + +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.text.MapParser; + +import java.util.LinkedHashMap; +import java.util.Map; + +import static com.yahoo.container.protect.Error.UNSPECIFIED; +import com.yahoo.yolean.Exceptions; + +/** + * Searcher that will create a Vespa WAND item from a list of tokens with weights. + * IndexFacts is used to determine which WAND to create. + * + * @since 5.1.11 + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + * @author bratseth + */ +public class WandSearcher extends Searcher { + + /** + * Enum used to represent which "wand" this searcher should produce. + */ + private enum WandType { + VESPA("vespa"), + OR("or"), + PARALLEL("parallel"), + DOT_PRODUCT("dotProduct"); + + private final String type; + + WandType(String type) { + this.type = type; + } + + public static WandType create(String type) { + for (WandType enumType : WandType.values()) { + if (enumType.type.equals(type)) { + return enumType; + } + } + return WandType.VESPA; + } + } + + /** + * Class to resolve the inputs used by this searcher. + */ + private static class InputResolver { + + private static final CompoundName WAND_FIELD = new CompoundName("wand.field"); + private static final CompoundName WAND_TOKENS = new CompoundName("wand.tokens"); + private static final CompoundName WAND_HEAP_SIZE = new CompoundName("wand.heapSize"); + private static final CompoundName WAND_TYPE = new CompoundName("wand.type"); + private static final CompoundName WAND_SCORE_THRESHOLD = new CompoundName("wand.scoreThreshold"); + private static final CompoundName WAND_THRESHOLD_BOOST_FACTOR = new CompoundName("wand.thresholdBoostFactor"); + private final String fieldName; + private final WandType wandType; + private final Map<String, Integer> tokens; + private final int heapSize; + private final double scoreThreshold; + private final double thresholdBoostFactor; + + public InputResolver(Query query, Execution execution) { + fieldName = query.properties().getString(WAND_FIELD); + if (fieldName != null) { + String tokens = query.properties().getString(WAND_TOKENS); + if (tokens != null) { + wandType = resolveWandType(execution.context().getIndexFacts().newSession(query), query); + this.tokens = new IntegerMapParser().parse(tokens, new LinkedHashMap<>()); + heapSize = resolveHeapSize(query); + scoreThreshold = resolveScoreThreshold(query); + thresholdBoostFactor = resolveThresholdBoostFactor(query); + return; + } + } + wandType = null; + tokens = null; + heapSize = 0; + scoreThreshold = 0; + thresholdBoostFactor = 1; + } + + private WandType resolveWandType(IndexFacts.Session indexFacts, Query query) { + Index index = indexFacts.getIndex(fieldName); + if (index.isNull()) { + throw new IllegalArgumentException("Field '" + fieldName + "' was not found in " + indexFacts); + } else { + return WandType.create(query.properties().getString(WAND_TYPE, "vespa")); + } + } + + private int resolveHeapSize(Query query) { + String defaultHeapSize = "100"; + return Integer.valueOf(query.properties().getString(WAND_HEAP_SIZE, defaultHeapSize)); + } + + private double resolveScoreThreshold(Query query) { + return Double.valueOf(query.properties().getString(WAND_SCORE_THRESHOLD, "0")); + } + + private double resolveThresholdBoostFactor(Query query) { + return Double.valueOf(query.properties().getString(WAND_THRESHOLD_BOOST_FACTOR, "1")); + } + + public boolean hasValidData() { + return tokens != null && !tokens.isEmpty(); + } + + public String getFieldName() { + return fieldName; + } + + public Map<String, Integer> getTokens() { + return tokens; + } + + public WandType getWandType() { + return wandType; + } + + public Integer getHeapSize() { + return heapSize; + } + + public Double getScoreThreshold() { + return scoreThreshold; + } + + public Double getThresholdBoostFactor() { + return thresholdBoostFactor; + } + } + + @Override + public Result search(Query query, Execution execution) { + try { + InputResolver inputs = new InputResolver(query, execution); + if ( ! inputs.hasValidData()) return execution.search(query); + + QueryTreeUtil.andQueryItemWithRoot(query, createWandQueryItem(inputs)); + query.trace("WandSearcher: Added WAND operator", true, 4); + return execution.search(query); + } + catch (IllegalArgumentException e) { + return new Result(query,ErrorMessage.createInvalidQueryParameter(Exceptions.toMessageString(e))); + } + } + + private Item createWandQueryItem(InputResolver inputs) { + if (inputs.getWandType().equals(WandType.VESPA)) { + return populate(new WeakAndItem(inputs.getHeapSize()), inputs.getFieldName(), inputs.getTokens()); + } else if (inputs.getWandType().equals(WandType.OR)) { + return populate(new OrItem(), inputs.getFieldName(), inputs.getTokens()); + } else if (inputs.getWandType().equals(WandType.PARALLEL)) { + return populate(new WandItem(inputs.getFieldName(), inputs.getHeapSize()), + inputs.getScoreThreshold(), inputs.getThresholdBoostFactor(), inputs.getTokens()); + } else if (inputs.getWandType().equals(WandType.DOT_PRODUCT)) { + return populate(new DotProductItem(inputs.getFieldName()), inputs.getTokens()); + } + throw new IllegalArgumentException("Unknown type '" + inputs.getWandType() + "'"); + } + + private CompositeItem populate(CompositeItem parent, String fieldName, Map<String,Integer> tokens) { + for (Map.Entry<String,Integer> entry : tokens.entrySet()) { + WordItem wordItem = new WordItem(entry.getKey(), fieldName); + wordItem.setWeight(entry.getValue()); + wordItem.setStemmed(true); + wordItem.setNormalizable(false); + parent.addItem(wordItem); + } + return parent; + } + + private WeightedSetItem populate(WeightedSetItem item, Map<String,Integer> tokens) { + for (Map.Entry<String,Integer> entry : tokens.entrySet()) { + item.addToken(entry.getKey(), entry.getValue()); + } + return item; + } + + private WandItem populate(WandItem item, double scoreThreshold, double thresholdBoostFactor, Map<String,Integer> tokens) { + populate(item, tokens); + item.setScoreThreshold(scoreThreshold); + item.setThresholdBoostFactor(thresholdBoostFactor); + return item; + } + + private static class IntegerMapParser extends MapParser<Integer> { + @Override + protected Integer parseValue(String s) { + return Integer.parseInt(s); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/package-info.java b/container-search/src/main/java/com/yahoo/search/querytransform/package-info.java new file mode 100644 index 00000000000..34e59301fca --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/package-info.java @@ -0,0 +1,9 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Vespa search platform query transformation infrastructure. Not a public + * API. + */ +@ExportPackage +package com.yahoo.search.querytransform; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/parser/.gitignore b/container-search/src/main/java/com/yahoo/search/querytransform/parser/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/querytransform/parser/.gitignore diff --git a/container-search/src/main/java/com/yahoo/search/rendering/DefaultRenderer.java b/container-search/src/main/java/com/yahoo/search/rendering/DefaultRenderer.java new file mode 100644 index 00000000000..de817d95393 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/DefaultRenderer.java @@ -0,0 +1,450 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.rendering; + +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.io.ByteWriter; +import com.yahoo.net.URI; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.prelude.templates.UserTemplate; +import com.yahoo.processing.rendering.AsynchronousSectionedRenderer; +import com.yahoo.processing.response.Data; +import com.yahoo.processing.response.DataList; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.grouping.result.HitRenderer; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.result.*; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; +import com.yahoo.yolean.trace.TraceNode; +import com.yahoo.yolean.trace.TraceVisitor; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.util.Iterator; +import java.util.Map; + +// TODO: Rename to XmlRenderer and make this a deprecated empty subclass. + +/** + * XML rendering of search results. This is NOT the default (but it once was). + * + * @author tonytv + */ +@SuppressWarnings({ "rawtypes", "deprecation" }) +public final class DefaultRenderer extends AsynchronousSectionedRenderer<Result> { + + public static final String DEFAULT_MIMETYPE = "text/xml"; + public static final String DEFAULT_ENCODING = "utf-8"; + + private static final Utf8String RESULT = new Utf8String("result"); + private static final Utf8String GROUP = new Utf8String("group"); + private static final Utf8String ID = new Utf8String("id"); + private static final Utf8String FIELD = new Utf8String("field"); + private static final Utf8String HIT = new Utf8String("hit"); + private static final Utf8String ERROR = new Utf8String("error"); + private static final Utf8String TOTAL_HIT_COUNT = new Utf8String("total-hit-count"); + private static final Utf8String QUERY_TIME = new Utf8String("querytime"); + private static final Utf8String SUMMARY_FETCH_TIME = new Utf8String("summaryfetchtime"); + private static final Utf8String SEARCH_TIME = new Utf8String("searchtime"); + private static final Utf8String NAME = new Utf8String("name"); + private static final Utf8String CODE = new Utf8String("code"); + private static final Utf8String COVERAGE_DOCS = new Utf8String("coverage-docs"); + private static final Utf8String COVERAGE_NODES = new Utf8String("coverage-nodes"); + private static final Utf8String COVERAGE_FULL = new Utf8String("coverage-full"); + private static final Utf8String COVERAGE = new Utf8String("coverage"); + private static final Utf8String RESULTS_FULL = new Utf8String("results-full"); + private static final Utf8String RESULTS = new Utf8String("results"); + private static final Utf8String TYPE = new Utf8String("type"); + private static final Utf8String RELEVANCY = new Utf8String("relevancy"); + private static final Utf8String SOURCE = new Utf8String("source"); + + + // this is shared between umpteen threads by design + private final CopyOnWriteHashMap<String, Utf8String> fieldNameMap = new CopyOnWriteHashMap<>(); + + private boolean utf8Output = false; + + private XMLWriter writer; + + @Override + public void init() { + super.init(); + utf8Output = false; + writer = null; + } + + @Override + public String getEncoding() { + + if (getResult() == null + || getResult().getQuery() == null + || getResult().getQuery().getModel().getEncoding() == null) { + return DEFAULT_ENCODING; + } else { + return getResult().getQuery().getModel().getEncoding(); + } + } + + @Override + public String getMimeType() { + return DEFAULT_MIMETYPE; + } + + private XMLWriter wrapWriter(Writer writer) { + return XMLWriter.from(writer, 10, -1); + } + + private void header(XMLWriter writer, Result result) throws IOException { + // TODO: move setting this to Result + utf8Output = "utf-8".equalsIgnoreCase(getRequestedEncoding(result.getQuery())); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag(RESULT).attribute(TOTAL_HIT_COUNT, String.valueOf(result.getTotalHitCount())); + if (result.getQuery().getPresentation().getReportCoverage()) { + renderCoverageAttributes(result.getCoverage(false), writer); + } + renderTime(writer, result); + writer.closeStartTag(); + } + + private void renderTime(XMLWriter writer, Result result) { + if (!result.getQuery().getPresentation().getTiming()) { + return; + } + + final String threeDecimals = "%.3f"; + final double milli = .001d; + final long now = System.currentTimeMillis(); + final long searchTime = now - result.getElapsedTime().first(); + final double searchSeconds = ((double) searchTime) * milli; + + if (result.getElapsedTime().firstFill() != 0L) { + final long queryTime = result.getElapsedTime().weightedSearchTime(); + final long summaryFetchTime = result.getElapsedTime().weightedFillTime(); + final double querySeconds = ((double) queryTime) * milli; + final double summarySeconds = ((double) summaryFetchTime) * milli; + writer.attribute(QUERY_TIME, String.format(threeDecimals, querySeconds)); + writer.attribute(SUMMARY_FETCH_TIME, String.format(threeDecimals, summarySeconds)); + } + writer.attribute(SEARCH_TIME, String.format(threeDecimals, searchSeconds)); + } + + protected static void renderCoverageAttributes(Coverage coverage, XMLWriter writer) throws IOException { + if (coverage == null) return; + writer.attribute(COVERAGE_DOCS,coverage.getDocs()); + writer.attribute(COVERAGE_NODES,coverage.getNodes()); + writer.attribute(COVERAGE_FULL,coverage.getFull()); + writer.attribute(COVERAGE,coverage.getResultPercentage()); + writer.attribute(RESULTS_FULL,coverage.getFullResultSets()); + writer.attribute(RESULTS,coverage.getResultSets()); + } + + + public void error(XMLWriter writer, Result result) throws IOException { + ErrorMessage error = result.hits().getError(); + writer.openTag(ERROR).attribute(CODE,error.getCode()).content(error.getMessage(),false).closeTag(); + } + + + @SuppressWarnings("UnusedParameters") + protected void emptyResult(XMLWriter writer, Result result) throws IOException {} + + @SuppressWarnings("UnusedParameters") + public void queryContext(XMLWriter writer, QueryContext queryContext, Query owner) throws IOException { + if (owner.getTraceLevel()!=0) { + XMLWriter xmlWriter=XMLWriter.from(writer); + xmlWriter.openTag("meta").attribute("type", QueryContext.ID); + TraceNode traceRoot = owner.getModel().getExecution().trace().traceNode().root(); + traceRoot.accept(new RenderingVisitor(xmlWriter, owner.getStartTime())); + xmlWriter.closeTag(); + } + } + + + private void renderSingularHit(XMLWriter writer, Hit hit) throws IOException { + writer.openTag(HIT); + renderHitAttributes(writer, hit); + writer.closeStartTag(); + renderHitFields(writer, hit); + } + + private void renderHitFields(XMLWriter writer, Hit hit) throws IOException { + renderSyntheticRelevanceField(writer, hit); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + renderField(writer, hit, it); + } + } + + private void renderField(XMLWriter writer, Hit hit, Iterator<Map.Entry<String, Object>> it) throws IOException { + Map.Entry<String, Object> entry = it.next(); + boolean isProbablyNotDecoded = false; + if (hit instanceof FastHit) { + FastHit f = (FastHit) hit; + isProbablyNotDecoded = f.fieldIsNotDecoded(entry.getKey()); + } + renderGenericFieldPossiblyNotDecoded(writer, hit, entry, isProbablyNotDecoded); + } + + private void renderGenericFieldPossiblyNotDecoded(XMLWriter writer, Hit hit, Map.Entry<String, Object> entry, boolean probablyNotDecoded) throws IOException { + String fieldName = entry.getKey(); + + // skip depending on hit type + if (fieldName.startsWith("$")) return; // Don't render fields that start with $ // TODO: Move to should render + + writeOpenFieldElement(writer, fieldName); + renderFieldContentPossiblyNotDecoded(writer, hit, probablyNotDecoded, fieldName); + writeCloseFieldElement(writer); + } + + private void renderFieldContentPossiblyNotDecoded(XMLWriter writer, Hit hit, boolean probablyNotDecoded, String fieldName) throws IOException { + boolean dumpedRaw = false; + if (probablyNotDecoded && (hit instanceof FastHit)) { + writer.closeStartTag(); + if ((writer.getWriter() instanceof ByteWriter) && utf8Output) { + dumpedRaw = UserTemplate.dumpBytes((ByteWriter) writer.getWriter(), (FastHit) hit, fieldName); + } + if (dumpedRaw) { + writer.content("", false); // let the xml writer note that this tag had content + } + } + if (!dumpedRaw) { + String xmlval = hit.getFieldXML(fieldName); + if (xmlval == null) { + xmlval = "(null)"; + } + writer.escapedContent(xmlval, false); + } + } + + private void renderSyntheticRelevanceField(XMLWriter writer, Hit hit) throws IOException { + final String relevancyFieldName = "relevancy"; + final Relevance relevance = hit.getRelevance(); + + // skip depending on hit type + if (relevance != null) { + renderSimpleField(writer, relevancyFieldName, relevance); + } + } + + private void renderSimpleField(XMLWriter writer, String relevancyFieldName, Relevance relevance) throws IOException { + writeOpenFieldElement(writer, relevancyFieldName); + writer.content(relevance.toString(), false); + writeCloseFieldElement(writer); + } + + private void writeCloseFieldElement(XMLWriter writer) throws IOException { + writer.closeTag(); + } + + private void writeOpenFieldElement(XMLWriter writer, String relevancyFieldName) throws IOException { + Utf8String utf8 = fieldNameMap.get(relevancyFieldName); + if (utf8 == null) { + utf8 = new Utf8String(relevancyFieldName); + fieldNameMap.put(relevancyFieldName, utf8); + } + writer.openTag(FIELD).attribute(NAME, utf8); + writer.closeStartTag(); + } + + private void renderHitAttributes(XMLWriter writer, Hit hit) throws IOException { + writer.attribute(TYPE, hit.getTypeString()); + if (hit.getRelevance() != null) { + writer.attribute(RELEVANCY, hit.getRelevance().toString()); +} + writer.attribute(SOURCE, hit.getSource()); + } + + private void renderHitGroup(XMLWriter writer, HitGroup hit) throws IOException { + if (HitRenderer.renderHeader(hit, writer)) { + // empty + } else if (hit.types().contains("grouphit")) { + // TODO Keep this? + renderHitGroupOfTypeGroupHit(writer, hit); + } else { + renderGroup(writer, hit); + } + } + + private void renderGroup(XMLWriter writer, HitGroup hit) throws IOException { + writer.openTag(GROUP); + renderHitAttributes(writer, hit); + writer.closeStartTag(); + } + + private void renderHitGroupOfTypeGroupHit(XMLWriter writer, HitGroup hit) throws IOException { + writer.openTag(HIT); + renderHitAttributes(writer, hit); + renderId(writer, hit); + writer.closeStartTag(); + } + + private void renderId(XMLWriter writer, HitGroup hit) throws IOException { + URI uri = hit.getId(); + if (uri != null) { + writer.openTag(ID).content(uri.stringValue(),false).closeTag(); + } + } + + private boolean simpleRenderHit(XMLWriter writer, Hit hit) throws IOException { + if (hit instanceof DefaultErrorHit) { + return simpleRenderDefaultErrorHit(writer, (DefaultErrorHit) hit); + } else if (hit instanceof GroupingListHit) { + return true; + } else { + return false; + } + } + + public static boolean simpleRenderDefaultErrorHit(XMLWriter writer, ErrorHit defaultErrorHit) throws IOException { + writer.openTag("errordetails"); + for (Iterator i = defaultErrorHit.errorIterator(); i.hasNext();) { + ErrorMessage error = (ErrorMessage) i.next(); + renderMessageDefaultErrorHit(writer, error); + } + writer.closeTag(); + return true; + } + + public static void renderMessageDefaultErrorHit(XMLWriter writer, ErrorMessage error) throws IOException { + writer.openTag("error"); + writer.attribute("source", error.getSource()); + writer.attribute("error", error.getMessage()); + writer.attribute("code", Integer.toString(error.getCode())); + writer.content(error.getDetailedMessage(), false); + if (error.getCause()!=null) { + writer.openTag("cause"); + writer.content("\n", true); + StringWriter stackTrace=new StringWriter(); + error.getCause().printStackTrace(new PrintWriter(stackTrace)); + writer.content(stackTrace.toString(), true); + writer.closeTag(); + } + writer.closeTag(); + } + + public static final class RenderingVisitor extends TraceVisitor { + + private static final String tag = "p"; + private final XMLWriter writer; + private long baseTime; + + public RenderingVisitor(XMLWriter writer,long baseTime) { + this.writer=writer; + this.baseTime=baseTime; + } + + @Override + public void entering(TraceNode node) { + if (node.isRoot()) return; + writer.openTag(tag); + } + + @Override + public void leaving(TraceNode node) { + if (node.isRoot()) return; + writer.closeTag(); + } + + @Override + public void visit(TraceNode node) { + if (node.isRoot()) return; + if (node.payload()==null) return; + + writer.openTag(tag); + if (node.timestamp()!=0) + writer.content(node.timestamp()-baseTime,false).content(" ms: ", false); + writer.content(node.payload().toString(),false); + writer.closeTag(); + } + + } + + private Result getResult() { + Result r; + try { + r = (Result) getResponse(); + } catch (ClassCastException e) { + throw new IllegalArgumentException( + "DefaultRenderer attempted used outside a search context, got a " + + getResponse().getClass().getName()); + } + return r; + } + + @Override + public void beginResponse(OutputStream stream) throws IOException { + Charset cs = Charset.forName(getRequestedEncoding(getResult().getQuery())); + CharsetEncoder encoder = cs.newEncoder(); + writer = wrapWriter(new ByteWriter(stream, encoder)); + + header(writer, getResult()); + if (getResult().hits().getError() != null || getResult().hits().getQuery().errors().size() > 0) { + error(writer, getResult()); + } + + if (getResult().getConcreteHitCount() == 0) { + emptyResult(writer, getResult()); + } + + if (getResult().getContext(false) != null) { + queryContext(writer, getResult().getContext(false), getResult().getQuery()); + } + + } + + /** Returns the encoding of the query, or the encoding given by the template if none is set */ + public final String getRequestedEncoding(Query query) { + String encoding = query.getModel().getEncoding(); + if (encoding != null) return encoding; + return getEncoding(); + } + + @Override + public void beginList(DataList<?> list) + throws IOException { + if (getRecursionLevel() == 1) { + return; + } + HitGroup hit = (HitGroup) list; + boolean renderedSimple = simpleRenderHit(writer, hit); + + if (renderedSimple) { + return; + } + renderHitGroup(writer, hit); + } + + @Override + public void data(Data data) throws IOException { + Hit hit = (Hit) data; + boolean renderedSimple = simpleRenderHit(writer, hit); + + if (renderedSimple) { + return; + } + renderSingularHit(writer, hit); + writer.closeTag(); + } + + @Override + public void endList(DataList<?> list) + throws IOException { + if (getRecursionLevel() == 1) { + return; + } + writer.closeTag(); + } + + @Override + public void endResponse() throws IOException { + writer.closeTag(); + writer.close(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java b/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java new file mode 100644 index 00000000000..94fe5dd446d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java @@ -0,0 +1,790 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.rendering; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Collections; +import java.util.Deque; +import java.util.Map; +import java.util.Set; +import java.util.function.LongSupplier; + +import org.json.JSONArray; +import org.json.JSONObject; + +import com.fasterxml.jackson.core.JsonEncoding; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.TreeNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import com.yahoo.data.JsonProducer; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.simple.JsonRender; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.json.JsonWriter; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.processing.Response; +import com.yahoo.processing.execution.Execution.Trace; +import com.yahoo.processing.rendering.AsynchronousSectionedRenderer; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.processing.response.Data; +import com.yahoo.processing.response.DataList; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.result.AbstractList; +import com.yahoo.search.grouping.result.BucketGroupId; +import com.yahoo.search.grouping.result.Group; +import com.yahoo.search.grouping.result.GroupId; +import com.yahoo.search.grouping.result.RawBucketId; +import com.yahoo.search.grouping.result.RawId; +import com.yahoo.search.grouping.result.RootGroup; +import com.yahoo.search.grouping.result.ValueGroupId; +import com.yahoo.search.result.Coverage; +import com.yahoo.search.result.DefaultErrorHit; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.result.NanNumber; +import com.yahoo.yolean.trace.TraceNode; +import com.yahoo.yolean.trace.TraceVisitor; + +/** + * JSON renderer for search results. + * + * @author Steinar Knutsen + */ +// NOTE: The JSON format is a public API. If new elements are added be sure to update the reference doc. +public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { + + private static final CompoundName DEBUG_RENDERING_KEY = new CompoundName("renderer.json.debug"); + + private enum RenderDecision { + YES, NO, DO_NOT_KNOW; + + boolean booleanValue() { + switch (this) { + case YES: + return true; + case NO: + return false; + default: + throw new IllegalStateException(); + } + } + }; + + // if this must be optimized, simply use com.fasterxml.jackson.core.SerializableString + private static final String BUCKET_LIMITS = "limits"; + private static final String BUCKET_TO = "to"; + private static final String BUCKET_FROM = "from"; + private static final String CHILDREN = "children"; + private static final String CONTINUATION = "continuation"; + private static final String COVERAGE = "coverage"; + private static final String COVERAGE_COVERAGE = "coverage"; + private static final String COVERAGE_DOCUMENTS = "documents"; + private static final String COVERAGE_FULL = "full"; + private static final String COVERAGE_NODES = "nodes"; + private static final String COVERAGE_RESULTS = "results"; + private static final String COVERAGE_RESULTS_FULL = "resultsFull"; + private static final String ERRORS = "errors"; + private static final String ERROR_CODE = "code"; + private static final String ERROR_MESSAGE = "message"; + private static final String ERROR_SOURCE = "source"; + private static final String ERROR_STACK_TRACE = "stackTrace"; + private static final String ERROR_SUMMARY = "summary"; + private static final String FIELDS = "fields"; + private static final String ID = "id"; + private static final String LABEL = "label"; + private static final String RELEVANCE = "relevance"; + private static final String ROOT = "root"; + private static final String SOURCE = "source"; + private static final String TOTAL_COUNT = "totalCount"; + private static final String TRACE = "trace"; + private static final String TRACE_CHILDREN = "children"; + private static final String TRACE_MESSAGE = "message"; + private static final String TRACE_TIMESTAMP = "timestamp"; + private static final String TIMING = "timing"; + private static final String QUERY_TIME = "querytime"; + private static final String SUMMARY_FETCH_TIME = "summaryfetchtime"; + private static final String SEARCH_TIME = "searchtime"; + private static final String TYPES = "types"; + private static final String GROUPING_VALUE = "value"; + private static final String VESPA_HIDDEN_FIELD_PREFIX = "$"; + + private final JsonFactory generatorFactory; + + private JsonGenerator generator; + private Deque<Integer> renderedChildren; + private boolean debugRendering; + private LongSupplier timeSource; + + private class TraceRenderer extends TraceVisitor { + private final long basetime; + private boolean hasFieldName = false; + int emittedChildNesting = 0; + int currentChildNesting = 0; + private boolean insideOpenObject = false; + + TraceRenderer(long basetime) { + this.basetime = basetime; + } + + @Override + public void entering(TraceNode node) { + ++currentChildNesting; + } + + @Override + public void leaving(TraceNode node) { + conditionalEndObject(); + if (currentChildNesting == emittedChildNesting) { + try { + generator.writeEndArray(); + generator.writeEndObject(); + } catch (IOException e) { + throw new TraceRenderWrapper(e); + } + --emittedChildNesting; + } + --currentChildNesting; + } + + @Override + public void visit(TraceNode node) { + try { + doVisit(node.timestamp(), node.payload(), node.children().iterator().hasNext()); + } catch (IOException e) { + throw new TraceRenderWrapper(e); + } + } + + private void doVisit(final long timestamp, final Object payload, final boolean hasChildren) + throws IOException, JsonGenerationException { + boolean dirty = false; + if (timestamp != 0L) { + header(); + generator.writeStartObject(); + generator.writeNumberField(TRACE_TIMESTAMP, timestamp - basetime); + dirty = true; + } + if (payload != null) { + if (!dirty) { + header(); + generator.writeStartObject(); + } + generator.writeStringField(TRACE_MESSAGE, payload.toString()); + dirty = true; + } + if (dirty) { + if (!hasChildren) { + generator.writeEndObject(); + } else { + setInsideOpenObject(true); + } + } + } + + private void header() { + fieldName(); + for (int i = 0; i < (currentChildNesting - emittedChildNesting); ++i) { + startChildArray(); + } + emittedChildNesting = currentChildNesting; + } + + private void startChildArray() { + try { + conditionalStartObject(); + generator.writeArrayFieldStart(TRACE_CHILDREN); + } catch (IOException e) { + throw new TraceRenderWrapper(e); + } + } + + private void conditionalStartObject() throws IOException, JsonGenerationException { + if (!isInsideOpenObject()) { + generator.writeStartObject(); + } else { + setInsideOpenObject(false); + } + } + + private void conditionalEndObject() { + if (isInsideOpenObject()) { + // This triggers if we were inside a data node with payload and + // subnodes, but none of the subnodes contained data + try { + generator.writeEndObject(); + setInsideOpenObject(false); + } catch (IOException e) { + throw new TraceRenderWrapper(e); + } + } + } + + private void fieldName() { + if (hasFieldName) { + return; + } + + try { + generator.writeFieldName(TRACE); + } catch (IOException e) { + throw new TraceRenderWrapper(e); + } + hasFieldName = true; + } + + boolean isInsideOpenObject() { + return insideOpenObject; + } + + void setInsideOpenObject(boolean insideOpenObject) { + this.insideOpenObject = insideOpenObject; + } + } + + private static final class TraceRenderWrapper extends RuntimeException { + + /** + * Should never be serialized, but this is still needed. + */ + private static final long serialVersionUID = 2L; + + TraceRenderWrapper(IOException wrapped) { + super(wrapped); + } + + } + + public JsonRenderer() { + generatorFactory = new JsonFactory(); + generatorFactory.setCodec(createJsonCodec()); + } + + /** + * Create the codec used for rendering instances of {@link TreeNode}. This + * method will be invoked when creating the first renderer instance, but not + * for each fresh clone used by individual results. + * + * @return an object mapper for the internal JsonFactory + */ + protected static ObjectMapper createJsonCodec() { + return new ObjectMapper(); + } + + @Override + public void init() { + super.init(); + generator = null; + renderedChildren = null; + debugRendering = false; + timeSource = () -> System.currentTimeMillis(); + } + + @Override + public void beginResponse(OutputStream stream) throws IOException { + generator = generatorFactory.createGenerator(stream, JsonEncoding.UTF8); + renderedChildren = new ArrayDeque<>(); + debugRendering = getDebugRendering(getResult().getQuery()); + generator.writeStartObject(); + renderTrace(getExecution().trace()); + renderTiming(); + generator.writeFieldName(ROOT); + } + + private void renderTiming() throws IOException { + if (!getResult().getQuery().getPresentation().getTiming()) { + return; + } + + final double milli = .001d; + final long now = timeSource.getAsLong(); + final long searchTime = now - getResult().getElapsedTime().first(); + final double searchSeconds = searchTime * milli; + + generator.writeObjectFieldStart(TIMING); + if (getResult().getElapsedTime().firstFill() != 0L) { + final long queryTime = getResult().getElapsedTime().weightedSearchTime(); + final long summaryFetchTime = getResult().getElapsedTime().weightedFillTime(); + final double querySeconds = queryTime * milli; + final double summarySeconds = summaryFetchTime * milli; + generator.writeNumberField(QUERY_TIME, querySeconds); + generator.writeNumberField(SUMMARY_FETCH_TIME, summarySeconds); + } + + generator.writeNumberField(SEARCH_TIME, searchSeconds); + generator.writeEndObject(); + } + + private boolean getDebugRendering(Query q) { + return q == null ? false : q.properties().getBoolean(DEBUG_RENDERING_KEY, false); + } + + private void renderTrace(Trace trace) throws JsonGenerationException, IOException { + if (!trace.traceNode().children().iterator().hasNext()) { + return; + } + try { + long basetime = trace.traceNode().timestamp(); + if (basetime == 0L) { + basetime = getResult().getElapsedTime().first(); + } + trace.accept(new TraceRenderer(basetime)); + } catch (TraceRenderWrapper e) { + throw new IOException(e); + } + } + + @Override + public void beginList(DataList<?> list) throws IOException { + Preconditions.checkArgument(list instanceof HitGroup, + "Expected subclass of com.yahoo.search.result.HitGroup, got %s.", + list.getClass()); + moreChildren(); + + renderHitGroupHead((HitGroup) list); + } + + protected void moreChildren() throws IOException, JsonGenerationException { + if (!renderedChildren.isEmpty()) { + childrenArray(); + } + renderedChildren.push(0); + } + + private void childrenArray() throws IOException, JsonGenerationException { + if (renderedChildren.peek() == 0) { + generator.writeArrayFieldStart(CHILDREN); + } + renderedChildren.push(renderedChildren.pop() + 1); + } + + private void lessChildren() throws IOException, JsonGenerationException { + int lastRenderedChildren = renderedChildren.pop(); + if (lastRenderedChildren > 0) { + generator.writeEndArray(); + } + } + + private void renderHitGroupHead(HitGroup hitGroup) throws JsonGenerationException, IOException { + final ErrorHit errorHit = hitGroup.getErrorHit(); + + generator.writeStartObject(); + renderHitContents(hitGroup); + if (getRecursionLevel() == 1) { + renderCoverage(); + } + if (errorHit != null) { + renderErrors(errorHit.errors()); + } + + // the framework will invoke begin methods as needed from here + } + + private void renderErrors(Set<ErrorMessage> errors) throws JsonGenerationException, IOException { + if (errors.isEmpty()) { + return; + } + generator.writeArrayFieldStart(ERRORS); + for (ErrorMessage e : errors) { + String summary = e.getMessage(); + String source = e.getSource(); + Throwable cause = e.getCause(); + String message = e.getDetailedMessage(); + generator.writeStartObject(); + generator.writeNumberField(ERROR_CODE, e.getCode()); + generator.writeStringField(ERROR_SUMMARY, summary); + if (source != null) { + generator.writeStringField(ERROR_SOURCE, source); + } + if (message != null) { + generator.writeStringField(ERROR_MESSAGE, message); + } + if (cause != null && cause.getStackTrace().length > 0) { + StringWriter s = new StringWriter(); + PrintWriter p = new PrintWriter(s); + cause.printStackTrace(p); + p.close(); + generator.writeStringField(ERROR_STACK_TRACE, s.toString()); + } + generator.writeEndObject(); + } + generator.writeEndArray(); + + + } + + private void renderCoverage() throws JsonGenerationException, IOException { + Coverage c = getResult().getCoverage(false); + if (c == null) { + return; + } + generator.writeObjectFieldStart(COVERAGE); + generator.writeNumberField(COVERAGE_COVERAGE, c.getResultPercentage()); + generator.writeNumberField(COVERAGE_DOCUMENTS, c.getDocs()); + generator.writeBooleanField(COVERAGE_FULL, c.getFull()); + generator.writeNumberField(COVERAGE_NODES, c.getNodes()); + generator.writeNumberField(COVERAGE_RESULTS, c.getResultSets()); + generator.writeNumberField(COVERAGE_RESULTS_FULL, c.getFullResultSets()); + generator.writeEndObject(); + } + + private void renderHit(Hit hit) throws JsonGenerationException, IOException { + if (!shouldRender(hit)) { + return; + } + + childrenArray(); + generator.writeStartObject(); + renderHitContents(hit); + generator.writeEndObject(); + } + + private boolean shouldRender(Hit hit) { + if (hit instanceof DefaultErrorHit) { + return false; + } + + return true; + } + + private boolean fieldsStart(boolean hasFieldsField) throws JsonGenerationException, IOException { + if (hasFieldsField) { + return true; + } + generator.writeObjectFieldStart(FIELDS); + return true; + } + + private void fieldsEnd(boolean hasFieldsField) throws JsonGenerationException, IOException { + if (!hasFieldsField) { + return; + } + generator.writeEndObject(); + } + + private void renderHitContents(Hit hit) throws JsonGenerationException, IOException { + String id = hit.getDisplayId(); + Set<String> types = hit.types(); + String source = hit.getSource(); + + if (id != null) { + generator.writeStringField(ID, id); + } + generator.writeNumberField(RELEVANCE, hit.getRelevance().getScore()); + if (types.size() > 0) { + generator.writeArrayFieldStart(TYPES); + for (String t : types) { + generator.writeString(t); + } + generator.writeEndArray(); + } + if (source != null) { + generator.writeStringField(SOURCE, hit.getSource()); + } + renderSpecialCasesForGrouping(hit); + + renderAllFields(hit); + } + + private void renderAllFields(Hit hit) throws JsonGenerationException, + IOException { + boolean hasFieldsField = false; + + hasFieldsField |= renderTotalHitCount(hit, hasFieldsField); + hasFieldsField |= renderStandardFields(hit, hasFieldsField); + fieldsEnd(hasFieldsField); + } + + private boolean renderStandardFields(Hit hit, boolean initialHasFieldsField) + throws JsonGenerationException, IOException { + boolean hasFieldsField = initialHasFieldsField; + for (String fieldName : hit.fieldKeys()) { + if (!shouldRender(fieldName, hit)) continue; + + // We can't look at the size of fieldKeys() and know whether we need + // the fields object, as all fields may be hidden. + hasFieldsField |= fieldsStart(hasFieldsField); + renderField(fieldName, hit); + } + return hasFieldsField; + } + + private boolean shouldRender(String fieldName, Hit hit) { + if (debugRendering) { + return true; + } + if (fieldName.startsWith(VESPA_HIDDEN_FIELD_PREFIX)) { + return false; + } + + RenderDecision r = lazyRenderAwareCheck(fieldName, hit); + if (r != RenderDecision.DO_NOT_KNOW) { + return r.booleanValue(); + } + + // this will trigger field decoding, so it is important the lazy decoding magic is done first + Object field = hit.getField(fieldName); + + if (field instanceof CharSequence && ((CharSequence) field).length() == 0) { + return false; + } + if (field instanceof StringFieldValue && ((StringFieldValue) field).getString().isEmpty()) { + // StringFieldValue cannot hold a null, so checking length directly is OK + return false; + } + if (field instanceof NanNumber) { + return false; + } + + return true; + } + + private RenderDecision lazyRenderAwareCheck(String fieldName, Hit hit) { + if (!(hit instanceof FastHit)) return RenderDecision.DO_NOT_KNOW; + + FastHit asFastHit = (FastHit) hit; + if (asFastHit.fieldIsNotDecoded(fieldName)) { + FastHit.RawField r = asFastHit.fetchFieldAsUtf8(fieldName); + if (r != null) { + byte[] utf8 = r.getUtf8(); + if (utf8.length == 0) { + return RenderDecision.NO; + } else { + return RenderDecision.YES; + } + } + } + return RenderDecision.DO_NOT_KNOW; + } + + private void renderSpecialCasesForGrouping(Hit hit) + throws JsonGenerationException, IOException { + if (hit instanceof AbstractList) { + renderGroupingListSyntheticFields((AbstractList) hit); + } else if (hit instanceof Group) { + renderGroupingGroupSyntheticFields(hit); + } + } + + private void renderGroupingGroupSyntheticFields(Hit hit) + throws JsonGenerationException, IOException { + renderGroupMetadata(((Group) hit).getGroupId()); + if (hit instanceof RootGroup) { + renderContinuations(Collections.singletonMap( + Continuation.THIS_PAGE, ((RootGroup) hit).continuation())); + } + } + + private void renderGroupingListSyntheticFields(AbstractList a) + throws JsonGenerationException, IOException { + writeGroupingLabel(a); + renderContinuations(a.continuations()); + } + + private void writeGroupingLabel(AbstractList a) + throws JsonGenerationException, IOException { + generator.writeStringField(LABEL, a.getLabel()); + } + + private void renderContinuations(Map<String, Continuation> continuations) + throws JsonGenerationException, IOException { + if (continuations.isEmpty()) { + return; + } + generator.writeObjectFieldStart(CONTINUATION); + for (Map.Entry<String, Continuation> e : continuations.entrySet()) { + generator.writeStringField(e.getKey(), e.getValue().toString()); + } + generator.writeEndObject(); + } + + private void renderGroupMetadata(GroupId id) throws JsonGenerationException, + IOException { + if (!(id instanceof ValueGroupId || id instanceof BucketGroupId)) { + return; + } + + if (id instanceof ValueGroupId) { + final ValueGroupId<?> valueId = (ValueGroupId<?>) id; + generator.writeStringField(GROUPING_VALUE, getIdValue(valueId)); + } else if (id instanceof BucketGroupId) { + final BucketGroupId<?> bucketId = (BucketGroupId<?>) id; + generator.writeObjectFieldStart(BUCKET_LIMITS); + generator.writeStringField(BUCKET_FROM, getBucketFrom(bucketId)); + generator.writeStringField(BUCKET_TO, getBucketTo(bucketId)); + generator.writeEndObject(); + } + } + + private static String getIdValue(ValueGroupId<?> id) { + return (id instanceof RawId ? Arrays.toString(((RawId) id).getValue()) + : id.getValue()).toString(); + } + + private static String getBucketFrom(BucketGroupId<?> id) { + return (id instanceof RawBucketId ? Arrays.toString(((RawBucketId) id) + .getFrom()) : id.getFrom()).toString(); + } + + private static String getBucketTo(BucketGroupId<?> id) { + return (id instanceof RawBucketId ? Arrays.toString(((RawBucketId) id) + .getTo()) : id.getTo()).toString(); + } + + private boolean renderTotalHitCount(Hit hit, boolean hasFieldsField) + throws JsonGenerationException, IOException { + if (getRecursionLevel() == 1 && hit instanceof HitGroup) { + fieldsStart(hasFieldsField); + generator.writeNumberField(TOTAL_COUNT, getResult() + .getTotalHitCount()); + return true; + } else { + return false; + } + } + + private void renderField(String fieldName, Hit hit) throws JsonGenerationException, IOException { + generator.writeFieldName(fieldName); + if (!tryDirectRendering(fieldName, hit)) { + renderFieldContents(hit.getField(fieldName)); + } + } + + private void renderFieldContents(Object field) throws JsonGenerationException, IOException { + if (field == null) { + generator.writeNull(); + } else if (field instanceof Number) { + renderNumberField((Number) field); + } else if (field instanceof TreeNode) { + generator.writeTree((TreeNode) field); + } else if (field instanceof JsonProducer) { + generator.writeRawValue(((JsonProducer) field).toJson()); + } else if (field instanceof Inspectable) { + StringBuilder intermediate = new StringBuilder(); + JsonRender.render((Inspectable) field, intermediate, true); + generator.writeRawValue(intermediate.toString()); + } else if (field instanceof StringFieldValue) { + // This needs special casing as JsonWriter hides empty strings now + generator.writeString(((StringFieldValue) field).getString()); + } else if (field instanceof FieldValue) { + // the null below is the field which has already been written + ((FieldValue) field).serialize(null, new JsonWriter(generator)); + } else if (field instanceof JSONArray || field instanceof JSONObject) { + // org.json returns null if the object would not result in + // syntactically correct JSON + String s = field.toString(); + if (s == null) { + generator.writeNull(); + } else { + generator.writeRawValue(s); + } + } else { + generator.writeString(field.toString()); + } + } + + private void renderNumberField(Number field) throws JsonGenerationException, IOException { + if (field instanceof Integer) { + generator.writeNumber(field.intValue()); + } else if (field instanceof Float) { + generator.writeNumber(field.floatValue()); + } else if (field instanceof Double) { + generator.writeNumber(field.doubleValue()); + } else if (field instanceof Long) { + generator.writeNumber(field.longValue()); + } else if (field instanceof Byte || field instanceof Short) { + generator.writeNumber(field.intValue()); + } else if (field instanceof BigInteger) { + generator.writeNumber((BigInteger) field); + } else if (field instanceof BigDecimal) { + generator.writeNumber((BigDecimal) field); + } else { + generator.writeNumber(field.doubleValue()); + } + } + + /** + * Really a private method, but package access for testability. + */ + boolean tryDirectRendering(String fieldName, Hit hit) + throws IOException, JsonGenerationException { + boolean renderedAsUtf8 = false; + if (hit instanceof FastHit) { + FastHit f = (FastHit) hit; + if (f.fieldIsNotDecoded(fieldName)) { + FastHit.RawField r = f.fetchFieldAsUtf8(fieldName); + if (r != null) { + byte[] utf8 = r.getUtf8(); + + generator.writeUTF8String(utf8, 0, utf8.length); + renderedAsUtf8 = true; + } + } + } + return renderedAsUtf8; + } + + @Override + public void data(Data data) throws IOException { + Preconditions.checkArgument(data instanceof Hit, + "Expected subclass of com.yahoo.search.result.Hit, got %s.", + data.getClass()); + renderHit((Hit) data); + } + + @Override + public void endList(DataList<?> list) throws IOException { + lessChildren(); + generator.writeEndObject(); + } + + @Override + public void endResponse() throws IOException { + generator.close(); + } + + @Override + public String getEncoding() { + return "utf-8"; + } + + @Override + public String getMimeType() { + return "application/json"; + } + + private Result getResult() { + Response r = getResponse(); + Preconditions.checkArgument(r instanceof Result, + "JsonRenderer can only render instances of com.yahoo.search.Result, got instance of %s.", + r.getClass()); + return (Result) r; + } + + /** + * Only for testing. Never to be used in any other context. + */ + void setGenerator(JsonGenerator generator) { + this.generator = generator; + } + + /** + * Only for testing. Never to be used in any other context. + */ + void setTimeSource(LongSupplier timeSource) { + this.timeSource = timeSource; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/rendering/Renderer.java b/container-search/src/main/java/com/yahoo/search/rendering/Renderer.java new file mode 100644 index 00000000000..92e3bb15d06 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/Renderer.java @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.rendering; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.SettableFuture; +import com.yahoo.io.ByteWriter; +import com.yahoo.processing.Request; +import com.yahoo.processing.execution.Execution; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; + +/** + * Renders a search result to a writer synchronously - the result is completely rendered when the render method returns.. + * The renderers are cloned just before rendering, + * and must therefore obey the following contract: + * + * <ol> + * <li>At construction time, only final members shall be initialized, and these + * must refer to immutable data only.</li> + * <li>State mutated during rendering shall be initialized in the init method.</li> + * </ol> + * + * @author tonytv + */ +abstract public class Renderer extends com.yahoo.processing.rendering.Renderer<Result> { + + /** + * Renders synchronously and returns when rendering is complete. + * + * @return a future which is always completed to true + */ + @Override + public final ListenableFuture<Boolean> render(OutputStream stream, Result response, Execution execution, Request request) { + Writer writer = null; + try { + writer = createWriter(stream,response); + render(writer, response); + } + catch (IOException e) { + throw new RuntimeException(e); + } + finally { + if (writer !=null) + try { writer.close(); } catch (IOException e2) {}; + } + SettableFuture<Boolean> completed=SettableFuture.create(); + completed.set(true); + return completed; + } + + /** + * Renders the result to the writer. + */ + protected abstract void render(Writer writer, Result result) throws IOException; + + private Writer createWriter(OutputStream stream,Result result) { + Charset cs = Charset.forName(getCharacterEncoding(result)); + CharsetEncoder encoder = cs.newEncoder(); + return new ByteWriter(stream, encoder); + } + + public String getCharacterEncoding(Result result) { + String encoding = result.getQuery().getModel().getEncoding(); + return (encoding != null) ? encoding : getEncoding(); + } + + /** + * @return The summary class to fill the hits with if no summary class was + * specified in the query presentation. + */ + public String getDefaultSummaryClass() { + return null; + } + + /** Returns the encoding of the query, or the encoding given by the template if none is set */ + public final String getRequestedEncoding(Query query) { + String encoding = query.getModel().getEncoding(); + if (encoding != null) return encoding; + return getEncoding(); + } + + /** + * Used to create a separate instance for each result to render. + */ + @Override + public Renderer clone() { + return (Renderer) super.clone(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/rendering/RendererRegistry.java b/container-search/src/main/java/com/yahoo/search/rendering/RendererRegistry.java new file mode 100644 index 00000000000..b60c58fd90f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/RendererRegistry.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.rendering; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.prelude.templates.PageTemplateSet; +import com.yahoo.prelude.templates.SearchRendererAdaptor; +import com.yahoo.prelude.templates.TiledTemplateSet; +import com.yahoo.prelude.templates.UserTemplate; +import com.yahoo.processing.rendering.Renderer; +import com.yahoo.search.Result; + +import java.util.Collection; +import java.util.Collections; + +/** + * Holds all configured and built-in renderers. + * This registry is always frozen. + * + * @author bratseth + */ +public final class RendererRegistry extends ComponentRegistry<com.yahoo.processing.rendering.Renderer<Result>> { + + public static final ComponentId xmlRendererId = ComponentId.fromString("DefaultRenderer"); + public static final ComponentId jsonRendererId = ComponentId.fromString("JsonRenderer"); + public static final ComponentId defaultRendererId = jsonRendererId; + + /** Creates a registry containing the built-in renderers only */ + public RendererRegistry() { + this(Collections.emptyList()); + } + + /** Creates a registry of the given renderers plus the built-in ones */ + public RendererRegistry(Collection<Renderer> renderers) { + // add json renderer + Renderer jsonRenderer = new JsonRenderer(); + jsonRenderer.initId(RendererRegistry.jsonRendererId); + register(jsonRenderer.getId(), jsonRenderer); + + // Add xml renderer + Renderer xmlRenderer = new DefaultRenderer(); + xmlRenderer.initId(xmlRendererId); + register(xmlRenderer.getId(), xmlRenderer); + + // add application renderers + for (Renderer renderer : renderers) + register(renderer.getId(), renderer); + + // add legacy "templates" converted to renderers + addTemplateSet(new TiledTemplateSet()); + addTemplateSet(new PageTemplateSet()); + + freeze(); + } + + @SuppressWarnings({"deprecation", "unchecked"}) + private void addTemplateSet(UserTemplate<?> templateSet) { + Renderer renderer = new SearchRendererAdaptor(templateSet); + ComponentId rendererId = new ComponentId(templateSet.getName()); + renderer.initId(rendererId); + register(rendererId, renderer); + } + + /** + * Returns the default JSON renderer + * + * @return the default built-in result renderer + */ + public com.yahoo.processing.rendering.Renderer<Result> getDefaultRenderer() { + return getComponent(jsonRendererId); + } + + /** + * Returns the requested renderer. + * + * @param format the id or format alias of the renderer to return. If null is passed the default renderer + * is returned + * @throws IllegalArgumentException if the renderer cannot be resolved + */ + public com.yahoo.processing.rendering.Renderer<Result> getRenderer(ComponentSpecification format) { + if (format == null || format.stringValue().equals("default")) return getDefaultRenderer(); + if (format.stringValue().equals("json")) return getComponent(jsonRendererId); + if (format.stringValue().equals("xml")) return getComponent(xmlRendererId); + + com.yahoo.processing.rendering.Renderer<Result> renderer = getComponent(format); + if (renderer == null) + throw new IllegalArgumentException("No renderer with id or alias '" + format + "'. " + + "Available renderers are: [" + rendererNames() + "]."); + return renderer; + } + + private String rendererNames() { + StringBuilder r = new StringBuilder(); + for (Renderer<Result> c : allComponents()) { + if (r.length() > 0) + r.append(", "); + r.append(c.getId().stringValue()); + } + return r.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/rendering/SectionedRenderer.java b/container-search/src/main/java/com/yahoo/search/rendering/SectionedRenderer.java new file mode 100644 index 00000000000..98978b76277 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/SectionedRenderer.java @@ -0,0 +1,220 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.rendering; + +import com.yahoo.search.Result; +import com.yahoo.search.grouping.result.Group; +import com.yahoo.search.grouping.result.GroupList; +import com.yahoo.search.grouping.result.HitList; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; + +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + + +/** + * Renders each part of a result to a writer. + * The renderers are cloned just before rendering, + * and must therefore obey the following contract: + * <ol> + * <li>At construction time, only final members shall be initialized, + * and these must refer to immutable data only. + * <li>State mutated during rendering shall be initialized in the init method. + * </ol> + * + * @author tonytv + */ +abstract public class SectionedRenderer<WRITER> extends Renderer { + /** + * Wraps the Writer instance. + * The result is given as a parameter to all the callback methods. + * Must be overridden if the generic parameter WRITER != java.io.Writer. + */ + @SuppressWarnings("unchecked") + public WRITER wrapWriter(Writer writer) { + return (WRITER)writer; + } + + /** + * Called at the start of rendering. + */ + abstract public void beginResult(WRITER writer, Result result) throws IOException; + + /** + * Called at the end of rendering. + */ + abstract public void endResult(WRITER writer, Result result) throws IOException; + + /** + * Called if there are errors in the result. + */ + abstract public void error(WRITER writer, Collection<ErrorMessage> errorMessages) throws IOException; + + /** + * Called if there are no hits in the result. + */ + abstract public void emptyResult(WRITER writer, Result result) throws IOException; + + /** + * Called if there is a non-null query context for the query of the result. + */ + abstract public void queryContext(WRITER writer, QueryContext queryContext) throws IOException; + + /** + * Called when a HitGroup is encountered. After all its children have been provided + * to methods of this class, endHitGroup is called. + */ + abstract public void beginHitGroup(WRITER writer, HitGroup hitGroup) throws IOException; + + /** + * Called after all the children of the HitGroup have been provided to methods of this class. + * See beginHitGroup. + */ + abstract public void endHitGroup(WRITER writer, HitGroup hitGroup) throws IOException; + + /** + * Called when a Hit is encountered. + */ + abstract public void hit(WRITER writer, Hit hit) throws IOException; + + /** + * Called when an errorHit is encountered. + * Forwards to hit() per default. + */ + public void errorHit(WRITER writer, ErrorHit errorHit) throws IOException { + hit(writer, (Hit)errorHit); + } + + /* Begin Grouping */ + + /** + * Same as beginHitGroup, but for Group(grouping api). + * Forwards to beginHitGroup() per default. + */ + public void beginGroup(WRITER writer, Group group) throws IOException { + beginHitGroup(writer, group); + } + + /** + * Same as endHitGroup, but for Group(grouping api). + * Forwards to endHitGroup() per default. + */ + public void endGroup(WRITER writer, Group group) throws IOException { + endHitGroup(writer, group); + } + + /** + * Same as beginHitGroup, but for GroupList(grouping api). + * Forwards to beginHitGroup() per default. + */ + public void beginGroupList(WRITER writer, GroupList groupList) throws IOException { + beginHitGroup(writer, groupList); + } + + /** + * Same as endHitGroup, but for GroupList(grouping api). + * Forwards to endHitGroup() per default. + */ + public void endGroupList(WRITER writer, GroupList groupList) throws IOException { + endHitGroup(writer, groupList); + } + + /** + * Same as beginHitGroup, but for HitList(grouping api). + * Forwards to beginHitGroup() per default. + */ + public void beginHitList(WRITER writer, HitList hitList) throws IOException { + beginHitGroup(writer, hitList); + } + + /** + * Same as endHitGroup, but for HitList(grouping api). + * Forwards to endHitGroup() per default. + */ + public void endHitList(WRITER writer, HitList hitList) throws IOException { + endHitGroup(writer, hitList); + } + /* End Grouping */ + + /** + * Picks apart the result and feeds it to the other methods. + */ + @Override + public final void render(Writer writer, Result result) throws IOException { + WRITER wrappedWriter = wrapWriter(writer); + + beginResult(wrappedWriter, result); + renderResultContent(wrappedWriter, result); + endResult(wrappedWriter, result); + } + + private void renderResultContent(WRITER writer, Result result) throws IOException { + if (result.hits().getError() != null || result.hits().getQuery().errors().size() > 0) { + error(writer, asUnmodifiableSearchErrorList(result.hits().getQuery().errors(), result.hits().getError())); + } + + if (result.getConcreteHitCount() == 0) { + emptyResult(writer, result); + } + + if (result.getContext(false) != null) { + queryContext(writer, result.getContext(false)); + } + + renderHitGroup(writer, result.hits()); + } + + private Collection<ErrorMessage> asUnmodifiableSearchErrorList(List<com.yahoo.processing.request.ErrorMessage> queryErrors,ErrorMessage resultError) { + if (queryErrors.size() == 0) + return Collections.singletonList(resultError); + List<ErrorMessage> searchErrors = new ArrayList<>(queryErrors.size() + (resultError != null ? 1 :0) ); + for (int i=0; i<queryErrors.size(); i++) + searchErrors.add(ErrorMessage.from(queryErrors.get(i))); + if (resultError != null) + searchErrors.add(resultError); + return Collections.unmodifiableCollection(searchErrors); + } + + private void renderHitGroup(WRITER writer, HitGroup hitGroup) throws IOException { + if (hitGroup instanceof GroupList) { + beginGroupList(writer, (GroupList) hitGroup); + renderHitGroupContent(writer, hitGroup); + endGroupList(writer, (GroupList) hitGroup); + } else if (hitGroup instanceof HitList) { + beginHitList(writer, (HitList) hitGroup); + renderHitGroupContent(writer, hitGroup); + endHitList(writer, (HitList) hitGroup); + } else if (hitGroup instanceof Group) { + beginGroup(writer, (Group) hitGroup); + renderHitGroupContent(writer, hitGroup); + endGroup(writer, (Group) hitGroup); + } else { + beginHitGroup(writer, hitGroup); + renderHitGroupContent(writer, hitGroup); + endHitGroup(writer, hitGroup); + } + } + + private void renderHitGroupContent(WRITER writer, HitGroup hitGroup) throws IOException { + for (Hit hit : hitGroup.asList()) { + renderHit(writer, hit); + } + } + + private void renderHit(WRITER writer, Hit hit) throws IOException { + if (hit instanceof HitGroup) { + renderHitGroup(writer, (HitGroup) hit); + } else if (hit instanceof ErrorHit) { + errorHit(writer, (ErrorHit) hit); + } else { + hit(writer, hit); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/rendering/SyncDefaultRenderer.java b/container-search/src/main/java/com/yahoo/search/rendering/SyncDefaultRenderer.java new file mode 100644 index 00000000000..d3039925013 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/SyncDefaultRenderer.java @@ -0,0 +1,471 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.rendering; + +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.io.ByteWriter; +import com.yahoo.log.LogLevel; +import com.yahoo.net.URI; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.prelude.templates.Context; +import com.yahoo.prelude.templates.DefaultTemplateSet; +import com.yahoo.prelude.templates.MapContext; +import com.yahoo.prelude.templates.UserTemplate; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.grouping.result.HitRenderer; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.result.*; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; +import com.yahoo.yolean.trace.TraceNode; +import com.yahoo.yolean.trace.TraceVisitor; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.util.Iterator; +import java.util.Map; +import java.util.logging.Logger; + +/** + * @author tonytv + */ +@SuppressWarnings({ "rawtypes", "deprecation" }) +public final class SyncDefaultRenderer extends Renderer { + + private static final Logger log = Logger.getLogger(SyncDefaultRenderer.class.getName()); + + public static final String DEFAULT_MIMETYPE = "text/xml"; + public static final String DEFAULT_ENCODING = "utf-8"; + + + private static final Utf8String RESULT = new Utf8String("result"); + private static final Utf8String GROUP = new Utf8String("group"); + private static final Utf8String ID = new Utf8String("id"); + private static final Utf8String FIELD = new Utf8String("field"); + private static final Utf8String HIT = new Utf8String("hit"); + private static final Utf8String ERROR = new Utf8String("error"); + private static final Utf8String TOTAL_HIT_COUNT = new Utf8String("total-hit-count"); + private static final Utf8String QUERY_TIME = new Utf8String("querytime"); + private static final Utf8String SUMMARY_FETCH_TIME = new Utf8String("summaryfetchtime"); + private static final Utf8String SEARCH_TIME = new Utf8String("searchtime"); + private static final Utf8String NAME = new Utf8String("name"); + private static final Utf8String CODE = new Utf8String("code"); + private static final Utf8String COVERAGE_DOCS = new Utf8String("coverage-docs"); + private static final Utf8String COVERAGE_NODES = new Utf8String("coverage-nodes"); + private static final Utf8String COVERAGE_FULL = new Utf8String("coverage-full"); + private static final Utf8String COVERAGE = new Utf8String("coverage"); + private static final Utf8String RESULTS_FULL = new Utf8String("results-full"); + private static final Utf8String RESULTS = new Utf8String("results"); + private static final Utf8String TYPE = new Utf8String("type"); + private static final Utf8String RELEVANCY = new Utf8String("relevancy"); + private static final Utf8String SOURCE = new Utf8String("source"); + + + //Per instance members, must be created at rendering time, not construction time due to cloning. + private Context context; + + private final DefaultTemplateSet defaultTemplate = new DefaultTemplateSet(); + + private final CopyOnWriteHashMap<String, Utf8String> fieldNameMap = new CopyOnWriteHashMap<>(); + + @Override + public void init() { + super.init(); + context = new MapContext(); + } + + @Override + public String getEncoding() { + return DEFAULT_ENCODING; + } + + @Override + public String getMimeType() { + return DEFAULT_MIMETYPE; + } + + @Override + public String getDefaultSummaryClass() { + return null; + } + + private XMLWriter wrapWriter(Writer writer) { + return XMLWriter.from(writer, 10, -1); + } + + /** + * Renders this result + */ + public void render(Writer writer, Result result) throws IOException { + XMLWriter xmlWriter = wrapWriter(writer); + + context.put("context", context); + context.put("result", result); + context.setBoldOpenTag(defaultTemplate.getBoldOpenTag()); + context.setBoldCloseTag(defaultTemplate.getBoldCloseTag()); + context.setSeparatorTag(defaultTemplate.getSeparatorTag()); + + try { + header(xmlWriter, result); + } catch (Exception e) { + handleException(e); + } + + if (result.hits().getError() != null || result.hits().getQuery().errors().size() > 0) { + error(xmlWriter, result); + } + + if (result.getConcreteHitCount() == 0) { + emptyResult(xmlWriter, result); + } + + if (result.getContext(false) != null) { + queryContext(xmlWriter, result.getContext(false), result.getQuery()); + } + + renderHitGroup(xmlWriter, result.hits(), result.hits().getQuery().getOffset() + 1); + + endResult(xmlWriter, result); + } + + private void header(XMLWriter writer, Result result) throws IOException { + // TODO: move setting this to Result + context.setUtf8Output("utf-8".equalsIgnoreCase(getRequestedEncoding(result.getQuery()))); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag(RESULT).attribute(TOTAL_HIT_COUNT,String.valueOf(result.getTotalHitCount())); + if (result.getQuery().getPresentation().getReportCoverage()) { + renderCoverageAttributes(result.getCoverage(false), writer); + } + renderTime(writer, result); + writer.closeStartTag(); + } + + private void renderTime(XMLWriter writer, Result result) { + if (!result.getQuery().getPresentation().getTiming()) { + return; + } + + final String threeDecimals = "%.3f"; + final double milli = .001d; + final long now = System.currentTimeMillis(); + final long searchTime = now - result.getQuery().getStartTime(); + final double searchSeconds = ((double) searchTime) * milli; + + if (result.getElapsedTime().firstFill() != 0L) { + final long queryTime = result.getElapsedTime().firstFill() - result.getQuery().getStartTime(); + final long summaryFetchTime = now - result.getElapsedTime().firstFill(); + final double querySeconds = ((double) queryTime) * milli; + final double summarySeconds = ((double) summaryFetchTime) * milli; + writer.attribute(QUERY_TIME, String.format(threeDecimals, querySeconds)); + writer.attribute(SUMMARY_FETCH_TIME, String.format(threeDecimals, summarySeconds)); + } + writer.attribute(SEARCH_TIME, String.format(threeDecimals, searchSeconds)); + } + + protected static void renderCoverageAttributes(Coverage coverage, XMLWriter writer) throws IOException { + if (coverage == null) return; + writer.attribute(COVERAGE_DOCS,coverage.getDocs()); + writer.attribute(COVERAGE_NODES,coverage.getNodes()); + writer.attribute(COVERAGE_FULL,coverage.getFull()); + writer.attribute(COVERAGE,coverage.getResultPercentage()); + writer.attribute(RESULTS_FULL,coverage.getFullResultSets()); + writer.attribute(RESULTS,coverage.getResultSets()); + } + + public void endResult(XMLWriter writer, Result result) throws IOException { + try { + writer.closeTag(); + } catch (Exception e) { + handleException(e); + } + } + + public void error(XMLWriter writer, Result result) throws IOException { + try { + ErrorMessage error = result.hits().getError(); + writer.openTag(ERROR).attribute(CODE,error.getCode()).content(error.getMessage(),false).closeTag(); + } catch (Exception e) { + handleException(e); + } + } + + + protected void emptyResult(XMLWriter writer, Result result) throws IOException {} + + public void queryContext(XMLWriter writer, QueryContext queryContext, Query owner) throws IOException { + try { + if (owner.getTraceLevel()!=0) { + XMLWriter xmlWriter=XMLWriter.from(writer); + xmlWriter.openTag("meta").attribute("type", QueryContext.ID); + TraceNode traceRoot = owner.getModel().getExecution().trace().traceNode().root(); + traceRoot.accept(new RenderingVisitor(xmlWriter, owner.getStartTime())); + xmlWriter.closeTag(); + } + } catch (Exception e) { + handleException(e); + } + } + + private void renderHitGroup(XMLWriter writer, HitGroup hitGroup, int hitnumber) + throws IOException { + for (Hit hit : hitGroup.asList()) { + renderHit(writer, hit, hitnumber); + if (!hit.isAuxiliary()) + hitnumber++; + } + } + + + /** + * Renders this hit as xml. The default implementation will call the simpleRender() + * hook. If it returns true, nothing more is done, otherwise the + * given template set will be used for rendering. + * + * + * @param writer the XMLWriter to append this hit to + * @throws java.io.IOException if rendering fails + */ + public void renderHit(XMLWriter writer, Hit hit, int hitno) throws IOException { + renderRegularHit(writer, hit, hitno); + } + + private void renderRegularHit(XMLWriter writer, Hit hit, int hitno) throws IOException { + boolean renderedSimple = simpleRenderHit(writer, hit); + + if (renderedSimple) { + return; + } + + try { + if (hit instanceof HitGroup) { + renderHitGroup(writer, (HitGroup) hit); + } else { + renderSingularHit(writer, hit); + } + } catch (Exception e) { + handleException(e); + } + + if (hit instanceof HitGroup) + renderHitGroup(writer, (HitGroup) hit, hitno); + + try { + writer.closeTag(); + } catch (Exception e) { + handleException(e); + } + } + + private void renderSingularHit(XMLWriter writer, Hit hit) throws IOException { + writer.openTag(HIT); + renderHitAttributes(writer, hit); + writer.closeStartTag(); + renderHitFields(writer, hit); + } + + private void renderHitFields(XMLWriter writer, Hit hit) throws IOException { + renderSyntheticRelevanceField(writer, hit); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + renderField(writer, hit, it); + } + } + + private void renderField(XMLWriter writer, Hit hit, Iterator<Map.Entry<String, Object>> it) throws IOException { + Map.Entry<String, Object> entry = it.next(); + boolean isProbablyNotDecoded = false; + if (hit instanceof FastHit) { + FastHit f = (FastHit) hit; + isProbablyNotDecoded = f.fieldIsNotDecoded(entry.getKey()); + } + renderGenericFieldPossiblyNotDecoded(writer, hit, entry, isProbablyNotDecoded); + } + + private void renderGenericFieldPossiblyNotDecoded(XMLWriter writer, Hit hit, Map.Entry<String, Object> entry, boolean probablyNotDecoded) throws IOException { + String fieldName = entry.getKey(); + + if (!shouldRenderField(hit, fieldName)) return; + if (fieldName.startsWith("$")) return; // Don't render fields that start with $ // TODO: Move to should render + + writeOpenFieldElement(writer, fieldName); + renderFieldContentPossiblyNotDecoded(writer, hit, probablyNotDecoded, fieldName); + writeCloseFieldElement(writer); + } + + private void renderFieldContentPossiblyNotDecoded(XMLWriter writer, Hit hit, boolean probablyNotDecoded, String fieldName) throws IOException { + boolean dumpedRaw = false; + if (probablyNotDecoded && (hit instanceof FastHit)) { + writer.closeStartTag(); + if ((writer.getWriter() instanceof ByteWriter) && context.isUtf8Output()) { + dumpedRaw = UserTemplate.dumpBytes((ByteWriter) writer.getWriter(), (FastHit) hit, fieldName); + } + if (dumpedRaw) { + writer.content("", false); // let the xml writer note that this tag had content + } + } + if (!dumpedRaw) { + String xmlval = hit.getFieldXML(fieldName); + if (xmlval == null) { + xmlval = "(null)"; + } + writer.escapedContent(xmlval, false); + } + } + + private void renderSyntheticRelevanceField(XMLWriter writer, Hit hit) throws IOException { + final String relevancyFieldName = "relevancy"; + final Relevance relevance = hit.getRelevance(); + + if (shouldRenderField(hit, relevancyFieldName) && relevance != null) { + renderSimpleField(writer, relevancyFieldName, relevance); + } + } + + private void renderSimpleField(XMLWriter writer, String relevancyFieldName, Relevance relevance) throws IOException { + writeOpenFieldElement(writer, relevancyFieldName); + writer.content(relevance.toString(), false); + writeCloseFieldElement(writer); + } + + private void writeCloseFieldElement(XMLWriter writer) throws IOException { + writer.closeTag(); + } + + private void writeOpenFieldElement(XMLWriter writer, String relevancyFieldName) throws IOException { + Utf8String utf8 = fieldNameMap.get(relevancyFieldName); + if (utf8 == null) { + utf8 = new Utf8String(relevancyFieldName); + fieldNameMap.put(relevancyFieldName, utf8); + } + writer.openTag(FIELD).attribute(NAME, utf8); + writer.closeStartTag(); + } + + private boolean shouldRenderField(Hit hit, String relevancyFieldName) { + // skip depending on hit type + return true; + } + + private void renderHitAttributes(XMLWriter writer, Hit hit) throws IOException { + writer.attribute(TYPE, hit.getTypeString()); + if (hit.getRelevance() != null) { + writer.attribute(RELEVANCY, hit.getRelevance().toString()); +} + writer.attribute(SOURCE, hit.getSource()); + } + + private void renderHitGroup(XMLWriter writer, HitGroup hit) throws IOException { + if (HitRenderer.renderHeader((HitGroup) hit, writer)) { + // empty + } else if (((HitGroup) hit).types().contains("grouphit")) { + // TODO Keep this? + renderHitGroupOfTypeGroupHit(writer, hit); + } else { + renderGroup(writer, hit); + } + } + + private void renderGroup(XMLWriter writer, HitGroup hit) throws IOException { + writer.openTag(GROUP); + renderHitAttributes(writer, (HitGroup) hit); + writer.closeStartTag(); + } + + private void renderHitGroupOfTypeGroupHit(XMLWriter writer, HitGroup hit) throws IOException { + writer.openTag(HIT); + renderHitAttributes(writer, (HitGroup) hit); + renderId(writer, hit); + writer.closeStartTag(); + } + + private void renderId(XMLWriter writer, HitGroup hit) throws IOException { + URI uri = hit.getId(); + if (uri != null) { + writer.openTag(ID).content(uri.stringValue(),false).closeTag(); + } + } + + private boolean simpleRenderHit(XMLWriter writer, Hit hit) throws IOException { + if (hit instanceof DefaultErrorHit) { + return simpleRenderDefaultErrorHit(writer, (DefaultErrorHit) hit); + } else if (hit instanceof GroupingListHit) { + return true; + } else { + return false; + } + } + + public static boolean simpleRenderDefaultErrorHit(XMLWriter writer, ErrorHit defaultErrorHit) throws IOException { + writer.openTag("errordetails"); + for (Iterator i = defaultErrorHit.errorIterator(); i.hasNext();) { + ErrorMessage error = (ErrorMessage) i.next(); + renderMessageDefaultErrorHit(writer, error); + } + writer.closeTag(); + return true; + } + + public static void renderMessageDefaultErrorHit(XMLWriter writer, ErrorMessage error) throws IOException { + writer.openTag("error"); + writer.attribute("source", error.getSource()); + writer.attribute("error", error.getMessage()); + writer.attribute("code", Integer.toString(error.getCode())); + writer.content(error.getDetailedMessage(), false); + if (error.getCause()!=null) { + writer.openTag("cause"); + writer.content("\n", true); + StringWriter stackTrace=new StringWriter(); + error.getCause().printStackTrace(new PrintWriter(stackTrace)); + writer.content(stackTrace.toString(), true); + writer.closeTag(); + } + writer.closeTag(); + } + + private void handleException(Exception e) throws IOException { + if (e instanceof IOException) { + throw (IOException) e; + } else { + log.log(LogLevel.WARNING, "Exception thrown when rendering the result:", e); + } + } + + public static final class RenderingVisitor extends TraceVisitor { + + private static final String tag = "p"; + private final XMLWriter writer; + private long baseTime; + + public RenderingVisitor(XMLWriter writer,long baseTime) { + this.writer=writer; + this.baseTime=baseTime; + } + + @Override + public void entering(TraceNode node) { + if (node.isRoot()) return; + writer.openTag(tag); + } + + @Override + public void leaving(TraceNode node) { + if (node.isRoot()) return; + writer.closeTag(); + } + + @Override + public void visit(TraceNode node) { + if (node.isRoot()) return; + if (node.payload()==null) return; + + writer.openTag(tag); + if (node.timestamp()!=0) + writer.content(node.timestamp()-baseTime,false).content(" ms: ", false); + writer.content(node.payload().toString(),false); + writer.closeTag(); + } + + } +} diff --git a/container-search/src/main/java/com/yahoo/search/rendering/package-info.java b/container-search/src/main/java/com/yahoo/search/rendering/package-info.java new file mode 100644 index 00000000000..7411055e015 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/rendering/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.rendering; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/result/ChainableComparator.java b/container-search/src/main/java/com/yahoo/search/result/ChainableComparator.java new file mode 100644 index 00000000000..0750618de67 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/ChainableComparator.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Comparator; + +/** + * Superclass of hit comparators which delegates comparisons of hits which are + * equal according to this comparator, to a secondary comparator. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class ChainableComparator implements Comparator<Hit> { + + private final Comparator<Hit> secondaryComparator; + + /** Creates this comparator, given a secondary comparator, or null if there is no secondary */ + public ChainableComparator(Comparator<Hit> secondaryComparator) { + this.secondaryComparator=secondaryComparator; + } + + /** Returns the comparator to use to compare hits which are equal according to this, or null if none */ + public Comparator<Hit> getSecondaryComparator() { return secondaryComparator; } + + /** + * Returns the comparison form the secondary comparison, or 0 if the secondary is null. + * When overriding this in the subclass, always <code>return super.compare(first,second)</code> + * at the end of the subclass' implementation. + */ + public int compare(Hit first,Hit second) { + if (secondaryComparator==null) return 0; + return secondaryComparator.compare(first,second); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/Coverage.java b/container-search/src/main/java/com/yahoo/search/result/Coverage.java new file mode 100644 index 00000000000..7d1e737bfb8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/Coverage.java @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +/** + * The coverage report for a result set. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author balder + */ +public class Coverage extends com.yahoo.container.handler.Coverage { + + public Coverage(long docs, long active) { + super(docs, active, 0, 1); + } + + public Coverage(long docs, int nodes, boolean full) { + this(docs, nodes, full, 1); + } + + public Coverage(long docs, int nodes, boolean full, int resultSets) { + super(docs, nodes, full, resultSets); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/DeepHitIterator.java b/container-search/src/main/java/com/yahoo/search/result/DeepHitIterator.java new file mode 100644 index 00000000000..a62a9c66e79 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/DeepHitIterator.java @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.*; + +/** + * An iterator for the forest of hits in a result. + * + * @author havardpe + */ +public class DeepHitIterator implements Iterator<Hit> { + + private final boolean ordered; + private List<Iterator<Hit>> stack; + private boolean canRemove = false; + private Iterator<Hit> it = null; + private Hit next = null; + + + /** + * Create a deep hit iterator based on the given hit iterator. + * + * @param it The hits iterator to traverse. + * @param ordered Whether or not the hits should be ordered. + */ + public DeepHitIterator(Iterator<Hit> it, boolean ordered) { + this.ordered = ordered; + this.it = it; + } + + @Override + public boolean hasNext() { + canRemove = false; + return getNext(); + } + + @Override + public Hit next() throws NoSuchElementException { + if (next == null && !getNext()) { + throw new NoSuchElementException(); + } + Hit ret = next; + next = null; + canRemove = true; + return ret; + } + + @Override + public void remove() throws UnsupportedOperationException, IllegalStateException { + if (!canRemove) { + throw new IllegalStateException("Can not remove() an element after calling hasNext()."); + } + it.remove(); + } + + private boolean getNext() { + if (next != null) { + return true; + } + + if (stack == null) { + stack = new ArrayList<>(); + } + while (true) { + if (it.hasNext()) { + Hit hit = it.next(); + if (hit instanceof HitGroup) { + stack.add(it); + if (ordered) { + it = ((HitGroup)hit).iterator(); + } else { + it = ((HitGroup)hit).unorderedIterator(); + } + } else { + next = hit; + return true; + } + } else if (!stack.isEmpty()) { + it = stack.remove(stack.size()-1); + } else { + return false; + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/result/DefaultErrorHit.java b/container-search/src/main/java/com/yahoo/search/result/DefaultErrorHit.java new file mode 100644 index 00000000000..79b8d55bb07 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/DefaultErrorHit.java @@ -0,0 +1,135 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.collections.ArraySet; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * A hit which holds information on error conditions in a result. + * En error hit maintains a main error - the main error of the result. + * + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class DefaultErrorHit extends Hit implements ErrorHit, Cloneable { + + /** + * A list of unique error messages, where the first is considered the "main" + * error. It should always contain at least one error. + */ + private List<ErrorMessage> errors = new ArrayList<>(); + + /** + * Creates an error hit with a main error + * + * @param source the name of the source or backend of this hit + * @param error an initial main error to add to this hit, cannot be null + */ + public DefaultErrorHit(String source, ErrorMessage error) { + super("error:" + source, new Relevance(Double.POSITIVE_INFINITY), source); + addError(error); + } + + public void setSource(String source) { + super.setSource(source); + for (Iterator<ErrorMessage> i = errorIterator(); i.hasNext();) { + ErrorMessage error = i.next(); + + if (error.getSource() == null) { + error.setSource(source); + } + } + } + + /** + * Returns the main error of this result, never null. + * + * @deprecated since 5.18, use {@link #errors()} + */ + @Override + public ErrorMessage getMainError() { + return errors.get(0); + } + + /** + * Insert the new "main" error at head of list, remove from the list if it + * already exists elsewhere. + */ + private void removeAndAddAtHead(ErrorMessage mainError) { + errors.remove(mainError); // avoid error duplication + errors.add(0, mainError); + } + + /** + * This is basically a way of making a list simulate a set. + */ + private void removeAndAdd(ErrorMessage error) { + errors.remove(error); + errors.add(error); + } + + /** + * Adds an error to this. This may change the main error + * and/or the list of detailed errors + */ + public void addError(ErrorMessage error) { + if (error.getSource() == null) { + error.setSource(getSource()); + } + removeAndAdd(error); + } + + + /** Add all errors from another error hit to this */ + public void addErrors(ErrorHit errorHit) { + for (Iterator<? extends ErrorMessage> i = errorHit.errorIterator(); i.hasNext();) { + addError(i.next()); + } + } + + /** + * Returns all the detail errors of this error hit, not including the main error. + * The iterator is modifiable. + */ + public Iterator<ErrorMessage> errorIterator() { + return errors.iterator(); + } + + /** Returns a read-only set containing all the error of this */ + public Set<ErrorMessage> errors() { + Set<ErrorMessage> s = new ArraySet<>(errors.size()); + s.addAll(errors); + return s; + } + + public String toString() { + return "Error: " + errors.get(0).toString(); + } + + /** Returns true - this is a meta hit containing information on other hits */ + public boolean isMeta() { + return true; + } + + /** + * Returns true if all errors in this have the given code + */ + public boolean hasOnlyErrorCode(int code) { + for (ErrorMessage error : errors) { + if (error.getCode() != code) + return false; + } + return true; + } + + public DefaultErrorHit clone() { + DefaultErrorHit clone = (DefaultErrorHit) super.clone(); + + clone.errors = new ArrayList<>(this.errors); + return clone; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/ErrorHit.java b/container-search/src/main/java/com/yahoo/search/result/ErrorHit.java new file mode 100644 index 00000000000..a3b79d98e65 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/ErrorHit.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Iterator; +import java.util.Set; + +/** + * A hit which holds information on error conditions in a result. + * En error hit maintains a main error - the main error of the result. + * + * @author bratseth + */ +public interface ErrorHit extends Cloneable { + + void setSource(String source); + + /** Returns the main error of this result, never null */ + @Deprecated // use: errors().iterator().next() + ErrorMessage getMainError(); + + /** + * Adds an error to this. This may change the main error + * and/or the list of detailed errors + */ + void addError(ErrorMessage error); + + /** Add all errors from another error hit to this */ + void addErrors(ErrorHit errorHit); + + /** + * Returns all the detail errors of this error hit, including the main error + */ + Iterator<? extends ErrorMessage> errorIterator(); + + /** Returns a read-only set containing all the error of this, including the main error */ + Set<ErrorMessage> errors(); + + /** Returns true - this is a meta hit containing information on other hits */ + boolean isMeta(); + + /** Returns true if main error is the given error code or if main error + is general error 8 and all suberrors are the given error code */ + boolean hasOnlyErrorCode(int code); + + Object clone(); + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java new file mode 100644 index 00000000000..0a0ef731836 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java @@ -0,0 +1,210 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.container.protect.Error; + +import static com.yahoo.container.protect.Error.*; + + +/** + * An error message with a code. Use create methods to create messages. + * The identity of an error message is determined by its values. + * + * @author bratseth + */ +public class ErrorMessage extends com.yahoo.processing.request.ErrorMessage { + + public static final int NULL_QUERY = Error.NULL_QUERY.code; + + /** The source producing this error, not always set */ + private String source = null; + + public ErrorMessage(int code, String message) { + super(code,message); + } + + /** + * Creates an application specific error message with an application specific code. + * If the error results from an exception a message which includes information from all nested (cause) exceptions + * can be generated using com.yahoo.protect.Exceptions.toMessageString(exception). + */ + public ErrorMessage(int code, String message, String detailedMessage) { + super(code,message, detailedMessage); + } + + /** + * Creates an application specific error message with an application specific code and a stack trace. + * This should only be used when there is useful information in the cause, i.e when the exception + * is not expected. Applications rarely need to handle unexpected exceptions as this is done by the framework. + */ + public ErrorMessage(int code, String message, String detailedMessage, Throwable cause) { + super(code, message, detailedMessage, cause); + } + + /** Creates an error message indicating that some backend service is unreachable */ + public static ErrorMessage createNoBackendsInService(String detailedMessage) { + return new ErrorMessage(NO_BACKENDS_IN_SERVICE.code, "No backends in service. Try later", detailedMessage); + } + + /** Creates an error message indicating that a null query was attempted evaluated */ + public static ErrorMessage createNullQuery(String detailedMessage) { + return new ErrorMessage(NULL_QUERY, "Null query", detailedMessage); + } + + /** Creates an error message indicating that the request is too large */ + public static ErrorMessage createRequestTooLarge(String detailedMessage) { + return new ErrorMessage(REQUEST_TOO_LARGE.code, "Request too large", detailedMessage); + } + + /** Creates an error message indicating that an illegal query was attempted evaluated. */ + public static ErrorMessage createIllegalQuery(String detailedMessage) { + return new ErrorMessage(ILLEGAL_QUERY.code, "Illegal query", detailedMessage); + } + + /** Creates an error message indicating that an invalid request parameter was received. */ + public static ErrorMessage createInvalidQueryParameter(String detailedMessage) { + return new ErrorMessage(INVALID_QUERY_PARAMETER.code, "Invalid query parameter", detailedMessage); + } + + /** Creates an error message indicating that an invalid request parameter was received. */ + public static ErrorMessage createInvalidQueryParameter(String detailedMessage, Throwable cause) { + return new ErrorMessage(INVALID_QUERY_PARAMETER.code, "Invalid query parameter", detailedMessage, cause); + } + + /** Creates a generic message used when there is no information available on the category of the error. */ + public static ErrorMessage createUnspecifiedError(String detailedMessage) { + return new ErrorMessage(UNSPECIFIED.code, "Unspecified error", detailedMessage); + } + + /** Creates a generic message used when there is no information available on the category of the error. */ + public static ErrorMessage createUnspecifiedError(String detailedMessage, Throwable cause) { + return new ErrorMessage(UNSPECIFIED.code, "Unspecified error", detailedMessage, cause); + } + + /** Creates a general error from an application components. */ + public static ErrorMessage createErrorInPluginSearcher(String detailedMessage) { + return new ErrorMessage(ERROR_IN_PLUGIN.code, "Error in plugin Searcher", detailedMessage); + } + + /** Creates a general error from an application component. */ + public static ErrorMessage createErrorInPluginSearcher(String detailedMessage, Throwable cause) { + return new ErrorMessage(ERROR_IN_PLUGIN.code, "Error in plugin Searcher", detailedMessage, cause); + } + + /** Creates an error indicating that an invalid query transformation was attempted. */ + public static ErrorMessage createInvalidQueryTransformation(String detailedMessage) { + return new ErrorMessage(INVALID_QUERY_TRANSFORMATION.code, "Invalid query transformation",detailedMessage); + } + + /** Creates an error indicating that the server is misconfigured */ + public static ErrorMessage createServerIsMisconfigured(String detailedMessage) { + return new ErrorMessage(SERVER_IS_MISCONFIGURED.code, "Service is misconfigured", detailedMessage); + } + + /** Creates an error indicating that there was a general error communicating with a backend service. */ + public static ErrorMessage createBackendCommunicationError(String detailedMessage) { + return new ErrorMessage(BACKEND_COMMUNICATION_ERROR.code, "Backend communication error", detailedMessage); + } + + /** Creates an error indicating that a node could not be pinged. */ + public static ErrorMessage createNoAnswerWhenPingingNode(String detailedMessage) { + return new ErrorMessage(NO_ANSWER_WHEN_PINGING_NODE.code, "No answer when pinging node", detailedMessage); + } + + public static final int timeoutCode = Error.TIMEOUT.code; + /** Creates an error indicating that a request to a backend timed out. */ + public static ErrorMessage createTimeout(String detailedMessage) { + return new ErrorMessage(timeoutCode, "Timed out",detailedMessage); + } + + public static final int emptyDocsumsCode = Error.EMPTY_DOCUMENTS.code; + /** Creates an error indicating that a request to a backend returned empty document content data. */ + public static ErrorMessage createEmptyDocsums(String detailedMessage) { + return new ErrorMessage(emptyDocsumsCode, "Empty document summaries",detailedMessage); + } + + /** + * Creates an error indicating that the requestor is not authorized to perform the requested operation. + * If this error is present, a HTTP layer will return 401. + */ + public static ErrorMessage createUnauthorized(String detailedMessage) { + return new ErrorMessage(UNAUTHORIZED.code, "Client not authenticated.", detailedMessage); + } + + /** + * Creates an error indicating that a forbidden operation was requested. + * If this error is present, a HTTP layer will return 403. + */ + public static ErrorMessage createForbidden(String detailedMessage) { + return new ErrorMessage(FORBIDDEN.code, "Forbidden.", detailedMessage); + } + + /** + * Creates an error indicating that the requested resource was not found. + * If this error is present, a HTTP layer will return 404. + */ + public static ErrorMessage createNotFound(String detailedMessage) { + return new ErrorMessage(NOT_FOUND.code, "Resource not found.", detailedMessage); + } + + /** + * Creates an error analog to HTTP bad request. If this error is present, a + * HTTP layer will return 400. + */ + public static ErrorMessage createBadRequest(String detailedMessage) { + return new ErrorMessage(BAD_REQUEST.code, "Bad request.", detailedMessage); + } + + /** + * Creates an error analog to HTTP internal server error. If this error is present, a + * HTTP layer will return 500. + */ + public static ErrorMessage createInternalServerError(String detailedMessage) { + return new ErrorMessage(INTERNAL_SERVER_ERROR.code, "Internal server error.", detailedMessage); + } + + /** Sets the source producing this error */ + public void setSource(String source) { this.source = source; } + + /** Returns the source producing this error, or null if no source is specified */ + public String getSource() { return source; } + + @Override + public int hashCode() { + return super.hashCode() + (source == null ? 0 : 31 * source.hashCode()); + } + + @Override + public boolean equals(Object o) { + if (!super.equals(o)) return false; + + ErrorMessage other = (ErrorMessage) o; + if (this.source != null) { + if (!this.source.equals(other.source)) return false; + } else { + if (other.source != null) return false; + } + + return true; + } + + @Override + public String toString() { + return (source==null ? "" : "Source '" + source + "': ") + super.toString(); + } + + @Override + public ErrorMessage clone() { + return (ErrorMessage)super.clone(); + } + + /** + * Returns the given error message as this type. If it already is, this is a cast of the given instance. + * Otherwise this creates a new instance having the same payload as the given instance. + */ + public static ErrorMessage from(com.yahoo.processing.request.ErrorMessage error) { + if (error instanceof ErrorMessage) return (ErrorMessage)error; + return new ErrorMessage(error.getCode(),error.getMessage(),error.getDetailedMessage(),error.getCause()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java new file mode 100644 index 00000000000..5c57d21b455 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.Type; +import com.yahoo.data.JsonProducer; +import com.yahoo.data.access.simple.JsonRender; + +/** + * A wrapper for structured data representing feature values. + */ +public class FeatureData implements Inspectable, JsonProducer { + + private final Inspector value; + + public FeatureData(Inspector value) { + this.value = value; + } + + @Override + public Inspector inspect() { + return value; + } + + public String toString() { + if (value.type() == Type.EMPTY) { + return ""; + } else { + return toJson(); + } + } + + @Override + public String toJson() { + return writeJson(new StringBuilder()).toString(); + } + + @Override + public StringBuilder writeJson(StringBuilder target) { + return JsonRender.render(value, target, true); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/FieldComparator.java b/container-search/src/main/java/com/yahoo/search/result/FieldComparator.java new file mode 100644 index 00000000000..77f6db18745 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/FieldComparator.java @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.search.query.Sorting; + +import java.util.Comparator; + +/** + * Comparator used for ordering hits using the field values and a sorting specification. + * <p> + * <b>Note:</b> this comparator imposes orderings that are inconsistent with equals. + * <p> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +// Is tested in HitSortSpecOrdererTestCase +public class FieldComparator extends ChainableComparator { + + /** The definition of sorting order */ + private Sorting sorting; + + /** Creates a field comparator using a sort order and having no chained comparator */ + public FieldComparator(Sorting sorting) { + this(sorting,null); + } + + /** Creates a field comparator using a sort order with a chained comparator */ + public FieldComparator(Sorting sorting,Comparator<Hit> secondaryComparator) { + super(secondaryComparator); + this.sorting = sorting; + } + + /** Creates a comparator given a sorting, or returns null if the given sorting is null */ + public static FieldComparator create(Sorting sorting) { + if (sorting==null) return null; + return new FieldComparator(sorting); + } + + /** + * Compares hits based on a sorting specification and values + * stored in hit fields.0 + * <p> + * When one of the hits has the requested property and the other + * has not, the the hit containing the property precedes the one + * that does not. + * <p> + * There is no locale based sorting here, as the backend does + * not do that either. + * + * @return -1, 0, 1 if first should be sorted before, equal to + * or after second + */ + @Override + public int compare(Hit first, Hit second) { + for (Sorting.FieldOrder fieldOrder : sorting.fieldOrders() ) { + String fieldName = fieldOrder.getFieldName(); + Object a = getField(first,fieldName); + Object b = getField(second,fieldName); + + // If either of the values are null, don't touch the ordering + // This is to avoid problems if the sorting is called before the + // result is filled. + if ((a == null) || (b == null)) return 0; + + int x = compareValues(a, b, fieldOrder.getSorter()); + if (x != 0) { + if (fieldOrder.getSortOrder() == Sorting.Order.DESCENDING) + x *= -1; + return x; + } + } + return super.compare(first,second); + } + + public Object getField(Hit hit,String key) { + if ("[relevance]".equals(key)) return hit.getRelevance(); + if ("[rank]".equals(key)) return hit.getRelevance(); + if ("[source]".equals(key)) return hit.getSource(); + return hit.getField(key); + } + + @SuppressWarnings("rawtypes") + private int compareValues(Object first, Object second, Sorting.AttributeSorter s) { + if (first.getClass().isInstance(second) + && first instanceof Comparable) { + // We now know: + // second is of a type which is a subclass of first's type + // They both implement Comparable + return s.compare((Comparable)first, (Comparable)second); + } else { + return s.compare(first.toString(), second.toString()); + } + } + + public String toString() { + StringBuilder b = new StringBuilder(); + b.append("FieldComparator:"); + if (sorting == null) { + b.append(" null"); + } else { + b.append(sorting.toString()); + } + return b.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/Hit.java b/container-search/src/main/java/com/yahoo/search/result/Hit.java new file mode 100644 index 00000000000..2cf1dba7efd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/Hit.java @@ -0,0 +1,787 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.collections.ArraySet; +import com.yahoo.component.provider.ListenableFreezableClass; +import com.yahoo.net.URI; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.processing.Request; +import com.yahoo.processing.response.Data; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.text.XML; + +import java.util.*; + +/** + * <p>A search hit. The identifier of the hit is the uri + * (the uri is immutable once set). + * If two hits have the same uri they are equal per definition. + * Hits are naturally ordered by decreasing relevance. + * Note that this definition of equals and natural ordering is inconsistent.</p> + * + * <p>Hits may be of the <i>meta</i> type, meaning that they contain some information + * about the query or result which does not represent a particular piece of matched + * content. Meta hits are not counted in the hit count of the result, and should + * usually never be filtered out.</p> + * + * <p>Some hit sources may produce hits which are not <i>filled</i>. A non-filled + * hit may miss some or all of its property values. To fill those, + * {@link com.yahoo.search.Searcher#fill fill} must be called on the search chain by the searcher + * which requires those properties. This mechanism allows initial filtering to be + * done of a lightweight version of the hits, which is cheaper if a significant + * number of hits are filtered out.</p> + * + * @author bratseth + */ +public class Hit extends ListenableFreezableClass implements Data, Comparable<Hit>, Cloneable { + + private static final String DOCUMENT_ID = "documentid"; + + /** A collection of string keyed object properties. */ + private Map<String,Object> fields = null; + private Map<String,Object> unmodifiableFieldMap = null; + + /** Meta data describing how a given searcher should treat this hit. */ + // TODO: The case for this is to allow multiple levels of federation searcher routing. + // Replace this by a cleaner specific solution to that problem. + private Map<Searcher, Object> searcherSpecificMetaData; + + /** The id of this hit */ + private URI id; + + /** The types of this hit */ + private Set<String> types = new ArraySet<>(2); + + /** The relevance of this hit */ + private Relevance relevance; + + /** Says whether this hit is cached or not */ + private boolean cached = false; + + /** + * The summary classes for which this hit is filled. If this set + * is 'null', it means that this hit is unfillable, which is + * equivalent to a hit where all summary classes have already + * been filled, or a hit where further filling will + * yield no extra information, if you prefer to look at it that + * way. + */ + private Set<String> filled = null; + private Set<String> unmodifiableFilled = null; + + /** The name of the source creating this hit */ + private String source = null; + + /** + * Add number, assigned when adding the hit to a result, + * used to order equal relevant hit by add order + */ + private int addNumber = -1; + private int sourceNumber; + + /** The query which produced this hit. Used for multi phase searching */ + private Query query; + + /** + * Set to true for hits which does not contain content, + * but which contains meta information about the query or result + */ + private boolean meta=false; + + /** If this is true, then this hit will not be counted as a concrete hit */ + private boolean auxiliary=false; + + /** + * The hit field used to store rank features. TODO: Remove + */ + public static final String RANKFEATURES_FIELD = "rankfeatures"; + public static final String SDDOCNAME_FIELD = "sddocname"; + + private Map<String,Object> getFieldMap() { + if (fields == null) { + fields = new LinkedHashMap<>(16); + } + return fields; + } + + private Map<String,Object> getUnmodifiableFieldMap() { + if (unmodifiableFieldMap == null) { + if (fields == null) { + return Collections.emptyMap(); + } else { + unmodifiableFieldMap = Collections.unmodifiableMap(fields); + } + } + return unmodifiableFieldMap; + } + + public static String stripCharacter(char strip, String toStripFrom) { + StringBuilder builder = null; + + int lastBadChar = 0; + for (int i = 0; i < toStripFrom.length(); i++) { + if (toStripFrom.charAt(i) == strip) { + if (builder == null) { + builder = new StringBuilder(toStripFrom.length()); + } + + builder.append(toStripFrom, lastBadChar, i); + lastBadChar = i + 1; + } + } + + if (builder == null) { + return toStripFrom; + } else { + if (lastBadChar < toStripFrom.length()) { + builder.append(toStripFrom, lastBadChar, toStripFrom.length()); + } + + return builder.toString(); + } + } + + /** Creates an (invalid) empty hit. Id and relevance must be set before handoff */ + protected Hit() {} + + /** + * Creates a minimal valid hit having relevance 1000 + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + */ + public Hit(String id) { + this(id, 1); + } + + /** + * Creates a minimal valid hit having relevance 1 + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types referring to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param query the query having this as a hit + */ + public Hit(String id, Query query) { + this(id, 1, query); + } + + /** + * Creates a minimal valid hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types referring to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance a relevance measure, preferably normalized between 0 and 1 + * @throws IllegalArgumentException if the given relevance is not between 0 and 1 + */ + public Hit(String id, double relevance) { + this(id,new Relevance(relevance)); + } + + /** + * Creates a minimal valid hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types referring to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance a relevance measure, preferably normalized between 0 and 1 + * @param query the query having this as a hit + * @throws IllegalArgumentException if the given relevance is not between 0 and 1 + */ + public Hit(String id, double relevance, Query query) { + this(id,new Relevance(relevance),query); + } + + /** + * Creates a minimal valid hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance the relevance of this hit + * @throws IllegalArgumentException if the given relevance is not between 0 and 1000 + */ + public Hit(String id, Relevance relevance) { + this(id, relevance, (String)null); + } + + /** + * Creates a minimal valid hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance the relevance of this hit + * @param query the query having this as a hit + * @throws IllegalArgumentException if the given relevance is not between 0 and 1000 + */ + public Hit(String id, Relevance relevance, Query query) { + this(id, relevance,null, query); + } + + /** + * Creates a hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance a relevance measure, preferably normalized between 0 and 1 + * @param source the name of the source of this hit, or null if no source is being specified + * @throws IllegalArgumentException if the given relevance is not between 0 and 1000 + */ + public Hit(String id, double relevance, String source) { + this(id, new Relevance(relevance), source, null); + } + + /** + * Creates a hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance a relevance measure, preferably normalized between 0 and 1 + * @param source the name of the source of this hit, or null if no source is being specified + * @param query the query having this as a hit + * @throws IllegalArgumentException if the given relevance is not between 0 and 1000 + */ + public Hit(String id, double relevance, String source, Query query) { + this(id, new Relevance(relevance), source); + } + + /** + * Creates a hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance the relevance of this hit + * @param source the name of the source of this hit + * @throws IllegalArgumentException if the given relevance is not between 0 and 1000 + */ + public Hit(String id, Relevance relevance, String source) { + this(id, relevance, source, null); + } + + /** + * Creates a hit. + * + * @param id the URI of a hit. This should be unique for this hit (but not for this + * <i>object instance</i> of course). For hit types refering to resources, + * this will be the full, valid url of the resource, for self-contained hits + * it is simply any unique string identification + * @param relevance the relevance of this hit + * @param source the name of the source of this hit + * @param query the query having this as a hit + * @throws IllegalArgumentException if the given relevance is not between 0 and 1000 + */ + public Hit(String id, Relevance relevance, String source, Query query) { + this.id=new URI(id); + this.relevance = relevance; + this.source=source; + this.query = query; + } + + /** Calls setId(new URI(id)) */ + public void setId(String id) { + if (this.id!=null) throw new IllegalStateException("Attempt to change id of " + this + " to " + id); + if (id==null) throw new NullPointerException("Attempt to assign id of " + this + " to null"); + assignId(new URI(id)); + } + + + /** + * Initializes the id of this hit. + * + * @throws NullPointerException if the uri is null + * @throws IllegalStateException if the uri of this hit is already set + */ + public void setId(URI id) { + if (this.id!=null) throw new IllegalStateException("Attempt to change id of " + this + " to " + id); + assignId(id); + } + + /** + * Assigns a new or changed id to this hit. + * As this is protected, reassigning isn't legal for Hits by default, however, subclasses may allow it + * using this method. + */ + protected final void assignId(URI id) { + if (id==null) throw new NullPointerException("Attempt to assign id of " + this + " to null"); + this.id=id; + } + + /** Returns the hit id */ + public URI getId() { return id; } + + /** + * Returns the id to display, or null to not display (render) the id. + * This is useful to avoid displaying ids when they are not assigned explicitly + * but are just generated values for internal use. + * This default implementation returns {@link #getId()}.toString() + */ + public String getDisplayId() { + String id = null; + + Object idField = getField(DOCUMENT_ID); + if (idField != null) { + id = idField.toString(); + } + if (id == null) { + id = getId() == null ? null : getId().toString(); + } + return id; + } + + /** + * Sets the relevance of this hit + * + * @param relevance the relevance of this hit + */ + public void setRelevance(Relevance relevance) { + if (relevance==null) throw new NullPointerException("Cannot assign null as relevance"); + this.relevance = relevance; + } + + /** Does setRelevance(new Relevance(relevance) */ + public void setRelevance(double relevance) { + setRelevance(new Relevance(relevance)); + } + + + /** Returns the relevance of this hit */ + public Relevance getRelevance() { return relevance; } + + /** Sets whether this hit is returned from a cache. Default is false */ + public void setCached(boolean cached) { this.cached = cached; } + + /** Returns whether this hit was added to this result from a cache or not */ + public boolean isCached() { return cached; } + + /** + * Tag this hit as fillable. This means that additional properties + * for this hit may be obtained by fetching document + * summaries. This also enables tracking of which summary classes + * have been used for filling so far. Invoking this method + * multiple times is allowed and will have no addition + * effect. Note that a fillable hit may not be made unfillable. + **/ + public void setFillable() { + if (filled == null) { + filled = Collections.emptySet(); + unmodifiableFilled = filled; + } + } + + /** + * Register that this hit has been filled with properties using + * the given summary class. Note that this method will implicitly + * tag this hit as fillable if it is currently not. + * + * @param summaryClass summary class used for filling + **/ + public void setFilled(String summaryClass) { + if (filled == null || filled.size() == 0) { + filled = Collections.singleton(summaryClass); + unmodifiableFilled = filled; + } else if (filled.size() == 1) { + filled = new HashSet<>(filled); + unmodifiableFilled = Collections.unmodifiableSet(filled); + + filled.add(summaryClass); + } else { + filled.add(summaryClass); + } + } + + public boolean isFillable() { + return filled != null; + } + + /** + * Returns the set of summary classes for which this hit is + * filled as an unmodifiable set. If this set is 'null', it means that this hit is + * unfillable, which is equivalent with a hit where all summary + * classes have already been used for filling, or a hit where + * further filling will yield no extra information, if you prefer + * to look at it that way. + * + * Note that you might need to overload isFilled if you overload this one. + **/ + public Set<String> getFilled() { + return unmodifiableFilled; + } + + /** + * Returns whether this hit has been filled with the properties + * contained in the given summary class. Note that this method + * will also return true if this hit is not fillable. + */ + public boolean isFilled(String summaryClass) { + return (filled == null) || filled.contains(summaryClass); + } + + /** Sets the name of the source creating this hit */ + public void setSource(String source) { this.source = source; } + + /** Returns the name of the source creating this hit */ + public String getSource() { return source; } + + /** Returns the fields of this as a read-only map. This is more costly than the preferred iterator(), as + * it uses Collections.unmodifiableMap() + * @return An readonly map of the fields + **/ + //TODO Should it be deprecated ? + public final Map<String,Object> fields() { return getUnmodifiableFieldMap(); } + + /** + * Fields + * @return An iterator for traversing the fields + * @since 5.1.3 + */ + public final Iterator<Map.Entry<String,Object>> fieldIterator() { return getFieldMap().entrySet().iterator(); } + + /** Returns a field value */ + public Object getField(String value) { return fields != null ? fields.get(value) : null; } + + /** + * Generate a HitField from a field if the field exists. Does the + * same as getField() in earlier versions. + * + * @since 3.0 + */ + public HitField buildHitField(String key) { + return buildHitField(key, false); + } + + /** + * Generate a HitField from a field if the field exists. Does the + * same as getField() in earlier versions. + * + * @since 3.0 + */ + public HitField buildHitField(String key, boolean forceNoPreTokenize) { + return buildHitField(key, forceNoPreTokenize, false); + } + + public HitField buildHitField(String key, boolean forceNoPreTokenize, boolean forceStringHandling) { + Object o = getField(key); + if (o == null) { + return null; + } + + if (o instanceof HitField) { + return (HitField) o; + } + + HitField h; + if (forceNoPreTokenize) { + if (o instanceof XMLString && !forceStringHandling) { + h = new HitField(key, (XMLString) o, false); + } else { + h = new HitField(key, o.toString(), false); + } + } else { + if (o instanceof XMLString && !forceStringHandling) { + h = new HitField(key, (XMLString) o); + } else { + h = new HitField(key, o.toString()); + } + } + h.setOriginal(o); + getFieldMap().put(key, h); + return h; + } + + /** + * Sets the value of a field + * + * @return the previous value, or null if none + */ + public Object setField(String key, Object value) { + return getFieldMap().put(key, value); + } + + /** Returns the types of this as a modifiable set. Modifications to this set are directly reflected in this hit */ + public Set<String> types() { return types; } + + /** + * Returns all types of this hit as a space-separated string + * + * @return all the types of this hit on the form "type1 type2 type3" + * (in no particular order). An empty string (never null) if + * no types are added + */ + public String getTypeString() { + StringBuilder buffer = new StringBuilder(types.size() * 7); + + for (Iterator<String> i = types.iterator(); i.hasNext();) { + buffer.append(i.next()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + + /** + * Returns true if the argument is a hit having the same uri as this + */ + public boolean equals(Object object) { + if (!(object instanceof Hit)) { + return false; + } + return getId().equals(((Hit) object).getId()); + } + + /** + * Returns the hashCode of this hit, which is the hashcode of its uri. + */ + public int hashCode() { + if (getId() == null) + throw new IllegalStateException("Id has not been set."); + + return getId().hashCode(); + } + + /** Compares this hit to another hit */ + public int compareTo(Hit other) { + // higher relevance is better + int result = other.getRelevance().compareTo(getRelevance()); + if (result != 0) { + return result; + } + // lower addnumber is better + result = this.getAddNumber() - other.getAddNumber(); + if (result != 0) { + return result; + } + + // if all else fails, compare URIs (alphabetically) + if (this.getId() == null && other.getId() == null) { + return 0; + } else if (other.getId() == null) { + return -1; + } else if (this.getId() == null) { + return 1; + } else { + return this.getId().compareTo(other.getId()); + } + } + + /** + * Returns the add number, assigned when adding the hit to a Result. + * + * Used to order equal relevant hit by add order. -1 if this hit + * has never been added to a result. + */ + public int getAddNumber() { return addNumber; } + + /** + * Sets the add number, assigned when adding the hit to a Result, + * used to order equal relevant hit by add order + */ + public void setAddNumber(int addNumber) { this.addNumber = addNumber; } + + /** + * Returns whether this is a concrete hit, containing content of the requested + * kind, or a meta hit containing information on the collection of hits, + * the query, the service and so on. This default implementation return false. + */ + public boolean isMeta() { return meta; } + + public void setMeta(boolean meta) { this.meta=meta; } + + /** + * Auxiliary hits are not counted towards the concrete number of hits to satisfy in the users request. + * Any kind of meta hit is auxiliary, but hits containing concrete results can also be auxiliary, + * for example ads in a service which does not primarily serve ads, or groups in a hierarchical organization. + * + * @return true if the auxiliary value is true, or if this is a meta hit + */ + public boolean isAuxiliary() { + return isMeta() || auxiliary; + } + + public void setAuxiliary(boolean auxiliary) { this.auxiliary=auxiliary; } + + /** Removes all fields from this */ + public void clearFields() { + getFieldMap().clear(); + } + + /** Removes a field from this */ + public Object removeField(String field) { + return getFieldMap().remove(field); + } + + /** + * Returns the keys of the fields of this hit as a modifiable view. + * This follows the rules of key sets returned from maps: Key removals are reflected + * in the map, add and addAll is not supported. + */ + public Set<String> fieldKeys() { + return getFieldMap().keySet(); + } + + /** + * Changes the key under which a value is found. This is useful because it allows keys to be changed + * without accessing the value (which may be lazily created). + */ + public void changeFieldKey(String oldKey,String newKey) { + Map<String,Object> fieldMap = getFieldMap(); + Object value=fieldMap.remove(oldKey); + fieldMap.put(newKey,value); + } + + /** + * Returns a string describing this hit + */ + public String toString() { + return "hit " + getId() + " (relevance " + getRelevance() + ")"; + } + + public Hit clone() { + Hit hit = (Hit) super.clone(); + + hit.fields = fields != null ? new LinkedHashMap<>(fields) : null; + hit.unmodifiableFieldMap = null; + hit.types = new LinkedHashSet<>(types); + if (filled != null) { + hit.setFilledInternal(new HashSet<>(filled)); + } + + return hit; + } + + public int getSourceNumber() { return sourceNumber; } + + public void setSourceNumber(int number) { this.sourceNumber = number; } + + /** Returns the query which produced this hit, or null if not known */ + public Query getQuery() { return query; } + + public Request request() { return query; } + + // TODO: rethink hit tagging + // hit group -> need option to retag + // hit -> should only set query once + public final void setQuery(Query query) { + if (this.query == null || this instanceof HitGroup) { + this.query = query; + } + } + + // TODO: Deprecate + /** + * Returns a field of this hit XML escaped and without token + * delimiters. + * + * @return a field of this hit, or null if the property is not set + */ + public String getFieldXML(String key) { + Object p = getField(key); + + if (p == null) { + return null; + } else if (p instanceof HitField) { + HitField hf = (HitField) p; + + return hf.quotedContent(false); + } else if (p instanceof StructuredData) { + return p.toString(); + } else if (p instanceof XMLString || p instanceof JSONString) { + return p.toString(); + } else { + return XML.xmlEscape(p.toString(), false, '\u001f'); + } + } + + // TODO: Move out? If not, delegate here from subclass + /** + * @return a field without bolding markup + */ + public String getUnboldedField(String key, boolean escape) { + Object p = getField(key); + + if (p == null) { + return null; + } else if (p instanceof HitField) { + return ((HitField) p).bareContent(escape, false); + } else if (p instanceof StructuredData) { + return p.toString(); + } else if (p instanceof XMLString || p instanceof JSONString) { + return p.toString(); + } else if (escape) { + return XML.xmlEscape(p.toString(), false, '\u001f'); + } else { + return stripCharacter('\u001F', p.toString()); + } + } + + /** + * set meta data describing how a given searcher should treat this hit. + * It is currently recommended that the invoker == searcher. + * <b>Internal. Do not use!</b> + */ + public void setSearcherSpecificMetaData(Searcher searcher, Object data) { + if (searcherSpecificMetaData == null) { + searcherSpecificMetaData = Collections.singletonMap(searcher, data); + } else { + if (searcherSpecificMetaData.size() == 1) { + Object tmp = searcherSpecificMetaData.get(searcher); + if (tmp != null) { + searcherSpecificMetaData = Collections.singletonMap(searcher, data); + } else { + searcherSpecificMetaData = new TreeMap<>(searcherSpecificMetaData); + searcherSpecificMetaData.put(searcher, data); + } + } else { + searcherSpecificMetaData.put(searcher, data); + } + } + } + + /** + * get meta data describing how a given searcher should treat this hit. + * It is currently recommended that the invoker == searcher + * <b>Internal. Do not use!</b> + */ + public Object getSearcherSpecificMetaData(Searcher searcher) { + return searcherSpecificMetaData != null ? searcherSpecificMetaData.get(searcher) : null; + } + + /** + * For vespa internal use only. + * This is only for the ones specially interested. It will replace the backing + * for filled. + * @param filled the backing set + */ + protected final void setFilledInternal(Set<String> filled) { + this.filled = filled; + unmodifiableFilled = (filled != null) ? Collections.unmodifiableSet(filled) : null; + } + + /** + * For vespa internal use only. + * Gives access to the modifiable backing set of filled summaries. + * This set might be unmodifiable if the size is less than or equal to 1 + * @return the set of filled summaries. + */ + protected final Set<String> getFilledInternal() { + return filled; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/HitGroup.java b/container-search/src/main/java/com/yahoo/search/result/HitGroup.java new file mode 100644 index 00000000000..e58c3dc847e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/HitGroup.java @@ -0,0 +1,898 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.util.concurrent.ListenableFuture; +import com.yahoo.collections.ListenableArrayList; +import com.yahoo.net.URI; +import com.yahoo.processing.response.ArrayDataList; +import com.yahoo.processing.response.DataList; +import com.yahoo.processing.response.DefaultIncomingData; +import com.yahoo.processing.response.IncomingData; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import static com.yahoo.collections.CollectionUtil.first; + +/** + * <p>A group of ordered hits. Since hitGroup is itself a kind of Hit, + * this can compose hierarchies of grouped hits.</p> + * + * <p>Group hits has a relevancy just as other hits - they can be ordered + * between each other and in comparison to other hits. + * + * <p>Note that a group is by default a meta hit, but it can also contain its own content + * in addition to subgroup content, in which case it should be set to non-meta.</p> + * + * @author bratseth + */ +public class HitGroup extends Hit implements DataList<Hit>, Cloneable, Iterable<Hit> { + + // This does its own book-keeping of its various state variables + // (see methods towards the end). For state variables which are recursive + // (depending on the state of hits in subgroups), the strategy is to do + // book-keeping on only this immediate level, but not do recursive calls to + // find the true recursive state when queried. This is sort of a middle ground + // between handling the complexity of recursive state book-keeping and the + // query cost of not doing any book-keeping. + // There is also a method, analyse which recursively updates the recursive + // state of the group and all subgroups. This should be called if the hits + // may have changed their own state in a way that may impact the recursive + // state of this. + + private ListenableArrayList<Hit> hits = new ListenableArrayList<>(16); + + transient private List<Hit> unmodifiableHits = Collections.unmodifiableList(hits); + + /** Whether or not the hits are sorted */ + private boolean hitsSorted = true; + + /** Whether or not deletion of hits breaks the sorted ordering */ + private boolean deletionBreaksOrdering = false; + + /** Whether the hits should be sorted (again) */ + private boolean orderedHits = false; + + /** The current number of concrete (non-meta) hits in the result */ + private int concreteHitCount = 0; + + /** The class used to determine the ordering of the hits of this */ + transient private HitOrderer hitOrderer = null; + + /** Accounting the number of subgroups to allow some early returns when the number is 0 */ + private int subgroupCount=0; + + /** + * The number of hits not cached at this level, not counting hits in subgroups or + * any nested hitgroups themselves + */ + private int notCachedCount=0; + + /** + * A direct reference to the errors of this result, or null if there are no errors. + * The error hit will also be listed in the set of this of this result + */ + private ErrorHit errorHit = null; + + private final ListenableFuture<DataList<Hit>> completedFuture; + + private final IncomingData<Hit> incomingHits; + + /** Creates an invalid group of hits. Id must be set before handoff. */ + public HitGroup() { + incomingHits = new IncomingData.NullIncomingData<>(this); + setRelevance(new Relevance(1)); + setMeta(true); + completedFuture = new IncomingData.NullIncomingData.ImmediateFuture<>(this); + } + + /** + * Creates a hit group with max relevancy (1) + * + * @param id the id of this hit - any string, it is convenient to make this unique in the result containing this + */ + public HitGroup(String id) { + this(id,new Relevance(1)); + } + + /** + * Creates a hit group + * + * @param id the id of this hit - any string, it is convenient to make this unique in the result containing this + * @param relevance the relevance of this group of hits, preferably a number between 0 and 1 + */ + public HitGroup(String id,double relevance) { + this(id,new Relevance(relevance)); + } + + /** + * Creates a group hit + * + * @param id the id of this hit - any string, it is convenient to make this unique in the result containing this + * @param relevance the relevancy of this group of hits + */ + public HitGroup(String id, Relevance relevance) { + super(id, relevance); + this.incomingHits = new IncomingData.NullIncomingData<>(this); + setMeta(true); + completedFuture = new IncomingData.NullIncomingData.ImmediateFuture<>(this); + } + + /** + * Creates a group hit + * + * @param id the id of this hit - any string, it is convenient to make this unique in the result containing this + * @param relevance the relevancy of this group of hits + * @param incomingHits the incoming buffer to which new hits can be added asynchronously + */ + protected HitGroup(String id, Relevance relevance, IncomingData<Hit> incomingHits) { + super(id, relevance); + this.incomingHits = incomingHits; + setMeta(true); + completedFuture = new ArrayDataList.DrainOnGetFuture<>(this); + } + + /** + * Creates a HitGroup which contains data which arrives in the future. + * + * @param id the id of this + * @return a HitGroup which is incomplete and which has an {@link #incoming} where new hits can be added later + */ + public static HitGroup createAsync(String id) { + DefaultIncomingData<Hit> incomingData = new DefaultIncomingData<>(); + HitGroup hitGroup = new HitGroup(id, new Relevance(1), incomingData); + incomingData.assignOwner(hitGroup); + return hitGroup; + } + + /** Calls setId(new URI(id)) */ + @Override + public void setId(String id) { + setId(new URI(id)); + } + + /** + * Assign an id to this hit. + * For HitGroups, this is a legal call also when an id is already set, + * i.e hit groups allows their ids to be reassigned. + * This is to allow hit groups to be inserted in new structures with an id reflecting their + * role/placement in the structure. + * + * @param id the new or initial iof of this hit + */ + @Override + public void setId(URI id) { + super.assignId(id); + } + + /** + * Turn off internal resorting of hits. + * + * @param ordered set to true to tell this group that the hits set in it is already correctly ordered and should + * never be resorted. Set to false to use the default lazy resorting by hit ordering. + */ + public void setOrdered(boolean ordered) { this.orderedHits = ordered; } + + /** + * Returns the number of hits available immediately in this group + * (counting a subgroup as one hit). + */ + public int size() { + return hits.size(); + } + + /** + * <p>Returns the number of concrete hits contained in this group + * and all subgroups. This should equal the + * requested hits count if the query has that many matches.</p> + */ + public int getConcreteSize() { + if (subgroupCount<1) return concreteHitCount; + int recursiveConcreteCount=concreteHitCount; + for (Hit hit : hits) { + if (hit instanceof HitGroup) + recursiveConcreteCount+=((HitGroup)hit).getConcreteSize(); + } + return recursiveConcreteCount; + } + + /** + * <p>Returns the number of concrete hits contained in <i>this</i> group, + * without counting hits in subgroups. + */ + public int getConcreteSizeShallow() { return concreteHitCount; } + + /** + * Returns the number of HitGroups present immediately in this list of hits. + */ + public int getSubgroupCount() { return subgroupCount; } + + /** + * Adds a hit to this group. + * If the given hit is an ErrorHit and this group already have an error hit, + * the errors in the given hit are merged into the errors of this. + * + * @return the resulting hit - this is usually the input hit, but if an error hit was added, + * and there was already an error hit present, that hit, containing the merged information + * is returned + */ + @Override + public Hit add(Hit hit) { + if (hit.isMeta() && hit instanceof ErrorHit) { + boolean add = mergeErrors((ErrorHit) hit); + if (!add) return (Hit)errorHit; + } + handleNewHit(hit); + hits.add(hit); + return hit; + } + + /** + * Adds a list of hits to this group, the same + */ + public void addAll(List<Hit> hits) { + for (Hit hit : hits) + add(hit); + } + + /** + * Returns the hit at the given (0-base) index in this group of hit + * (without searching any subgroups). + * + * @param index the index into this list + * @throws IndexOutOfBoundsException if there is no hit at the given index + */ + public Hit get(int index) { + updateHits(); + ensureSorted(); + return hits.get(index); + } + + /** Same as {@link #get(String,int)} */ + public Hit get(String id) { + return get(id,-1); + } + + public Hit get(String id, int depth) { + return get(new URI(id), depth); + } + + /** + * Returns the hit with the given id, or null if there is no hit with this id + * in this group or any subgroup. + * This method is o(min(number of nested hits in this result,depth)). + * + * @param id the id of the hit to return from this or any nested group + * @param depth the max depth to recurse into nested groups: -1: Recurse infinitely deep, 0: Only look at hits in + * the list of this group, 1: Look at hits in this group, and the hits of any immediate nested HitGroups, + * etc. + * @return The hit, or null if not found. + */ + public Hit get(URI id, int depth) { + updateHits(); + for (Iterator<Hit> i = unorderedIterator(); i.hasNext();) { + Hit hit = i.next(); + URI hitUri = hit.getId(); + + if (hitUri != null && hitUri.equals(id)) { + return hit; + } + + if (hit instanceof HitGroup && depth!=0) { + Hit found=((HitGroup)hit).get(id,depth-1); + if (found!=null) return found; + } + } + return null; + } + + /** + * Inserts the given hit at the specified index in this group. + */ + public void set(int index, Hit hit) { + updateHits(); + if (hit instanceof ErrorHit) { // Merge instead + add(hit); + return; + } + + handleNewHit(hit); + Hit oldHit = hits.set(index, hit); + + if (oldHit!=null) + handleRemovedHit(oldHit); + } + + /** + * Adds a hit to this group in the specified index, + * all existing hits on this index and higher will have their index + * increased by one. + * <b>Note:</b> If the group was sorted, it will still be considered sorted + * after this call. + */ + public void add(int index, Hit hit) { + if (hit instanceof ErrorHit) { // Merge instead + add(hit); + return; + } + + boolean wasSorted = hitsSorted; + handleNewHit(hit); + hits.add(index, hit); + hitsSorted = wasSorted; + } + + /** + * Removes a hit from this group or any subgroup + * + * @param uriString the uri of the hit to remove + * @return the hit to remove, or null if the hit was not present + */ + public Hit remove(String uriString) { + return remove(new URI(uriString)); + } + + /** + * Removes a hit from this group or any subgroup. + * + * @param uri The uri of the hit to remove. + * @return The hit removed, or null if not found. + */ + public Hit remove(URI uri) { + for (Iterator<Hit> it = hits.iterator(); it.hasNext(); ) { + Hit hit = it.next(); + if (uri.equals(hit.getId())) { + it.remove(); + handleRemovedHit(hit); + return hit; + } + if (hit instanceof HitGroup) { + Hit removed = ((HitGroup)hit).remove(uri); + if (removed != null) { + return removed; + } + } + } + return null; + } + + /** + * Removes a hit from this group (not considering the hits of any subgroup) + * + * @param index the position of the hit to remove + * @return the hit removed + * @throws IndexOutOfBoundsException if there is no hit at the given position + */ + public Hit remove(int index) { + updateHits(); + Hit hit = hits.remove(index); + handleRemovedHit(hit); + + return hit; + } + + /** Sets the main error of this result. Prefer addError to add some error information. */ + public void setError(ErrorMessage error) { + if (errorHit == null) + add((Hit)createErrorHit(error)); + else + errorHit.addError(error); + } + + /** Adds an error to this result */ + public void addError(ErrorMessage error) { + if (errorHit == null) + add((Hit)createErrorHit(error)); + else + errorHit.addError(error); + } + + /** + * Returns the error hit containing all error information, + * or null if no error has occurred + */ + public ErrorHit getErrorHit() { + getError(); // Make sure the error hit is updated + return errorHit; + } + + /** + * Returns the first error in this result, + * or null if no searcher has produced an error AND the query doesn't contain an error + */ + public ErrorMessage getError() { + // See updateHits if this method is changed + if (errorHit != null) { + return errorHit.errors().iterator().next(); + } + + if (getQuery() != null && getQuery().errors().size() != 0) { + updateHits(); + } // Pull them over + + if (errorHit == null) { + return null; + } + + return errorHit.errors().iterator().next(); + } + + /** + * Handles the addition of a new error hit, whether or not we already have one + * + * @return true if this shouls also be added to the list of hits of this reslt + */ + private boolean mergeErrors(ErrorHit newHit) { + if (errorHit == null) { + errorHit = newHit; + return true; + } else { + errorHit.addErrors(newHit); + return false; + } + } + + /** + * Must be called before the list of hits, or anything dependent on the list of hits, is removed. + * Merges errors from the query if there is one set for this group + */ + private void updateHits() { + if (getQuery()==null) return; + + if (getQuery().errors().size() == 0) return; + + if (errorHit == null) // Creates an error hit where the first error is "main" + add((Hit)createErrorHit(toSearchError(getQuery().errors().get(0)))); + + // Add the rest of the errors + for (int i=1; i<getQuery().errors().size(); i++) + errorHit.addError(toSearchError(getQuery().errors().get(i))); + getQuery().errors().clear(); // TODO: Really clear them from here? + } + + protected ErrorHit createErrorHit(ErrorMessage errorMessage) { + return new DefaultErrorHit(getSource(), errorMessage); + } + + /** Compatibility */ + private ErrorMessage toSearchError(com.yahoo.processing.request.ErrorMessage error) { + if (error instanceof ErrorMessage) return (ErrorMessage)error; + else return new ErrorMessage(error.getCode(),error.getMessage(),error.getDetailedMessage(),error.getCause()); + } + + /** + * Remove the first <code>offset</code> <i>concrete</i> hits in this group, + * and hits beyond <code>offset+numHits</code> + */ + public void trim(int offset, int numHits) { + updateHits(); + ensureSorted(); + + int highBound = numHits + offset; // Largest offset +1 + + int currentIndex = -1; + + for (Iterator<Hit> i = hits.iterator(); i.hasNext();) { + Hit hit = i.next(); + + if (hit.isAuxiliary()) continue; + + currentIndex++; + if (currentIndex < offset || currentIndex >= highBound) { + i.remove(); + handleRemovedHit(hit); + } + } + } + + /** + * Returns an iterator of the hits in this group. + * <p> + * This iterator is modifiable - removals will take effect in this group of hits. + */ + public Iterator<Hit> iterator() { + updateHits(); + ensureSorted(); + return new HitIterator(this, hits); + } + + /** + * Returns an iterator that does depth-first traversal of leaf hits of this group. Calling this method has the + * side-effect of sorting the internal list of hits. + * + * @return A modifiable iterator. + */ + public Iterator<Hit> deepIterator() { + return new DeepHitIterator(iterator(), true); + } + + /** + * Returns an iterator that does depth-first traversal of leaf hits of this group, in a potentially unsorted order. + * As opposed to {@link #deepIterator()}, this method has no side-effect. + * + * @return A modifiable iterator. + */ + public Iterator<Hit> unorderedDeepIterator() { + return new DeepHitIterator(unorderedIterator(), false); + } + + /** Returns a read only list view of the hits in this */ + public List<Hit> asList() { + updateHits(); + ensureSorted(); + return unmodifiableHits; + } + + /** + * Returns a read only list view of the hits in this which is potentially unsorted. + * Using this over getHits is potentially faster when a sorted view is not needed. + */ + public List<Hit> asUnorderedHits() { + updateHits(); + return unmodifiableHits; + } + + /** + * Returns an iterator of the hits in this group in a potentially unsorted order. + * <p> + * Using this over getPreludeHitIterator is potentially faster when a sorted view is not needed. + * <p> + * This iterator is modifiable - removals will take effect in this group of hits. + */ + public Iterator<Hit> unorderedIterator() { + updateHits(); + return new HitIterator(this, hits); + } + + /** + * Force hit sorting now. + * This is not normally useful because a group will stay sorted automatically, + * but it is in the case where + * the hits have changed their internal state in a way that should change ordering + */ + public void sort() { + if (hitOrderer == null) { + Collections.sort(hits); + hitsSorted = true; + } else { + // This may or may not lead to a sorted result set, but + // it's a best effort + hitOrderer.order(hits); + if (likelyHitsHaveCorrectValueForSortFields()) { + hitsSorted = true; + } + } + } + + private boolean likelyHitsHaveCorrectValueForSortFields() { + if (hitOrderer == null) { + return true; + } else { + Set<String> filledFields = getFilled(); + return filledFields == null || !filledFields.isEmpty(); + } + } + + /** + * <p>Sets the hit orderer for this group.</p> + * + * @param hitOrderer the new hit orderer, or null to use default relevancy ordering + */ + public void setOrderer(HitOrderer hitOrderer) { + this.hitOrderer = hitOrderer; + if (hits.size() > 1) { + hitsSorted = false; + } + } + + /** + * Explicitly set whether the hits in this group are correctly sorted at this moment. + * If the contained hits are modified directly in a way that + * may break ordering, you should call setSorted(false). + */ + public void setSorted(boolean sorted) { + this.hitsSorted = sorted; + } + + + /** Returns the orderer used by this group, or null if the default relevancy order is used */ + public HitOrderer getOrderer() { + return hitOrderer; + } + + public void setDeletionBreaksOrdering(boolean flag) { deletionBreaksOrdering = flag; } + + public boolean getDeletionBreaksOrdering() { return deletionBreaksOrdering; } + + /** Called before hit lists or positions are used */ + private void ensureSorted() { + if ( ! orderedHits && ! hitsSorted && likelyHitsHaveCorrectValueForSortFields()) { + sort(); + } + } + + /** + * Returns true if all the hits recursively contained in this + * is cached + */ + public @Override boolean isCached() { + if (notCachedCount<1) return true; + if (subgroupCount<1) return false; // No need to check below + + // Else check recursively + for (Hit hit : hits) { + if (hit instanceof HitGroup) { + if (hit.isCached()) return true; + } + } + return false; + } + + /** + * Returns whether all hits in this result have been filled with + * the properties contained in the given summary class. Note that + * this method will also return true if no hits in this result are + * fillable. + */ + public boolean isFilled(String summaryClass) { + Set<String> filled = getFilled(); + return (filled == null || filled.contains(summaryClass)); + } + + + /** + * Sets sorting information to be the same as for the provided hitGroup. + * The contained hits should already be sorted in the order specified by + * the hitGroup given as argument. + */ + public void copyOrdering(HitGroup hitGroup) { + setOrderer(hitGroup.getOrderer()); + setDeletionBreaksOrdering(hitGroup.getDeletionBreaksOrdering()); + setOrdered(hitGroup.orderedHits); + } + + // -------------- State bookkeeping + + /** Ensures result invariants. Must be called when a hit is added to this result. */ + private void handleNewHit(Hit hit) { + if (!hit.isAuxiliary()) + concreteHitCount++; + + if (hit.getAddNumber() < 0) { + hit.setAddNumber(size()); + } + + hitsSorted = false; + Set<String> hitFilled = hit.getFilled(); + + if (hitFilled != null) { + Set<String> filled = getFilledInternal(); + if (filled == null) { + if (hitFilled.isEmpty()) { + filled = null; + } else if (hitFilled.size() == 1) { + filled = Collections.singleton(hitFilled.iterator().next()); + } else { + filled = new HashSet<>(hitFilled); + } + setFilledInternal(filled); + } else { + if (filled.size() == 1) { + if ( ! hitFilled.contains(filled.iterator().next())) { + filled = null; // No intersection + setFilledInternal(filled); + } + } else { + filled.retainAll(hitFilled); + } + } + } + + if (hit instanceof HitGroup) { + subgroupCount++; + } + if (!hit.isCached()) { + notCachedCount++; + } + } + + // Filled is not kept in sync at removal + private void handleRemovedHit(Hit hit) { + if (!hit.isAuxiliary()) { + concreteHitCount--; + if (!hit.isCached()) + notCachedCount--; + } + else if (hit instanceof HitGroup) { + subgroupCount--; + } + + if (deletionBreaksOrdering) { + hitsSorted = false; + } + } + + private void analyzeHit(Hit hit) { + if (hit instanceof HitGroup) { + ((HitGroup)hit).analyze(); + } + if (!hit.isAuxiliary()) + concreteHitCount++; + + if (!hit.isCached()) + notCachedCount++; + } + + /** + * Update concreteHitCount, cached and filled by iterating trough the hits of this result. + * Recursively also update all subgroups. + */ + public void analyze() { + concreteHitCount=0; + setFilledInternal(null); + notCachedCount=0; + Set<String> filled = getFilledInternal(); + + Iterator<Hit> i = unorderedIterator(); + while (filled == null && i.hasNext()) { + Hit hit = i.next(); + analyzeHit(hit); + Set<String> hitFilled = hit.getFilled(); + if (hitFilled != null) { + filled = (hitFilled.size() == 1) + ? Collections.singleton(hitFilled.iterator().next()) + : hitFilled.isEmpty() ? null : new HashSet<>(hitFilled); + setFilledInternal(filled); + } + } + String singleKey = null; + if (filled != null && filled.size() == 1) { + singleKey = filled.iterator().next(); + } + + + for (; i.hasNext();) { + Hit hit = i.next(); + analyzeHit(hit); + + if (filled != null) { + Set<String> hitFilled = hit.getFilled(); + if (hitFilled == null) { + // Intentionally empty. Strange semantic, null -> matches everything + } else if (hitFilled.isEmpty()) { + filled = null; // No intersection + setFilledInternal(filled); + } else { + if (filled.size() == 1) { + if ( ! hitFilled.contains(singleKey)) { + filled = null; // No intersection + setFilledInternal(filled); + singleKey = null; + } + } else { + filled.retainAll(hitFilled); + if (filled.size() == 1) { + singleKey = filled.iterator().next(); + } + } + } + } + } + } + + public HitGroup clone() { + HitGroup hitGroupClone = (HitGroup) super.clone(); + hitGroupClone.hits = new ListenableArrayList<>(this.hits.size()); + hitGroupClone.unmodifiableHits = Collections.unmodifiableList(hitGroupClone.hits); + for (Iterator<Hit> i = this.hits.iterator(); i.hasNext();) { + Hit hitClone = i.next().clone(); + hitGroupClone.hits.add(hitClone); + } + if (this.errorHit!=null) { // Find the cloned error and assign it + for (Hit hit : hitGroupClone.asList()) { + if (hit instanceof ErrorHit) + hitGroupClone.errorHit=(ErrorHit)hit; + } + } + + if (this.getFilledInternal()!=null) { + hitGroupClone.setFilledInternal(new HashSet<>(this.getFilledInternal())); + } + + return hitGroupClone; + } + + @Override + public void setFillable() {} + + /** Ignored as this should always be derived from the content hits */ + @Override + public void setFilled(String summaryClass) {} + + @Override + public boolean isFillable() { + return fillableHits().iterator().hasNext(); + } + + @Override + public Set<String> getFilled() { + Iterator<Hit> hitIterator = hits.iterator(); + Set<String> firstSummaryNames = getSummaryNamesNextFilledHit(hitIterator); + if (firstSummaryNames == null || firstSummaryNames.isEmpty()) + return firstSummaryNames; + + Set<String> intersection = firstSummaryNames; + while (true) { + Set<String> summaryNames = getSummaryNamesNextFilledHit(hitIterator); + if (summaryNames == null) + break; + + if (intersection.size() == 1) + return getFilledSingle(first(intersection), hitIterator); + + + boolean notInSet = false; + if (intersection == firstSummaryNames) { + if (intersection.size() == summaryNames.size()) { + for(String s : summaryNames) { + if ( ! intersection.contains(s)) { + intersection = new HashSet<>(firstSummaryNames); + notInSet = true; + break; + } + } + } + } + if (notInSet) { + intersection.retainAll(summaryNames); + } + + } + + return intersection; + } + + private Set<String> getSummaryNamesNextFilledHit(Iterator<Hit> hitIterator) { + while (hitIterator.hasNext()) { + Set<String> filled = hitIterator.next().getFilled(); + if (filled != null) + return filled; + } + return null; + } + + private Set<String> getFilledSingle(String summaryName, Iterator<Hit> hitIterator) { + while (true) { + Set<String> summaryNames = getSummaryNamesNextFilledHit(hitIterator); + if (summaryNames == null) { + return Collections.singleton(summaryName); + } else if (!summaryNames.contains(summaryName)) { + return Collections.emptySet(); + } + } + } + + private Iterable<Hit> fillableHits() { + Predicate<Hit> isFillable = hit -> hit.isFillable(); + + return Iterables.filter(hits, isFillable); + } + + /** Returns the incoming hit buffer to which new hits can be added to this asynchronous, if supported by the instance */ + @Override + public IncomingData<Hit> incoming() { return incomingHits; } + + @Override + public ListenableFuture<DataList<Hit>> complete() { return completedFuture; } + + @Override + public void addDataListener(Runnable runnable) { + hits.addListener(runnable); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/HitGroupsLastComparator.java b/container-search/src/main/java/com/yahoo/search/result/HitGroupsLastComparator.java new file mode 100644 index 00000000000..0fe73a5afb5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/HitGroupsLastComparator.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Comparator; + +/** + * Ensures that HitGroups are placed last in the result. + * + * @author tonytv + */ +public class HitGroupsLastComparator extends ChainableComparator { + + public HitGroupsLastComparator(Comparator<Hit> secondaryComparator) { + super(secondaryComparator); + } + + @Override + public int compare(Hit left, Hit right) { + if (isHitGroup(left) ^ isHitGroup(right)) { + return isHitGroup(left) ? 1 : -1; + } else { + return super.compare(left, right); + } + } + + private boolean isHitGroup(Hit hit) { + return hit instanceof HitGroup; + } + + @Override + public String toString() { + return getSecondaryComparator().toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/result/HitIterator.java b/container-search/src/main/java/com/yahoo/search/result/HitIterator.java new file mode 100644 index 00000000000..adf642a28ec --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/HitIterator.java @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import com.yahoo.search.Result; + + +/** + * An iterator for the list of hits in a result. This iterator supports the remove operation. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class HitIterator implements Iterator<Hit> { + + /** The index into the list of hits */ + private int index = -1; + + /** The list of hits to iterate over */ + private List<Hit> hits = null; + + /** The result the hits belong to */ + private HitGroup hitGroup = null; + + /** Whether the iterator is in a state where remove is OK */ + private boolean canRemove = false; + + public HitIterator(HitGroup hitGroup, List<Hit> hits) { + this.hitGroup = hitGroup; + this.hits = hits; + } + + public HitIterator(Result result, List<Hit> hits) { + this.hitGroup = result.hits(); + this.hits = hits; + } + + public boolean hasNext() { + if (hits.size() > (index + 1)) { + return true; + } else { + return false; + } + } + + public Hit next() throws NoSuchElementException { + if (hits.size() <= (index + 1)) { + throw new NoSuchElementException(); + } else { + canRemove = true; + return hits.get(++index); + } + } + + public void remove() throws IllegalStateException { + if (!canRemove) { + throw new IllegalStateException(); + } + hitGroup.remove(index); + index--; + canRemove = false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/HitOrderer.java b/container-search/src/main/java/com/yahoo/search/result/HitOrderer.java new file mode 100644 index 00000000000..5982a93d86a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/HitOrderer.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Comparator; +import java.util.List; + +/** + * A class capable of ordering a list of hits + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ + +public abstract class HitOrderer { + + /** Orders the given list of hits */ + public abstract void order(List<Hit> hits); + + /** + * Returns the Comparator that this HitOrderer uses internally to + * sort hits. Returns null if no Comparator is used. + * <p> + * This default implementation returns null. + * + * @return the Comparator used to order hits, or null + */ + public Comparator<Hit> getComparator() { + return null; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/HitSortOrderer.java b/container-search/src/main/java/com/yahoo/search/result/HitSortOrderer.java new file mode 100644 index 00000000000..c532aba99d8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/HitSortOrderer.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.search.query.Sorting; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * A hit orderer which can be assigned to a HitGroup to keep that group's + * hit sorted in accordance with the sorting specification given when this is created. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class HitSortOrderer extends HitOrderer { + + private final Comparator<Hit> fieldComparator; + + /** Create a sort order from a sorting */ + public HitSortOrderer(Sorting sorting) { + fieldComparator = + new MetaHitsFirstComparator( + new HitGroupsLastComparator( + new FieldComparator(sorting))); + } + + /** + * Create a sort order from a comparator. + * This will be appended to the standard comparators used by this. + */ + public HitSortOrderer(Comparator<Hit> comparator) { + fieldComparator = new MetaHitsFirstComparator(new HitGroupsLastComparator(comparator)); + } + + /** + * Orders the given list of hits according to the sorting given at construction + * + * Meta hits are sorted before concrete hits, but have no internal + * ordering. The sorting is stable. + */ + public void order(List<Hit> hits) { + Collections.sort(hits, fieldComparator); + } + + public Comparator<Hit> getComparator() { + return fieldComparator; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/MetaHitsFirstComparator.java b/container-search/src/main/java/com/yahoo/search/result/MetaHitsFirstComparator.java new file mode 100644 index 00000000000..900f47da6e4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/MetaHitsFirstComparator.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Comparator; + +/** + * Ensures that meta hits are sorted before normal hits. All meta hits are + * considered equal. + * + * @author tonytv + */ +public class MetaHitsFirstComparator extends ChainableComparator { + + public MetaHitsFirstComparator(Comparator<Hit> secondaryComparator) { + super(secondaryComparator); + } + + @Override + public int compare(Hit left, Hit right) { + if (left.isMeta() && right.isMeta()) { + return 0; + } else if (left.isMeta()) { + return -1; + } else if (right.isMeta()) { + return 1; + } else { + return super.compare(left, right); + } + } + + @Override + public String toString() { + return getSecondaryComparator().toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/result/NanNumber.java b/container-search/src/main/java/com/yahoo/search/result/NanNumber.java new file mode 100644 index 00000000000..385be70cd4c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/NanNumber.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +/** + * A class representing unset or undefined numeric values. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("serial") +public final class NanNumber extends Number { + public static final NanNumber NaN = new NanNumber(); + + private NanNumber() { + } + + @Override + public double doubleValue() { + return Double.NaN; + } + + @Override + public float floatValue() { + return Float.NaN; + } + + @Override + public int intValue() { + return 0; + } + + @Override + public long longValue() { + return 0L; + } + + @Override + public String toString() { + return ""; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/Relevance.java b/container-search/src/main/java/com/yahoo/search/result/Relevance.java new file mode 100644 index 00000000000..df79b64585e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/Relevance.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.text.DoubleFormatter; + +/** + * A relevance double value. These values should always be normalized between 0 and 1 (where 1 means perfect), + * however, this is not enforced. + * <p> + * Sources may create subclasses of this to include additional information or functionality. + * + * @author bratseth + */ +public class Relevance implements Comparable<Relevance> { + + private static final long serialVersionUID = 4536685722731661704L; + + /** The relevancy score. */ + private double score; + + /** + * Construct a relevancy object with an initial value. + * This initial value should ideally be a normalized value + * between 0 and 1, but that is not enforced. + * + * @param score the inital value (rank score) + */ + public Relevance(double score) { + this.score=score; + } + + /** + * Set score value to this value. This should ideally be a + * normalized value between 0 and 1, but that is not enforced. + * NaN is also a legal value, for elements where it makes no sense to assign a particular value. + */ + public void setScore(double score) { this.score = score; } + + /** + * Returns the relevancy score of this, preferably a normalized value + * between 0 and 1 but this is not guaranteed by this framework + */ + public double getScore() { return score; } + + /** + * Returns the score value as a string + */ + public @Override String toString() { + return DoubleFormatter.stringValue(score); + } + + /** Compares relevancy values with */ + public int compareTo(Relevance other) { + double thisScore = getScore(); + double otherScore = other.getScore(); + if (Double.isNaN(thisScore)) { + if (Double.isNaN(otherScore)) { + return 0; + } else { + return -1; + } + } else if (Double.isNaN(otherScore)) { + return 1; + } else { + return Double.compare(thisScore, otherScore); + } + } + + /** Compares relevancy values */ + public @Override boolean equals(Object object) { + if (object==this) return true; + + if (!(object instanceof Relevance)) { return false; } + + Relevance other = (Relevance) object; + return getScore() == other.getScore(); + } + + /** Returns a hash from the relevancy value */ + public @Override int hashCode() { + double hash=getScore()*335451367; // A largish prime + if (hash>-1 && hash<1) hash=1/hash; + return (int) hash; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/StructuredData.java b/container-search/src/main/java/com/yahoo/search/result/StructuredData.java new file mode 100644 index 00000000000..c49f8a04b97 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/StructuredData.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.simple.JsonRender; +import com.yahoo.data.JsonProducer; +import com.yahoo.data.XmlProducer; +import com.yahoo.prelude.hitfield.XmlRenderer; + +/** + * A wrapper for structured data representing feature values. + */ +public class StructuredData implements Inspectable, JsonProducer, XmlProducer { + + private final Inspector value; + + public StructuredData(Inspector value) { + this.value = value; + } + + @Override + public Inspector inspect() { + return value; + } + + public String toString() { + return toXML(); + } + + @Override + public String toXML() { + return writeXML(new StringBuilder()).toString(); + } + + @Override + public StringBuilder writeXML(StringBuilder target) { + return XmlRenderer.render(target, value); + } + + @Override + public String toJson() { + return writeJson(new StringBuilder()).toString(); + } + + @Override + public StringBuilder writeJson(StringBuilder target) { + return JsonRender.render(value, target, true); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/Templating.java b/container-search/src/main/java/com/yahoo/search/result/Templating.java new file mode 100644 index 00000000000..61dd38aaf93 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/Templating.java @@ -0,0 +1,210 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.result; + +import java.util.Map; + +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.prelude.templates.SearchRendererAdaptor; +import com.yahoo.prelude.templates.TemplateSet; +import com.yahoo.prelude.templates.UserTemplate; +import com.yahoo.processing.rendering.Renderer; +import com.yahoo.search.Result; +import com.yahoo.search.query.Presentation; + +/** + * Helper methods and data store for result attributes geared towards result + * rendering and presentation. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class Templating { + + private final Result result; + private Renderer<Result> renderer; + + public Templating(Result result) { + super(); + this.result = result; + } + + /** + * Returns The first hit presented in the result as an index into the global + * list of all hits generated by the user query. + */ + public int getFirstHitNo() { + return result.getQuery().getOffset() + 1; + } + + /** + * Returns the first hit of the next result page, 0 if there aren't any more + * hits available + */ + public long getNextFirstHitNo() { + if (result.getQuery().getHits() > result.getConcreteHitCount()) { + return 0; + } + + return Math.min(getLastHitNo() + 1, result.getTotalHitCount()); + } + + /** + * Returns the first hit of the next result page, 0 if there aren't any more + * hits available + */ + public long getNextLastHitNo() { + if (result.getQuery().getHits() > result.getConcreteHitCount()) { + return 0; + } + + return Math.min(getLastHitNo() + result.getConcreteHitCount(), result.getTotalHitCount()); + } + + /** + * Returns the number of the last result of the current hit page. + */ + public int getLastHitNo() { + return getFirstHitNo() + result.getConcreteHitCount() - 1; + } + + /** + * The first hit presented on the previous result page as an index into the + * global list of all hits generated by the user query + */ + public int getPrevFirstHitNo() { + return Math.max(getFirstHitNo() - result.getQuery().getHits(), 1); + } + + /** + * The last hit presented on the previous result page as an index into the + * global list of all hits generated by the user query + */ + public int getPrevLastHitNo() { + return Math.max(getFirstHitNo() - 1, 0); + } + + /** + * An URL that may be used to obtain the next result page. + */ + public String getNextResultURL() { + HttpRequest request = result.getQuery().getHttpRequest(); + StringBuilder nextURL = new StringBuilder(); + + nextURL.append(getPath(request)).append("?"); + parametersExceptOffset(request, nextURL); + + int offset = getLastHitNo(); + + nextURL.append("&").append("offset=").append(Integer.toString(offset)); + return nextURL.toString(); + } + + /** + * An URL that may be used to obtain the previous result page. + */ + public String getPreviousResultURL() { + HttpRequest request = result.getQuery().getHttpRequest(); + StringBuilder prevURL = new StringBuilder(); + + prevURL.append(getPath(request)).append("?"); + parametersExceptOffset(request, prevURL); + int offset = getPrevFirstHitNo() - 1; + prevURL.append("&").append("offset=").append(Integer.toString(offset)); + return prevURL.toString(); + } + + public String getCurrentResultURL() { + HttpRequest request = result.getQuery().getHttpRequest(); + StringBuilder thisURL = new StringBuilder(); + + thisURL.append(getPath(request)).append("?"); + parameters(request, thisURL); + return thisURL.toString(); + } + + private String getPath(HttpRequest request) { + String path = request.getUri().getPath(); + if (path == null) { + path = ""; + } + return path; + } + + private void parametersExceptOffset(HttpRequest request, StringBuilder nextURL) { + int startLength = nextURL.length(); + for (Map.Entry<String, String> property : request.propertyMap().entrySet()) { + if (property.getKey().equals("offset")) continue; + + if (nextURL.length() > startLength) + nextURL.append("&"); + nextURL.append(property.getKey()).append("=").append(property.getValue()); + } + } + + private void parameters(HttpRequest request, StringBuilder nextURL) { + int startLength = nextURL.length(); + for (Map.Entry<String, String> property : request.propertyMap().entrySet()) { + if (nextURL.length() > startLength) + nextURL.append("&"); + nextURL.append(property.getKey()).append("=").append(property.getValue()); + } + } + + /** + * Returns the templates which will render the result. This is never null. + * If default rendering is used, it is a TemplateSet containing no + * templates. + */ + @SuppressWarnings("rawtypes") + public UserTemplate getTemplates() { + if (renderer == null) { + return TemplateSet.getDefault(); + } else if (renderer instanceof SearchRendererAdaptor) { + return ((SearchRendererAdaptor) renderer).getAdaptee(); + } else { + throw new RuntimeException( + "Please use getTemplate() instead of getTemplates() when using the new template api."); + } + } + + /** + * Sets the template set which should render this result set + * + * @param templates + * the templates which should render this result, or null to + * use the default xml rendering + */ + @SuppressWarnings("deprecation") + public void setTemplates(@SuppressWarnings("rawtypes") UserTemplate templates) { + if (templates == null) { + setTemplates(TemplateSet.getDefault()); + } else { + setRenderer(new SearchRendererAdaptor(templates)); + } + } + + /** + * @deprecated since 5.1.21, use {@link Presentation#getRenderer()} + */ + @Deprecated // OK Do not remove on Vespa 6. Remove when we move everything having to do with templates + public Renderer<Result> getRenderer() { + return renderer; + } + + /** + * @deprecated since 5.1.21, use {@link Presentation#setRenderer(com.yahoo.component.ComponentSpecification)} + */ + @Deprecated // OK Do not remove on Vespa 6. Remove when we move everything having to do with templates + public void setRenderer(Renderer<Result> renderer) { + this.renderer = renderer; + } + + /** + * For internal use only. + */ + public boolean usesDefaultTemplate() { + return renderer == null || + (renderer instanceof SearchRendererAdaptor && + ((SearchRendererAdaptor) renderer).getAdaptee().isDefaultTemplateSet()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/result/package-info.java b/container-search/src/main/java/com/yahoo/search/result/package-info.java new file mode 100644 index 00000000000..aa93d0fdeab --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/result/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The content of a Result produced in response to a Query. + */ +@ExportPackage +@PublicApi +package com.yahoo.search.result; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/AsyncExecution.java b/container-search/src/main/java/com/yahoo/search/searchchain/AsyncExecution.java new file mode 100644 index 00000000000..e1794a73a93 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/AsyncExecution.java @@ -0,0 +1,204 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +import com.yahoo.component.chain.Chain; +import com.yahoo.concurrent.ThreadFactoryFactory; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.*; + +/** + * Provides asynchronous execution of searchchains. + * + * <p> + * AsyncExecution is implemented as an asynchronous wrapper around Execution + * that returns Future. + * </p> + * + * This is used in the following way + * + * <pre> + * Execution execution = new Execution(searchChain, context); + * AsyncExecution asyncExecution = new AsyncExecution(execution); + * Future<Result> future = asyncExecution.search(query) + * try { + * result = future.get(timeout, TimeUnit.milliseconds); + * } catch(TimeoutException e) { + * // Handle timeout + * } + * </pre> + * + * <p> + * Note that the query is not a thread safe object and cannot be shared between + * multiple concurrent executions - a clone() must be made, or a new query + * created for each AsyncExecution instance. + * </p> + * + * @see com.yahoo.search.searchchain.Execution + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class AsyncExecution { + + private static final ThreadFactory threadFactory = ThreadFactoryFactory.getThreadFactory("search"); + + private static final Executor executorMain = createExecutor(); + + private static Executor createExecutor() { + ThreadPoolExecutor executor = new ThreadPoolExecutor(100, Integer.MAX_VALUE, 1L, TimeUnit.SECONDS, + new SynchronousQueue<>(false), threadFactory); + // Prestart needed, if not all threads will be created by the fist N tasks and hence they might also + // get the dreaded thread locals initialized even if they will never run. + // That counters what we we want to achieve with the Q that will prefer thread locality. + executor.prestartAllCoreThreads(); + return executor; + } + + /** The execution this executes */ + private final Execution execution; + + /** + * Creates an async execution. + * + * @param chain the chain to execute + * @param execution the execution holding the context of this + */ + public AsyncExecution(Chain<? extends Searcher> chain, Execution execution) { + this(execution.context(), chain); + } + + /** + * Creates an async execution. + * + * @param chain the chain to execute + * @param context the the context of this + */ + public AsyncExecution(Chain<? extends Searcher> chain, Execution.Context context) { + this(context, chain); + } + + /** + * <p> + * Creates an async execution from an existing execution. This async + * execution will execute the chain from the given execution, <i>starting + * from the next searcher in that chain.</i> This is handy to execute + * multiple queries to the rest of the chain in parallel. If the Execution + * is freshly instantiated, the search will obviously start from the first + * searcher. + * </p> + * + * <p> + * The state of the given execution is read on construction of this and not + * used later - the argument execution can be reused for other purposes. + * </p> + * + * @param execution the execution from which the state of this is created + * + * @see Execution#Execution(Chain, com.yahoo.search.searchchain.Execution.Context) + * @see #AsyncExecution(Chain, Execution) + */ + public AsyncExecution(Execution execution) { + this.execution = new Execution(execution); + } + + private AsyncExecution(Execution.Context context, Chain<? extends Searcher> chain) { + this.execution = new Execution(chain, context); + } + + /** + * Does an async search, note that the query argument cannot simultaneously + * be used to execute any other searches, a clone() must be made of the + * query for each async execution if the same query is to be used in more + * than one. + * + * @see com.yahoo.search.searchchain.Execution + */ + public FutureResult search(final Query query) { + return getFutureResult(() -> execution.search(query), query); + } + + public FutureResult searchAndFill(final Query query) { + return getFutureResult(() -> { + Result result = execution.search(query); + execution.fill(result, query.getPresentation().getSummary()); + return result; + }, query); + } + + private static Executor getExecutor() { + return executorMain; + } + + /** + * The future of this functions returns the original Result + * + * @see com.yahoo.search.searchchain.Execution + */ + public FutureResult fill(final Result result, final String summaryClass) { + return getFutureResult(() -> { + execution.fill(result, summaryClass); + return result; + }, result.getQuery()); + + } + + private static <T> Future<T> getFuture(Callable<T> callable) { + final FutureTask<T> future = new FutureTask<>(callable); + getExecutor().execute(future); + return future; + } + + private static Future<Void> runTask(Runnable runnable) { + return getFuture(() -> { + runnable.run(); + return null; + }); + } + + private FutureResult getFutureResult(Callable<Result> callable, Query query) { + FutureResult future = new FutureResult(callable, execution, query); + getExecutor().execute(future); + return future; + } + + /* + * Waits for all futures until the given timeout. If a FutureResult isn't + * done when the timeout expires, it will be cancelled, and it will return a + * result. All unfinished Futures will be cancelled. + * + * @return the list of results in the same order as returned from the task + * collection + */ + public static List<Result> waitForAll(Collection<FutureResult> tasks, long timeoutMs) { + + // Copy the list in case it is modified while we are waiting + final List<FutureResult> workingTasks = new ArrayList<>(tasks); + try { + runTask(() -> { + for (FutureResult task : workingTasks) + task.get(); + }).get(timeoutMs, TimeUnit.MILLISECONDS); + }catch (TimeoutException | InterruptedException | ExecutionException e) { + // Handle timeouts below + } + + final List<Result> results = new ArrayList<>(tasks.size()); + for (FutureResult atask : workingTasks) { + Result result; + if (atask.isDone() && !atask.isCancelled()) { + result = atask.get(); // Since isDone() = true, this won't + // block. + } else { // Not done and no errors thrown + result = new Result(atask.getQuery(), + atask.createTimeoutError()); + } + results.add(result); + } + return results; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java b/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java new file mode 100644 index 00000000000..a888ad9b59e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java @@ -0,0 +1,672 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +import com.yahoo.component.chain.Chain; +import com.yahoo.language.Linguistics; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.prelude.query.parser.SpecialTokenRegistry; +import com.yahoo.processing.Processor; +import com.yahoo.processing.Request; +import com.yahoo.processing.Response; +import com.yahoo.protect.Validator; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.cluster.PingableSearcher; +import com.yahoo.search.rendering.RendererRegistry; +import com.yahoo.search.statistics.TimeTracker; + +import java.util.logging.Logger; + +/** + * <p>An execution of a search chain. This keeps track of the call state for an execution (in the calling thread) + * of the searchers of a search chain.</p> + * + * <p>To execute a search chain, simply do + * <pre> + * Result result = new Execution(mySearchChain, execution.context()).search(query) + * </pre> + * + * + * <p>See also {@link AsyncExecution}, which performs an execution in a separate thread than the caller.</p> + * + * <p>Execution instances should not be reused for multiple separate executions.</p> + * + * @author bratseth + */ +public class Execution extends com.yahoo.processing.execution.Execution { + + public static final String ATTRIBUTEPREFETCH = "attributeprefetch"; + + /** + * The execution context is the search chain's current view of the indexes, + * search chain registrys, etc. Searcher instances may set values here to + * change the behavior of the rest of the search chain. + * <p> + * The Context class simply carries a set of objects which define the + * environment for the search. <b>Important:</b> All objects available through context need to + * be either truly immutable or support the freeze pattern. + * <p> + * If you are implementing a searcher where you need to create a new Context + * instance to create an Execution, you should use the context from the + * execution the searcher was invoked from. You can also copy + * (Context.shallowCopy()) the incoming context if it is necessary to do + * more. In other words, a minimal example would be:<br> + * new Execution(searchChain, execution.context()) + */ + public static final class Context { + + /** + * Whether the search should perform detailed diagnostics. + */ + private boolean detailedDiagnostics = false; + + /** + * Whether the container was considered to be in a breakdown state when + * this query started. + */ + private boolean breakdown = false; + + /** + * The search chain registry current when this execution was created, or + * when the registry was first accessed, or null if it was not set on + * creation or has been accessed yet. No setter method is intentional. + */ + private SearchChainRegistry searchChainRegistry = null; + + private IndexFacts indexFacts = null; + + /** + * The current set of special tokens. + */ + private SpecialTokenRegistry tokenRegistry = null; + + /** + * The current template registry. + */ + private RendererRegistry rendererRegistry = null; + + /** + * The current linguistics. + */ + private Linguistics linguistics = null; + + /** Always set if this context belongs to an execution, never set if it does not. */ + private final Execution owner; + + // Please don't add more constructors to the public interface of Context + // unless the constructor is reasonably safe for an inexperienced user + // in a production setting. Since queries blow up in a spectacular + // fashion if Context is in a bad state, the Context() constructor is + // package private. + + /** Create a context used to carry state into another context */ + Context() { this.owner=null; } + + /** Create a context which belongs to an execution */ + Context(Execution owner) { this.owner=owner; } + + /** + * Creates a context from arguments, all of which may be null, though + * this can be risky. If you are doing this outside a test, it is + * usually better to do something like execution.context().shallowCopy() + * instead, and then set the fields you need to change. It is also safe + * to use the context from the incoming execution directly. In other + * words, a plug-in writer should practically never construct a Context + * instance directly. + * <p> + * This context is never attached to an execution but is used to carry state into + * another context. + */ + public Context(SearchChainRegistry searchChainRegistry, IndexFacts indexFacts, + SpecialTokenRegistry tokenRegistry, RendererRegistry rendererRegistry, Linguistics linguistics) + { + owner=null; + // The next time something is added here, compose into wrapper objects. Many arguments... + + // Four methods need to be updated when adding something: + // fill(Context), populateFrom(Context), equals(Context) and, + // obviously, the most complete constructor. + this.searchChainRegistry = searchChainRegistry; + this.indexFacts = indexFacts; + this.tokenRegistry = tokenRegistry; + this.rendererRegistry = rendererRegistry; + this.linguistics = linguistics; + } + + /** Creates a context stub with no information. This is for unit testing. */ + public static Context createContextStub() { + return new Context(null, null, null, null, null); + } + + /** + * Create a Context instance where only the index related settings are + * initialized. This is for unit testing. + */ + public static Context createContextStub(IndexFacts indexFacts) { + return new Context(null, indexFacts, null, null, null); + } + + /** + * Create a Context instance where only the search chain registry and index facts are + * initialized. This is for unit testing. + */ + public static Context createContextStub(SearchChainRegistry searchChainRegistry, IndexFacts indexFacts) { + return new Context(searchChainRegistry, indexFacts, null, null, null); + } + + /** + * Create a Context instance where only the search chain registry, index facts and linguistics are + * initialized. This is for unit testing. + */ + public static Context createContextStub(SearchChainRegistry searchChainRegistry, IndexFacts indexFacts, Linguistics linguistics) { + return new Context(searchChainRegistry, indexFacts, null, null, linguistics); + } + + /** + * Populate missing values in this from the given context. + * Values which are non-null in this will not be overwritten. + * + * @param sourceContext the context from which to get the parameters + */ + public void populateFrom(Context sourceContext) { + // breakdown and detailedDiagnostics has no unset state, so they are always copied + detailedDiagnostics = sourceContext.detailedDiagnostics; + breakdown = sourceContext.breakdown; + if (indexFacts == null) { + indexFacts = sourceContext.indexFacts; + } + if (tokenRegistry == null) { + tokenRegistry = sourceContext.tokenRegistry; + } + if (searchChainRegistry == null) { + searchChainRegistry = sourceContext.searchChainRegistry; + } + if (rendererRegistry == null) { + rendererRegistry = sourceContext.rendererRegistry; + } + if (linguistics == null) { + linguistics = sourceContext.linguistics; + } + } + + /** + * The brutal version of populateFrom(). + * + * @param other a Context instance this will copy all state from + */ + void fill(Context other) { + searchChainRegistry = other.searchChainRegistry; + indexFacts = other.indexFacts; + tokenRegistry = other.tokenRegistry; + rendererRegistry = other.rendererRegistry; + detailedDiagnostics = other.detailedDiagnostics; + breakdown = other.breakdown; + linguistics = other.linguistics; + } + + public boolean equals(Context other) { + // equals() needs to be cheap, that's yet another reason we can only + // allow immutables and frozen objects in the context + return other.indexFacts == indexFacts + && other.rendererRegistry == rendererRegistry + && other.tokenRegistry == tokenRegistry + && other.searchChainRegistry == searchChainRegistry + && other.detailedDiagnostics == detailedDiagnostics + && other.breakdown == breakdown + && other.linguistics == linguistics; + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass() != Context.class) { + return false; + } else { + return equals((Context) other); + } + } + + /** + * Standard shallow copy, the new instance will carry the same + * references as this. + * + * @return a new instance which is a shallow copy + */ + public Context shallowCopy() { + Context c = new Context(); + c.fill(this); + return c; + } + + /** + * This is used when building the Context stack. If Context has been + * changed since last time, build a new object. Otherwise simply return + * the previous snapshot. + * + * @param previous another Context instance to compare with + * @return a copy of this, or previous + */ + Context copyIfChanged(Context previous) { + if (equals(previous)) { + return previous; + } else { + return shallowCopy(); + } + } + + /** + * Returns information about the indexes specified by the search definitions + * used in this system, or null if not know. + */ + // TODO: Make a null default instance + public IndexFacts getIndexFacts() { + return indexFacts; + } + + /** + * Use this to override index settings for the searchers below + * a given searcher, the easiest way to do this is to wrap the incoming + * IndexFacts instance in a subclass. E.g. + * execution.context().setIndexFacts(new WrapperClass(execution.context().getIndexFacts())). + * + * @param indexFacts + * an instance to override the following searcher's view of + * the indexes. + */ + public void setIndexFacts(IndexFacts indexFacts) { + this.indexFacts = indexFacts; + } + + /** + * Returns the search chain registry to use with this execution. This is + * a snapshot taken at creation of this execution, use + * Context.shallowCopy() to get a correctly instantiated Context if + * making a custom Context instance. + */ + public SearchChainRegistry searchChainRegistry() { + return searchChainRegistry; + } + + /** + * Returns the template registry to use with this execution. This is + * a snapshot taken at creation of this execution. + */ + public RendererRegistry rendererRegistry() { + return rendererRegistry; + } + + /** + * @return the current set of special strings for the query tokenizer + */ + public SpecialTokenRegistry getTokenRegistry() { + return tokenRegistry; + } + + /** + * Wrapping the incoming special token registry and then setting the + * wrapper as the token registry, can be used for changing the set of + * special tokens used by succeeding searchers. E.g. + * execution.context().setTokenRegistry(new WrapperClass(execution.context().getTokenRegistry())). + * + * @param tokenRegistry a new registry for overriding behavior of following searchers + */ + public void setTokenRegistry(SpecialTokenRegistry tokenRegistry) { + this.tokenRegistry = tokenRegistry; + } + + public void setDetailedDiagnostics(boolean breakdown) { + this.detailedDiagnostics = breakdown; + } + + /** + * The container has some internal diagnostics mechanisms which may be + * costly, and therefore not active by default. Any general diagnostic + * mechanism which should not be active be default, may inspect that + * state here. If breakdown is assumed, a certain percentage of queries + * will have this set automatically. + * + * @return whether components exposing different level of diagnostics + * should go for the most detailed level + */ + public boolean getDetailedDiagnostics() { + return detailedDiagnostics; + } + + /** + * If too many queries time out, the search handler will assume the + * system is in a breakdown state. This state is propagated here. + * + * @return whether the system is assumed to be in a breakdown state + */ + public boolean getBreakdown() { + return breakdown; + } + + public void setBreakdown(boolean breakdown) { + this.breakdown = breakdown; + } + + /** + * Returns the {@link Linguistics} object assigned to this Context. This object provides access to all the + * linguistic-related APIs, and comes pre-configured with the Execution given. + * + * @return The current Linguistics. + */ + public Linguistics getLinguistics() { + return linguistics; + } + + public void setLinguistics(Linguistics linguistics) { + this.linguistics = linguistics; + } + + /** Creates a child trace if this has an owner, or a root trace otherwise */ + private Trace createChildTrace() { + return owner!=null ? owner.trace().createChild() : Trace.createRoot(0); + } + + /** Creates a child environment if this has an owner, or a root environment otherwise */ + private Environment createChildEnvironment() { + return owner!=null ? owner.environment().nested() : Execution.Environment.<Searcher>createEmpty(); + } + + } + + /** + * The index of where in the chain this Execution has its initial entry point. + * This is needed because executions can be started from the middle of other executions. + */ + private final int entryIndex; + + /** Time spent in each state of filling, searching or pinging. */ + private final TimeTracker timer; + + /** A searcher's view of state external to the search chain. */ + // Note that the context plays the same role as the Environment of the super.Execution + // (although complicated by the need for stack-like behavior on changes). + // We might want to unify those at some point. + private final Context context = new Context(this); + + /** + * Array for hiding context changes done in search by searcher following + * another. + */ + private final Context[] contextCache; + + private static final Logger log = Logger.getLogger(Execution.class.getName()); + + /** + * <p> + * Creates an execution from another. This execution will start at the + * <b>current next searcher</b> in the given execution, rather than at the + * start. + * </p> + * + * <p> + * The relevant state of the given execution is copied before this method + * returns - the argument execution can then be reused for any other + * purpose. + * </p> + */ + public Execution(Execution execution) { + this(execution.chain(), execution.context, execution.nextIndex()); + } + + /** Creates an which executes nothing */ + public Execution(Context context) { + this(new Chain<>(), context); + } + + /** + * The usually best way of creating a new execution for a search chain. This + * is the one suitable for a production environment. It is safe to use the + * incoming context from the search directly: + * + * <pre> + * public Result search(Query query, Execution execution) { + * SearchChain searchChain = fancyChainSelectionRoutine(query); + * if (searchChain != null) { + * return new Execution(searchChain, execution.context()); + * else { + * return execution.search(query); + * } + * } + * </pre> + * + * @param searchChain + * the search chain to execute + * @param context + * the execution context from which this is populated (the given + * context is not changed nor retained by this), or null to not + * populate from a context + * @throws IllegalArgumentException + * if searchChain is null + */ + public Execution(Chain<? extends Searcher> searchChain, Context context) { + this(searchChain, context, 0); + } + + /** Creates an execution from a single searcher */ + public Execution(Searcher searcher, Context context) { + this(new Chain<>(searcher), context, 0); + } + + /** + * Creates a new execution for a search chain or a single searcher. private + * to ensure only searchChain or searcher is null (and because it's long and + * cumbersome). + * + * @param searchChain + * the search chain to execute, must be null if searcher is set + * @param context + * execution context for the search + * @param searcherIndex + * index of the first searcher to invoke, see + * Execution(Execution) + * @throws IllegalArgumentException + * if searchChain is null + */ + @SuppressWarnings("unchecked") + private Execution(Chain<? extends Processor> searchChain,Context context, int searcherIndex) { + // Create a new Execution which is placed in the context of the execution of the given Context if any + // "if any" because a context may, or may not, belong to an execution. + // This is decided at the creation time of the Context - Context instances which do not belong + // to an execution plays the role of data carriers between executions. + super(searchChain,searcherIndex,context.createChildTrace(),context.createChildEnvironment()); + this.context.fill(context); + contextCache = new Context[searchChain.components().size()]; + entryIndex=searcherIndex; + timer = new TimeTracker(searchChain, searcherIndex); + } + + /** Does return search(((Query)request) */ + @Override + public final Response process(Request request) { + return search((Query)request); + } + + /** Calls search on the next searcher in this chain. If there is no next, an empty result is returned. */ + public Result search(Query query) { + timer.sampleSearch(nextIndex(), context.getDetailedDiagnostics()); + + // Transfer state between query and execution as the execution constructors does not do that completely + query.getModel().setExecution(this); + trace().setTraceLevel(query.getTraceLevel()); + + return (Result)super.process(query); + } + + @Override + protected void onInvoking(Request request, Processor processor) { + super.onInvoking(request,processor); + final int traceDependencies = 6; + Query query = (Query) request; + if (query.getTraceLevel() >= traceDependencies) { + query.trace(new StringBuilder().append(processor.getId()) + .append(" ").append(processor.getDependencies().toString()) + .toString(), traceDependencies); + } + } + + /** + * The default response returned from this kind of execution when there are not further processors + * - an empty Result + */ + @Override + protected Response defaultResponse(Request request) { + return new Result((Query)request); + } + + /** + * Fill hit properties with values from all in-memory attributes. + * This can be done with good performance on many more hits than + * those for which fill is called with the final summary class, so + * if filtering can be done using only in-memory attribute data, + * this method should be preferred over {@link #fill} to get that data for filtering. + * <p> + * Calling this on already filled results has no cost. + * + * @param result the result to fill + */ + @SuppressWarnings("deprecation") + public void fillAttributes(Result result) { + fill(result, ATTRIBUTEPREFETCH); + } + + /** + * Fill hit properties with data using the default summary + * class, possibly overridden with the 'summary' request parameter. + * <p> + * Fill <b>must</b> be called before any property (accessed by + * getProperty/getField) is accessed on the hit. It should be done + * as late as possible for performance reasons. + * <p> + * Calling this on already filled results has no cost. + * + * @param result the result to fill + */ + public void fill(Result result) { + fill(result, result.getQuery().getPresentation().getSummary()); + } + + /** Calls fill on the next searcher in this chain. If there is no next, nothing is done. */ + public void fill(Result result,String summaryClass) { + timer.sampleFill(nextIndex(), context.getDetailedDiagnostics()); + Searcher next = (Searcher)next(); // TODO: Allow but skip processors which are not searchers + if (next==null) return; + + try { + nextProcessor(); + next.ensureFilled(result, summaryClass, this); + } + finally { + previousProcessor(); + timer.sampleFillReturn(nextIndex(), context.getDetailedDiagnostics(), result); + } + } + + /** Calls ping on the next search in this chain. If there is no next, a Pong is created and returned. */ + public Pong ping(Ping ping) { + // return this reference, not directly. It's needed for adding time data + Pong annotationReference = null; + + timer.samplePing(nextIndex(), context.getDetailedDiagnostics()); + Searcher next = (Searcher)next(); // TODO: Allow but skip processors which are not searchers + if (next==null) { + annotationReference = new Pong(); + return annotationReference; + } + + try { + nextProcessor(); + annotationReference = invokePing(ping, next); + return annotationReference; + } + finally { + previousProcessor(); + timer.samplePingReturn(nextIndex(), context.getDetailedDiagnostics(), annotationReference); + } + } + + @Override + protected void onReturning(Request request, Processor processor,Response response) { + super.onReturning(request, processor, response); + timer.sampleSearchReturn(nextIndex(), context.getDetailedDiagnostics(), (Result)response); + } + + @Override + protected void previousProcessor() { + super.previousProcessor(); + popContext(); + } + + @Override + protected void nextProcessor() { + pushContext(); + super.nextProcessor(); + } + + private void popContext() { + context.fill(contextCache[nextIndex()]); + contextCache[nextIndex()] = null; + } + + private void pushContext() { + final Context contextToPush; + // Do note: Never put this.context in the cache. It would be totally + // meaningless, since it's a final. + if (nextIndex() == entryIndex) { + contextToPush = context.shallowCopy(); + } else { + contextToPush = context.copyIfChanged(contextCache[nextIndex() - 1]); + } + contextCache[nextIndex()] = contextToPush; + } + + private Pong invokePing(Ping ping, Searcher next) { + Pong annotationReference; + if (next instanceof PingableSearcher) { + annotationReference = ((PingableSearcher) next).ping(ping, this); + } else { + annotationReference = ping(ping); + } + return annotationReference; + } + + /** + * Returns the search chain registry to use with this execution. This is a + * snapshot taken at creation of this execution if available. + */ + public SearchChainRegistry searchChainRegistry() { + return context.searchChainRegistry(); + } + + /** + * Returns the context of this execution, which contains various objects + * which are looked up through a memory barrier at the point this is created + * and which is guaranteed to be frozen during the execution of this query. + * <p> + * Note that the context itself can be changed. Such changes will be visible + * to downstream searchers, but not after returning from the modifying + * searcher. In other words, a change in the context will not be visible to + * the preceding searchers when the result is returned from the searcher + * which modified the context. + */ + public Context context() { + return context; + } + + /** + * @return the TimeTracker instance associated with this Execution + */ + public TimeTracker timer() { + return timer; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/ForkingSearcher.java b/container-search/src/main/java/com/yahoo/search/searchchain/ForkingSearcher.java new file mode 100644 index 00000000000..cae1ba36e6c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/ForkingSearcher.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.Chain; +import com.yahoo.search.Searcher; + +import java.util.Collection; + +/** + * Searchers which invokes other search chains should override this. + * + * @author bratseth + */ +public abstract class ForkingSearcher extends Searcher { + + public ForkingSearcher() {} + + /** A search chain with a comment about when it is used. */ + public static class CommentedSearchChain { + public final String comment; + public final Chain<Searcher> searchChain; + + public CommentedSearchChain(String comment, Chain<Searcher> searchChain) { + this.comment = comment; + this.searchChain = searchChain; + } + } + + /** Returns which searchers this searcher may forward to, for debugging and tracing */ + public abstract Collection<CommentedSearchChain> getSearchChainsForwarded(SearchChainRegistry registry); + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/FutureResult.java b/container-search/src/main/java/com/yahoo/search/searchchain/FutureResult.java new file mode 100644 index 00000000000..877252f07e6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/FutureResult.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.ErrorMessage; + +/** + * Extends a {@code FutureTask<Result>}, with some added error handling + */ +public class FutureResult extends FutureTask<Result> { + + private final Query query; + + /** Only used for generating messages */ + private final Execution execution; + + private final static Logger log = Logger.getLogger(FutureResult.class.getName()); + + FutureResult(Callable<Result> callable, Execution execution, final Query query) { + super(callable); + this.query = query; + this.execution = execution; + } + + @Override + public Result get() { + Result result; + try { + result = super.get(); + } + catch (InterruptedException e) { + result = new Result(getQuery(), ErrorMessage.createUnspecifiedError( + "'" + execution + "' was interrupted while executing: " + Exceptions.toMessageString(e))); + } + catch (ExecutionException e) { + log.log(Level.WARNING,"Exception on executing " + execution + " for " + query,e); + result = new Result(getQuery(), ErrorMessage.createErrorInPluginSearcher( + "Error in '" + execution + "': " + Exceptions.toMessageString(e), + e.getCause())); + } + return result; + } + + @Override + public Result get(long timeout, TimeUnit timeunit) { + Result result; + try { + result = super.get(timeout, timeunit); + } + catch (InterruptedException e) { + result = new Result(getQuery(), ErrorMessage.createUnspecifiedError( + "'" + execution + "' was interrupted while executing: " + Exceptions.toMessageString(e))); + } + catch (ExecutionException e) { + log.log(Level.WARNING,"Exception on executing " + execution + " for " + query, e); + result = new Result(getQuery(), ErrorMessage.createErrorInPluginSearcher( + "Error in '" + execution + "': " + Exceptions.toMessageString(e), + e.getCause())); + } + catch (TimeoutException e) { + result = new Result(getQuery(), createTimeoutError()); + } + return result; + } + + /** Returns the query used in this execution, never null */ + public Query getQuery() { + return query; + } + + ErrorMessage createTimeoutError() { + return ErrorMessage.createTimeout( + "Error executing '" + execution + "': " + " Chain timed out."); + + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/PhaseNames.java b/container-search/src/main/java/com/yahoo/search/searchchain/PhaseNames.java new file mode 100644 index 00000000000..96bef503e0e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/PhaseNames.java @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +/** + * Helper class for ordering searchers. Searchers may use these names in their + * {@literal @}Before and {@literal @}After annotations, though in general + * a searcher should depend on some explicit functionality, not these + * checkpoints. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class PhaseNames { + private PhaseNames() { + } + + /** + * A checkpoint where the query is not yet transformed in any way. RAW_QUERY + * is the first checkpoint not provided by some searcher. + */ + public static final String RAW_QUERY = "rawQuery"; + + /** + * A checkpoint where as many query transformers as practically possible has + * been run. TRANSFORMED_QUERY is the first checkpoint after RAW_QUERY. + */ + public static final String TRANSFORMED_QUERY = "transformedQuery"; + + /** + * A checkpoint where results from different backends have been flattened + * into a single result. BLENDED_RESULT is the first checkpoint after + * TRANSFORMED_QUERY. + */ + public static final String BLENDED_RESULT = "blendedResult"; + + /** + * A checkpoint where data from different backends are not yet merged. + * UNBLENDED_RESULT is the first checkpoint after BLENDED_RESULT. + */ + public static final String UNBLENDED_RESULT = "unblendedResult"; + + /** + * The last checkpoint in a search chain not provided by any searcher. + */ + public static final String BACKEND = "backend"; + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/SearchChain.java b/container-search/src/main/java/com/yahoo/search/searchchain/SearchChain.java new file mode 100644 index 00000000000..457604f7ce8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/SearchChain.java @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.Phase; +import com.yahoo.search.Searcher; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +/** + * A named collection of searchers. + * <p> + * The searchers may have dependencies which define an ordering + * of the searchers of this chain. + * <p> + * Search chains may inherit the searchers of other chains and modify + * the inherited set of searchers. + * <p> + * Search chains may be versioned. The version and name string combined + * is an unique identifier of a search chain. + * <p> + * A search chain cannot be modified once constructed. + * + * @author bratseth + */ +public class SearchChain extends Chain<Searcher> { + + public SearchChain(ComponentId id) { + this(id, null, null); + } + + public SearchChain(ComponentId id, Searcher... searchers) { + this(id, Arrays.asList(searchers)); + } + + public SearchChain(ComponentId id, Collection<Searcher> searchers) { + this(id, searchers, null); + } + + /** + * Creates a search chain. + * <p> + * This search chain makes a copy of the given lists before return and does not modify the argument lists. + * <p> + * The total set of searchers included in this chain will be + * <ul> + * <li>The searchers given in <code>searchers</code>. + * <li>Plus all searchers returned by {@link #searchers} on all search chains in <code>inherited</code>. + * If a searcher with a given name is present in the <code>searchers</code> list in any version, that + * version will be used, and a searcher with that name will never be included from an inherited search chain. + * If the same searcher exists in multiple inherited chains, the highest version will be used. + * <li>Minus all searchers, of any version, whose name exists in the <code>excluded</code> list. + * </ul> + * + * @param id the id of this search chain + * @param searchers the searchers of this chain, or null if none + * @param phases the phases of this chain + */ + public SearchChain(ComponentId id, Collection<Searcher> searchers, Collection<Phase> phases) { + super(id, searchers, phases); + } + + /** For internal use only! */ + public SearchChain(Chain<Searcher> chain) { + super(chain.getId(), chain.components()); + } + + /** + * Returns an unmodifiable list of the searchers this search chain executs, in resolved execution order. + * This includes all inherited (and not excluded) searchers. + */ + public List<Searcher> searchers() { + return components(); + } + + @Override + public String toString() { + StringBuilder b = new StringBuilder("search "); + b.append(super.toString()); + return b.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/SearchChainRegistry.java b/container-search/src/main/java/com/yahoo/search/searchchain/SearchChainRegistry.java new file mode 100644 index 00000000000..9513394bc9f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/SearchChainRegistry.java @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.processing.execution.chain.ChainRegistry; +import com.yahoo.search.Searcher; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Contains a reference to all currently known search chains. + * Searchers can be fetched from this from multiple threads. + * <p> + * A registry can exist in two states: + * <ul> + * <li>not frozen - in this state it can be edited freely by calling {@link #register} + * <li>frozen - in this state any attempt at modification throws an IlegalStateException + * </ul> + * Registries start in the first state, moves to the second on calling freeze and stays in that + * state for the rest of their lifetime. + * + * @author bratseth + */ +public class SearchChainRegistry extends ChainRegistry<Searcher> { + + private final SearcherRegistry searcherRegistry; + + @Override + public void freeze() { + super.freeze(); + getSearcherRegistry().freeze(); + } + + public SearchChainRegistry() { + searcherRegistry = new SearcherRegistry(); + searcherRegistry.freeze(); + } + + public SearchChainRegistry(ComponentRegistry<? extends AbstractComponent> allComponentRegistry) { + this.searcherRegistry = setupSearcherRegistry(allComponentRegistry); + } + + public void register(Chain<Searcher> component) { + super.register(component.getId(), component); + } + + public Chain<Searcher> unregister(Chain<Searcher> component) { + return super.unregister(component.getId()); + } + + private SearcherRegistry setupSearcherRegistry(ComponentRegistry<? extends AbstractComponent> allComponents) { + SearcherRegistry registry = new SearcherRegistry(); + for (AbstractComponent component : allComponents.allComponents()) { + if (component instanceof Searcher) { + registry.register((Searcher) component); + } + } + //just freeze this right away + registry.freeze(); + return registry; + } + + public SearcherRegistry getSearcherRegistry() { + return searcherRegistry; + } + + @Override + public SearchChain getComponent(ComponentId id) { + Chain<Searcher> chain = super.getComponent(id); + return asSearchChain(chain); + } + + @Override + public SearchChain getComponent(ComponentSpecification specification) { + return asSearchChain(super.getComponent(specification)); + } + + public final Chain<Searcher> getChain(String componentSpecification) { + return super.getComponent(new ComponentSpecification(componentSpecification)); + } + + public final Chain<Searcher> getChain(ComponentId id) { + return super.getComponent(id); + } + + + @Override + public SearchChain getComponent(String componentSpecification) { + return getComponent(new ComponentSpecification(componentSpecification)); + } + + private SearchChain asSearchChain(Chain<Searcher> chain) { + if (chain == null) { + return null; + } else if (chain instanceof SearchChain) { + return (SearchChain) chain; + } else { + return new SearchChain(chain); + } + } + + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/SearcherRegistry.java b/container-search/src/main/java/com/yahoo/search/searchchain/SearcherRegistry.java new file mode 100644 index 00000000000..d1a4c1743d6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/SearcherRegistry.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.search.Searcher; +import com.yahoo.search.pagetemplates.engine.Resolver; + +/** + * A registry of searchers. This is instantiated and recycled in the context of an owning search chain registry. + * This class exists for legacy purposes only, to preserve the public API for retrieving searchers from Vespa 4.2. + * + * @author bratseth + */ +public class SearcherRegistry extends ComponentRegistry<Searcher> { + + public void register(Searcher searcher) { + super.register(searcher.getId(), searcher); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/example/ExampleSearcher.java b/container-search/src/main/java/com/yahoo/search/searchchain/example/ExampleSearcher.java new file mode 100644 index 00000000000..06a4096dc68 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/example/ExampleSearcher.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.example; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +/** + * An example searcher which adds a hit + * + * @author bratseth + */ +public class ExampleSearcher extends Searcher { + + public @Override Result search(Query query,Execution execution) { + Result result=execution.search(query); + result.hits().add(new Hit("example",1.0,"examplesearcher")); + return result; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/VespaSearchers.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/VespaSearchers.java new file mode 100644 index 00000000000..1a3790e1012 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/VespaSearchers.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.model; + +import com.yahoo.container.bundle.BundleInstantiationSpecification; +import com.yahoo.component.ComponentId; +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.dependencies.Dependencies; +import com.yahoo.component.chain.model.ChainedComponentModel; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.model.federation.FederationSearcherModel; +import com.yahoo.search.searchchain.model.federation.FederationSearcherModel.TargetSpec; +import org.apache.commons.collections.CollectionUtils; + +import java.util.*; + + +/** + * Defines the searcher models used in the vespa and native search chains, except for federation. + * + * @author tonytv + */ +@SuppressWarnings({"rawtypes", "deprecation", "unchecked"}) +public class VespaSearchers { + public static final Collection<ChainedComponentModel> vespaSearcherModels = + toSearcherModels( + com.yahoo.prelude.querytransform.IndexCombinatorSearcher.class, + //com.yahoo.prelude.querytransform.LocalitySearcher.class, + com.yahoo.prelude.querytransform.PhrasingSearcher.class, + com.yahoo.prelude.searcher.FieldCollapsingSearcher.class, + com.yahoo.search.yql.MinimalQueryInserter.class, + com.yahoo.search.yql.FieldFilter.class, + com.yahoo.prelude.searcher.JuniperSearcher.class, + com.yahoo.prelude.searcher.BlendingSearcher.class, + com.yahoo.prelude.searcher.PosSearcher.class, + com.yahoo.prelude.semantics.SemanticSearcher.class, + com.yahoo.search.grouping.GroupingQueryParser.class); + + + public static final Collection<ChainedComponentModel> nativeSearcherModels; + + static { + nativeSearcherModels = new LinkedHashSet<>(); + nativeSearcherModels.add(federationSearcherModel()); + nativeSearcherModels.addAll(toSearcherModels(com.yahoo.prelude.statistics.StatisticsSearcher.class)); + + //ensure that searchers in the native search chain are not overridden by searchers in the vespa search chain, + //and that all component ids in each chain are unique. + assert(allComponentIdsDifferent(vespaSearcherModels, nativeSearcherModels)); + } + + private static boolean allComponentIdsDifferent(Collection<ChainedComponentModel> vespaSearcherModels, + Collection<ChainedComponentModel> nativeSearcherModels) { + Set<ComponentId> componentIds = new LinkedHashSet<>(); + return + allAdded(vespaSearcherModels, componentIds) && + allAdded(nativeSearcherModels, componentIds); + + } + + private static FederationSearcherModel federationSearcherModel() { + return new FederationSearcherModel(new ComponentSpecification("federation"), + Dependencies.emptyDependencies(), + Collections.<TargetSpec>emptyList(), true); + } + + private static boolean allAdded(Collection<ChainedComponentModel> searcherModels, Set<ComponentId> componentIds) { + for (ChainedComponentModel model : searcherModels) { + if (!componentIds.add(model.getComponentId())) + return false; + } + return true; + } + + private static Collection<ChainedComponentModel> toSearcherModels(Class<? extends Searcher>... searchers) { + List<ChainedComponentModel> searcherModels = new ArrayList<>(); + for (Class c : searchers) { + searcherModels.add( + new ChainedComponentModel( + BundleInstantiationSpecification.getInternalSearcherSpecificationFromStrings(c.getName(), null), + Dependencies.emptyDependencies())); + } + return searcherModels; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/FederationOptions.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/FederationOptions.java new file mode 100644 index 00000000000..ec6bf9661c6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/FederationOptions.java @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.model.federation; + +import net.jcip.annotations.Immutable; + +/** + * Options for controlling federation to a single source. + * + * @author tonytv + */ +@Immutable +public class FederationOptions implements Cloneable { + + private final Boolean optional; + private final Integer timeoutInMilliseconds; + private final Integer requestTimeoutInMilliseconds; + private final Boolean useByDefault; + + /** + * Creates a request with no separate requestTimeoutInMilliseconds + */ + public FederationOptions(Boolean optional, Integer timeoutInMilliseconds, Boolean useByDefault) { + this(optional, timeoutInMilliseconds, null, useByDefault); + } + + /** + * Creates a fully specified set of options + * + * @param optional whether this should be optional + * @param timeoutInMilliseconds the max time to wait for a result from this source, or null to not specify a limit + * @param requestTimeoutInMilliseconds the max time to allow this request to live, or null to make this the same as + * timeoutInMilliseconds. Setting this higher than timeoutInMilliseconds is + * useful to use queries to populate the cache of slow sources + * @param useByDefault whether this should be invoked by default + */ + public FederationOptions(Boolean optional, Integer timeoutInMilliseconds, Integer requestTimeoutInMilliseconds, Boolean useByDefault) { + this.optional = optional; + this.timeoutInMilliseconds = timeoutInMilliseconds; + this.requestTimeoutInMilliseconds = requestTimeoutInMilliseconds; + this.useByDefault = useByDefault; + } + + /** Creates a set of default options: Mandatory, no timeout restriction and not used by default */ + public FederationOptions() { + this(null, null, null, null); + } + + /** Returns a set of options which are the same of this but with optional set to the given value */ + public FederationOptions setOptional(Boolean newOptional) { + return new FederationOptions(newOptional, timeoutInMilliseconds, requestTimeoutInMilliseconds, useByDefault); + } + + /** Returns a set of options which are the same of this but with timeout set to the given value */ + public FederationOptions setTimeoutInMilliseconds(Integer newTimeoutInMilliseconds) { + return new FederationOptions(optional, newTimeoutInMilliseconds, requestTimeoutInMilliseconds, useByDefault); + } + + /** Returns a set of options which are the same of this but with request timeout set to the given value */ + public FederationOptions setRequestTimeoutInMilliseconds(Integer newRequestTimeoutInMilliseconds) { + return new FederationOptions(optional, timeoutInMilliseconds, newRequestTimeoutInMilliseconds, useByDefault); + } + + /** Returns a set of options which are the same of this but with default set to the given value */ + public FederationOptions setUseByDefault(Boolean newUseByDefault) { + return new FederationOptions(optional, timeoutInMilliseconds, requestTimeoutInMilliseconds, newUseByDefault); + } + + public boolean getOptional() { + return (optional != null) ? optional : false; + } + + /** Returns the amount of time we should wait for this target, or -1 to use default */ + public int getTimeoutInMilliseconds() { + return (timeoutInMilliseconds != null) ? timeoutInMilliseconds : -1; + } + + /** Returns the amount of time we should allow this target execution to run, or -1 to use default */ + public int getRequestTimeoutInMilliseconds() { + return (requestTimeoutInMilliseconds != null) ? requestTimeoutInMilliseconds : -1; + } + + public long getSearchChainExecutionTimeoutInMilliseconds(long queryTimeout) { + return getTimeoutInMilliseconds() >= 0 ? + getTimeoutInMilliseconds() : + queryTimeout; + } + + public boolean getUseByDefault() { + return useByDefault != null ? useByDefault : false; + } + + public FederationOptions inherit(FederationOptions parent) { + return new FederationOptions( + inherit(optional, parent.optional), + inherit(timeoutInMilliseconds, parent.timeoutInMilliseconds), + inherit(requestTimeoutInMilliseconds, parent.requestTimeoutInMilliseconds), + inherit(useByDefault, parent.useByDefault)); + } + + private static <T> T inherit(T child, T parent) { + return (child != null) ? child : parent; + } + + @Override + public boolean equals(Object other) { + return (other instanceof FederationOptions) && + equals((FederationOptions) other); + } + + public boolean equals(FederationOptions other) { + return getOptional() == other.getOptional() && + getTimeoutInMilliseconds() == other.getTimeoutInMilliseconds(); + } + + @Override + public String toString() { + return "FederationOptions{" + + "optional=" + optional + + ", timeoutInMilliseconds=" + timeoutInMilliseconds + + ", useByDefault=" + useByDefault + + '}'; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/FederationSearcherModel.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/FederationSearcherModel.java new file mode 100644 index 00000000000..99293cb611b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/FederationSearcherModel.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.model.federation; + +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.yahoo.container.bundle.BundleInstantiationSpecification; +import net.jcip.annotations.Immutable; + +import com.yahoo.component.ComponentSpecification; +import com.yahoo.component.chain.dependencies.Dependencies; +import com.yahoo.component.chain.model.ChainedComponentModel; +import com.yahoo.search.federation.FederationSearcher; + +/** + * Specifies how a federation searcher is to be set up. + * + * @author tonytv + */ +@Immutable +public class FederationSearcherModel extends ChainedComponentModel { + + /** + * Specifies one or more search chains that can be addressed + * as a single source. + */ + public static class TargetSpec { + public final ComponentSpecification sourceSpec; + public final FederationOptions federationOptions; + + public TargetSpec(ComponentSpecification sourceSpec, FederationOptions federationOptions) { + this.sourceSpec = sourceSpec; + this.federationOptions = federationOptions; + } + } + + private static ComponentSpecification federationSearcherComponentSpecification = + new ComponentSpecification(FederationSearcher.class.getName()); + + public final List<TargetSpec> targets; + public final boolean inheritDefaultSources; + + public FederationSearcherModel(ComponentSpecification componentId, Dependencies dependencies, + List<TargetSpec> targets, boolean inheritDefaultSources) { + super(BundleInstantiationSpecification.getInternalSearcherSpecification(componentId, federationSearcherComponentSpecification), + dependencies); + this.inheritDefaultSources = inheritDefaultSources; + this.targets = ImmutableList.copyOf(targets); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/HttpProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/HttpProviderSpec.java new file mode 100644 index 00000000000..33bdb54b00e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/HttpProviderSpec.java @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.model.federation; + +import com.yahoo.container.bundle.BundleInstantiationSpecification; +import net.jcip.annotations.Immutable; + +import com.yahoo.search.federation.http.HTTPProviderSearcher; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Specifies how a http provider is to be set up. + * + * @author tonytv + */ +@Immutable +public class HttpProviderSpec { + public enum Type { + vespa(com.yahoo.search.federation.vespa.VespaSearcher.class); + + Type(Class<? extends HTTPProviderSearcher> searcherClass) { + className = searcherClass.getName(); + } + + final String className; + } + + // The default connection parameter values come from the config definition + public static class ConnectionParameters { + public final Double readTimeout; + public final Double connectionTimeout; + public final Double connectionPoolTimeout; + public final Integer retries; + + public ConnectionParameters(Double readTimeout, Double connectionTimeout, + Double connectionPoolTimeout, Integer retries) { + this.readTimeout = readTimeout; + this.connectionTimeout = connectionTimeout; + this.connectionPoolTimeout = connectionPoolTimeout; + this.retries = retries; + } + } + + public static class Node { + public final String host; + public final int port; + + public Node(String host, int port) { + this.host = host; + this.port = port; + } + + @Override + public String toString() { + return "Node{" + + "host='" + host + '\'' + + ", port=" + port + + '}'; + } + } + + public final ConnectionParameters connectionParameters; + + public final Integer cacheSizeMB; + + public final String path; + public final List<Node> nodes; + public final String ycaApplicationId; + public final Integer ycaCertificateTtl; + public final Integer ycaRetryWait; + public final Node ycaProxy; + + //TODO:remove this + public final double cacheWeight; + + + public static BundleInstantiationSpecification toBundleInstantiationSpecification(Type type) { + return BundleInstantiationSpecification.getInternalSearcherSpecificationFromStrings(type.className, null); + } + + public static boolean includesType(String typeString) { + for (Type type : Type.values()) { + if (type.name().equals(typeString)) { + return true; + } + } + return false; + } + + public HttpProviderSpec(Double cacheWeight, + String path, + List<Node> nodes, + String ycaApplicationId, + Integer ycaCertificateTtl, + Integer ycaRetryWait, + Node ycaProxy, + Integer cacheSizeMB, + ConnectionParameters connectionParameters) { + + final double defaultCacheWeight = 1.0d; + this.cacheWeight = (cacheWeight != null) ? cacheWeight : defaultCacheWeight; + + this.path = path; + this.nodes = unmodifiable(nodes); + this.ycaApplicationId = ycaApplicationId; + this.ycaProxy = ycaProxy; + this.ycaCertificateTtl = ycaCertificateTtl; + this.ycaRetryWait = ycaRetryWait; + this.cacheSizeMB = cacheSizeMB; + + this.connectionParameters = connectionParameters; + } + + private List<HttpProviderSpec.Node> unmodifiable(List<HttpProviderSpec.Node> nodes) { + return nodes == null ? + Collections.<HttpProviderSpec.Node>emptyList() : + Collections.unmodifiableList(new ArrayList<>(nodes)); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java new file mode 100644 index 00000000000..c8847507039 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.model.federation; + +import com.google.common.collect.ImmutableList; +import com.yahoo.container.bundle.BundleInstantiationSpecification; +import com.yahoo.component.chain.dependencies.Dependencies; +import com.yahoo.component.chain.model.ChainedComponentModel; +import com.yahoo.search.Searcher; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import net.jcip.annotations.Immutable; + +/** + * Specifies how a local provider is to be set up. + * + * @author tonytv + */ +@Immutable +public class LocalProviderSpec { + @SuppressWarnings("unchecked") + public static final Collection<ChainedComponentModel> searcherModels = + toSearcherModels( + com.yahoo.prelude.querytransform.CJKSearcher.class, + com.yahoo.search.querytransform.NGramSearcher.class, + com.yahoo.prelude.querytransform.LiteralBoostSearcher.class, + com.yahoo.prelude.querytransform.NormalizingSearcher.class, + com.yahoo.prelude.querytransform.StemmingSearcher.class, + com.yahoo.search.querytransform.VespaLowercasingSearcher.class, + com.yahoo.search.querytransform.DefaultPositionSearcher.class, + com.yahoo.search.querytransform.RangeQueryOptimizer.class, + com.yahoo.search.querytransform.SortingDegrader.class, + com.yahoo.prelude.searcher.ValidateSortingSearcher.class, + com.yahoo.prelude.cluster.ClusterSearcher.class, + com.yahoo.search.grouping.GroupingValidator.class, + com.yahoo.search.grouping.vespa.GroupingExecutor.class, + com.yahoo.prelude.querytransform.RecallSearcher.class, + com.yahoo.search.querytransform.WandSearcher.class, + com.yahoo.search.querytransform.BooleanSearcher.class, + com.yahoo.prelude.searcher.ValidatePredicateSearcher.class, + com.yahoo.search.searchers.ValidateMatchPhaseSearcher.class, + com.yahoo.search.yql.FieldFiller.class, + com.yahoo.search.searchers.InputCheckingSearcher.class); + + public final String clusterName; + + //TODO: make this final + public Integer cacheSize; + + public LocalProviderSpec(String clusterName, Integer cacheSize) { + this.clusterName = clusterName; + this.cacheSize = cacheSize; + + if (clusterName == null) + throw new IllegalArgumentException("Missing cluster name."); + } + + public static boolean includesType(String type) { + return "local".equals(type); + } + + @SafeVarargs + private static final Collection<ChainedComponentModel> toSearcherModels(Class<? extends Searcher>... searchers) { + List<ChainedComponentModel> searcherModels = new ArrayList<>(); + + for (Class<? extends Searcher> c : searchers) { + searcherModels.add( + new ChainedComponentModel( + BundleInstantiationSpecification.getInternalSearcherSpecificationFromStrings( + c.getName(), + null), + Dependencies.emptyDependencies())); + } + + return ImmutableList.copyOf(searcherModels); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/package-info.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/package-info.java new file mode 100644 index 00000000000..9642d389661 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.search.searchchain.model.federation; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/package-info.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/package-info.java new file mode 100644 index 00000000000..9219eb36094 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.search.searchchain.model; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/package-info.java b/container-search/src/main/java/com/yahoo/search/searchchain/package-info.java new file mode 100644 index 00000000000..0b1ec05abef --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Classes for composition of searchers into search chains, which are executed to produce Results for Queries. + */ +@ExportPackage +@PublicApi +package com.yahoo.search.searchchain; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/testutil/DocumentSourceSearcher.java b/container-search/src/main/java/com/yahoo/search/searchchain/testutil/DocumentSourceSearcher.java new file mode 100644 index 00000000000..a5b9c58f084 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchchain/testutil/DocumentSourceSearcher.java @@ -0,0 +1,190 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchchain.testutil; + + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.Map; +import java.util.HashMap; +import java.util.List; + +import com.yahoo.net.URI; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +/** + * <p>Implements a document source. You pass in a query and a Result + * set. When this Searcher is called with that query it will return + * that result set.</p> + * + * <p>This supports multi-phase search.</p> + * + * <p>To avoid having to add type information for the fields, a quck hack is used to + * support testing of attribute prefetching. + * Any field in the configured hits which has a name starting by attribute + * will be returned when attribute prefetch filling is requested.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class DocumentSourceSearcher extends Searcher { + + // using null as name in the API would just be a horrid headache + public static final String DEFAULT_SUMMARY_CLASS = "default"; + + // TODO: update tests to explicitly set hits, so that the default results can be removed entirely. + private Result defaultFilledResult; + + private Map<Query, Result> completelyFilledResults = new HashMap<>(); + private Map<Query, Result> unFilledResults = new HashMap<>(); + private Map<String, Set<String>> summaryClasses = new HashMap<>(); + + private int queryCount; + + public DocumentSourceSearcher() { + addDefaultResults(); + } + + /** + * Adds a result which can be searched for and filled. + * Summary fields starting by "a" are attributes, others are not. + * + * @return true when replacing an existing <query, result> pair. + */ + public boolean addResult(Query query, Result fullResult) { + Result emptyResult = new Result(query.clone()); + emptyResult.setTotalHitCount(fullResult.getTotalHitCount()); + for (Hit fullHit : fullResult.hits().asList()) { + Hit emptyHit = fullHit.clone(); + emptyHit.clearFields(); + emptyHit.setFillable(); + emptyHit.setRelevance(fullHit.getRelevance()); + + emptyResult.hits().add(emptyHit); + } + unFilledResults.put(getQueryKeyClone(query), emptyResult); + + if (completelyFilledResults.put(getQueryKeyClone(query), fullResult.clone()) != null) { + // TODO: throw exception if the key exists from before, change the method to void + return true; + } + return false; + } + + public void addSummaryClass(String name, Set<String> fields) { + summaryClasses.put(name,fields); + } + + public void addSummaryClassByCopy(String name, Collection<String> fields) { + addSummaryClass(name, new HashSet<>(fields)); + } + + private void addDefaultResults() { + Query q = new Query("?query=default"); + Result r = new Result(q); + // These four used to assign collapseId 1,2,3,4 - re-add that if needed + r.hits().add(new Hit("http://default-1.html", 0)); + r.hits().add(new Hit("http://default-2.html", 0)); + r.hits().add(new Hit("http://default-3.html", 0)); + r.hits().add(new Hit("http://default-4.html", 0)); + defaultFilledResult = r; + addResult(q, r); + } + + public @Override Result search(Query query, Execution execution) { + queryCount++; + Result r; + r = unFilledResults.get(getQueryKeyClone(query)); + if (r == null) { + r = defaultFilledResult.clone(); + } else { + r = r.clone(); + } + + r.setQuery(query); + r.hits().trim(query.getOffset(), query.getHits()); + return r; + } + + /** + * Returns a query clone which has offset and hits set to null. This is used by access to + * the maps using the query as key to achieve lookup independent of offset/hits value + */ + private Query getQueryKeyClone(Query query) { + Query key=query.clone(); + key.setWindow(0,0); + return key; + } + + public @Override void fill(Result result, String summaryClass, Execution execution) { + Result filledResult; + filledResult = completelyFilledResults.get(getQueryKeyClone(result.getQuery())); + + if (filledResult == null) { + filledResult = defaultFilledResult; + } + fillHits(filledResult,result,summaryClass); + } + + private void fillHits(Result filledHits, Result hitsToFill, String summaryClass) { + Set<String> fieldsToFill = summaryClasses.get(summaryClass); + + if (fieldsToFill == null ) { + fieldsToFill = summaryClasses.get(DEFAULT_SUMMARY_CLASS); + } + + for (Hit hitToFill : hitsToFill.hits()) { + Hit filledHit = getMatchingFilledHit(hitToFill.getId(), filledHits); + + if (filledHit != null) { + if (fieldsToFill != null) { + copyFieldValuesThatExist(filledHit,hitToFill,fieldsToFill); + } else { + // TODO: remove this block and update fieldsToFill above to throw an exception if no appropriate summary class is found + for (Map.Entry<String,Object> propertyEntry : filledHit.fields().entrySet()) { + hitToFill.setField(propertyEntry.getKey(), + propertyEntry.getValue()); + } + } + hitToFill.setFilled(summaryClass == null ? DEFAULT_SUMMARY_CLASS : summaryClass); + } + } + hitsToFill.analyzeHits(); + } + + private Hit getMatchingFilledHit(URI hitToFillId, Result filledHits) { + Hit filledHit = null; + + for ( Hit filledHitCandidate : filledHits.hits()) { + if ( hitToFillId == filledHitCandidate.getId() ) { + filledHit = filledHitCandidate; + break; + } + } + return filledHit; + } + + private void copyFieldValuesThatExist(Hit filledHit, Hit hitToFill, Set<String> fieldsToFill) { + for (String fieldToFill : fieldsToFill ) { + if ( filledHit.getField(fieldToFill) != null ) { + hitToFill.setField(fieldToFill, filledHit.getField(fieldToFill)); + } + } + } + + /** + * Returns the number of queries made to this searcher since the last + * reset. For testing - not reliable if multiple threads makes + * queries simultaneously + */ + public int getQueryCount() { + return queryCount; + } + + public void resetQueryCount() { + queryCount=0; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchers/CacheControlSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/CacheControlSearcher.java new file mode 100644 index 00000000000..064e38d91fc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/CacheControlSearcher.java @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; + +/** + * Searcher that sets cache control HTTP headers in response based on query/GET parameters to + * control caching done by proxy/caches such as YSquid and YTS: + * <ul> + * <li>max-age=XXX - set with &cachecontrol.maxage parameter + * <li>stale-while-revalidate=YYY - set with &cachecontrol.staleage + * <li>no-cache - if Vespa &noCache or &cachecontrol.nocache parameter is set to true + * </ul> + * + * <p>This is controlled through the three query parameters <code>cachecontrol.maxage</code>, + * <code>cachecontrol.staleage</code> and <code>cachecontrol.nocache</code>, with the obvious meanings.</p> + * + * Example: + * <ul> + * <li>Request: "?query=foo&cachecontrol.maxage=60&cachecontrol.staleage=3600" + * <li>Response HTTP header: "Cache-Control: max-age=60, revalidate-while-stale=3600" + * </ul> + * + * Further documentation on use of Cache-Control headers: + * http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9 + * + * @author Frode Lundgren + */ +public class CacheControlSearcher extends Searcher { + + private static final CompoundName cachecontrolNocache=new CompoundName("cachecontrol.nocache"); + private static final CompoundName cachecontrolMaxage=new CompoundName("cachecontrol.maxage"); + private static final CompoundName cachecontrolStaleage=new CompoundName("cachecontrol.staleage"); + + public static final String CACHE_CONTROL_HEADER = "Cache-Control"; + + @Override + public Result search(Query query, Execution execution) { + query.trace("CacheControlSearcher: Running version $Revision$", false, 6); + Result result = execution.search(query); + query = result.getQuery(); + + if (result.getHeaders(true) == null) { + query.trace("CacheControlSearcher: No HTTP header map available - skipping searcher.", false, 5); + return result; + } + + // If you specify no-cache, no further cache control headers make sense + if (query.properties().getBoolean(cachecontrolNocache, false) || query.getNoCache()) { + result.getHeaders(true).put(CACHE_CONTROL_HEADER, "no-cache"); + query.trace("CacheControlSearcher: Added no-cache header", false, 4); + return result; + } + + // Handle max-age header + int maxage = query.properties().getInteger(cachecontrolMaxage, -1); + if (maxage > 0) { + result.getHeaders(true).put(CACHE_CONTROL_HEADER, "max-age=" + maxage); + query.trace("CacheControlSearcher: Set max-age header to " + maxage, false, 4); + } + + // Handle stale-while-revalidate header + int staleage = query.properties().getInteger(cachecontrolStaleage, -1); + if (staleage > 0) { + result.getHeaders(true).put(CACHE_CONTROL_HEADER, "stale-while-revalidate=" + staleage); + query.trace("CacheControlSearcher: Set stale-while-revalidate header to " + maxage, false, 4); + } + + return result; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ConnectionControlSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ConnectionControlSearcher.java new file mode 100644 index 00000000000..cdbf864f7fd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/ConnectionControlSearcher.java @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +import java.util.concurrent.TimeUnit; +import java.util.function.LongSupplier; + +/** + * Searcher which can enforce HTTP connection close based on query properties. + * + * <p> + * This searcher informs the client to close a persistent HTTP connection if the + * connection is older than the configured max lifetime. This is done by adding + * the "Connection" HTTP header with the value "Close" to the result. + * </p> + * + * <p> + * The searcher reads the query property "connectioncontrol.maxlifetime", which + * is an integer number of seconds, to get the value for maximum connection + * lifetime. Setting it to zero will enforce connection close independently of + * the age of the connection. Typical usage would be as follows: + * </p> + * + * <ol> + * <li>Add the ConnectionControlSearcher to the default search chain of your + * application. (It has no special ordering considerations.)</li> + * + * <li>For the default query profile of your application, set a reasonable value + * for "connectioncontrol.maxlifetime". The definition of reasonable will be + * highly application dependent, but it should always be less than the grace + * period when taking the container out of production traffic.</li> + * + * <li>Deploy application. The container will now inform clients to close + * connections/reconnect within the configured time limit. + * </ol> + * + * @author frodelu + * @author Steinar Knutsen + */ +public class ConnectionControlSearcher extends Searcher { + + private final String simpleName = this.getClass().getSimpleName(); + + private final LongSupplier clock; + + private static final CompoundName KEEPALIVE_MAXLIFETIMESECONDS = new CompoundName("connectioncontrol.maxlifetime"); + private static final String HTTP_CONNECTION_HEADER_NAME = "Connection"; + private static final String HTTP_CONNECTION_CLOSE_ARGUMENT = "Close"; + + public ConnectionControlSearcher() { + this(() -> System.currentTimeMillis()); + } + + private ConnectionControlSearcher(LongSupplier clock) { + this.clock = clock; + } + + /** + * Create a searcher instance suitable for unit tests. + * + * @param clock a simulated or real clock behaving similarly to System.currentTimeMillis() + * @return a fully initialised instance + */ + public static ConnectionControlSearcher createTestInstance(LongSupplier clock) { + return new ConnectionControlSearcher(clock); + } + + @Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + + query.trace(false, 3, simpleName, " updating headers."); + keepAliveProcessing(query, result); + return result; + } + + /** + * If the HTTP connection has been alive for too long, set the header + * "Connection: Close" to tell the client to close the connection after this + * request. + */ + private void keepAliveProcessing(Query query, Result result) { + int maxLifetimeSeconds = query.properties().getInteger(KEEPALIVE_MAXLIFETIMESECONDS, -1); + + if (maxLifetimeSeconds < 0) { + return; + } else if (maxLifetimeSeconds == 0) { + result.getHeaders(true).put(HTTP_CONNECTION_HEADER_NAME, HTTP_CONNECTION_CLOSE_ARGUMENT); + query.trace(false, 5, simpleName, ": Max HTTP connection lifetime set to 0; adding \"", HTTP_CONNECTION_HEADER_NAME, + ": ", HTTP_CONNECTION_CLOSE_ARGUMENT, "\" header"); + } else { + setCloseIfLifetimeExceeded(query, result, maxLifetimeSeconds); + } + } + + private void setCloseIfLifetimeExceeded(Query query, Result result, int maxLifetimeSeconds) { + final HttpRequest httpRequest = query.getHttpRequest(); + if (httpRequest == null) { + query.trace(false, 5, simpleName, " got max lifetime = ", maxLifetimeSeconds, + ", but got no JDisc request. Setting no header."); + return; + } + + final long connectedAtMillis = httpRequest.getJDiscRequest().getConnectedAt(TimeUnit.MILLISECONDS); + final long maxLifeTimeMillis = maxLifetimeSeconds * 1000L; + if (connectedAtMillis + maxLifeTimeMillis < clock.getAsLong()) { + result.getHeaders(true).put(HTTP_CONNECTION_HEADER_NAME, HTTP_CONNECTION_CLOSE_ARGUMENT); + query.trace(false, 5, simpleName, ": Max HTTP connection lifetime (", maxLifetimeSeconds, ") exceeded; adding \"", + HTTP_CONNECTION_HEADER_NAME, ": ", HTTP_CONNECTION_CLOSE_ARGUMENT, "\" header"); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchers/InputCheckingSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/InputCheckingSearcher.java new file mode 100644 index 00000000000..d99cb72f5a3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/InputCheckingSearcher.java @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.ListIterator; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.log.LogLevel; +import com.yahoo.metrics.simple.Counter; +import com.yahoo.metrics.simple.MetricReceiver; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +/** + * Check whether the query tree seems to be "well formed". In other words, run heurestics against + * the input data to see whether the query should sent to the search backend. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class InputCheckingSearcher extends Searcher { + + private final Counter utfRejections; + private final Counter repeatedConsecutiveTermsInPhraseRejections; + private final Counter repeatedTermsInPhraseRejections; + private static final Logger log = Logger.getLogger(InputCheckingSearcher.class.getName()); + private final int MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE = 5; + private final int MAX_REPEATED_TERMS_IN_PHRASE=10; + + public InputCheckingSearcher(MetricReceiver metrics) { + utfRejections = metrics.declareCounter("double_encoded_utf8_rejections"); + repeatedTermsInPhraseRejections = metrics.declareCounter("repeated_terms_in_phrase_rejections"); + repeatedConsecutiveTermsInPhraseRejections = metrics.declareCounter("repeated_consecutive_terms_in_phrase_rejections"); + } + + @Override + public Result search(Query query, Execution execution) { + try { + checkQuery(query); + } catch (IllegalArgumentException e) { + if (log.isLoggable(LogLevel.DEBUG)) { + log.log(LogLevel.DEBUG, "Rejected query \"" + query.toString() + "\" on cause of: " + e.getMessage()); + } + return new Result(query, ErrorMessage.createIllegalQuery(e.getMessage())); + } + return execution.search(query); + } + + private void checkQuery(Query query) { + doubleEncodedUtf8(query); + checkPhrases(query.getModel().getQueryTree().getRoot()); + // add new heuristics here + } + + private void checkPhrases(Item queryItem) { + if (queryItem instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) queryItem; + repeatedConsecutiveTermsInPhraseCheck(phrase); + repeatedTermsInPhraseCheck(phrase); + } else if (queryItem instanceof CompositeItem) { + CompositeItem asComposite = (CompositeItem) queryItem; + for (ListIterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { + checkPhrases(i.next()); + } + } + } + + private void repeatedConsecutiveTermsInPhraseCheck(PhraseItem phrase) { + if (phrase.getItemCount() > MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE) { + String prev = null; + int repeatedCount = 0; + for (int i = 0; i < phrase.getItemCount(); ++i) { + Item item = phrase.getItem(i); + if (item instanceof TermItem) { + TermItem term = (TermItem) item; + String current = term.getIndexedString(); + if (prev != null) { + if (prev.equals(current)) { + repeatedCount++; + if (repeatedCount >= MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE) { + repeatedConsecutiveTermsInPhraseRejections.add(); + throw new IllegalArgumentException("More than " + MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE + + " ocurrences of term '" + current + "' in a row detected in phrase : " + phrase.toString()); + } + } else { + repeatedCount = 0; + } + } + prev = current; + } else { + prev = null; + repeatedCount = 0; + } + } + } + } + private static final class Count { + private int v; + Count(int initial) { v = initial; } + void inc() { v++; } + int get() { return v; } + } + private void repeatedTermsInPhraseCheck(PhraseItem phrase) { + if (phrase.getItemCount() > MAX_REPEATED_TERMS_IN_PHRASE) { + Map<String, Count> repeatedCount = new HashMap<>(); + for (int i = 0; i < phrase.getItemCount(); ++i) { + Item item = phrase.getItem(i); + if (item instanceof TermItem) { + TermItem term = (TermItem) item; + String current = term.getIndexedString(); + Count count = repeatedCount.get(current); + if (count != null) { + if (count.get() >= MAX_REPEATED_TERMS_IN_PHRASE) { + repeatedTermsInPhraseRejections.add(); + throw new IllegalArgumentException("Phrase contains more than " + MAX_REPEATED_TERMS_IN_PHRASE + + " occurrences of term '" + current + "' in phrase : " + phrase.toString()); + } + count.inc(); + } else { + repeatedCount.put(current, new Count(1)); + } + } + } + } + } + + + private void doubleEncodedUtf8(Query query) { + int singleCharacterTerms = countSingleCharacterUserTerms(query.getModel().getQueryTree()); + if (singleCharacterTerms <= 4) { + return; + } + String userInput = query.getModel().getQueryString(); + ByteBuffer asOctets = ByteBuffer.allocate(userInput.length()); + boolean asciiOnly = true; + for (int i = 0; i < userInput.length(); ++i) { + char c = userInput.charAt(i); + if (c > 255) { + return; // not double (or more) encoded + } + if (c > 127) { + asciiOnly = false; + } + asOctets.put((byte) c); + } + if (asciiOnly) { + return; + } + asOctets.flip(); + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + // OK, unmappable character is sort of theoretical, but added to be explicit + try { + decoder.decode(asOctets); + } catch (CharacterCodingException e) { + return; + } + utfRejections.add(); + throw new IllegalArgumentException("The user input has been determined to be double encoded UTF-8." + + " Please investigate whether this is a false positive."); + } + + private int countSingleCharacterUserTerms(Item queryItem) { + if (queryItem instanceof CompositeItem) { + int sum = 0; + CompositeItem asComposite = (CompositeItem) queryItem; + for (ListIterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { + sum += countSingleCharacterUserTerms(i.next()); + } + return sum; + } else if (queryItem instanceof WordItem) { + WordItem word = (WordItem) queryItem; + return (word.isFromQuery() && word.stringValue().length() == 1) ? 1 : 0; + } else { + return 0; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchers/RateLimitingSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/RateLimitingSearcher.java new file mode 100755 index 00000000000..95cec1d0960 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/RateLimitingSearcher.java @@ -0,0 +1,219 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.google.inject.Inject; +import com.yahoo.cloud.config.ClusterInfoConfig; +import com.yahoo.jdisc.Metric; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.RateLimitingConfig; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.yolean.chain.Provides; + +import java.time.Clock; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; + +/** + * A simple rate limiter. + * <p> + * This takes these query parameter arguments: + * <ul> + * <li>rate.id - (String) the id of the client from rate limiting perspective + * <li>rate.cost - (Double) the cost Double of this query. This is read after executing the query and hence can be set + * by downstream searchers inspecting the result to allow differencing the cost of various queries. Default is 1. + * <li>rate.quota - (Double) the cost per second a particular id is allowed to consume in this system. + * <li>rate.idDimension - (String) the name of the rate-id dimension used when logging metrics. + * If this is not specified, the metric will be logged without dimensions. + * <li>rate.dryRun - (Boolean) emit metrics on rejected requests but don't actually reject them + * </ul> + * <p> + * Whenever quota is exceeded for an id this searcher will reject queries from that id by + * returning a result containing a status 429 error. + * <p> + * If rate.id or rate.quota is not set in Query.properties this searcher will do nothing. + * <p> + * Metrics: This will emit the count metric requestsOverQuota with the dimension [rate.idDimension=rate.id] + * counting rejected requests. + * <p> + * Ordering: This searcher Provides rateLimiting + * + * @author bratseth + */ +@Provides(RateLimitingSearcher.RATE_LIMITING) +public class RateLimitingSearcher extends Searcher { + + /** Constant containing the name this Provides - "rateLimiting", for ordering constraints */ + public static final String RATE_LIMITING = "rateLimiting"; + + public static final CompoundName idKey = new CompoundName("rate.id"); + public static final CompoundName costKey = new CompoundName("rate.cost"); + public static final CompoundName quotaKey = new CompoundName("rate.quota"); + public static final CompoundName idDimensionKey = new CompoundName("rate.idDimension"); + public static final CompoundName dryRunKey = new CompoundName("rate.dryRun"); + + private static final String requestsOverQuotaMetricName = "requestsOverQuota"; + + /** Used to divide quota by nodes. Assumption: All nodes get the same share of traffic. */ + private final int nodeCount; + + /** Shared capacity across all threads. Each thread will ask for more capacity from here when they run out. */ + private final AvailableCapacity availableCapacity; + + /** Capacity already allocated to this thread */ + private final ThreadLocal<Map<String, Double>> allocatedCapacity = new ThreadLocal<>(); + + /** For emitting metrics */ + private final Metric metric; + + /** + * How much capacity to allocate to a thread each time it runs out. + * A higher value means less contention and less accuracy. + */ + private final double capacityIncrement; + + /** How often to check for new capacity if we have run out */ + private final double recheckForCapacityProbability; + + @Inject + public RateLimitingSearcher(RateLimitingConfig rateLimitingConfig, ClusterInfoConfig clusterInfoConfig, Metric metric) { + this(rateLimitingConfig, clusterInfoConfig, metric, Clock.systemUTC()); + } + + /** For testing - allows injection of a timer to avoid depending on the system clock */ + public RateLimitingSearcher(RateLimitingConfig rateLimitingConfig, ClusterInfoConfig clusterInfoConfig, Metric metric, Clock clock) { + this.capacityIncrement = rateLimitingConfig.capacityIncrement(); + this.recheckForCapacityProbability = rateLimitingConfig.recheckForCapacityProbability(); + this.availableCapacity = new AvailableCapacity(rateLimitingConfig.maxAvailableCapacity(), clock); + + this.nodeCount = clusterInfoConfig.nodeCount(); + + this.metric = metric; + } + + @Override + public Result search(Query query, Execution execution) { + String id = query.properties().getString(idKey); + Double rate = query.properties().getDouble(quotaKey); + if (id == null || rate == null) { + query.trace(false, 6, "Skipping rate limiting check. Need both " + idKey + " and " + quotaKey + " set"); + return execution.search(query); + } + + rate = rate / nodeCount; + + if (allocatedCapacity.get() == null) // new thread + allocatedCapacity.set(new HashMap<>()); + if (allocatedCapacity.get().get(id) == null) // new id in this thread + requestCapacity(id, rate); + + // Check if there is capacity available. Cannot check for exact cost as it may be computed after execution + // no capacity means we're over rate. Only recheck occasionally to limit synchronization. + if (getAllocatedCapacity(id) <= 0 && ThreadLocalRandom.current().nextDouble() < recheckForCapacityProbability) { + requestCapacity(id, rate); + } + + if (rate==0 || getAllocatedCapacity(id) <= 0) { // we are still over rate: reject + metric.add(requestsOverQuotaMetricName, 1, createContext(query.properties().getString(idDimensionKey, ""), id)); + if ( ! query.properties().getBoolean(dryRunKey, false)) + return new Result(query, new ErrorMessage(429, "Too many requests", "Allowed rate: " + rate + "/s")); + } + + Result result = execution.search(query); + addAllocatedCapacity(id, - query.properties().getDouble(costKey, 1.0)); + + if (getAllocatedCapacity(id) <= 0) // make sure we ask for more with 100% probability when first running out + requestCapacity(id, rate); + + return result; + } + + private Metric.Context createContext(String dimensionName, String dimensionValue) { + if (dimensionName.isEmpty()) + return metric.createContext(Collections.emptyMap()); + return metric.createContext(Collections.singletonMap(dimensionName, dimensionValue)); + } + + private double getAllocatedCapacity(String id) { + Double value = allocatedCapacity.get().get(id); + if (value == null) return 0; + return value; + } + + private void addAllocatedCapacity(String id, double newCapacity) { + Double capacity = allocatedCapacity.get().get(id); + if (capacity != null) + newCapacity += capacity; + allocatedCapacity.get().put(id, newCapacity); + } + + private void requestCapacity(String id, double rate) { + double minimumRequested = Math.max(0, -getAllocatedCapacity(id)); // If we are below, make sure we reach 0 + double preferredRequested = Math.max(capacityIncrement, -getAllocatedCapacity(id)); + addAllocatedCapacity(id, availableCapacity.request(id, minimumRequested, preferredRequested, rate)); + } + + /** + * This keeps track of the current "capacity" (total cost) available to each client (rate id) + * across all threads. Capacity is supplied at the rate per second given by the clients quota. + * When all the capacity is spent, no further capacity will be handed out, leading to request rejection. + * Capacity has a max value it will never exceed to avoid clients saving capacity for future overspending. + */ + private static class AvailableCapacity { + + private final double maxAvailableCapacity; + private final Clock clock; + + private final Map<String, CapacityAllocation> available = new HashMap<>(); + + public AvailableCapacity(double maxAvailableCapacity, Clock clock) { + this.maxAvailableCapacity = maxAvailableCapacity; + this.clock = clock; + } + + /** Returns an amount of capacity between 0 and the requested amount based on availability for this id */ + public synchronized double request(String id, double minimumRequested, double preferredRequested, double rate) { + CapacityAllocation allocation = available.get(id); + if (allocation == null) { + allocation = new CapacityAllocation(rate, clock); + available.put(id, allocation); + } + return allocation.request(minimumRequested, preferredRequested, rate, maxAvailableCapacity); + } + + } + + private static class CapacityAllocation { + + private double capacity; + private final Clock clock; + private long lastAllocatedTime; + + public CapacityAllocation(double initialCapacity, Clock clock) { + this.capacity = initialCapacity; + this.clock = clock; + lastAllocatedTime = clock.millis(); + } + + public double request(double minimumRequested, double preferredRequested, double rate, double maxAvailableCapacity) { + if ( preferredRequested > capacity) { // attempt to allocate more + // rate is per second so we get rate/1000 per millisecond + long currentTime = clock.millis(); + capacity += Math.min(maxAvailableCapacity, rate/1000d * (Math.max(0, currentTime - lastAllocatedTime))); + lastAllocatedTime = currentTime; + } + double grantedCapacity = Math.min(capacity/10, preferredRequested); // /10 to avoid stealing all capacity when low + if (grantedCapacity < minimumRequested) + grantedCapacity = Math.min(minimumRequested, capacity); + capacity = capacity - grantedCapacity; + return grantedCapacity; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateMatchPhaseSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateMatchPhaseSearcher.java new file mode 100644 index 00000000000..ff00c8edb9b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateMatchPhaseSearcher.java @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vespa.config.search.AttributesConfig; + +import java.util.HashSet; +import java.util.Set; + +/** + * Validates that the attribute given as match-phase override is actually a valid numeric attribute + * with fast-search enabled. + * Created by balder on 1/21/15. + */ +public class ValidateMatchPhaseSearcher extends Searcher { + private Set<String> validMatchPhaseAttributes = new HashSet<>(); + private Set<String> validDiversityAttributes = new HashSet<>(); + public ValidateMatchPhaseSearcher(AttributesConfig attributesConfig) { + for (AttributesConfig.Attribute a : attributesConfig.attribute()) { + if (a.fastsearch() && + (a.collectiontype() == AttributesConfig.Attribute.Collectiontype.SINGLE) && + isNumeric(a.datatype())) + { + validMatchPhaseAttributes.add(a.name()); + } + } + for (AttributesConfig.Attribute a : attributesConfig.attribute()) { + if ((a.collectiontype() == AttributesConfig.Attribute.Collectiontype.SINGLE) && + ((a.datatype() == AttributesConfig.Attribute.Datatype.STRING) || isNumeric(a.datatype()))) + { + validDiversityAttributes.add(a.name()); + } + } + } + private boolean isNumeric(AttributesConfig.Attribute.Datatype.Enum dt) { + return dt == AttributesConfig.Attribute.Datatype.DOUBLE || + dt == AttributesConfig.Attribute.Datatype.FLOAT || + dt == AttributesConfig.Attribute.Datatype.INT8 || + dt == AttributesConfig.Attribute.Datatype.INT16 || + dt == AttributesConfig.Attribute.Datatype.INT32 || + dt == AttributesConfig.Attribute.Datatype.INT64; + } + @Override + public Result search(Query query, Execution execution) { + ErrorMessage e = validate(query); + return (e != null) + ? new Result(query, e) + : execution.search(query); + } + + private ErrorMessage validate(Query query) { + String attribute = query.getRanking().getMatchPhase().getAttribute(); + if ( attribute != null && ! validMatchPhaseAttributes.contains(attribute) ) { + return ErrorMessage.createInvalidQueryParameter("The attribute '" + attribute + "' is not available for match-phase. " + + "It must be a single value numeric attribute with fast-search."); + } + attribute = query.getRanking().getMatchPhase().getDiversity().getAttribute(); + if (attribute != null && ! validDiversityAttributes.contains(attribute)) { + return ErrorMessage.createInvalidQueryParameter("The attribute '" + attribute + "' is not available for match-phase diversification. " + + "It must be a single value numeric or string attribute."); + } + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/searchers/package-info.java b/container-search/src/main/java/com/yahoo/search/searchers/package-info.java new file mode 100644 index 00000000000..78f1e5940a6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Various useful searchers + */ +@ExportPackage +@PublicApi +package com.yahoo.search.searchers; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/statistics/ElapsedTime.java b/container-search/src/main/java/com/yahoo/search/statistics/ElapsedTime.java new file mode 100644 index 00000000000..8cf159f5ad8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/statistics/ElapsedTime.java @@ -0,0 +1,235 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.statistics; + +import com.yahoo.collections.TinyIdentitySet; +import com.yahoo.search.statistics.TimeTracker.Activity; +import com.yahoo.search.statistics.TimeTracker.SearcherTimer; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +import static com.yahoo.search.statistics.TimeTracker.Activity.*; + +/** + * Basically a collection of TimeTracker instances. + * + * <p>This class may need a lot of restructuring as actual + * needs are mapped out. + * + * @author <a href="steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class ElapsedTime { + + // An identity set is used to make it safe to do multiple merges. This may happen if + // user calls Result.mergeWith() and Result.mergeWithAfterFill() on the same result + // with the same result as an argument too. This is slightly pathological, but better + // safe than sorry. It also covers in SearchHandler where the same Execution instance + // is used for search and fill. + /** A map used as a set to store the time track of all the Execution instances for a Result */ + private Set<TimeTracker> tracks = new TinyIdentitySet<>(8); + + public void add(TimeTracker track) { + tracks.add(track); + } + + private long fetcher(Activity toFetch, TimeTracker fetchFrom) { + switch (toFetch) { + case SEARCH: + return fetchFrom.searchTime(); + case FILL: + return fetchFrom.fillTime(); + case PING: + return fetchFrom.pingTime(); + default: + return 0L; + } + + } + + /** + * Give an estimate on how much of the time tracked by this + * instance was used fetching document contents. This will + * by definition be smaller than last() - first(). + */ + public long weightedFillTime() { + return weightedTime(FILL); + } + + private long weightedTime(Activity kind) { + long total = 0L; + long elapsed = 0L; + long first = Long.MAX_VALUE; + long last = 0L; + + if (tracks.isEmpty()) { + return 0L; + } + for (TimeTracker track : tracks) { + total += track.totalTime(); + elapsed += fetcher(kind, track); + last = Math.max(last, track.last()); + first = Math.min(first, track.first()); + } + if (total == 0L) { + return 0L; + } else { + return ((last - first) * elapsed) / total; + } + } + + private long absoluteTime(Activity kind) { + long elapsed = 0L; + + if (tracks.isEmpty()) { + return 0L; + } + for (TimeTracker track : tracks) { + elapsed += fetcher(kind, track); + } + return elapsed; + } + + /** + * Total amount of time spent in all threads for this Execution while + * fetching document contents, or preparing to fetch them. + */ + public long fillTime() { + return absoluteTime(FILL); + } + + /** + * Total amount of time spent for this ElapsedTime instance. + */ + public long totalTime() { + long total = 0L; + for (TimeTracker track : tracks) { + total += track.totalTime(); + } + return total; + } + + /** + * Give a relative estimate on how much of the time tracked by this + * instance was used searching. This will + * by definition be smaller than last() - first(). + */ + public long weightedSearchTime() { + return weightedTime(SEARCH); + } + + /** + * Total amount of time spent in all threads for this Execution while + * searching or waiting for (a) backend(s) doing (a) search(es). + */ + public long searchTime() { + return absoluteTime(SEARCH); + } + + /** + * Total amount of time spent in all threads for this Execution while + * pinging, or preparing to ping, a backend. + */ + public long pingTime() { + return absoluteTime(PING); + } + + /** + * Give a relative estimate on how much of the time tracked by this + * instance was used pinging backends. This will + * by definition be smaller than last() - first(). + */ + public long weightedPingTime() { + return weightedTime(PING); + } + + /** + * Time stamp of start of the first event registered. + */ + public long first() { + long first = Long.MAX_VALUE; + for (TimeTracker track : tracks) { + first = Math.min(first, track.first()); + } + return first; + } + + /** + * Time stamp of the end the last event registered. + */ + public long last() { + long last = 0L; + for (TimeTracker track : tracks) { + last = Math.max(last, track.last()); + } + return last; + } + + public void merge(ElapsedTime other) { + for (TimeTracker t : other.tracks) { + add(t); + } + } + + /** + * The time of the start of the first document fill requested. + */ + public long firstFill() { + long first = Long.MAX_VALUE; + if (tracks.isEmpty()) { + return 0L; + } + for (TimeTracker t : tracks) { + long candidate = t.firstFill(); + if (candidate == 0L) { + continue; + } + first = Math.min(first, t.firstFill()); + } + return first; + } + + /* + * Tell whether time use per searcher is available. + */ + public boolean hasDetailedData() { + for (TimeTracker t : tracks) { + if (t.searcherTracking() != null) { + return true; + } + } + return false; + } + + public String detailedReport() { + Map<String, TimeTracker.SearcherTimer> raw = new LinkedHashMap<>(); + StringBuilder report = new StringBuilder(); + int preLen; + report.append("Time use per searcher: "); + for (TimeTracker t : tracks) { + if (t.searcherTracking() == null) { + continue; + } + SearcherTimer[] searchers = t.searcherTracking(); + for (SearcherTimer s : searchers) { + SearcherTimer sum; + if (raw.containsKey(s.getName())) { + sum = raw.get(s.getName()); + } else { + sum = new SearcherTimer(s.getName()); + raw.put(s.getName(), sum); + } + sum.merge(s); + } + } + preLen = report.length(); + for (TimeTracker.SearcherTimer value : raw.values()) { + if (report.length() > preLen) { + report.append(",\n "); + } + report.append(value.toString()); + } + report.append("."); + return report.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/statistics/PeakQpsSearcher.java b/container-search/src/main/java/com/yahoo/search/statistics/PeakQpsSearcher.java new file mode 100644 index 00000000000..e6056659c55 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/statistics/PeakQpsSearcher.java @@ -0,0 +1,237 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.statistics; + +import com.yahoo.collections.Tuple2; +import com.yahoo.concurrent.ThreadLocalDirectory; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Callback; +import com.yahoo.statistics.Handle; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +import java.util.*; + +/** + * Aggregate peak qps and expose through meta hits and/or log events. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class PeakQpsSearcher extends Searcher { + private final ThreadLocalDirectory<Deque<QueryRatePerSecond>, Long> directory; + private final Value qpsStatistics; + private final CompoundName propertyName; + private final boolean useMetaHit; + + /** + * Meta hit which carries the peak qps and mean qps since the last time this + * data was requested. The URI is always "meta:qps". The data is stored as + * Number subclasses in the fields named by the fields PEAK_QPS and MEAN_QPS + * in the QpsHit class. + */ + public static class QpsHit extends Hit { + /** + * Machine generated version ID for serialization. + */ + private static final long serialVersionUID = 1042868845398233889L; + + /** + * The name of the field containing mean QPS since the last measurement. + */ + public static final String MEAN_QPS = "mean_qps"; + + /** + * The name of the field containing peak QPS since the last measurement. + */ + public static final String PEAK_QPS = "peak_qps"; + public static final String SCHEME = "meta"; + + public QpsHit(Integer peakQps, Double meanQps) { + super(SCHEME + ":qps"); + setField(PEAK_QPS, peakQps); + setField(MEAN_QPS, meanQps); + } + + public boolean isMeta() { + return true; + } + + @Override + public String toString() { + return "QPS hit: Peak QPS " + getField(PEAK_QPS) + ", mean QPS " + getField(MEAN_QPS) + "."; + } + } + + static class QueryRatePerSecond { + long when; + int howMany; + + QueryRatePerSecond(long when) { + this.when = when; + this.howMany = 0; + } + + void add(int x) { + howMany += x; + } + + void increment() { + howMany += 1; + } + + @Override + public String toString() { + return "QueryRatePerSecond(" + when + ": " + howMany + ")"; + } + } + + static class QueryRate implements + ThreadLocalDirectory.Updater<Deque<QueryRatePerSecond>, Long> { + @Override + public Deque<QueryRatePerSecond> update( + Deque<QueryRatePerSecond> current, Long when) { + QueryRatePerSecond last = current.peekLast(); + if (last == null || last.when != when) { + last = new QueryRatePerSecond(when); + current.addLast(last); + } + last.increment(); + return current; + } + + @Override + public Deque<QueryRatePerSecond> createGenerationInstance( + Deque<QueryRatePerSecond> previous) { + if (previous == null) { + return new ArrayDeque<>(); + } else { + return new ArrayDeque<>(previous.size()); + } + } + } + + private class Fetcher implements Callback { + @Override + public void run(Handle h, boolean firstRun) { + List<Deque<QueryRatePerSecond>> data = directory.fetch(); + List<QueryRatePerSecond> chewed = merge(data); + for (QueryRatePerSecond qps : chewed) { + qpsStatistics.put((double) qps.howMany); + } + } + } + + public PeakQpsSearcher(MeasureQpsConfig config, Statistics manager) { + directory = createDirectory(); + MeasureQpsConfig.Outputmethod.Enum method = config.outputmethod(); + if (method == MeasureQpsConfig.Outputmethod.METAHIT) { + useMetaHit = true; + propertyName = new CompoundName(config.queryproperty()); + qpsStatistics = null; + } else if (method == MeasureQpsConfig.Outputmethod.STATISTICS) { + String event = config.eventname(); + if (event == null || event.isEmpty()) { + event = getId().getName(); + event = event.replace('.', '_'); + } + qpsStatistics = new Value(event, manager, new Value.Parameters() + .setAppendChar('_').setLogMax(true).setLogMean(true) + .setLogMin(false).setLogRaw(false).setNameExtension(true) + .setCallback(new Fetcher())); + useMetaHit = false; + propertyName = null; + } else { + throw new IllegalArgumentException( + "Config definition out of sync with implementation." + + " No way to create output for method " + method + "."); + } + } + + static ThreadLocalDirectory<Deque<QueryRatePerSecond>, Long> createDirectory() { + return new ThreadLocalDirectory<>(new QueryRate()); + } + + static List<QueryRatePerSecond> merge(List<Deque<QueryRatePerSecond>> measurements) { + List<QueryRatePerSecond> rates = new ArrayList<>(); + while (measurements.size() > 0) { + Deque<Deque<QueryRatePerSecond>> consumeFrom + = new ArrayDeque<>(measurements.size()); + long current = Long.MAX_VALUE; + for (ListIterator<Deque<QueryRatePerSecond>> i = measurements.listIterator(); i.hasNext();) { + Deque<QueryRatePerSecond> deck = i.next(); + if (deck.size() == 0) { + i.remove(); + continue; + } + QueryRatePerSecond threadData = deck.peekFirst(); + if (threadData.when < current) { + consumeFrom.clear(); + current = threadData.when; + consumeFrom.add(deck); + } else if (threadData.when == current) { + consumeFrom.add(deck); + } + } + if (consumeFrom.size() > 0) { + rates.add(consume(consumeFrom)); + } + } + return rates; + } + + private static QueryRatePerSecond consume(Deque<Deque<QueryRatePerSecond>> consumeFrom) { + Deque<QueryRatePerSecond> valueQueue = consumeFrom.pop(); + QueryRatePerSecond value = valueQueue.pop(); + QueryRatePerSecond thisSecond = new QueryRatePerSecond(value.when); + thisSecond.add(value.howMany); + while (consumeFrom.size() > 0) { + valueQueue = consumeFrom.pop(); + value = valueQueue.pop(); + thisSecond.add(value.howMany); + } + return thisSecond; + + } + + @Override + public Result search(Query query, Execution execution) { + Result r; + long when = query.getStartTime() / 1000L; + Hit meta = null; + directory.update(when); + if (useMetaHit) { + if (query.properties().getBoolean(propertyName, false)) { + List<QueryRatePerSecond> l = merge(directory.fetch()); + Tuple2<Integer, Double> maxAndMean = maxAndMean(l); + meta = new QpsHit(maxAndMean.first, maxAndMean.second); + } + } + r = execution.search(query); + if (meta != null) { + r.hits().add(meta); + } + return r; + } + + private Tuple2<Integer, Double> maxAndMean(List<QueryRatePerSecond> l) { + int max = Integer.MIN_VALUE; + double sum = 0.0d; + if (l.size() == 0) { + return new Tuple2<>(Integer.valueOf(0), + Double.valueOf(0.0)); + } + for (QueryRatePerSecond qps : l) { + sum += (double) qps.howMany; + if (qps.howMany > max) { + max = qps.howMany; + } + } + return new Tuple2<>(Integer.valueOf(max), + Double.valueOf(sum / (double) l.size())); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/statistics/TimeTracker.java b/container-search/src/main/java/com/yahoo/search/statistics/TimeTracker.java new file mode 100644 index 00000000000..6d23701b06a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/statistics/TimeTracker.java @@ -0,0 +1,390 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.statistics; + +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; + +import com.yahoo.component.chain.Chain; +import com.yahoo.prelude.Pong; +import com.yahoo.processing.Processor; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; + +/** + * A container for storing time stamps throughout the + * lifetime of an Execution instance. + * + * <p>Check state both when entering and exiting, to allow for arbitrary + * new queries anywhere inside a search chain. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class TimeTracker { + + public enum Activity { + PING, + SEARCH, + FILL; + } + + static class SearcherTimer { + // Searcher ID + private final String name; + // Time spent transforming query/producing result + private final EnumMap<Activity, Long> invoking = new EnumMap<>(Activity.class); + // Time spent transforming result + private final EnumMap<Activity, Long> returning = new EnumMap<>(Activity.class); + + SearcherTimer(String name) { + this.name = name; + } + + private void activityRepr(StringBuilder buffer, int preLen, + Map.Entry<Activity, Long> m) { + if (buffer.length() != preLen) { + buffer.append(", "); + } + buffer.append(m.getKey()).append(": ").append(m.getValue()) + .append(" ms"); + } + + void addInvoking(Activity activity, long time) { + Long storedTillNow = invoking.get(activity); + long tillNow = getTime(storedTillNow); + invoking.put(activity, Long.valueOf(tillNow + time)); + } + + void addReturning(Activity activity, long time) { + Long storedTillNow = returning.get(activity); + long tillNow = getTime(storedTillNow); + returning.put(activity, Long.valueOf(tillNow + time)); + } + + Long getInvoking(Activity activity) { + return invoking.get(activity); + } + + String getName() { + return name; + } + + Long getReturning(Activity activity) { + return returning.get(activity); + } + + private long getTime(Long storedTillNow) { + long tillNow; + if (storedTillNow == null) { + tillNow = 0L; + } else { + tillNow = storedTillNow.longValue(); + } + return tillNow; + } + + public void merge(SearcherTimer other) { + for (Map.Entry<Activity, Long> invokingEntry : other.invoking.entrySet()) { + addInvoking(invokingEntry.getKey(), invokingEntry.getValue()); + } + for (Map.Entry<Activity, Long> returningEntry : other.returning.entrySet()) { + addReturning(returningEntry.getKey(), returningEntry.getValue()); + } + } + + public String toString() { + StringBuilder buffer = new StringBuilder(); + int preLen; + buffer.append(name).append("(").append("QueryProcessing("); + preLen = buffer.length(); + for (Map.Entry<Activity, Long> m : invoking.entrySet()) { + activityRepr(buffer, preLen, m); + } + buffer.append("), ResultProcessing("); + preLen = buffer.length(); + for (Map.Entry<Activity, Long> m : returning.entrySet()) { + activityRepr(buffer, preLen, m); + } + buffer.append("))"); + return buffer.toString(); + } + } + + static class State { + public final long start; + public final Activity activity; + + State(long start, Activity activity) { + super(); + this.start = start; + this.activity = activity; + } + } + + static class Tag { + public final long start; + public final long end; + public final Activity activity; + + Tag(long start, long end, Activity activity) { + super(); + this.start = start; + this.end = end; + this.activity = activity; + } + } + + static class TimeSource { + long now() { + return System.currentTimeMillis(); + } + } + + private State state = null; + private List<Tag> tags = new ArrayList<>(); + + private SearcherTimer[] searcherTracking = null; + private final Chain<? extends Processor> searchChain; + // whether the previous state was invoking or returning + private boolean invoking = true; + private long last = 0L; + private final int entryIndex; + TimeSource timeSource = new TimeSource(); + + public TimeTracker(Chain<? extends Searcher> searchChain) { + this(searchChain, 0); + } + + public TimeTracker(Chain<? extends Processor> searchChain, int entryIndex) { + this.searchChain = searchChain; + this.entryIndex = entryIndex; + } + + private void concludeState(long now) { + if (state == null) { + return; + } + + tags.add(new Tag(state.start, now, state.activity)); + state = null; + } + + private void concludeStateOnExit(long now) { + if (now != 0L) { + concludeState(now); + } else { + concludeState(getNow()); + } + } + + private long detailedMeasurements(int searcherIndex, boolean calledAsInvoking) { + long now = getNow(); + if (searcherTracking == null) { + initBreakdown(); + } + SearcherTimer timeSpentIn = getPreviouslyRunSearcher(searcherIndex, calledAsInvoking); + long spent = now - last; + if (timeSpentIn != null && last != 0L) { + if (invoking) { + timeSpentIn.addInvoking(getActivity(), spent); + } else { + timeSpentIn.addReturning(getActivity(), spent); + } + } + last = now; + if (searcherIndex >= searcherTracking.length) { + // We are now outside the search chain and will go back up with the + // default result. + invoking = false; + } else { + invoking = calledAsInvoking; + } + return now; + } + + private void enteringState(int searcherIndex, boolean detailed, final Activity activity) { + long now = 0L; + if (detailed) { + now = detailedMeasurements(searcherIndex, true); + } + if (isNewState(activity)) { + if (now == 0L) { + now = getNow(); + } + concludeState(now); + initNewState(now, activity); + } else { + return; + } + } + + private long fetchTime(Activity filter, Tag container) { + if (filter == container.activity) { + return container.end - container.start; + } else { + return 0L; + } + } + + public long fillTime() { + return typedSum(Activity.FILL); + } + + public long first() { + if (tags.isEmpty()) { + return 0L; + } else { + return tags.get(0).start; + } + } + + public long firstFill() { + for (Tag t : tags) { + if (t.activity == Activity.FILL) { + return t.start; + } + } + return 0L; + } + + private Activity getActivity() { + if (state == null) { + throw new IllegalStateException("Trying to measure an interval having only one point."); + } + return state.activity; + } + + private long getNow() { + return timeSource.now(); + } + + private SearcherTimer getPreviouslyRunSearcher(int searcherIndex, boolean calledAsInvoking) { + if (calledAsInvoking) { + searcherIndex -= 1; + if (searcherIndex < entryIndex) { + return null; + } else { + return searcherTracking[searcherIndex]; + } + } else { + return searcherTracking[searcherIndex]; + } + } + + private void initBreakdown() { + if (searcherTracking != null) { + throw new IllegalStateException("initBreakdown invoked" + + " when measurement structures are already initialized."); + } + List<? extends Processor> searchers = searchChain.components(); + searcherTracking = new SearcherTimer[searchers.size()]; + for (int i = 0; i < searcherTracking.length; ++i) { + searcherTracking[i] = new SearcherTimer(searchers.get(i).getId().stringValue()); + } + } + + private void initNewState(long now, Activity activity) { + state = new State(now, activity); + } + + void injectTimeSource(TimeSource source) { + this.timeSource = source; + } + + private boolean isNewState(Activity callPath) { + if (state == null) { + return true; + } else if (callPath == state.activity) { + return false; + } else { + return true; + } + } + + public long last() { + if (tags.isEmpty()) { + return 0L; + } else { + return tags.get(tags.size() - 1).end; + } + } + + public long pingTime() { + return typedSum(Activity.PING); + } + + private long returnfromState(int searcherIndex, boolean detailed) { + if (detailed) { + return detailedMeasurements(searcherIndex, false); + } else { + return 0L; + } + } + + public void sampleFill(int searcherIndex, boolean detailed) { + enteringState(searcherIndex, detailed, Activity.FILL); + } + + public void sampleFillReturn(int searcherIndex, boolean detailed, Result annotationReference) { + ElapsedTime elapsed = getElapsedTime(annotationReference); + sampleReturn(searcherIndex, detailed, elapsed); + } + + public void samplePing(int searcherIndex, boolean detailed) { + enteringState(searcherIndex, detailed, Activity.PING); + } + + public void samplePingReturn(int searcherIndex, boolean detailed, Pong annotationReference) { + ElapsedTime elapsed = getElapsedTime(annotationReference); + sampleReturn(searcherIndex, detailed, elapsed); + } + + public void sampleSearch(int searcherIndex, boolean detailed) { + enteringState(searcherIndex, detailed, Activity.SEARCH); + } + + public void sampleSearchReturn(int searcherIndex, boolean detailed, Result annotationReference) { + ElapsedTime elapsed = getElapsedTime(annotationReference); + sampleReturn(searcherIndex, detailed, elapsed); + } + + private void sampleReturn(int searcherIndex, boolean detailed, ElapsedTime elapsed) { + long now = returnfromState(searcherIndex, detailed); + if (searcherIndex == entryIndex) { + concludeStateOnExit(now); + if (elapsed != null) { + elapsed.add(this); + } + } + } + + private ElapsedTime getElapsedTime(Result r) { + return r == null ? null : r.getElapsedTime(); + } + + private ElapsedTime getElapsedTime(Pong p) { + return p == null ? null : p.getElapsedTime(); + } + + SearcherTimer[] searcherTracking() { + return searcherTracking; + } + + public long searchTime() { + return typedSum(Activity.SEARCH); + } + + public long totalTime() { + return last() - first(); + } + + private long typedSum(Activity activity) { + long sum = 0L; + for (Tag tag : tags) { + sum += fetchTime(activity, tag); + } + return sum; + } +} + diff --git a/container-search/src/main/java/com/yahoo/search/statistics/TimingSearcher.java b/container-search/src/main/java/com/yahoo/search/statistics/TimingSearcher.java new file mode 100644 index 00000000000..0b16c87df07 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/statistics/TimingSearcher.java @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.statistics; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.search.statistics.TimingSearcherConfig.Timer; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.cluster.PingableSearcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.statistics.TimeTracker.Activity; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + + +/** + * A searcher which is intended to be useful as a general probe for + * measuring time consumption a search chain. + * + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Before("rawQuery") +public class TimingSearcher extends PingableSearcher { + private Value measurements; + private final boolean measurePing; + private final boolean measureSearch; + private final boolean measureFill; + private static final Parameters defaultParameters = new Parameters(null, Activity.SEARCH); + + public static class Parameters { + final String eventName; + final Activity pathToSample; + + public Parameters(String eventName, Activity pathToSample) { + super(); + this.eventName = eventName; + this.pathToSample = pathToSample; + } + } + + TimingSearcher(ComponentId id, Parameters setUp, Statistics manager) { + super(id); + if (setUp == null) { + setUp = defaultParameters; + } + String eventName = setUp.eventName; + if (eventName == null || "".equals(eventName)) { + eventName = id.getName(); + } + measurements = new Value(eventName, manager, new Value.Parameters() + .setNameExtension(true).setLogMax(true).setLogMin(true) + .setLogMean(true).setLogSum(true).setLogInsertions(true) + .setAppendChar('_')); + + measurePing = setUp.pathToSample == Activity.PING; + measureSearch = setUp.pathToSample == Activity.SEARCH; + measureFill = setUp.pathToSample == Activity.FILL; + } + + public TimingSearcher(ComponentId id, TimingSearcherConfig config, Statistics manager) { + this(id, buildParameters(config, id.getName()), manager); + } + + private static Parameters buildParameters( + TimingSearcherConfig config, String searcherName) { + for (int i = 0; i < config.timer().size(); ++i) { + Timer t = config.timer(i); + if (t.name().equals(searcherName)) { + return buildParameters(t); + } + } + return null; + } + + private static Parameters buildParameters(Timer t) { + Activity m; + Timer.Measure.Enum toSample = t.measure(); + if (toSample == Timer.Measure.FILL) { + m = Activity.FILL; + } else if (toSample == Timer.Measure.PING) { + m = Activity.PING; + } else { + m = Activity.SEARCH; + } + return new Parameters(t.eventname(), m); + } + + private long preMeasure(boolean doIt) { + if (doIt) { + return System.currentTimeMillis(); + } else { + return 0L; + } + } + + private void postMeasure(boolean doIt, long start) { + if (doIt) { + long elapsed = System.currentTimeMillis() - start; + measurements.put(elapsed); + } + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + long start = preMeasure(measureFill); + super.fill(result, summaryClass, execution); + postMeasure(measureFill, start); + } + + @Override + public Pong ping(Ping ping, Execution execution) { + long start = preMeasure(measurePing); + Pong pong = execution.ping(ping); + postMeasure(measurePing, start); + return pong; + } + + @Override + public Result search(Query query, Execution execution) { + long start = preMeasure(measureSearch); + Result result = execution.search(query); + postMeasure(measureSearch, start); + return result; + } + + /** + * This method is only included for testing. + */ + public void setMeasurements(Value measurements) { + this.measurements = measurements; + } + + @Override + public void deconstruct() { + // avoid dangling, duplicate loggers + measurements.cancel(); + super.deconstruct(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/search/statistics/package-info.java b/container-search/src/main/java/com/yahoo/search/statistics/package-info.java new file mode 100644 index 00000000000..04626fa913e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/statistics/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.search.statistics; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/search/template/.gitignore b/container-search/src/main/java/com/yahoo/search/template/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/template/.gitignore diff --git a/container-search/src/main/java/com/yahoo/search/yql/ArgumentsTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/ArgumentsTypeChecker.java new file mode 100644 index 00000000000..c297bf80cac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ArgumentsTypeChecker.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; + +import java.util.List; + +final class ArgumentsTypeChecker { + + private final Operator target; + private final List<OperatorTypeChecker> checkers; + + public ArgumentsTypeChecker(Operator target, List<OperatorTypeChecker> checkers) { + this.target = target; + this.checkers = checkers; + } + + public void check(Object... args) { + if (args == null) { + Preconditions.checkArgument(checkers.size() == 0, "Operator %s argument count mismatch: expected %s got 0", target, checkers.size()); + return; + } else { + Preconditions.checkArgument(args.length == checkers.size(), "Operator %s argument count mismatch: expected: %s got %s", target, checkers.size(), args.length); + } + for (int i = 0; i < checkers.size(); ++i) { + checkers.get(i).check(args[i]); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveFileStream.java b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveFileStream.java new file mode 100644 index 00000000000..33e684357af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveFileStream.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import org.antlr.v4.runtime.ANTLRFileStream; +import org.antlr.v4.runtime.CharStream; + +import java.io.IOException; + +/** + * Enable ANTLR to do case insensitive comparisons when reading from files without throwing away the case in the token. + */ + +class CaseInsensitiveFileStream extends ANTLRFileStream { + + public CaseInsensitiveFileStream(String fileName) throws IOException { + super(fileName); + } + + public CaseInsensitiveFileStream(String fileName, String encoding) throws IOException { + super(fileName, encoding); + } + + @Override + public int LA(int i) { + if (i == 0) { + return 0; + } + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset 0 + } + + if ((p + i - 1) >= n) { + return CharStream.EOF; + } + return Character.toLowerCase(data[p + i - 1]); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveInputStream.java b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveInputStream.java new file mode 100644 index 00000000000..e15fe04bb39 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveInputStream.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStream; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Enable ANTLR to do case insensitive comparisons when reading from files without throwing away the case in the token. + */ +class CaseInsensitiveInputStream extends ANTLRInputStream { + + public CaseInsensitiveInputStream() { + super(); + } + + public CaseInsensitiveInputStream(InputStream input) throws IOException { + super(input); + } + + public CaseInsensitiveInputStream(InputStream input, int size) throws IOException { + super(input, size); + } + + public CaseInsensitiveInputStream(char[] data, int numberOfActualCharsInArray) throws IOException { + super(data, numberOfActualCharsInArray); + } + + public CaseInsensitiveInputStream(String input) throws IOException { + super(input); + } + + @Override + public int LA(int i) { + if (i == 0) { + return 0; + } + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset 0 + } + + if ((p + i - 1) >= n) { + return CharStream.EOF; + } + return Character.toLowerCase(data[p + i - 1]); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ExpressionOperator.java b/container-search/src/main/java/com/yahoo/search/yql/ExpressionOperator.java new file mode 100644 index 00000000000..e9fe52d33e7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ExpressionOperator.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +/** + * Operators on expressions. + */ +enum ExpressionOperator implements Operator { + + AND(TypeCheckers.EXPRS), + OR(TypeCheckers.EXPRS), + EQ(ExpressionOperator.class, ExpressionOperator.class), + NEQ(ExpressionOperator.class, ExpressionOperator.class), + LT(ExpressionOperator.class, ExpressionOperator.class), + GT(ExpressionOperator.class, ExpressionOperator.class), + LTEQ(ExpressionOperator.class, ExpressionOperator.class), + GTEQ(ExpressionOperator.class, ExpressionOperator.class), + + IN(ExpressionOperator.class, ExpressionOperator.class), + IN_QUERY(ExpressionOperator.class, SequenceOperator.class), + NOT_IN(ExpressionOperator.class, ExpressionOperator.class), + NOT_IN_QUERY(ExpressionOperator.class, SequenceOperator.class), + + LIKE(ExpressionOperator.class, ExpressionOperator.class), + NOT_LIKE(ExpressionOperator.class, ExpressionOperator.class), + + IS_NULL(ExpressionOperator.class), + IS_NOT_NULL(ExpressionOperator.class), + MATCHES(ExpressionOperator.class, ExpressionOperator.class), + NOT_MATCHES(ExpressionOperator.class, ExpressionOperator.class), + CONTAINS(ExpressionOperator.class, ExpressionOperator.class), + + ADD(ExpressionOperator.class, ExpressionOperator.class), + SUB(ExpressionOperator.class, ExpressionOperator.class), + MULT(ExpressionOperator.class, ExpressionOperator.class), + DIV(ExpressionOperator.class, ExpressionOperator.class), + MOD(ExpressionOperator.class, ExpressionOperator.class), + + NEGATE(ExpressionOperator.class), + NOT(ExpressionOperator.class), + + MAP(TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), + + ARRAY(TypeCheckers.EXPRS), + + INDEX(ExpressionOperator.class, ExpressionOperator.class), + PROPREF(ExpressionOperator.class, String.class), + + CALL(TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), + + VARREF(String.class), + + LITERAL(TypeCheckers.LITERAL_TYPES), + + READ_RECORD(String.class), + READ_FIELD(String.class, String.class), + READ_MODULE(TypeCheckers.LIST_OF_STRING), + + VESPA_GROUPING(String.class), + + NULL(); + + private final ArgumentsTypeChecker checker; + + + private ExpressionOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof ExpressionOperator; + } + }; + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/FieldFiller.java b/container-search/src/main/java/com/yahoo/search/yql/FieldFiller.java new file mode 100644 index 00000000000..f6e8ee1f27a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/FieldFiller.java @@ -0,0 +1,156 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import com.google.common.annotations.Beta; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig.Documentdb; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig.Documentdb.Summaryclass; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig.Documentdb.Summaryclass.Fields; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Presentation; +import com.yahoo.search.searchchain.Execution; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Ensure the fields specified in {@link Presentation#getSummaryFields()} are + * available after filling phase. + * + * @author <a href="mailto:stiankri@yahoo-inc.com">Stian Kristoffersen</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Beta +@After(MinimalQueryInserter.EXTERNAL_YQL) +public class FieldFiller extends Searcher { + + private final Set<String> intersectionOfAttributes; + private final SummaryIntersections summaryDb = new SummaryIntersections(); + public static final CompoundName FIELD_FILLER_DISABLE = new CompoundName( + "FieldFiller.disable"); + + private static class SummaryIntersections { + private final Map<String, Map<String, Set<String>>> db = new HashMap<>(); + + void add(String dbName, Summaryclass summary) { + Map<String, Set<String>> docType = getOrCreateDocType(dbName); + Set<String> fields = new HashSet<>(summary.fields().size()); + for (Fields f : summary.fields()) { + fields.add(f.name()); + } + docType.put(summary.name(), fields); + } + + @NonNull + private Map<String, Set<String>> getOrCreateDocType(String dbName) { + Map<String, Set<String>> docType = db.get(dbName); + if (docType == null) { + docType = new HashMap<>(); + db.put(dbName, docType); + } + return docType; + } + + boolean hasAll(Set<String> requested, String summaryName, Set<String> restrict) { + Set<String> explicitRestriction; + Set<String> intersection = null; + + if (restrict.isEmpty()) { + explicitRestriction = db.keySet(); + } else { + explicitRestriction = restrict; + } + + for (String docType : explicitRestriction) { + Map<String, Set<String>> summaries = db.get(docType); + Set<String> summary; + + if (summaries == null) { + continue; + } + summary = summaries.get(summaryName); + if (summary == null) { + intersection = null; + break; + } + if (intersection == null) { + intersection = new HashSet<>(summary.size()); + intersection.addAll(summary); + } else { + intersection.retainAll(summary); + } + } + return intersection == null ? false : intersection + .containsAll(requested); + } + } + + public FieldFiller(DocumentdbInfoConfig config) { + intersectionOfAttributes = new HashSet<>(); + boolean first = true; + + for (Documentdb db : config.documentdb()) { + for (Summaryclass summary : db.summaryclass()) { + Set<String> attributes = null; + if (Execution.ATTRIBUTEPREFETCH.equals(summary.name())) { + attributes = new HashSet<>(summary.fields().size()); + for (Fields f : summary.fields()) { + attributes.add(f.name()); + } + if (first) { + first = false; + intersectionOfAttributes.addAll(attributes); + } else { + intersectionOfAttributes.retainAll(attributes); + } + } + // yes, we store attribute prefetch here as well, this is in + // case we get a query where we have a restrict parameter which + // makes filling with attribute prefetch possible even though it + // wouldn't have been possible without restricting the set of + // doctypes + summaryDb.add(db.name(), summary); + } + } + } + + @Override + public Result search(Query query, Execution execution) { + return execution.search(query); + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + + final Set<String> summaryFields = result.getQuery().getPresentation() + .getSummaryFields(); + + if (summaryFields.isEmpty() + || summaryClass == null + || result.getQuery().properties() + .getBoolean(FIELD_FILLER_DISABLE)) { + return; + } + + if (intersectionOfAttributes.containsAll(summaryFields)) { + if (!Execution.ATTRIBUTEPREFETCH.equals(summaryClass)) { + execution.fill(result, Execution.ATTRIBUTEPREFETCH); + } + } else { + // Yes, summaryClass may be Execution.ATTRIBUTEPREFETCH here + if (!summaryDb.hasAll(summaryFields, summaryClass, result + .getQuery().getModel().getRestrict())) { + execution.fill(result, null); + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/FieldFilter.java b/container-search/src/main/java/com/yahoo/search/yql/FieldFilter.java new file mode 100644 index 00000000000..b44fdadd17b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/FieldFilter.java @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.Set; + +import com.google.common.annotations.Beta; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +/** + * Remove fields which are not explicitly requested, if any field is explicitly + * requested. Disable using FieldFilter.disable=true in request. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Beta +@After(MinimalQueryInserter.EXTERNAL_YQL) +@Before("com.yahoo.search.yql.FieldFiller") +public class FieldFilter extends Searcher { + + public static final CompoundName FIELD_FILTER_DISABLE = new CompoundName("FieldFilter.disable"); + + @Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + filter(result); + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + filter(result); + } + + private void filter(Result result) { + Set<String> requestedFields; + + if (result.getQuery().properties().getBoolean(FIELD_FILTER_DISABLE)) return; + if (result.getQuery().getPresentation().getSummaryFields().isEmpty()) return; + + requestedFields = result.getQuery().getPresentation().getSummaryFields(); + for (Iterator<Hit> i = result.hits().unorderedDeepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h.isMeta()) continue; + for (Iterator<Entry<String, Object>> fields = h.fieldIterator(); fields.hasNext();) { + Entry<String, Object> field = fields.next(); + if ( ! requestedFields.contains(field.getKey())) + fields.remove(); + } + + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/JavaListTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/JavaListTypeChecker.java new file mode 100644 index 00000000000..86e2cbf01ff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/JavaListTypeChecker.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; + +import java.util.List; + +class JavaListTypeChecker extends OperatorTypeChecker { + + private final Class<?> elementType; + + public JavaListTypeChecker(Operator parent, int idx, Class<?> elementType) { + super(parent, idx); + this.elementType = elementType; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(argument instanceof List, "Argument %s of %s must be a List<%s>", idx, parent, elementType.getName(), argument.getClass().getName()); + List<?> lst = (List<?>) argument; + for (Object elt : lst) { + Preconditions.checkNotNull(elt, "Argument %s of %s List elements may not be null", idx, parent); + Preconditions.checkArgument(elementType.isInstance(elt), "Argument %s of %s List elements must be %s (is %s)", idx, parent, elementType.getName(), elt.getClass().getName()); + } + } + +} + diff --git a/container-search/src/main/java/com/yahoo/search/yql/JavaTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/JavaTypeChecker.java new file mode 100644 index 00000000000..bf91474c19b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/JavaTypeChecker.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; + +class JavaTypeChecker extends OperatorTypeChecker { + + private final Class<?> type; + + public JavaTypeChecker(Operator parent, int idx, Class<?> type) { + super(parent, idx); + this.type = type; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(type.isInstance(argument), "Argument %s of %s must be %s (is: %s).", idx, parent, type.getName(), argument.getClass().getName()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/JavaUnionTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/JavaUnionTypeChecker.java new file mode 100644 index 00000000000..a94027a9bd2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/JavaUnionTypeChecker.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; + +import java.util.Set; + +public class JavaUnionTypeChecker extends OperatorTypeChecker { + + private final Set<Class<?>> types; + + public JavaUnionTypeChecker(Operator parent, int idx, Set<Class<?>> types) { + super(parent, idx); + this.types = types; + } + + public JavaUnionTypeChecker(Operator parent, int idx, Class<?>... types) { + super(parent, idx); + this.types = ImmutableSet.copyOf(types); + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + for (Class<?> candidate : types) { + if (candidate.isInstance(argument)) { + return; + } + } + Preconditions.checkArgument(false, "Argument %s of %s must be %s (is: %s).", idx, parent, Joiner.on("|").join(types), argument.getClass()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/Location.java b/container-search/src/main/java/com/yahoo/search/yql/Location.java new file mode 100644 index 00000000000..a304ed75536 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/Location.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * A pointer to a location in a YQL source program. + */ +final class Location { + + private final String programName; + private final int lineNumber; + private final int characterOffset; + + public Location(String programName, int lineNumber, int characterOffset) { + this.programName = programName; + this.lineNumber = lineNumber; + this.characterOffset = characterOffset; + } + + + public int getLineNumber() { + return lineNumber; + } + + public int getCharacterOffset() { + return characterOffset; + } + + @Override + public String toString() { + if (programName != null) { + return programName + ":L" + lineNumber + ":" + characterOffset; + } else { + return "L" + lineNumber + ":" + characterOffset; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java b/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java new file mode 100644 index 00000000000..d710754e887 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.annotations.Beta; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.yolean.chain.After; +import com.yahoo.yolean.chain.Before; +import com.yahoo.yolean.chain.Provides; + +/** + * Minimal combinator for YQL+ syntax and heuristically parsed user queries. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @since 5.1.28 + */ +@Beta +@Provides(MinimalQueryInserter.EXTERNAL_YQL) +@Before(PhaseNames.TRANSFORMED_QUERY) +@After("com.yahoo.prelude.statistics.StatisticsSearcher") +public class MinimalQueryInserter extends Searcher { + public static final String EXTERNAL_YQL = "ExternalYql"; + + public static final CompoundName YQL = new CompoundName("yql"); + + private static final CompoundName MAX_HITS = new CompoundName("maxHits"); + private static final CompoundName MAX_OFFSET = new CompoundName("maxOffset"); + + public MinimalQueryInserter() { + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().get(YQL) == null) { + return execution.search(query); + } + ParserEnvironment env = ParserEnvironment.fromExecutionContext(execution.context()); + YqlParser parser = (YqlParser) ParserFactory.newInstance(Query.Type.YQL, env); + parser.setQueryParser(false); + parser.setUserQuery(query); + QueryTree newTree; + try { + newTree = parser.parse(Parsable.fromQueryModel(query.getModel()) + .setQuery(query.properties().getString(YQL))); + } catch (RuntimeException e) { + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Could not instantiate query from YQL+", e)); + } + if (parser.getOffset() != null) { + final int maxHits = query.properties().getInteger(MAX_HITS); + final int maxOffset = query.properties().getInteger(MAX_OFFSET); + if (parser.getOffset() > maxOffset) { + return new Result(query, ErrorMessage.createInvalidQueryParameter("Requested offset " + parser.getOffset() + + ", but the max offset allowed is " + maxOffset + ".")); + } + if (parser.getHits() > maxHits) { + return new Result(query, ErrorMessage.createInvalidQueryParameter("Requested " + parser.getHits() + + " hits returned, but max hits allowed is " + maxHits + ".")); + + } + } + query.getModel().getQueryTree().setRoot(newTree.getRoot()); + query.getPresentation().getSummaryFields().addAll(parser.getYqlSummaryFields()); + for (VespaGroupingStep step : parser.getGroupingSteps()) { + GroupingRequest.newInstance(query) + .setRootOperation(step.getOperation()) + .continuations().addAll(step.continuations()); + } + if (parser.getYqlSources().size() == 0) { + query.getModel().getSources().clear(); + } else { + query.getModel().getSources().addAll(parser.getYqlSources()); + } + if (parser.getOffset() != null) { + query.setOffset(parser.getOffset()); + query.setHits(parser.getHits()); + } + if (parser.getTimeout() != null) { + query.setTimeout(parser.getTimeout().longValue()); + } + if (parser.getSorting() != null) { + query.getRanking().setSorting(parser.getSorting()); + } + query.trace("YQL+ query parsed", true, 2); + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/NodeTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/NodeTypeChecker.java new file mode 100644 index 00000000000..c407689e107 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/NodeTypeChecker.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import java.util.Set; + +/** + * Check that an argument is an OperatorNode of a particular operator set. + */ +class NodeTypeChecker extends OperatorTypeChecker { + + private final Class<? extends Operator> operatorType; + private final Set<? extends Operator> operators; + + public NodeTypeChecker(Operator parent, int idx, Class<? extends Operator> operatorType, Set<? extends Operator> operators) { + super(parent, idx); + this.operatorType = operatorType; + this.operators = operators; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(argument instanceof OperatorNode, "Argument %s of %s must be an OperatorNode<%s> (is %s).", idx, parent, operatorType.getName(), argument.getClass()); + OperatorNode<?> node = (OperatorNode<?>) argument; + Operator op = node.getOperator(); + Preconditions.checkArgument(operatorType.isInstance(op), "Argument %s of %s must be an OperatorNode<%s> (is: %s).", idx, parent, operatorType.getName(), op.getClass()); + if (!operators.isEmpty()) { + Preconditions.checkArgument(operators.contains(op), "Argument %s of %s must be %s (is %s).", idx, parent, Joiner.on("|").join(operators), op); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/NullItemException.java b/container-search/src/main/java/com/yahoo/search/yql/NullItemException.java new file mode 100644 index 00000000000..c50f22ff711 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/NullItemException.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * Used to communicate a NullItem has been encountered in the query tree. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("serial") +public class NullItemException extends RuntimeException { + public NullItemException(String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/Operator.java b/container-search/src/main/java/com/yahoo/search/yql/Operator.java new file mode 100644 index 00000000000..f5c0f9fb56d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/Operator.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +interface Operator { + + String name(); + + void checkArguments(Object... args); + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorNode.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorNode.java new file mode 100644 index 00000000000..d1b65ee258b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorNode.java @@ -0,0 +1,261 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Function; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +/** + * Represents a use of an operator against concrete arguments. The types of arguments depend on the operator. + * <p> + * The extension point of this scheme is the Operator rather than new types of Nodes. + * <p> + * Operators SHOULD take a fixed number of arguments -- wrap variable argument counts in Lists. + */ +final class OperatorNode<T extends Operator> { + + public static <T extends Operator> OperatorNode<T> create(T operator, Object... args) { + operator.checkArguments(args == null ? EMPTY_ARGS : args); + return new OperatorNode<T>(operator, args); + } + + public static <T extends Operator> OperatorNode<T> create(Location loc, T operator, Object... args) { + operator.checkArguments(args == null ? EMPTY_ARGS : args); + return new OperatorNode<T>(loc, operator, args); + } + + public static <T extends Operator> OperatorNode<T> create(Location loc, Map<String, Object> annotations, T operator, Object... args) { + operator.checkArguments(args == null ? EMPTY_ARGS : args); + return new OperatorNode<T>(loc, annotations, operator, args); + } + + private static final Object[] EMPTY_ARGS = new Object[0]; + + private final Location location; + private final T operator; + private Map<String, Object> annotations = ImmutableMap.of(); + private final Object[] args; + + private OperatorNode(T operator, Object... args) { + this.location = null; + this.operator = operator; + if (args == null) { + this.args = EMPTY_ARGS; + } else { + this.args = args; + } + } + + private OperatorNode(Location loc, T operator, Object... args) { + this.location = loc; + this.operator = operator; + if (args == null) { + this.args = EMPTY_ARGS; + } else { + this.args = args; + } + } + + private OperatorNode(Location loc, Map<String, Object> annotations, T operator, Object... args) { + this.location = loc; + this.operator = operator; + this.annotations = ImmutableMap.copyOf(annotations); + if (args == null) { + this.args = EMPTY_ARGS; + } else { + this.args = args; + } + } + + public T getOperator() { + return operator; + } + + public Object[] getArguments() { + // this is only called by a test right now, but ImmutableList.copyOf won't tolerate null elements + if (args.length == 0) { + return args; + } + Object[] copy = new Object[args.length]; + System.arraycopy(args, 0, copy, 0, args.length); + return copy; + } + + public <T> T getArgument(int i) { + return (T) args[i]; + } + + public <T> T getArgument(int i, Class<T> clazz) { + return clazz.cast(getArgument(i)); + } + + public Location getLocation() { + return location; + } + + public Object getAnnotation(String name) { + return annotations.get(name); + } + + public OperatorNode<T> putAnnotation(String name, Object value) { + if (annotations.isEmpty()) { + annotations = Maps.newLinkedHashMap(); + } else if (annotations instanceof ImmutableMap) { + annotations = Maps.newLinkedHashMap(annotations); + } + annotations.put(name, value); + return this; + } + + public Map<String, Object> getAnnotations() { + // TODO: this should be a read-only view? + return ImmutableMap.copyOf(annotations); + } + + public OperatorNode<T> transform(Function<Object, Object> argumentTransform) { + if (args.length == 0) { + // nothing to transform, so no change is possible + return this; + } + Object[] newArgs = new Object[args.length]; + boolean changed = false; + for (int i = 0; i < args.length; ++i) { + Object target = args[i]; + if (target instanceof List) { + List<Object> newList = Lists.newArrayListWithExpectedSize(((List) target).size()); + for (Object val : (List) target) { + newList.add(argumentTransform.apply(val)); + } + newArgs[i] = newList; + // this will always 'change' the tree, maybe fix later + } else { + newArgs[i] = argumentTransform.apply(args[i]); + } + changed = changed || newArgs[i] != args[i]; + } + if (changed) { + return new OperatorNode<>(location, annotations, operator, newArgs); + } + return this; + } + + public void visit(OperatorVisitor visitor) { + if (visitor.enter(this)) { + for (Object target : args) { + if (target instanceof List) { + for (Object val : (List) target) { + if (val instanceof OperatorNode) { + ((OperatorNode) val).visit(visitor); + } + } + } else if (target instanceof OperatorNode) { + ((OperatorNode) target).visit(visitor); + + } + } + } + visitor.exit(this); + } + + // we are aware only of types used in our logical operator trees -- OperatorNode, List, and constant values + private static final Function<Object, Object> COPY = new Function<Object, Object>() { + @Nullable + @Override + public Object apply(@Nullable Object input) { + if (input instanceof List) { + List<Object> newList = Lists.newArrayListWithExpectedSize(((List) input).size()); + for (Object val : (List) input) { + newList.add(COPY.apply(val)); + } + return newList; + } else if (input instanceof OperatorNode) { + return ((OperatorNode) input).copy(); + } else if (input instanceof String || input instanceof Number || input instanceof Boolean) { + return input; + } else { + // this may be annoying but COPY not understanding how to COPY and quietly reusing + // when it may not be immutable could be dangerous + throw new IllegalArgumentException("Unexpected value type in OperatorNode tree: " + input); + } + } + }; + + public OperatorNode<T> copy() { + Object[] newArgs = new Object[args.length]; + for (int i = 0; i < args.length; ++i) { + newArgs[i] = COPY.apply(args[i]); + } + return new OperatorNode<>(location, ImmutableMap.copyOf(annotations), operator, newArgs); + } + + public void toString(StringBuilder output) { + output.append("(") + .append(operator.name()); + if(location != null) { + output.append(" L") + .append(location.getCharacterOffset()) + .append(":") + .append(location.getLineNumber()); + } + if(annotations != null && !annotations.isEmpty()) { + output.append(" {"); + Joiner.on(", ").withKeyValueSeparator("=") + .appendTo(output, annotations); + output.append("}"); + } + boolean first = true; + for(Object arg : args) { + if(!first) { + output.append(","); + } + first = false; + output.append(" "); + if(arg instanceof OperatorNode) { + ((OperatorNode) arg).toString(output); + } else if(arg instanceof Iterable) { + output.append("["); + Joiner.on(", ").appendTo(output, (Iterable)arg); + output.append("]"); + } else { + output.append(arg.toString()); + } + } + output.append(")"); + } + + public String toString() { + StringBuilder output = new StringBuilder(); + toString(output); + return output.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + OperatorNode that = (OperatorNode) o; + + if (!annotations.equals(that.annotations)) return false; + // Probably incorrect - comparing Object[] arrays with Arrays.equals + if (!Arrays.equals(args, that.args)) return false; + if (!operator.equals(that.operator)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = operator.hashCode(); + result = 31 * result + annotations.hashCode(); + result = 31 * result + Arrays.hashCode(args); + return result; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorNodeListTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorNodeListTypeChecker.java new file mode 100644 index 00000000000..d0c98fb3d11 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorNodeListTypeChecker.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; + +import java.util.List; +import java.util.Set; + +class OperatorNodeListTypeChecker extends OperatorTypeChecker { + + private final Class<? extends Operator> operatorType; + private final Set<? extends Operator> operators; + + public OperatorNodeListTypeChecker(Operator parent, int idx, Class<? extends Operator> operatorType, Set<? extends Operator> operators) { + super(parent, idx); + this.operatorType = operatorType; + this.operators = operators; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(argument instanceof List, "Argument %s of %s must be a List<OperatorNode<%s>>", idx, parent, operatorType.getName(), argument.getClass()); + List<OperatorNode<?>> lst = (List<OperatorNode<?>>) argument; + for (OperatorNode<?> node : lst) { + Operator op = node.getOperator(); + Preconditions.checkArgument(operatorType.isInstance(op), "Argument %s of %s must contain only OperatorNode<%s> (is: %s).", idx, parent, operatorType.getName(), op.getClass()); + if (!operators.isEmpty()) { + Preconditions.checkArgument(operators.contains(op), "Argument %s of %s must contain only %s (is %s).", idx, parent, Joiner.on("|").join(operators), op); + } + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorTypeChecker.java new file mode 100644 index 00000000000..8266f414fa7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorTypeChecker.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * Check the type of a single argument. + */ +abstract class OperatorTypeChecker { + + protected final Operator parent; + protected final int idx; + + protected OperatorTypeChecker(Operator parent, int idx) { + this.parent = parent; + this.idx = idx; + } + + public abstract void check(Object argument); + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorVisitor.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorVisitor.java new file mode 100644 index 00000000000..73c3612c1c9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorVisitor.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +interface OperatorVisitor { + + <T extends Operator> boolean enter(OperatorNode<T> node); + + <T extends Operator> void exit(OperatorNode<T> node); + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ParserBase.java b/container-search/src/main/java/com/yahoo/search/yql/ParserBase.java new file mode 100644 index 00000000000..af3418919e8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ParserBase.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.collect.Sets; + +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.tree.ParseTree; + +import java.util.Set; + +/** + * Provides semantic helper functions to Parser. + */ +abstract class ParserBase extends Parser { + + private static String arrayRuleName = "array"; + public ParserBase(TokenStream input) { + super(input); + } + + private Set<String> arrayParameters = Sets.newHashSet(); + + public void registerParameter(String name, String typeName) { + if (typeName.equals(arrayRuleName)) { + arrayParameters.add(name); + } + } + + public boolean isArrayParameter(ParseTree nameNode) { + String name = nameNode.getText(); + if (name.startsWith("@")) { + name = name.substring(1); + } + return name != null && arrayParameters.contains(name); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProgramCompileException.java b/container-search/src/main/java/com/yahoo/search/yql/ProgramCompileException.java new file mode 100644 index 00000000000..592bd690d56 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProgramCompileException.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +class ProgramCompileException extends RuntimeException { + + private Location sourceLocation; + + public ProgramCompileException(String message) { + super(message); + } + + public ProgramCompileException(String message, Object... args) { + super(formatMessage(message, args)); + } + + private static String formatMessage(String message, Object... args) { + return args == null ? message : String.format(message, args); + } + + public ProgramCompileException(String message, Throwable cause) { + super(message, cause); + } + + public ProgramCompileException(Throwable cause) { + super(cause); + } + + public ProgramCompileException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + + public ProgramCompileException(Location sourceLocation, String message, Object... args) { + super(String.format("%s %s", sourceLocation != null ? sourceLocation : "", args == null ? message : String.format(message, args))); + this.sourceLocation = sourceLocation; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProgramParser.java b/container-search/src/main/java/com/yahoo/search/yql/ProgramParser.java new file mode 100644 index 00000000000..a8d1bc43a4c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProgramParser.java @@ -0,0 +1,1549 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.yahoo.search.yql.yqlplusParser.AnnotationContext; +import com.yahoo.search.yql.yqlplusParser.AnnotateExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ArgumentContext; +import com.yahoo.search.yql.yqlplusParser.ArgumentsContext; +import com.yahoo.search.yql.yqlplusParser.ArrayLiteralContext; +import com.yahoo.search.yql.yqlplusParser.ArrayTypeContext; +import com.yahoo.search.yql.yqlplusParser.Call_sourceContext; +import com.yahoo.search.yql.yqlplusParser.ConstantArrayContext; +import com.yahoo.search.yql.yqlplusParser.ConstantExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ConstantMapExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ConstantPropertyNameAndValueContext; +import com.yahoo.search.yql.yqlplusParser.Delete_statementContext; +import com.yahoo.search.yql.yqlplusParser.DereferencedExpressionContext; +import com.yahoo.search.yql.yqlplusParser.EqualityExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ExpressionContext; +import com.yahoo.search.yql.yqlplusParser.FallbackContext; +import com.yahoo.search.yql.yqlplusParser.Field_defContext; +import com.yahoo.search.yql.yqlplusParser.Field_names_specContext; +import com.yahoo.search.yql.yqlplusParser.Field_values_group_specContext; +import com.yahoo.search.yql.yqlplusParser.Field_values_specContext; +import com.yahoo.search.yql.yqlplusParser.IdentContext; +import com.yahoo.search.yql.yqlplusParser.Import_listContext; +import com.yahoo.search.yql.yqlplusParser.Import_statementContext; +import com.yahoo.search.yql.yqlplusParser.InNotInTargetContext; +import com.yahoo.search.yql.yqlplusParser.Insert_sourceContext; +import com.yahoo.search.yql.yqlplusParser.Insert_statementContext; +import com.yahoo.search.yql.yqlplusParser.Insert_valuesContext; +import com.yahoo.search.yql.yqlplusParser.JoinExpressionContext; +import com.yahoo.search.yql.yqlplusParser.Join_exprContext; +import com.yahoo.search.yql.yqlplusParser.LimitContext; +import com.yahoo.search.yql.yqlplusParser.Literal_elementContext; +import com.yahoo.search.yql.yqlplusParser.Literal_listContext; +import com.yahoo.search.yql.yqlplusParser.LogicalANDExpressionContext; +import com.yahoo.search.yql.yqlplusParser.LogicalORExpressionContext; +import com.yahoo.search.yql.yqlplusParser.MapExpressionContext; +import com.yahoo.search.yql.yqlplusParser.MapTypeContext; +import com.yahoo.search.yql.yqlplusParser.Merge_componentContext; +import com.yahoo.search.yql.yqlplusParser.Merge_statementContext; +import com.yahoo.search.yql.yqlplusParser.ModuleIdContext; +import com.yahoo.search.yql.yqlplusParser.ModuleNameContext; +import com.yahoo.search.yql.yqlplusParser.MultiplicativeExpressionContext; +import com.yahoo.search.yql.yqlplusParser.Namespaced_nameContext; +import com.yahoo.search.yql.yqlplusParser.Next_statementContext; +import com.yahoo.search.yql.yqlplusParser.OffsetContext; +import com.yahoo.search.yql.yqlplusParser.OrderbyContext; +import com.yahoo.search.yql.yqlplusParser.Orderby_fieldContext; +import com.yahoo.search.yql.yqlplusParser.Output_specContext; +import com.yahoo.search.yql.yqlplusParser.Paged_clauseContext; +import com.yahoo.search.yql.yqlplusParser.ParamsContext; +import com.yahoo.search.yql.yqlplusParser.Pipeline_stepContext; +import com.yahoo.search.yql.yqlplusParser.Procedure_argumentContext; +import com.yahoo.search.yql.yqlplusParser.Program_arglistContext; +import com.yahoo.search.yql.yqlplusParser.Project_specContext; +import com.yahoo.search.yql.yqlplusParser.ProgramContext; +import com.yahoo.search.yql.yqlplusParser.PropertyNameAndValueContext; +import com.yahoo.search.yql.yqlplusParser.Query_statementContext; +import com.yahoo.search.yql.yqlplusParser.RelationalExpressionContext; +import com.yahoo.search.yql.yqlplusParser.RelationalOpContext; +import com.yahoo.search.yql.yqlplusParser.Returning_specContext; +import com.yahoo.search.yql.yqlplusParser.Scalar_literalContext; +import com.yahoo.search.yql.yqlplusParser.Select_source_joinContext; +import com.yahoo.search.yql.yqlplusParser.Select_source_multiContext; +import com.yahoo.search.yql.yqlplusParser.Select_statementContext; +import com.yahoo.search.yql.yqlplusParser.Selectvar_statementContext; +import com.yahoo.search.yql.yqlplusParser.Sequence_sourceContext; +import com.yahoo.search.yql.yqlplusParser.Source_listContext; +import com.yahoo.search.yql.yqlplusParser.Source_specContext; +import com.yahoo.search.yql.yqlplusParser.Source_statementContext; +import com.yahoo.search.yql.yqlplusParser.StatementContext; +import com.yahoo.search.yql.yqlplusParser.TimeoutContext; +import com.yahoo.search.yql.yqlplusParser.TypenameContext; +import com.yahoo.search.yql.yqlplusParser.UnaryExpressionContext; +import com.yahoo.search.yql.yqlplusParser.Update_statementContext; +import com.yahoo.search.yql.yqlplusParser.Update_valuesContext; +import com.yahoo.search.yql.yqlplusParser.ViewContext; +import com.yahoo.search.yql.yqlplusParser.WhereContext; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.misc.NotNull; +import org.antlr.v4.runtime.misc.Nullable; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Translate the ANTLR grammar into the logical representation. + */ +final class ProgramParser { + + public yqlplusParser prepareParser(String programName, InputStream input) throws IOException { + return prepareParser(programName, new CaseInsensitiveInputStream(input)); + } + + public yqlplusParser prepareParser(String programName, String input) throws IOException { + return prepareParser(programName, new CaseInsensitiveInputStream(input)); + } + + public yqlplusParser prepareParser(File file) throws IOException { + return prepareParser(file.getAbsoluteFile().toString(), new CaseInsensitiveFileStream(file.getAbsolutePath())); + } + + + private yqlplusParser prepareParser(final String programName, CharStream input) { + yqlplusLexer lex = new yqlplusLexer(input); + lex.addErrorListener(new BaseErrorListener() { + @Override + public void syntaxError(@NotNull Recognizer<?, ?> recognizer, + @Nullable Object offendingSymbol, + int line, + int charPositionInLine, + @NotNull String msg, + @Nullable RecognitionException e) + { + throw new ProgramCompileException(new Location(programName, line, charPositionInLine), msg); + } + + }); + TokenStream tokens = new CommonTokenStream(lex); + yqlplusParser parser = new yqlplusParser(tokens); + parser.addErrorListener(new BaseErrorListener() { + @Override + public void syntaxError(@NotNull Recognizer<?, ?> recognizer, + @Nullable Object offendingSymbol, + int line, + int charPositionInLine, + @NotNull String msg, + @Nullable RecognitionException e) + { + throw new ProgramCompileException(new Location(programName, line, charPositionInLine), msg); + } + + }); + parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + return parser; + } + + private ProgramContext parseProgram(yqlplusParser parser) throws RecognitionException { + try { + return parser.program(); + } catch (RecognitionException e) { + //Retry parsing using full LL mode + parser.reset(); + parser.getInterpreter().setPredictionMode(PredictionMode.LL); + return parser.program(); + } + } + + public OperatorNode<StatementOperator> parse(String programName, InputStream program) throws IOException, RecognitionException { + yqlplusParser parser = prepareParser(programName, program); + return convertProgram(parseProgram(parser), parser, programName); + } + + public OperatorNode<StatementOperator> parse(String programName, String program) throws IOException, RecognitionException { + yqlplusParser parser = prepareParser(programName, program); + return convertProgram(parseProgram(parser), parser, programName); + } + + public OperatorNode<StatementOperator> parse(File input) throws IOException, RecognitionException { + yqlplusParser parser = prepareParser(input); + return convertProgram(parseProgram(parser), parser, input.getAbsoluteFile().toString()); + } + + public OperatorNode<ExpressionOperator> parseExpression(String input) throws IOException, RecognitionException { + return convertExpr(prepareParser("<expression>", input).expression(false).getRuleContext(), new Scope()); + } + + public OperatorNode<ExpressionOperator> parseExpression(String input, Set<String> visibleAliases) throws IOException, RecognitionException { + Scope scope = new Scope(); + final Location loc = new Location("<expression>", -1, -1); + for (String alias : visibleAliases) { + scope.defineDataSource(loc, alias); + } + return convertExpr(prepareParser("<expression>", input).expression(false).getRuleContext(), scope); + } + + private Location toLocation(Scope scope, ParseTree node) { + Token start; + if (node instanceof ParserRuleContext) { + start = ((ParserRuleContext)node).start; + } else if (node instanceof TerminalNode) { + start = ((TerminalNode)node).getSymbol(); + } else { + throw new ProgramCompileException("Location is not available for type " + node.getClass()); + } + Location location = new Location(scope != null? scope.programName: "<string>", start.getLine(), start.getCharPositionInLine()); + return location; + } + + private List<String> readName(Namespaced_nameContext node) { + List<String> path = Lists.newArrayList(); + for (ParseTree elt:node.children) { + if (!(getParseTreeIndex(elt) == yqlplusParser.DOT)) { + path.add(elt.getText()); + } + } + return path; + } + + static class Binding { + private final List<String> binding; + + Binding(String moduleName, String exportName) { + this.binding = ImmutableList.of(moduleName, exportName); + } + + Binding(String moduleName) { + this.binding = ImmutableList.of(moduleName); + } + + Binding(List<String> binding) { + this.binding = binding; + } + + public List<String> toPath() { + return binding; + } + + public List<String> toPathWith(List<String> rest) { + return ImmutableList.copyOf(Iterables.concat(toPath(), rest)); + } + } + + static class Scope { + final Scope root; + final Scope parent; + Set<String> cursors = ImmutableSet.of(); + Set<String> variables = ImmutableSet.of(); + Set<String> views = Sets.newHashSet(); + Map<String, Binding> bindings = Maps.newHashMap(); + final yqlplusParser parser; + final String programName; + + Scope() { + this.parser = null; + this.programName = null; + this.root = this; + this.parent = null; + } + + Scope(yqlplusParser parser, String programName) { + this.parser = parser; + this.programName = programName; + this.root = this; + this.parent = null; + } + + Scope(Scope root, Scope parent) { + this.root = root; + this.parent = parent; + this.parser = parent.parser; + this.programName = parent.programName; + } + + public yqlplusParser getParser() { + return parser; + } + + public String getProgramName() { + return programName; + } + + public Set<String> getCursors() { + return cursors; + } + + + boolean isBound(String name) { + // bindings live only in the 'root' node + return root.bindings.containsKey(name); + } + + public Binding getBinding(String name) { + return root.bindings.get(name); + } + + public List<String> resolvePath(List<String> path) { + if (path.size() < 1 || !isBound(path.get(0))) { + return path; + } else { + return getBinding(path.get(0)).toPathWith(path.subList(1, path.size())); + } + } + + boolean isCursor(String name) { + return cursors.contains(name) || (parent != null && parent.isCursor(name)); + } + + boolean isVariable(String name) { + return variables.contains(name) || (parent != null && parent.isVariable(name)); + } + + public void bindModule(Location loc, List<String> binding, String symbolName) { + if (isBound(symbolName)) { + throw new ProgramCompileException(loc, "Name '%s' is already used.", symbolName); + } + root.bindings.put(symbolName, new Binding(binding)); + } + + public void bindModuleSymbol(Location loc, List<String> moduleName, String exportName, String symbolName) { + ImmutableList.Builder<String> builder = ImmutableList.builder(); + builder.addAll(moduleName); + builder.add(exportName); + bindModule(loc, builder.build(), symbolName); + } + + public void defineDataSource(Location loc, String name) { + if (isCursor(name)) { + throw new ProgramCompileException(loc, "Alias '%s' is already used.", name); + } + if (cursors.isEmpty()) { + cursors = Sets.newHashSet(); + } + cursors.add(name); + } + + public void defineVariable(Location loc, String name) { + if (isVariable(name)) { + throw new ProgramCompileException(loc, "Variable/argument '%s' is already used.", name); + } + if (variables.isEmpty()) { + variables = Sets.newHashSet(); + } + variables.add(name); + + } + + public void defineView(Location loc, String text) { + if (this != root) { + throw new IllegalStateException("Views MUST be defined in 'root' scope only"); + } + if (views.contains(text)) { + throw new ProgramCompileException(loc, "View '%s' already defined", text); + } + views.add(text); + } + + Scope child() { + return new Scope(root, this); + } + + Scope getRoot() { + return root; + } + } + + private OperatorNode<SequenceOperator> convertSelectOrInsertOrUpdateOrDelete(ParseTree node, Scope scopeParent) { + + Preconditions.checkArgument(node instanceof Select_statementContext || node instanceof Insert_statementContext || + node instanceof Update_statementContext || node instanceof Delete_statementContext); + + // SELECT^ select_field_spec select_source where? orderby? limit? offset? timeout? fallback? + // select is the only place to define where/orderby/limit/offset and joins + Scope scope = scopeParent.child(); + ProjectionBuilder proj = null; + OperatorNode<SequenceOperator> source = null; + OperatorNode<ExpressionOperator> filter = null; + List<OperatorNode<SortOperator>> orderby = null; + OperatorNode<ExpressionOperator> offset = null; + OperatorNode<ExpressionOperator> limit = null; + OperatorNode<ExpressionOperator> timeout = null; + OperatorNode<SequenceOperator> fallback = null; + OperatorNode<SequenceOperator> insertValues = null; + OperatorNode<ExpressionOperator> updateValues = null; + + ParseTree sourceNode; + + if (node instanceof Select_statementContext ) { + sourceNode = node.getChild(2) != null ? node.getChild(2).getChild(0):null; + } else { + sourceNode = node.getChild(1); + } + + if (sourceNode != null) { + switch (getParseTreeIndex(sourceNode)) { + // ALL_SOURCE and MULTI_SOURCE are how FROM SOURCES + // *|source_name,... are parsed + // They can't be used directly with the JOIN syntax at this time + case yqlplusParser.RULE_select_source_all: { + Location location = toLocation(scope, sourceNode.getChild(2)); + source = OperatorNode.create(location, SequenceOperator.ALL); + source.putAnnotation("alias", "row"); + scope.defineDataSource(location, "row"); + } + break; + case yqlplusParser.RULE_select_source_multi: + Source_listContext multiSourceContext = ((Select_source_multiContext) sourceNode).source_list(); + source = readMultiSource(scope, multiSourceContext); + source.putAnnotation("alias", "row"); + scope.defineDataSource(toLocation(scope, multiSourceContext), "row"); + break; + case yqlplusParser.RULE_select_source_join: + source = convertSource((ParserRuleContext) sourceNode.getChild(1), scope); + List<Join_exprContext> joinContexts = ((Select_source_joinContext)sourceNode).join_expr(); + for (Join_exprContext joinContext:joinContexts) { + source = convertJoin(joinContext, source, scope); + } + break; + case yqlplusParser.RULE_insert_source: + Insert_sourceContext insertSourceContext = (Insert_sourceContext) sourceNode; + source = convertSource((ParserRuleContext)insertSourceContext.getChild(1), scope); + break; + case yqlplusParser.RULE_delete_source: + source = convertSource((ParserRuleContext)sourceNode.getChild(1), scope); + break; + case yqlplusParser.RULE_update_source: + source = convertSource((ParserRuleContext)sourceNode.getChild(0), scope); + break; + } + } else { + source = OperatorNode.create(SequenceOperator.EMPTY); + } + + for (int i = 1; i < node.getChildCount(); ++i) { + ParseTree child = node.getChild(i); + switch (getParseTreeIndex(child)) { + case yqlplusParser.RULE_select_field_spec: + if (getParseTreeIndex(child.getChild(0)) == yqlplusParser.RULE_project_spec) { + proj = readProjection(((Project_specContext) child.getChild(0)).field_def(), scope); + } + break; + case yqlplusParser.RULE_returning_spec: + proj = readProjection(((Returning_specContext) child).select_field_spec().project_spec().field_def(), scope); + break; + case yqlplusParser.RULE_where: + filter = convertExpr(((WhereContext) child).expression(), scope); + break; + case yqlplusParser.RULE_orderby: + // OrderbyContext orderby() + List<Orderby_fieldContext> orderFieds = ((OrderbyContext) child) + .orderby_fields().orderby_field(); + orderby = Lists.newArrayListWithExpectedSize(orderFieds.size()); + for (int j = 0; j < orderFieds.size(); ++j) { + orderby.add(convertSortKey(orderFieds.get(j), scope)); + } + break; + case yqlplusParser.RULE_limit: + limit = convertExpr(((LimitContext) child).fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_offset: + offset = convertExpr(((OffsetContext) child).fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_timeout: + timeout = convertExpr(((TimeoutContext) child).fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_fallback: + fallback = convertQuery(((FallbackContext) child).select_statement(), scope); + break; + case yqlplusParser.RULE_insert_values: + if (child.getChild(0) instanceof yqlplusParser.Query_statementContext) { + insertValues = convertQuery(child.getChild(0).getChild(0), scope); + } else { + insertValues = readBatchValues(((Insert_valuesContext) child).field_names_spec(), ((Insert_valuesContext)child).field_values_group_spec(), scope); + } + break; + case yqlplusParser.RULE_update_values: + if (getParseTreeIndex(child.getChild(0)) == yqlplusParser.RULE_field_def) { + updateValues = readValues(((Update_valuesContext)child).field_def(), scope); + } else { + updateValues = readValues((Field_names_specContext)child.getChild(0), (Field_values_specContext)child.getChild(2), scope); + } + break; + } + } + // now assemble the logical plan + OperatorNode<SequenceOperator> result = source; + // filter + if (filter != null) { + result = OperatorNode.create(SequenceOperator.FILTER, result, filter); + } + // insert values + if (insertValues != null) { + result = OperatorNode.create(SequenceOperator.INSERT, result, insertValues); + } + // update + if (updateValues != null) { + if (filter != null) { + result = OperatorNode.create(SequenceOperator.UPDATE, source, updateValues, filter); + } else { + result = OperatorNode.create(SequenceOperator.UPDATE_ALL, source, updateValues); + } + } + // delete + if (getParseTreeIndex(node) == yqlplusParser.RULE_delete_statement) { + if (filter != null) { + result = OperatorNode.create(SequenceOperator.DELETE, source, filter); + } else { + result = OperatorNode.create(SequenceOperator.DELETE_ALL, source); + } + } + // then sort (or project and sort) + boolean projectBeforeSort = false; + if (orderby != null) { + if (proj != null) { + for (OperatorNode<SortOperator> sortKey : orderby) { + OperatorNode<ExpressionOperator> sortExpression = sortKey.getArgument(0); + List<OperatorNode<ExpressionOperator>> sortReadFields = getReadFieldExpressions(sortExpression); + for (OperatorNode<ExpressionOperator> sortReadField : sortReadFields) { + String sortKeyField = sortReadField.getArgument(1); + if (proj.isAlias(sortKeyField)) { + // TODO: Add support for "mixed" case + projectBeforeSort = true; + break; + } + } + } + } + if (projectBeforeSort) { + result = OperatorNode.create(SequenceOperator.SORT, proj.make(result), orderby); + } else { + result = OperatorNode.create(SequenceOperator.SORT, result, orderby); + } + } + // then offset/limit (must be done after sorting!) + if (offset != null && limit != null) { + result = OperatorNode.create(SequenceOperator.SLICE, result, offset, limit); + } else if (offset != null) { + result = OperatorNode.create(SequenceOperator.OFFSET, result, offset); + } else if (limit != null) { + result = OperatorNode.create(SequenceOperator.LIMIT, result, limit); + } + // finally, project (if not already) + if (proj != null && !projectBeforeSort) { + result = proj.make(result); + } + if (timeout != null) { + result = OperatorNode.create(SequenceOperator.TIMEOUT, result, timeout); + } + // if there's a fallback, emit a fallback node + if (fallback != null) { + result = OperatorNode.create(SequenceOperator.FALLBACK, result, fallback); + } + return result; + } + + private OperatorNode<ExpressionOperator> readValues(List<Field_defContext> fieldDefs, Scope scope) { + List<String> fieldNames; + List<OperatorNode<ExpressionOperator>> fieldValues; + int numPairs = fieldDefs.size(); + fieldNames = Lists.newArrayListWithExpectedSize(numPairs); + fieldValues = Lists.newArrayListWithExpectedSize(numPairs); + for (int j = 0; j < numPairs; j++) { + ParseTree startNode = fieldDefs.get(j); + while(startNode.getChildCount() < 3) { + startNode = startNode.getChild(0); + } + fieldNames.add((String) convertExpr(startNode.getChild(0), scope).getArgument(1)); + fieldValues.add(convertExpr(startNode.getChild(2), scope)); + } + return OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues); + } + + private OperatorNode<SequenceOperator> readMultiSource(Scope scope, Source_listContext multiSource) { + List<List<String>> sourceNameList = Lists.newArrayList(); + List<Namespaced_nameContext> nameSpaces = multiSource.namespaced_name(); + for(Namespaced_nameContext node : nameSpaces) { + List<String> name = readName(node); + sourceNameList.add(name); + } + return OperatorNode.create(toLocation(scope, multiSource), SequenceOperator.MULTISOURCE, sourceNameList); + } +// pipeline_step +// : namespaced_name arguments[false]? +// ; + private OperatorNode<SequenceOperator> convertPipe(Query_statementContext queryStatementContext, List<Pipeline_stepContext> nodes, Scope scope) { + OperatorNode<SequenceOperator> result = convertQuery(queryStatementContext.getChild(0), scope.getRoot()); + for (Pipeline_stepContext step:nodes) { + if (getParseTreeIndex(step.getChild(0)) == yqlplusParser.RULE_vespa_grouping) { + result = OperatorNode.create(SequenceOperator.PIPE, result, ImmutableList.<String>of(), + ImmutableList.of(convertExpr(step.getChild(0), scope))); + } else { + List<String> name = readName(step.namespaced_name()); + List<OperatorNode<ExpressionOperator>> args = ImmutableList.of(); + //LPAREN (argument[$in_select] (COMMA argument[$in_select])*) RPAREN + if (step.getChildCount() > 1) { + ArgumentsContext arguments = step.arguments(); + if (arguments.getChildCount() > 2) { + List<ArgumentContext> argumentContextList = arguments.argument(); + args = Lists.newArrayListWithExpectedSize(argumentContextList.size()); + for (ArgumentContext argumentContext: argumentContextList) { + args.add(convertExpr(argumentContext.expression(), scope.getRoot())); + + } + } + } + result = OperatorNode.create(SequenceOperator.PIPE, result, scope.resolvePath(name), args); + } + } + return result; + } + + private OperatorNode<SequenceOperator> convertMerge(List<Merge_componentContext> mergeComponentList, Scope scope) { + Preconditions.checkArgument(mergeComponentList != null); + List<OperatorNode<SequenceOperator>> sources = Lists.newArrayListWithExpectedSize(mergeComponentList.size()); + for (Merge_componentContext mergeComponent:mergeComponentList) { + Select_statementContext selectContext = mergeComponent.select_statement(); + Source_statementContext sourceContext = mergeComponent.source_statement(); + if (selectContext != null) { + sources.add(convertQuery(selectContext, scope.getRoot())); + } else { + sources.add(convertQuery(sourceContext, scope.getRoot())); + } + } + return OperatorNode.create(SequenceOperator.MERGE, sources); + } + + private OperatorNode<SequenceOperator> convertQuery(ParseTree node, Scope scope) { + if (node instanceof Select_statementContext + || node instanceof Insert_statementContext + || node instanceof Update_statementContext + || node instanceof Delete_statementContext) { + return convertSelectOrInsertOrUpdateOrDelete(node, scope.getRoot()); + } else if (node instanceof Source_statementContext) { //for pipe + Source_statementContext sourceStatementContext = (Source_statementContext)node; + return convertPipe(sourceStatementContext.query_statement(), sourceStatementContext.pipeline_step(), scope); + } else if (node instanceof Merge_statementContext) { + return convertMerge(((Merge_statementContext)node).merge_component(), scope); + } else { + throw new IllegalArgumentException("Unexpected argument type to convertQueryStatement: " + node.toStringTree()); + } + + } + + private OperatorNode<SequenceOperator> convertJoin(Join_exprContext node, OperatorNode<SequenceOperator> left, Scope scope) { + Source_specContext sourceSpec = node.source_spec(); + OperatorNode<SequenceOperator> right = convertSource(sourceSpec, scope); + JoinExpressionContext joinContext = node.joinExpression(); + OperatorNode<ExpressionOperator> joinExpression = readBinOp(ExpressionOperator.valueOf("EQ"), joinContext.getChild(0), joinContext.getChild(2), scope); + if (joinExpression.getOperator() != ExpressionOperator.EQ) { + throw new ProgramCompileException(joinExpression.getLocation(), "Unexpected join expression type: %s (expected EQ)", joinExpression.getOperator()); + } + return OperatorNode.create(toLocation(scope, sourceSpec), node.join_spec().LEFT() != null ? SequenceOperator.LEFT_JOIN : SequenceOperator.JOIN, left, right, joinExpression); + } + + private String assignAlias(String alias, ParserRuleContext node, Scope scope) { + if (alias == null) { + alias = "source"; + } + + if (node != null && node instanceof yqlplusParser.Alias_defContext) { + //alias_def : (AS? ID); + ParseTree idChild = node; + if (node.getChildCount() > 1) { + idChild = node.getChild(1); + } + alias = idChild.getText(); + if (scope.isCursor(alias)) { + throw new ProgramCompileException(toLocation(scope, idChild), "Source alias '%s' is already used", alias); + } + scope.defineDataSource(toLocation(scope, idChild), alias); + return alias; + } else { + String candidate = alias; + int c = 0; + while (scope.isCursor(candidate)) { + candidate = alias + (++c); + } + scope.defineDataSource(null, candidate); + return alias; + } + } + + private OperatorNode<SequenceOperator> convertSource(ParserRuleContext sourceSpecNode, Scope scope) { + + // DataSources + String alias; + OperatorNode<SequenceOperator> result; + ParserRuleContext dataSourceNode = sourceSpecNode; + ParserRuleContext aliasContext = null; + //data_source + //: call_source + //| LPAREN source_statement RPAREN + //| sequence_source + //; + if (sourceSpecNode instanceof Source_specContext) { + dataSourceNode = (ParserRuleContext)sourceSpecNode.getChild(0); + if (sourceSpecNode.getChildCount() == 2) { + aliasContext = (ParserRuleContext)sourceSpecNode.getChild(1); + } + if (dataSourceNode.getChild(0) instanceof Call_sourceContext || + dataSourceNode.getChild(0) instanceof Sequence_sourceContext) { + dataSourceNode = (ParserRuleContext)dataSourceNode.getChild(0); + } else { //source_statement + dataSourceNode = (ParserRuleContext)dataSourceNode.getChild(1); + } + } + switch (getParseTreeIndex(dataSourceNode)) { + case yqlplusParser.RULE_write_data_source: + case yqlplusParser.RULE_call_source: { + List<String> names = readName((Namespaced_nameContext)dataSourceNode.getChild(Namespaced_nameContext.class, 0)); + alias = assignAlias(names.get(names.size() - 1), aliasContext, scope); + List<OperatorNode<ExpressionOperator>> arguments = ImmutableList.of(); + ArgumentsContext argumentsContext = dataSourceNode.getRuleContext(ArgumentsContext.class,0); + if ( argumentsContext != null) { + List<ArgumentContext> argumentContexts = argumentsContext.argument(); + arguments = Lists.newArrayListWithExpectedSize(argumentContexts.size()); + for (ArgumentContext argumentContext:argumentContexts) { + arguments.add(convertExpr(argumentContext, scope)); + } + } + if (names.size() == 1 && scope.isVariable(names.get(0))) { + String ident = names.get(0); + if (arguments.size() > 0) { + throw new ProgramCompileException(toLocation(scope, argumentsContext), "Invalid call-with-arguments on local source '%s'", ident); + } + result = OperatorNode.create(toLocation(scope, dataSourceNode), SequenceOperator.EVALUATE, OperatorNode.create(toLocation(scope, dataSourceNode), ExpressionOperator.VARREF, ident)); + } else { + result = OperatorNode.create(toLocation(scope, dataSourceNode), SequenceOperator.SCAN, scope.resolvePath(names), arguments); + } + break; + } + case yqlplusParser.RULE_sequence_source: { + IdentContext identContext = dataSourceNode.getRuleContext(IdentContext.class,0); + String ident = identContext.getText(); + if (!scope.isVariable(ident)) { + throw new ProgramCompileException(toLocation(scope, identContext), "Unknown variable reference '%s'", ident); + } + alias = assignAlias(ident, aliasContext, scope); + result = OperatorNode.create(toLocation(scope, dataSourceNode), SequenceOperator.EVALUATE, OperatorNode.create(toLocation(scope, dataSourceNode), ExpressionOperator.VARREF, ident)); + break; + } + case yqlplusParser.RULE_source_statement: { + alias = assignAlias(null, dataSourceNode, scope); + result = convertQuery(dataSourceNode, scope); + break; + } + default: + throw new IllegalArgumentException("Unexpected argument type to convertSource: " + dataSourceNode.getText()); + } + result.putAnnotation("alias", alias); + return result; + } + + private OperatorNode<TypeOperator> decodeType(Scope scope, TypenameContext type) { + + TypeOperator op; + ParseTree typeNode = type.getChild(0); + switch (getParseTreeIndex(typeNode)) { + case yqlplusParser.TYPE_BOOLEAN: + op = TypeOperator.BOOLEAN; + break; + case yqlplusParser.TYPE_BYTE: + op = TypeOperator.BYTE; + break; + case yqlplusParser.TYPE_DOUBLE: + op = TypeOperator.DOUBLE; + break; + case yqlplusParser.TYPE_INT16: + op = TypeOperator.INT16; + break; + case yqlplusParser.TYPE_INT32: + op = TypeOperator.INT32; + break; + case yqlplusParser.TYPE_INT64: + op = TypeOperator.INT64; + break; + case yqlplusParser.TYPE_STRING: + op = TypeOperator.STRING; + break; + case yqlplusParser.TYPE_TIMESTAMP: + op = TypeOperator.TIMESTAMP; + break; + case yqlplusParser.RULE_arrayType: + return OperatorNode.create(toLocation(scope, typeNode), TypeOperator.ARRAY, decodeType(scope, ((ArrayTypeContext)typeNode).getChild(TypenameContext.class, 0))); + case yqlplusParser.RULE_mapType: + return OperatorNode.create(toLocation(scope, typeNode), TypeOperator.MAP, decodeType(scope, ((MapTypeContext)typeNode).getChild(TypenameContext.class, 0))); + default: + throw new ProgramCompileException("Unknown type " + typeNode.getText()); + } + return OperatorNode.create(toLocation(scope, typeNode), op); + } + + private List<String> createBindingName(ParseTree node) { + if (node instanceof ModuleNameContext) { + if (((ModuleNameContext)node).namespaced_name() != null) { + return readName(((ModuleNameContext)node).namespaced_name()); + } else if (((ModuleNameContext)node).literalString() != null) { + return ImmutableList.of(((ModuleNameContext)node).literalString().STRING().getText()); + } + } else if (node instanceof ModuleIdContext) { + return ImmutableList.of(node.getText()); + } + throw new ProgramCompileException("Wrong context"); + } + + private OperatorNode<StatementOperator> convertProgram( + ParserRuleContext program, yqlplusParser parser, String programName) { + Scope scope = new Scope(parser, programName); + List<OperatorNode<StatementOperator>> stmts = Lists.newArrayList(); + int output = 0; + for (ParseTree node : program.children) { + if (!(node instanceof ParserRuleContext)) { + continue; + } + ParserRuleContext ruleContext = (ParserRuleContext) node; + switch (ruleContext.getRuleIndex()) { + case yqlplusParser.RULE_params: { + // ^(ARGUMENT ident typeref expression?) + ParamsContext paramsContext = (ParamsContext) ruleContext; + Program_arglistContext program_arglistContext = paramsContext.program_arglist(); + if (program_arglistContext != null) { + List<Procedure_argumentContext> argList = program_arglistContext.procedure_argument(); + for (Procedure_argumentContext procedureArgumentContext : argList) { + String name = procedureArgumentContext.ident().getText(); + OperatorNode<TypeOperator> type = decodeType(scope, procedureArgumentContext.getChild(TypenameContext.class, 0)); + OperatorNode<ExpressionOperator> defaultValue = OperatorNode.create(ExpressionOperator.NULL); + if (procedureArgumentContext.expression() != null) { + defaultValue = convertExpr(procedureArgumentContext.expression(), scope); + } + scope.defineVariable(toLocation(scope, procedureArgumentContext), name); + stmts.add(OperatorNode.create(StatementOperator.ARGUMENT, name, type, defaultValue)); + } + } + break; + } + case yqlplusParser.RULE_import_statement: { + Import_statementContext importContext = (Import_statementContext) ruleContext; + if (null == importContext.import_list()) { + List<String> name = createBindingName(node.getChild(1)); + String target; + Location location = toLocation(scope, node.getChild(1)); + if (node.getChildCount() == 2) { + target = name.get(0); + } else if (node.getChildCount() == 4) { + target = node.getChild(3).getText(); + } else { + throw new ProgramCompileException("Unknown node count for IMPORT: " + node.toStringTree()); + } + scope.bindModule(location, name, target); + } else { + // | FROM moduleName IMPORT import_list -> ^(IMPORT_FROM + // moduleName import_list+) + Import_listContext importListContext = importContext.import_list(); + List<String> name = createBindingName(importContext.moduleName()); + Location location = toLocation(scope, importContext.moduleName()); + List<ModuleIdContext> moduleIds = importListContext.moduleId(); + List<String> symbols = Lists.newArrayListWithExpectedSize(moduleIds.size()); + for (ModuleIdContext cnode : moduleIds) { + symbols.add(cnode.ID().getText()); + } + for (String sym : symbols) { + scope.bindModuleSymbol(location, name, sym, sym); + } + } + break; + } + + // DDL + case yqlplusParser.RULE_ddl: + ruleContext = (ParserRuleContext)ruleContext.getChild(0); + case yqlplusParser.RULE_view: { + // view and projection expansion now has to be done by the + // execution engine + // since views/projections, in order to be useful, have to + // support being used from outside the same program + ViewContext viewContext = (ViewContext) ruleContext; + Location loc = toLocation(scope, viewContext); + scope.getRoot().defineView(loc, viewContext.ID().getText()); + stmts.add(OperatorNode.create(loc, StatementOperator.DEFINE_VIEW, viewContext.ID().getText(), convertQuery(viewContext.source_statement(), scope.getRoot()))); + break; + } + case yqlplusParser.RULE_statement: { + // ^(STATEMENT_QUERY source_statement paged_clause? + // output_spec?) + StatementContext statementContext = (StatementContext) ruleContext; + switch (getParseTreeIndex(ruleContext.getChild(0))) { + case yqlplusParser.RULE_selectvar_statement: { + // ^(STATEMENT_SELECTVAR ident source_statement) + Selectvar_statementContext selectVarContext = (Selectvar_statementContext) ruleContext.getChild(0); + String variable = selectVarContext.ident().getText(); + OperatorNode<SequenceOperator> query = convertQuery(selectVarContext.source_statement(), scope); + Location location = toLocation(scope, selectVarContext.ident()); + scope.defineVariable(location, variable); + stmts.add(OperatorNode.create(location, StatementOperator.EXECUTE, query, variable)); + break; + } + case yqlplusParser.RULE_next_statement: { + // NEXT^ literalString OUTPUT! AS! ident + Next_statementContext nextStateContext = (Next_statementContext) ruleContext.getChild(0); + String continuationValue = StringUnescaper.unquote(nextStateContext.literalString().getText()); + String variable = nextStateContext.ident().getText(); + Location location = toLocation(scope, node); + OperatorNode<SequenceOperator> next = OperatorNode.create(location, SequenceOperator.NEXT, continuationValue); + stmts.add(OperatorNode.create(location, StatementOperator.EXECUTE, next, variable)); + stmts.add(OperatorNode.create(location, StatementOperator.OUTPUT, variable)); + scope.defineVariable(location, variable); + break; + } + case yqlplusParser.RULE_output_statement: + Source_statementContext source_statement = statementContext.output_statement().source_statement(); + OperatorNode<SequenceOperator> query; + if (source_statement.getChildCount() == 1) { + query = convertQuery( source_statement.query_statement().getChild(0), scope); + } else { + query = convertQuery(source_statement, scope); + } + String variable = "result" + (++output); + boolean isCountVariable = false; + OperatorNode<ExpressionOperator> pageSize = null; + ParseTree outputStatement = node.getChild(0); + Location location = toLocation(scope, outputStatement); + for (int i = 1; i < outputStatement.getChildCount(); ++i) { + ParseTree child = outputStatement.getChild(i); + switch (getParseTreeIndex(child)) { + case yqlplusParser.RULE_paged_clause: + Paged_clauseContext pagedContext = (Paged_clauseContext) child; + pageSize = convertExpr(pagedContext.fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_output_spec: + Output_specContext outputSpecContext = (Output_specContext) child; + variable = outputSpecContext.ident().getText(); + if (outputSpecContext.COUNT() != null) { + isCountVariable = true; + } + break; + default: + throw new ProgramCompileException( "Unknown statement attribute: " + child.toStringTree()); + } + } + scope.defineVariable(location, variable); + if (pageSize != null) { + query = OperatorNode.create(SequenceOperator.PAGE, query, pageSize); + } + stmts.add(OperatorNode.create(location, StatementOperator.EXECUTE, query, variable)); + stmts.add(OperatorNode.create(location, isCountVariable ? StatementOperator.COUNT:StatementOperator.OUTPUT, variable)); + } + break; + } + default: + throw new ProgramCompileException("Unknown program element: " + node.getText()); + } + } + // traverse the tree, find all of the namespaced calls not covered by + // imports so we can + // define "implicit" import statements for them (to make engine + // implementation easier) + return OperatorNode.create(StatementOperator.PROGRAM, stmts); + } + + private OperatorNode<SortOperator> convertSortKey(Orderby_fieldContext node, Scope scope) { + TerminalNode descDef = node.DESC(); + OperatorNode<ExpressionOperator> exprNode = convertExpr(node.expression(), scope); + if (descDef != null ) { + return OperatorNode.create(toLocation(scope, descDef), SortOperator.DESC, exprNode); + } else { + return OperatorNode.create(toLocation(scope, node), SortOperator.ASC, exprNode); + } + } + + private ProjectionBuilder readProjection(List<Field_defContext> fieldDefs, Scope scope) { + if (null == fieldDefs) + throw new ProgramCompileException("Null fieldDefs"); + ProjectionBuilder proj = new ProjectionBuilder(); + for (Field_defContext rulenode : fieldDefs) { + // FIELD + // expression alias_def? + OperatorNode<ExpressionOperator> expr = convertExpr((ExpressionContext)rulenode.getChild(0), scope); + + String aliasName = null; + if (rulenode.getChildCount() > 1) { + // ^(ALIAS ID) + aliasName = rulenode.alias_def().ID().getText(); + } + proj.addField(aliasName, expr); + // no grammar for the other rule types at this time + } + return proj; + } + + public static int getParseTreeIndex(ParseTree parseTree) { + if (parseTree instanceof TerminalNode) { + return ((TerminalNode)parseTree).getSymbol().getType(); + } else { + return ((RuleNode)parseTree).getRuleContext().getRuleIndex(); + } + } + + public OperatorNode<ExpressionOperator> convertExpr(ParseTree parseTree, + Scope scope) { + switch (getParseTreeIndex(parseTree)) { + case yqlplusParser.RULE_vespa_grouping: { + ParseTree firstChild = parseTree.getChild(0); + if (getParseTreeIndex(firstChild) == yqlplusParser.RULE_annotation) { + ParseTree secondChild = parseTree.getChild(1); + OperatorNode<ExpressionOperator> annotation = convertExpr(((AnnotationContext) firstChild) + .constantMapExpression(), scope); + OperatorNode<ExpressionOperator> expr = OperatorNode.create(toLocation(scope, secondChild), + ExpressionOperator.VESPA_GROUPING, secondChild.getText()); + List<String> names = (List<String>) annotation.getArgument(0); + List<OperatorNode<ExpressionOperator>> annotates = (List<OperatorNode<ExpressionOperator>>) annotation + .getArgument(1); + for (int i = 0; i < names.size(); ++i) { + expr.putAnnotation(names.get(i), readConstantExpression(annotates.get(i))); + } + return expr; + } else { + return OperatorNode.create(toLocation(scope, firstChild), ExpressionOperator.VESPA_GROUPING, + firstChild.getText()); + } + } + case yqlplusParser.RULE_nullOperator: + return OperatorNode.create(ExpressionOperator.NULL); + case yqlplusParser.RULE_argument: + return convertExpr(parseTree.getChild(0), scope); + case yqlplusParser.RULE_fixed_or_parameter: { + ParseTree firstChild = parseTree.getChild(0); + if (getParseTreeIndex(firstChild) == yqlplusParser.INT) { + return OperatorNode.create(toLocation(scope, firstChild), ExpressionOperator.LITERAL, new Integer(firstChild.getText())); + } else { + return convertExpr(firstChild, scope); + } + } + case yqlplusParser.RULE_constantMapExpression: { + List<ConstantPropertyNameAndValueContext> propertyList = ((ConstantMapExpressionContext) parseTree).constantPropertyNameAndValue(); + List<String> names = Lists.newArrayListWithExpectedSize(propertyList.size()); + List<OperatorNode<ExpressionOperator>> exprs = Lists.newArrayListWithExpectedSize(propertyList.size()); + for (ConstantPropertyNameAndValueContext child : propertyList) { + // : propertyName ':' expression[$expression::namespace] -> + // ^(PROPERTY propertyName expression) + names.add(StringUnescaper.unquote(child.getChild(0).getText())); + exprs.add(convertExpr(child.getChild(2), scope)); + } + return OperatorNode.create(toLocation(scope, parseTree),ExpressionOperator.MAP, names, exprs); + } + case yqlplusParser.RULE_mapExpression: { + List<PropertyNameAndValueContext> propertyList = ((MapExpressionContext)parseTree).propertyNameAndValue(); + List<String> names = Lists.newArrayListWithExpectedSize(propertyList.size()); + List<OperatorNode<ExpressionOperator>> exprs = Lists.newArrayListWithCapacity(propertyList.size()); + for (PropertyNameAndValueContext child : propertyList) { + // : propertyName ':' expression[$expression::namespace] -> + // ^(PROPERTY propertyName expression) + names.add(StringUnescaper.unquote(child.getChild(0).getText())); + exprs.add(convertExpr(child.getChild(2), scope)); + } + return OperatorNode.create(toLocation(scope, parseTree),ExpressionOperator.MAP, names, exprs); + } + case yqlplusParser.RULE_constantArray: { + List<ConstantExpressionContext> expressionList = ((ConstantArrayContext)parseTree).constantExpression(); + List<OperatorNode<ExpressionOperator>> values = Lists.newArrayListWithExpectedSize(expressionList.size()); + for (ConstantExpressionContext expr : expressionList) { + values.add(convertExpr(expr, scope)); + } + return OperatorNode.create(toLocation(scope, expressionList.isEmpty()? parseTree:expressionList.get(0)), ExpressionOperator.ARRAY, values); + } + case yqlplusParser.RULE_arrayLiteral: { + List<ExpressionContext> expressionList = ((ArrayLiteralContext) parseTree).expression(); + List<OperatorNode<ExpressionOperator>> values = Lists.newArrayListWithExpectedSize(expressionList.size()); + for (ExpressionContext expr : expressionList) { + values.add(convertExpr(expr, scope)); + } + return OperatorNode.create(toLocation(scope, expressionList.isEmpty()? parseTree:expressionList.get(0)), ExpressionOperator.ARRAY, values); + } + //dereferencedExpression: primaryExpression(indexref[in_select]| propertyref)* + case yqlplusParser.RULE_dereferencedExpression: { + DereferencedExpressionContext dereferencedExpression = (DereferencedExpressionContext) parseTree; + Iterator<ParseTree> it = dereferencedExpression.children.iterator(); + OperatorNode<ExpressionOperator> result = convertExpr(it.next(), scope); + while (it.hasNext()) { + ParseTree defTree = it.next(); + if (getParseTreeIndex(defTree) == yqlplusParser.RULE_propertyref) { + //DOT nm=ID + result = OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.PROPREF, result, defTree.getChild(1).getText()); + } else { + //indexref + result = OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.INDEX, result, convertExpr(defTree.getChild(1), scope)); + } + } + return result; + } + case yqlplusParser.RULE_primaryExpression: { + // ^(CALL namespaced_name arguments) + ParseTree firstChild = parseTree.getChild(0); + switch (getParseTreeIndex(firstChild)) { + case yqlplusParser.RULE_fieldref: { + return convertExpr(firstChild, scope); + } + case yqlplusParser.RULE_callExpresion: { + List<ArgumentContext> args = ((ArgumentsContext) firstChild.getChild(1)).argument(); + List<OperatorNode<ExpressionOperator>> arguments = Lists.newArrayListWithExpectedSize(args.size()); + for (ArgumentContext argContext : args) { + arguments.add(convertExpr(argContext.expression(),scope)); + } + return OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.CALL, scope.resolvePath(readName((Namespaced_nameContext) firstChild.getChild(0))), arguments); + } + // TODO add processing this is not implemented in V3 + // case yqlplusParser.APPLY: + + case yqlplusParser.RULE_parameter: + // external variable reference + return OperatorNode.create(toLocation(scope, firstChild), ExpressionOperator.VARREF, firstChild.getChild(1).getText()); + case yqlplusParser.RULE_scalar_literal: + case yqlplusParser.RULE_arrayLiteral: + case yqlplusParser.RULE_mapExpression: + return convertExpr(firstChild, scope); + case yqlplusParser.LPAREN: + return convertExpr(parseTree.getChild(1), scope); + } + } + // TODO: Temporarily disable CAST - think through how types are named + // case yqlplusParser.CAST: { + // + // return new Cast() + // } + // return new CastExpression(payload); + case yqlplusParser.RULE_parameter: { + // external variable reference + ParserRuleContext parameterContext = (ParserRuleContext) parseTree; + IdentContext identContext = parameterContext.getRuleContext(IdentContext.class, 0); + return OperatorNode.create(toLocation(scope, identContext), ExpressionOperator.VARREF, identContext.getText()); + } + case yqlplusParser.RULE_annotateExpression: { + //annotation logicalORExpression + AnnotationContext annotateExpressionContext = ((AnnotateExpressionContext)parseTree).annotation(); + OperatorNode<ExpressionOperator> annotation = convertExpr(annotateExpressionContext.constantMapExpression(), scope); + OperatorNode<ExpressionOperator> expr = convertExpr(parseTree.getChild(1), scope); + List<String> names = (List<String>) annotation.getArgument(0); + List<OperatorNode<ExpressionOperator>> annotates = (List<OperatorNode<ExpressionOperator>>) annotation.getArgument(1); + for (int i = 0; i < names.size(); ++i) { + expr.putAnnotation(names.get(i), readConstantExpression(annotates.get(i))); + } + return expr; + } + case yqlplusParser.RULE_expression: { + return convertExpr(parseTree.getChild(0), scope); + } + case yqlplusParser.RULE_logicalANDExpression: + LogicalANDExpressionContext andExpressionContext = (LogicalANDExpressionContext) parseTree; + return readConjOp(ExpressionOperator.AND, andExpressionContext.equalityExpression(), scope); + case yqlplusParser.RULE_logicalORExpression: { + int childCount = parseTree.getChildCount(); + LogicalORExpressionContext logicalORExpressionContext = (LogicalORExpressionContext) parseTree; + if (childCount > 1) { + return readConjOrOp(ExpressionOperator.OR, logicalORExpressionContext, scope); + } else { + List<EqualityExpressionContext> equalityExpressionList = ((LogicalANDExpressionContext) parseTree.getChild(0)).equalityExpression(); + if (equalityExpressionList.size() > 1) { + return readConjOp(ExpressionOperator.AND, equalityExpressionList, scope); + } else { + return convertExpr(equalityExpressionList.get(0), scope); + } + } + } + case yqlplusParser.RULE_equalityExpression: { + EqualityExpressionContext equalityExpression = (EqualityExpressionContext) parseTree; + RelationalExpressionContext relationalExpressionContext = equalityExpression.relationalExpression(0); + OperatorNode<ExpressionOperator> expr = convertExpr(relationalExpressionContext, scope); + InNotInTargetContext inNotInTarget = equalityExpression.inNotInTarget(); + int childCount = equalityExpression.getChildCount(); + if (childCount == 1) { + return expr; + } + if (inNotInTarget != null) { + Literal_listContext literalListContext = inNotInTarget.literal_list(); + boolean isIN = equalityExpression.IN() != null; + if (literalListContext == null) { + Select_statementContext selectStatementContext = inNotInTarget.select_statement(); + OperatorNode<SequenceOperator> query = convertQuery(selectStatementContext, scope); + return OperatorNode.create(expr.getLocation(),isIN ? ExpressionOperator.IN_QUERY: ExpressionOperator.NOT_IN_QUERY, expr, query); + } else { + // we need to identify the type of the target; if it's a + // scalar we need to wrap it in a CREATE_ARRAY + // if it's already a CREATE ARRAY then it's fine, otherwise + // we need to know the variable type + // return readBinOp(node.getType() == yqlplusParser.IN ? + // ExpressionOperator.IN : ExpressionOperator.NOT_IN, node, + // scope); + return readBinOp(isIN ? ExpressionOperator.IN: ExpressionOperator.NOT_IN, equalityExpression.getChild(0), literalListContext, scope); + } + + } else { + ParseTree firstChild = equalityExpression.getChild(1); + if (equalityExpression.getChildCount() == 2) { + switch (getParseTreeIndex(firstChild)) { + case yqlplusParser.IS_NULL: + return readUnOp(ExpressionOperator.IS_NULL, relationalExpressionContext, scope); + case yqlplusParser.IS_NOT_NULL: + return readUnOp(ExpressionOperator.IS_NOT_NULL, relationalExpressionContext, scope); + } + } else { + switch (getParseTreeIndex(firstChild.getChild(0))) { + case yqlplusParser.EQ: + return readBinOp(ExpressionOperator.EQ, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.NEQ: + return readBinOp(ExpressionOperator.NEQ, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.LIKE: + return readBinOp(ExpressionOperator.LIKE, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.NOTLIKE: + return readBinOp(ExpressionOperator.NOT_LIKE, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.MATCHES: + return readBinOp(ExpressionOperator.MATCHES, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.NOTMATCHES: + return readBinOp(ExpressionOperator.NOT_MATCHES, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.CONTAINS: + return readBinOp(ExpressionOperator.CONTAINS, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + } + } + + } + break; + } + case yqlplusParser.RULE_relationalExpression: { + RelationalExpressionContext relationalExpressionContext = (RelationalExpressionContext) parseTree; + RelationalOpContext opContext = relationalExpressionContext.relationalOp(); + if (opContext != null) { + switch (getParseTreeIndex(relationalExpressionContext.relationalOp().getChild(0))) { + case yqlplusParser.LT: + return readBinOp(ExpressionOperator.LT, parseTree, scope); + case yqlplusParser.LTEQ: + return readBinOp(ExpressionOperator.LTEQ, parseTree, scope); + case yqlplusParser.GT: + return readBinOp(ExpressionOperator.GT, parseTree, scope); + case yqlplusParser.GTEQ: + return readBinOp(ExpressionOperator.GTEQ, parseTree, scope); + } + } else { + return convertExpr(relationalExpressionContext.additiveExpression(0), scope); + } + } + break; + case yqlplusParser.RULE_additiveExpression: + case yqlplusParser.RULE_multiplicativeExpression: { + if (parseTree.getChildCount() > 1) { + String opStr = parseTree.getChild(1).getText(); + switch (opStr) { + case "+": + return readBinOp(ExpressionOperator.ADD, parseTree, scope); + case "-": + return readBinOp(ExpressionOperator.SUB, parseTree, scope); + case "/": + return readBinOp(ExpressionOperator.DIV, parseTree, scope); + case "*": + return readBinOp(ExpressionOperator.MULT, parseTree, scope); + case "%": + return readBinOp(ExpressionOperator.MOD, parseTree, scope); + default: + if (parseTree.getChild(0) instanceof UnaryExpressionContext) { + return convertExpr(parseTree.getChild(0), scope); + } else { + throw new ProgramCompileException(toLocation(scope, parseTree), "Unknown expression type: " + parseTree.toStringTree()); + } + } + } else { + if (parseTree.getChild(0) instanceof UnaryExpressionContext) { + return convertExpr(parseTree.getChild(0), scope); + } else if (parseTree.getChild(0) instanceof MultiplicativeExpressionContext) { + return convertExpr(parseTree.getChild(0), scope); + } else { + throw new ProgramCompileException(toLocation(scope, parseTree), "Unknown expression type: " + parseTree.getText()); + } + } + } + case yqlplusParser.RULE_unaryExpression: { + if (1 == parseTree.getChildCount()) { + return convertExpr(parseTree.getChild(0), scope); + } else if (2 == parseTree.getChildCount()) { + if ("-".equals(parseTree.getChild(0).getText())) { + return readUnOp(ExpressionOperator.NEGATE, parseTree, scope); + } else if ("!".equals(parseTree.getChild(0).getText())) { + return readUnOp(ExpressionOperator.NOT, parseTree, scope); + } + throw new ProgramCompileException(toLocation(scope, parseTree),"Unknown unary operator " + parseTree.getText()); + } else { + throw new ProgramCompileException(toLocation(scope, parseTree),"Unknown child count " + parseTree.getChildCount() + " of " + parseTree.getText()); + } + } + case yqlplusParser.RULE_fieldref: + case yqlplusParser.RULE_joinDereferencedExpression: { + // all in-scope data sources should be defined in scope + // the 'first' field in a namespaced reference must be: + // - a field name if (and only if) there is exactly one data source + // in scope OR + // - an alias name, which will be followed by a field name + // ^(FIELDREF<FieldReference>[$expression::namespace] + // namespaced_name) + List<String> path = readName((Namespaced_nameContext) parseTree.getChild(0)); + Location loc = toLocation(scope, parseTree.getChild(0)); + String alias = path.get(0); + OperatorNode<ExpressionOperator> result = null; + int start = 0; + if (scope.isCursor(alias)) { + if (path.size() > 1) { + result = OperatorNode.create(loc, ExpressionOperator.READ_FIELD, alias, path.get(1)); + start = 2; + } else { + result = OperatorNode.create(loc, ExpressionOperator.READ_RECORD, alias); + start = 1; + } + } else if (scope.isBound(alias)) { + return OperatorNode.create(loc, ExpressionOperator.READ_MODULE, scope.getBinding(alias).toPathWith(path.subList(1, path.size()))); + } else if (scope.getCursors().size() == 1) { + alias = scope.getCursors().iterator().next(); + result = OperatorNode.create(loc, ExpressionOperator.READ_FIELD, alias, path.get(0)); + start = 1; + } else { + // ah ha, we can't end up with a 'loose' UDF call because it + // won't be a module or known alias + // so we need not support implicit imports for constants used in + // UDFs + throw new ProgramCompileException(loc, "Unknown field or alias '%s'", alias); + } + for (int idx = start; idx < path.size(); ++idx) { + result = OperatorNode.create(loc, ExpressionOperator.PROPREF, result, path.get(idx)); + } + return result; + } + case yqlplusParser.RULE_scalar_literal: + return OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.LITERAL, convertLiteral((Scalar_literalContext) parseTree)); + case yqlplusParser.RULE_insert_values: + return readValues((Insert_valuesContext) parseTree, scope); + case yqlplusParser.RULE_constantExpression: + return convertExpr(parseTree.getChild(0), scope); + case yqlplusParser.RULE_literal_list: + if (getParseTreeIndex(parseTree.getChild(1)) == yqlplusParser.RULE_array_parameter) { + return convertExpr(parseTree.getChild(1), scope); + } else { + List<Literal_elementContext> elements = ((Literal_listContext) parseTree).literal_element(); + ParseTree firldElement = elements.get(0).getChild(0); + if (elements.size() == 1 && scope.getParser().isArrayParameter(firldElement)) { + return convertExpr(firldElement, scope); + } else { + List<OperatorNode<ExpressionOperator>> values = Lists.newArrayListWithExpectedSize(elements.size()); + for (Literal_elementContext child : elements) { + values.add(convertExpr(child.getChild(0), scope)); + } + return OperatorNode.create(toLocation(scope, elements.get(0)),ExpressionOperator.ARRAY, values); + } + } + } + throw new ProgramCompileException(toLocation(scope, parseTree), + "Unknown expression type: " + parseTree.getText()); + } + + public Object convertLiteral(Scalar_literalContext literal) { + int parseTreeIndex = getParseTreeIndex(literal.getChild(0)); + String text = literal.getChild(0).getText(); + switch(parseTreeIndex) { + case yqlplusParser.INT: + return new Integer(text); + case yqlplusParser.FLOAT: + return new Double(text); + case yqlplusParser.STRING: + return StringUnescaper.unquote(text); + case yqlplusParser.TRUE: + case yqlplusParser.FALSE: + return new Boolean(text); + case yqlplusParser.LONG_INT: + return Long.parseLong(text.substring(0, text.length()-1)); + default: + throw new ProgramCompileException("Unknow literal type " + text); + } + } + + private Object readConstantExpression(OperatorNode<ExpressionOperator> node) { + switch (node.getOperator()) { + case LITERAL: + return node.getArgument(0); + case MAP: { + ImmutableMap.Builder<String, Object> map = ImmutableMap.builder(); + List<String> names = (List<String>) node.getArgument(0); + List<OperatorNode<ExpressionOperator>> exprs = (List<OperatorNode<ExpressionOperator>>) node.getArgument(1); + for (int i = 0; i < names.size(); ++i) { + map.put(names.get(i), readConstantExpression(exprs.get(i))); + } + return map.build(); + } + case ARRAY: { + List<OperatorNode<ExpressionOperator>> exprs = (List<OperatorNode<ExpressionOperator>>) node.getArgument(0); + ImmutableList.Builder<Object> lst = ImmutableList.builder(); + for (OperatorNode<ExpressionOperator> expr : exprs) { + lst.add(readConstantExpression(expr)); + } + return lst.build(); + } + default: + throw new ProgramCompileException(node.getLocation(), "Internal error: Unknown constant expression type: " + node.getOperator()); + } + } + + private OperatorNode<ExpressionOperator> readBinOp(ExpressionOperator op, ParseTree node, Scope scope) { + assert node.getChildCount() == 3; + return OperatorNode.create(op, convertExpr(node.getChild(0), scope), convertExpr(node.getChild(2), scope)); + } + + private OperatorNode<ExpressionOperator> readBinOp(ExpressionOperator op, ParseTree operand1, ParseTree operand2, Scope scope) { + return OperatorNode.create(op, convertExpr(operand1, scope), convertExpr(operand2, scope)); + } + + private OperatorNode<ExpressionOperator> readConjOp(ExpressionOperator op, List<EqualityExpressionContext> nodes, Scope scope) { + List<OperatorNode<ExpressionOperator>> arguments = Lists.newArrayListWithExpectedSize(nodes.size()); + for (ParseTree child : nodes) { + arguments.add(convertExpr(child, scope)); + } + return OperatorNode.create(op, arguments); + } + + private OperatorNode<ExpressionOperator> readConjOrOp(ExpressionOperator op, LogicalORExpressionContext node, Scope scope) { + List<LogicalANDExpressionContext> andExpressionList = node.logicalANDExpression(); + List<OperatorNode<ExpressionOperator>> arguments = Lists.newArrayListWithExpectedSize(andExpressionList.size()); + for (LogicalANDExpressionContext child : andExpressionList) { + List<EqualityExpressionContext> equalities = child.equalityExpression(); + if (equalities.size() == 1) { + arguments.add(convertExpr(equalities.get(0), scope)); + } else { + List<OperatorNode<ExpressionOperator>> andArguments = Lists.newArrayListWithExpectedSize(equalities.size()); + for (EqualityExpressionContext subTreeChild:equalities) { + andArguments.add(convertExpr(subTreeChild, scope)); + } + arguments.add(OperatorNode.create(ExpressionOperator.AND, andArguments)); + } + + } + return OperatorNode.create(op, arguments); + } + + // (IS_NULL | IS_NOT_NULL) + // unaryExpression + private OperatorNode<ExpressionOperator> readUnOp(ExpressionOperator op, ParseTree node, Scope scope) { + assert (node instanceof TerminalNode) || (node.getChildCount() == 1) || (node instanceof UnaryExpressionContext); + if (node instanceof TerminalNode) { + return OperatorNode.create(op, convertExpr(node, scope)); + } else if (node.getChildCount() == 1) { + return OperatorNode.create(op, convertExpr(node.getChild(0), scope)); + } else { + return OperatorNode.create(op, convertExpr(node.getChild(1), scope)); + } + } + + private OperatorNode<ExpressionOperator> readValues(Field_names_specContext nameDefs, Field_values_specContext values, Scope scope) { + List<Field_defContext> fieldDefs = nameDefs.field_def(); + List<ExpressionContext> valueDefs = values.expression(); + assert fieldDefs.size() == valueDefs.size(); + List<String> fieldNames; + List<OperatorNode<ExpressionOperator>> fieldValues; + int numPairs = fieldDefs.size(); + fieldNames = Lists.newArrayListWithExpectedSize(numPairs); + fieldValues = Lists.newArrayListWithExpectedSize(numPairs); + for (int i = 0; i < numPairs; i++) { + fieldNames.add((String) convertExpr(fieldDefs.get(i).expression(), scope).getArgument(1)); + fieldValues.add(convertExpr(valueDefs.get(i), scope)); + } + return OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues); + } + + private OperatorNode<ExpressionOperator> readValues(ParserRuleContext node, Scope scope) { + List<String> fieldNames; + List<OperatorNode<ExpressionOperator>> fieldValues; + if (node.getRuleIndex() == yqlplusParser.RULE_field_def) { + Field_defContext fieldDefContext = (Field_defContext)node; + //TODO double check + fieldNames = Lists.newArrayListWithExpectedSize(node.getChildCount()); + fieldValues = Lists.newArrayListWithExpectedSize(node.getChildCount()); + for (int i = 0; i < node.getChildCount(); i++) { + fieldNames.add((String) convertExpr(node.getChild(i).getChild(0).getChild(0), scope).getArgument(1)); + fieldValues.add(convertExpr(node.getChild(i).getChild(0).getChild(1), scope)); + } + } else { + assert node.getChildCount() % 2 == 0; + int numPairs = node.getChildCount() / 2; + fieldNames = Lists.newArrayListWithExpectedSize(numPairs); + fieldValues = Lists.newArrayListWithExpectedSize(numPairs); + for (int i = 0; i < numPairs; i++) { + fieldNames.add((String) convertExpr(node.getChild(i).getChild(0), scope).getArgument(1)); + fieldValues.add(convertExpr(node.getChild(numPairs + i), scope)); + } + } + return OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues); + } + + /* + * Converts node list + * + * a_name, b_name, c_name, a_value_1, b_value_1, c_value_1, a_value_2, b_value_2, c_value2, a_value_3, b_value_3, c_value_3 + * + * into corresponding constant sequence: + * + * [ { a_name : a_value_1, b_name : b_value_1, c_name : c_value_1 }, ... ] + * + */ + private OperatorNode<SequenceOperator> readBatchValues(Field_names_specContext nameDefs, List<Field_values_group_specContext> valueGroups, Scope scope) { + List<Field_defContext> nameContexts = nameDefs.field_def(); + List<String> fieldNames = Lists.newArrayList(); + for (Field_defContext nameContext:nameContexts) { + fieldNames.add((String) convertExpr(nameContext.getChild(0), scope).getArgument(1)); + } + List<OperatorNode> records = Lists.newArrayList(); + for (Field_values_group_specContext valueGorup:valueGroups) { + List<ExpressionContext> expressionList = valueGorup.expression(); + List<OperatorNode<ExpressionOperator>> fieldValues = Lists.newArrayListWithExpectedSize(expressionList.size()); + for (ExpressionContext expressionContext:expressionList) { + fieldValues.add(convertExpr(expressionContext, scope)); + } + records.add(OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues)); + } + // Return constant sequence of records with the given name/values + return OperatorNode.create(SequenceOperator.EVALUATE, OperatorNode.create(ExpressionOperator.ARRAY, records)); + } + + /* + * Scans the given node for READ_FIELD expressions. + * + * TODO: Search recursively and consider additional operators + * + * @param in the node to scan + * @return list of READ_FIELD expressions + */ + private List<OperatorNode<ExpressionOperator>> getReadFieldExpressions(OperatorNode<ExpressionOperator> in) { + List<OperatorNode<ExpressionOperator>> readFieldList = Lists.newArrayList(); + switch (in.getOperator()) { + case READ_FIELD: + readFieldList.add(in); + break; + case CALL: + List<OperatorNode<ExpressionOperator>> callArgs = in.getArgument(1); + for (OperatorNode<ExpressionOperator> callArg : callArgs) { + if (callArg.getOperator() == ExpressionOperator.READ_FIELD) { + readFieldList.add(callArg); + } + } + break; + } + return readFieldList; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProjectOperator.java b/container-search/src/main/java/com/yahoo/search/yql/ProjectOperator.java new file mode 100644 index 00000000000..16ecc4c4077 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProjectOperator.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +/** + * Represents a projection command which affects the output record. + */ +enum ProjectOperator implements Operator { + + FIELD(ExpressionOperator.class, String.class), // FIELD expr name + RECORD(ExpressionOperator.class, String.class), // RECORD expr name + MERGE_RECORD(String.class); // MERGE_RECORD name (alias of record to merge) + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof ProjectOperator; + } + }; + + private ProjectOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProjectionBuilder.java b/container-search/src/main/java/com/yahoo/search/yql/ProjectionBuilder.java new file mode 100644 index 00000000000..109d1cd654b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProjectionBuilder.java @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import java.util.Map; +import java.util.Set; + +class ProjectionBuilder { + + private Map<String, OperatorNode<ExpressionOperator>> fields = Maps.newLinkedHashMap(); + private Set<String> aliasNames = Sets.newHashSet(); + + public void addField(String name, OperatorNode<ExpressionOperator> expr) { + String aliasName = name; + if (name == null) { + name = assignName(expr); + } + if (fields.containsKey(name)) { + throw new ProgramCompileException(expr.getLocation(), "Field alias '%s' already defined", name); + } + fields.put(name, expr); + if (aliasName != null) { + // Store use + aliasNames.add(aliasName); + } + } + + public boolean isAlias(String name) { + return aliasNames.contains(name); + } + + private String assignName(OperatorNode<ExpressionOperator> expr) { + String baseName = "expr"; + switch (expr.getOperator()) { + case PROPREF: + baseName = (String) expr.getArgument(1); + break; + case READ_RECORD: + baseName = (String) expr.getArgument(0); + break; + case READ_FIELD: + baseName = (String) expr.getArgument(1); + break; + case VARREF: + baseName = (String) expr.getArgument(0); + break; + // fall through, leaving baseName alone + } + int c = 0; + String candidate = baseName; + while (fields.containsKey(candidate)) { + candidate = baseName + (++c); + } + return candidate; + } + + public OperatorNode<SequenceOperator> make(OperatorNode<SequenceOperator> target) { + ImmutableList.Builder<OperatorNode<ProjectOperator>> lst = ImmutableList.builder(); + for (Map.Entry<String, OperatorNode<ExpressionOperator>> e : fields.entrySet()) { + if (e.getKey().startsWith("*")) { + lst.add(OperatorNode.create(ProjectOperator.MERGE_RECORD, e.getValue().getArgument(0))); + } else if (e.getValue().getOperator() == ExpressionOperator.READ_RECORD) { + lst.add(OperatorNode.create(ProjectOperator.RECORD, e.getValue(), e.getKey())); + } else { + lst.add(OperatorNode.create(ProjectOperator.FIELD, e.getValue(), e.getKey())); + } + } + return OperatorNode.create(SequenceOperator.PROJECT, target, lst.build()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/SequenceOperator.java b/container-search/src/main/java/com/yahoo/search/yql/SequenceOperator.java new file mode 100644 index 00000000000..65d1e039e10 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/SequenceOperator.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; +import com.google.inject.TypeLiteral; + +import java.util.List; + +/** + * Logical sequence operators represent a logical description of a "source" (query against data stores + pipes), representing + * a source_expression in the grammar. + */ +enum SequenceOperator implements Operator { + + SCAN(TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), // scan a named data source (with optional arguments) + /** + * INSERT(target-sequence, input-records) + */ + INSERT(SequenceOperator.class, SequenceOperator.class), + UPDATE(SequenceOperator.class, ExpressionOperator.MAP, ExpressionOperator.class), + UPDATE_ALL(SequenceOperator.class, ExpressionOperator.MAP), + DELETE(SequenceOperator.class, ExpressionOperator.class), + DELETE_ALL(SequenceOperator.class), + EMPTY(), // emits a single, empty row + // evaluate the given expression and use the result as a sequence + EVALUATE(ExpressionOperator.class), + NEXT(String.class), + + PROJECT(SequenceOperator.class, new TypeLiteral<List<OperatorNode<ProjectOperator>>>() { + }), // transform a sequence into a new schema + FILTER(SequenceOperator.class, ExpressionOperator.class), // filter a sequence by an expression + SORT(SequenceOperator.class, new TypeLiteral<List<OperatorNode<SortOperator>>>() { + }), // sort a sequence + PIPE(SequenceOperator.class, TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), // pipe from one source through a named transformation + LIMIT(SequenceOperator.class, ExpressionOperator.class), + OFFSET(SequenceOperator.class, ExpressionOperator.class), + SLICE(SequenceOperator.class, ExpressionOperator.class, ExpressionOperator.class), + MERGE(TypeCheckers.SEQUENCES), + JOIN(SequenceOperator.class, SequenceOperator.class, ExpressionOperator.class), // combine two (or more, in the case of MERGE) sequences to produce a new sequence + LEFT_JOIN(SequenceOperator.class, SequenceOperator.class, ExpressionOperator.class), + + FALLBACK(SequenceOperator.class, SequenceOperator.class), + + TIMEOUT(SequenceOperator.class, ExpressionOperator.class), + PAGE(SequenceOperator.class, ExpressionOperator.class), + ALL(), + MULTISOURCE(TypeCheckers.LIST_OF_LIST_OF_STRING); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof SequenceOperator; + } + }; + + private SequenceOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/SortOperator.java b/container-search/src/main/java/com/yahoo/search/yql/SortOperator.java new file mode 100644 index 00000000000..db03f787524 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/SortOperator.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +/** + * Represents a sort argument. ORDER BY foo; → (ASC foo) + */ +enum SortOperator implements Operator { + + ASC(ExpressionOperator.class), + DESC(ExpressionOperator.class); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof SortOperator; + } + }; + + private SortOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/StatementOperator.java b/container-search/src/main/java/com/yahoo/search/yql/StatementOperator.java new file mode 100644 index 00000000000..f25212e1098 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/StatementOperator.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; +import com.google.inject.TypeLiteral; + +import java.util.List; + +/** + * Represents program statements. + */ +enum StatementOperator implements Operator { + + PROGRAM(new TypeLiteral<List<OperatorNode<StatementOperator>>>() { + }), + ARGUMENT(String.class, TypeOperator.class, ExpressionOperator.class), + DEFINE_VIEW(String.class, SequenceOperator.class), + EXECUTE(SequenceOperator.class, String.class), + OUTPUT(String.class), + COUNT(String.class); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof StatementOperator; + } + }; + + private StatementOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java new file mode 100644 index 00000000000..76d81429ab3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +class StringUnescaper { + + private static boolean lookaheadOctal(String v, int point) { + return point < v.length() && "01234567".indexOf(v.charAt(point)) != -1; + } + + public static String unquote(String token) { + if (null == token || !(token.startsWith("'") && token.endsWith("'") || token.startsWith("\"") && token.endsWith("\""))) { + return token; + } + // remove quotes from around string and unescape it + String value = token.substring(1, token.length() - 1); + // first quickly check to see if \ is present -- if not then there's no escaping and we're done + int idx = value.indexOf('\\'); + if (idx == -1) { + return value; + } + // the output string will be no bigger than the input string, since escapes add characters + StringBuilder result = new StringBuilder(value.length()); + int start = 0; + while (idx != -1) { + result.append(value.subSequence(start, idx)); + start = idx + 1; + switch (value.charAt(start)) { + case 'b': + result.append('\b'); + ++start; + break; + case 't': + result.append('\t'); + ++start; + break; + case 'n': + result.append('\n'); + ++start; + break; + case 'f': + result.append('\f'); + ++start; + break; + case 'r': + result.append('\r'); + ++start; + break; + case '"': + result.append('"'); + ++start; + break; + case '\'': + result.append('\''); + ++start; + break; + case '\\': + result.append('\\'); + ++start; + break; + case '/': + result.append('/'); + ++start; + break; + case 'u': + // hex hex hex hex + ++start; + result.append((char) Integer.parseInt(value.substring(start, start + 4), 16)); + start += 4; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // octal escape + // 1, 2, or 3 bytes + // peek ahead + if (lookaheadOctal(value, start + 1) && lookaheadOctal(value, start + 2)) { + result.append((char) Integer.parseInt(value.substring(start, start + 3), 8)); + start += 3; + } else if (lookaheadOctal(value, start + 1)) { + result.append((char) Integer.parseInt(value.substring(start, start + 2), 8)); + start += 2; + } else { + result.append((char) Integer.parseInt(value.substring(start, start + 1), 8)); + start += 1; + } + break; + default: + // the lexer should be ensuring there are no malformed escapes here, so we'll just blow up + throw new IllegalArgumentException("Unknown escape sequence in token: " + token); + } + idx = value.indexOf('\\', start); + } + result.append(value.subSequence(start, value.length())); + return result.toString(); + } + + public static String escape(String value) { + int idx = value.indexOf('\''); + if (idx == -1) { + return "\'" + value + "\'"; + + } + StringBuilder result = new StringBuilder(value.length() + 5); + result.append("'"); + // right now we only escape ' on output + int start = 0; + while (idx != -1) { + result.append(value.subSequence(start, idx)); + start = idx + 1; + result.append("\\'"); + idx = value.indexOf('\\', start); + } + result.append(value.subSequence(start, value.length())); + result.append("'"); + return result.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/TypeCheckers.java b/container-search/src/main/java/com/yahoo/search/yql/TypeCheckers.java new file mode 100644 index 00000000000..32aca6d5708 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/TypeCheckers.java @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.inject.TypeLiteral; + +import java.lang.reflect.ParameterizedType; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +final class TypeCheckers { + + public static final TypeLiteral<List<String>> LIST_OF_STRING = new TypeLiteral<List<String>>() { + }; + public static final TypeLiteral<List<List<String>>> LIST_OF_LIST_OF_STRING = new TypeLiteral<List<List<String>>>() { + }; + public static final TypeLiteral<List<OperatorNode<SequenceOperator>>> SEQUENCES = new TypeLiteral<List<OperatorNode<SequenceOperator>>>() { + }; + public static final TypeLiteral<List<OperatorNode<ExpressionOperator>>> EXPRS = new TypeLiteral<List<OperatorNode<ExpressionOperator>>>() { + }; + public static final TypeLiteral<List<List<OperatorNode<ExpressionOperator>>>> LIST_OF_EXPRS = new TypeLiteral<List<List<OperatorNode<ExpressionOperator>>>>() { + }; + public static final ImmutableSet<Class<?>> LITERAL_TYPES = ImmutableSet.<Class<?>>builder() + .add(String.class) + .add(Integer.class) + .add(Double.class) + .add(Boolean.class) + .add(Float.class) + .add(Byte.class) + .add(Long.class) + .add(List.class) + .add(Map.class) + .build(); + + private TypeCheckers() { + } + + public static ArgumentsTypeChecker make(Operator target, Object... types) { + // Class<?> extends Operator -> NodeTypeChecker + if (types == null) { + types = new Object[0]; + } + List<OperatorTypeChecker> checkers = Lists.newArrayListWithCapacity(types.length); + for (int i = 0; i < types.length; ++i) { + checkers.add(createChecker(target, i, types[i])); + } + return new ArgumentsTypeChecker(target, checkers); + } + + // this is festooned with instance checkes before all the casting + @SuppressWarnings("unchecked") + private static OperatorTypeChecker createChecker(Operator parent, int idx, Object value) { + if (value instanceof TypeLiteral) { + TypeLiteral<?> lit = (TypeLiteral<?>) value; + Class<?> raw = lit.getRawType(); + if (List.class.isAssignableFrom(raw)) { + Preconditions.checkArgument(lit.getType() instanceof ParameterizedType, "TypeLiteral without a ParameterizedType for List"); + ParameterizedType type = (ParameterizedType) lit.getType(); + TypeLiteral<?> arg = TypeLiteral.get(type.getActualTypeArguments()[0]); + if (OperatorNode.class.isAssignableFrom(arg.getRawType())) { + Preconditions.checkArgument(arg.getType() instanceof ParameterizedType, "Type spec must be List<OperatorNode<?>>"); + Class<? extends Operator> optype = (Class<? extends Operator>) TypeLiteral.get(((ParameterizedType) arg.getType()).getActualTypeArguments()[0]).getRawType(); + return new OperatorNodeListTypeChecker(parent, idx, optype, ImmutableSet.<Operator>of()); + } else { + return new JavaListTypeChecker(parent, idx, arg.getRawType()); + } + } + throw new IllegalArgumentException("don't know how to handle TypeLiteral " + value); + } + if (value instanceof Class) { + Class<?> clazz = (Class<?>) value; + if (Operator.class.isAssignableFrom(clazz)) { + return new NodeTypeChecker(parent, idx, (Class<? extends Operator>) clazz, ImmutableSet.<Operator>of()); + } else { + return new JavaTypeChecker(parent, idx, clazz); + } + } else if (value instanceof Operator) { + Operator operator = (Operator) value; + Class<? extends Operator> clazz = operator.getClass(); + Set<? extends Operator> allowed; + if (Enum.class.isInstance(value)) { + Class<? extends Enum> enumClazz = (Class<? extends Enum>) clazz; + allowed = (Set<? extends Operator>) EnumSet.of(enumClazz.cast(value)); + } else { + allowed = ImmutableSet.of(operator); + } + return new NodeTypeChecker(parent, idx, clazz, allowed); + } else if (value instanceof EnumSet) { + EnumSet<?> v = (EnumSet<?>) value; + Enum elt = Iterables.get(v, 0); + if (elt instanceof Operator) { + Class<? extends Operator> opclass = (Class<? extends Operator>) elt.getClass(); + Set<? extends Operator> allowed = (Set<? extends Operator>) v; + return new NodeTypeChecker(parent, idx, opclass, allowed); + } + } else if (value instanceof Set) { + // Set<Class<?>> + return new JavaUnionTypeChecker(parent, idx, (Set<Class<?>>) value); + } + throw new IllegalArgumentException("I don't know how to create a checker from " + value); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/TypeOperator.java b/container-search/src/main/java/com/yahoo/search/yql/TypeOperator.java new file mode 100644 index 00000000000..01b1f88cc5e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/TypeOperator.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +enum TypeOperator implements Operator { + + BYTE, + INT16, + INT32, + INT64, + STRING, + DOUBLE, + TIMESTAMP, + BOOLEAN, + ARRAY(TypeOperator.class), + MAP(TypeOperator.class); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof TypeOperator; + } + }; + + TypeOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaGroupingStep.java b/container-search/src/main/java/com/yahoo/search/yql/VespaGroupingStep.java new file mode 100644 index 00000000000..520728dc231 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaGroupingStep.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.request.GroupingOperation; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a> + */ +public class VespaGroupingStep { + + private final GroupingOperation operation; + private final List<Continuation> continuations = new ArrayList<>(); + + public VespaGroupingStep(GroupingOperation operation) { + this.operation = operation; + } + + public GroupingOperation getOperation() { + return operation; + } + + public List<Continuation> continuations() { + return continuations; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java new file mode 100644 index 00000000000..397225a087c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -0,0 +1,1381 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import static com.yahoo.search.yql.YqlParser.ACCENT_DROP; +import static com.yahoo.search.yql.YqlParser.ALTERNATIVES; +import static com.yahoo.search.yql.YqlParser.AND_SEGMENTING; +import static com.yahoo.search.yql.YqlParser.BOUNDS; +import static com.yahoo.search.yql.YqlParser.BOUNDS_LEFT_OPEN; +import static com.yahoo.search.yql.YqlParser.BOUNDS_OPEN; +import static com.yahoo.search.yql.YqlParser.BOUNDS_RIGHT_OPEN; +import static com.yahoo.search.yql.YqlParser.CONNECTION_ID; +import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT; +import static com.yahoo.search.yql.YqlParser.CONNECTIVITY; +import static com.yahoo.search.yql.YqlParser.DISTANCE; +import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; +import static com.yahoo.search.yql.YqlParser.EQUIV; +import static com.yahoo.search.yql.YqlParser.FILTER; +import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; +import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS; +import static com.yahoo.search.yql.YqlParser.LABEL; +import static com.yahoo.search.yql.YqlParser.NEAR; +import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE; +import static com.yahoo.search.yql.YqlParser.ONEAR; +import static com.yahoo.search.yql.YqlParser.ORIGIN; +import static com.yahoo.search.yql.YqlParser.ORIGIN_LENGTH; +import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET; +import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL; +import static com.yahoo.search.yql.YqlParser.PHRASE; +import static com.yahoo.search.yql.YqlParser.PREFIX; +import static com.yahoo.search.yql.YqlParser.RANGE; +import static com.yahoo.search.yql.YqlParser.RANK; +import static com.yahoo.search.yql.YqlParser.RANKED; +import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD; +import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE; +import static com.yahoo.search.yql.YqlParser.STEM; +import static com.yahoo.search.yql.YqlParser.SUBSTRING; +import static com.yahoo.search.yql.YqlParser.SUFFIX; +import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS; +import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR; +import static com.yahoo.search.yql.YqlParser.UNIQUE_ID; +import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA; +import static com.yahoo.search.yql.YqlParser.WAND; +import static com.yahoo.search.yql.YqlParser.WEAK_AND; +import static com.yahoo.search.yql.YqlParser.WEIGHT; +import static com.yahoo.search.yql.YqlParser.WEIGHTED_SET; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Deque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Map.Entry; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.AndSegmentItem; +import com.yahoo.prelude.query.DotProductItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.IndexedItem; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.MarkerWordItem; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.ONearItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PhraseSegmentItem; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.PrefixItem; +import com.yahoo.prelude.query.RangeItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.RegExpItem; +import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.Substring; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.prelude.query.SuffixItem; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.prelude.query.ToolBox.QueryVisitor; +import com.yahoo.prelude.query.WandItem; +import com.yahoo.prelude.query.WeakAndItem; +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.prelude.query.WordAlternativesItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.GroupingRequest; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Serialize Vespa query trees to YQL+ strings. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class VespaSerializer { + // TODO refactor, too much copy/paste + + private static class AndSegmentSerializer extends Serializer { + private static void serializeWords(StringBuilder destination, + AndSegmentItem segment) { + for (int i = 0; i < segment.getItemCount(); ++i) { + if (i > 0) { + destination.append(", "); + } + Item current = segment.getItem(i); + if (current instanceof WordItem) { + destination.append('"'); + escape(((WordItem) current).getIndexedString(), destination) + .append('"'); + } else { + throw new IllegalArgumentException( + "Serializing of " + + current.getClass().getSimpleName() + + " in segment AND expressions not implemented, please report this as a bug."); + } + } + } + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, item, true); + } + + static boolean serialize(StringBuilder destination, Item item, + boolean includeField) { + AndSegmentItem phrase = (AndSegmentItem) item; + Substring origin = phrase.getOrigin(); + String image; + int offset; + int length; + + if (origin == null) { + image = phrase.getRawWord(); + offset = 0; + length = image.length(); + } else { + image = origin.getSuperstring(); + offset = origin.start; + length = origin.end - origin.start; + } + + if (includeField) { + destination.append(normalizeIndexName(phrase.getIndexName())) + .append(" contains "); + } + destination.append("([{"); + serializeOrigin(destination, image, offset, length); + destination.append(", \"").append(AND_SEGMENTING) + .append("\": true"); + destination.append("}]"); + destination.append(PHRASE).append('('); + serializeWords(destination, phrase); + destination.append("))"); + return false; + } + } + + private static class AndSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return " AND "; + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append("("); + return true; + } + } + + private static class DotProductSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + serializeWeightedSetContents(destination, DOT_PRODUCT, + (WeightedSetItem) item); + return false; + } + + } + + private static class EquivSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + EquivItem e = (EquivItem) item; + String annotations = leafAnnotations(e); + destination.append(getIndexName(e.getItem(0))).append(" contains "); + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + destination.append(EQUIV).append('('); + int initLen = destination.length(); + for (Iterator<Item> i = e.getItemIterator(); i.hasNext();) { + Item x = i.next(); + if (destination.length() > initLen) { + destination.append(", "); + } + if (x instanceof PhraseItem) { + PhraseSerializer.serialize(destination, x, false); + } else { + destination.append('"'); + escape(((IndexedItem) x).getIndexedString(), destination); + destination.append('"'); + } + } + if (annotations.length() > 0) { + destination.append(')'); + } + destination.append(')'); + return false; + } + + } + + private static class NearSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + NearItem n = (NearItem) item; + String annotations = nearAnnotations(n); + + destination.append(getIndexName(n.getItem(0))).append(" contains "); + if (annotations.length() > 0) { + destination.append('(').append(annotations); + } + destination.append(NEAR).append('('); + int initLen = destination.length(); + for (ListIterator<Item> i = n.getItemIterator(); i.hasNext();) { + WordItem close = (WordItem) i.next(); + if (destination.length() > initLen) { + destination.append(", "); + } + destination.append('"'); + escape(close.getIndexedString(), destination).append('"'); + } + destination.append(')'); + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + + static String nearAnnotations(NearItem n) { + if (n.getDistance() != NearItem.defaultDistance) { + return "[{\"" + DISTANCE + "\": " + n.getDistance() + "}]"; + } else { + return ""; + } + } + + } + + private static class NotSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + if (state.peekFirst().subItems == 1) { + return ") AND !("; + } else { + return " OR "; + } + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append("("); + return true; + } + } + + private static class NullSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + throw new NullItemException( + "NullItem encountered in query tree." + + " This is usually a symptom of an invalid query or an error" + + " in a query transformer."); + } + } + + private static class NumberSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + IntItem intItem = (IntItem) item; + if (intItem.getFromLimit().number() + .equals(intItem.getToLimit().number())) { + destination.append(normalizeIndexName(intItem.getIndexName())) + .append(" = "); + annotatedNumberImage(intItem, intItem.getFromLimit().number() + .toString(), destination); + } else if (intItem.getFromLimit().isInfinite()) { + destination.append(normalizeIndexName(intItem.getIndexName())); + destination.append(intItem.getToLimit().isInclusive() ? " <= " + : " < "); + annotatedNumberImage(intItem, intItem.getToLimit().number() + .toString(), destination); + } else if (intItem.getToLimit().isInfinite()) { + destination.append(normalizeIndexName(intItem.getIndexName())); + destination + .append(intItem.getFromLimit().isInclusive() ? " >= " + : " > "); + annotatedNumberImage(intItem, intItem.getFromLimit().number() + .toString(), destination); + } else { + serializeAsRange(destination, intItem); + } + return false; + } + + private void serializeAsRange(StringBuilder destination, IntItem intItem) { + String annotations = leafAnnotations(intItem); + boolean leftOpen = !intItem.getFromLimit().isInclusive(); + boolean rightOpen = !intItem.getToLimit().isInclusive(); + String boundsAnnotation = ""; + int initLen; + + if (leftOpen && rightOpen) { + boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + BOUNDS_OPEN + + "\""; + } else if (leftOpen) { + boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + + BOUNDS_LEFT_OPEN + "\""; + } else if (rightOpen) { + boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + + BOUNDS_RIGHT_OPEN + "\""; + } + if (annotations.length() > 0 || boundsAnnotation.length() > 0) { + destination.append("[{"); + } + initLen = destination.length(); + if (annotations.length() > 0) { + + destination.append(annotations); + } + comma(destination, initLen); + if (boundsAnnotation.length() > 0) { + destination.append(boundsAnnotation); + } + if (initLen != annotations.length()) { + destination.append("}]"); + } + destination.append(RANGE).append('(') + .append(normalizeIndexName(intItem.getIndexName())) + .append(", ").append(intItem.getFromLimit().number()) + .append(", ").append(intItem.getToLimit().number()) + .append(")"); + } + + private void annotatedNumberImage(IntItem item, String rawNumber, + StringBuilder image) { + String annotations = leafAnnotations(item); + + if (annotations.length() > 0) { + image.append("([{").append(annotations).append("}]"); + } + if ('-' == rawNumber.charAt(0)) { + image.append('('); + } + image.append(rawNumber); + appendLongIfNecessary(rawNumber, image); + if ('-' == rawNumber.charAt(0)) { + image.append(')'); + } + if (annotations.length() > 0) { + image.append(')'); + } + } + + private void appendLongIfNecessary(String rawNumber, StringBuilder image) { + // floating point + if (rawNumber.indexOf('.') >= 0) { + return; + } + try { + long l = Long.parseLong(rawNumber); + if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) { + image.append('L'); + } + } catch (NumberFormatException e) { + // somebody has managed to init an IntItem containing noise, + // just give up + return; + } + } + } + + private static class RegExpSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + RegExpItem regexp = (RegExpItem) item; + + String annotations = leafAnnotations(regexp); + destination.append(normalizeIndexName(regexp.getIndexName())).append( + " matches "); + annotatedTerm(destination, regexp, annotations); + return false; + } + } + + private static class ONearSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + NearItem n = (NearItem) item; + String annotations = NearSerializer.nearAnnotations(n); + + destination.append(getIndexName(n.getItem(0))).append(" contains "); + if (annotations.length() > 0) { + destination.append('(').append(annotations); + } + destination.append(ONEAR).append('('); + int initLen = destination.length(); + for (ListIterator<Item> i = n.getItemIterator(); i.hasNext();) { + WordItem close = (WordItem) i.next(); + if (destination.length() > initLen) { + destination.append(", "); + } + destination.append('"'); + escape(close.getIndexedString(), destination).append('"'); + } + destination.append(')'); + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + + } + + private static class OrSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return " OR "; + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append("("); + return true; + } + } + + private static class PhraseSegmentSerializer extends Serializer { + + private static void serializeWords(StringBuilder destination, + PhraseSegmentItem segment) { + for (int i = 0; i < segment.getItemCount(); ++i) { + if (i > 0) { + destination.append(", "); + } + Item current = segment.getItem(i); + if (current instanceof WordItem) { + destination.append('"'); + escape(((WordItem) current).getIndexedString(), destination) + .append('"'); + } else { + throw new IllegalArgumentException( + "Serializing of " + + current.getClass().getSimpleName() + + " in phrases not implemented, please report this as a bug."); + } + } + } + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, item, true); + } + + static boolean serialize(StringBuilder destination, Item item, + boolean includeField) { + PhraseSegmentItem phrase = (PhraseSegmentItem) item; + Substring origin = phrase.getOrigin(); + String image; + int offset; + int length; + + if (includeField) { + destination.append(normalizeIndexName(phrase.getIndexName())) + .append(" contains "); + } + if (origin == null) { + image = phrase.getRawWord(); + offset = 0; + length = image.length(); + } else { + image = origin.getSuperstring(); + offset = origin.start; + length = origin.end - origin.start; + } + + destination.append("([{"); + serializeOrigin(destination, image, offset, length); + String annotations = leafAnnotations(phrase); + if (annotations.length() > 0) { + destination.append(", ").append(annotations); + } + if (phrase.getSegmentingRule() == SegmentingRule.BOOLEAN_AND) { + destination.append(", ").append('"').append(AND_SEGMENTING) + .append("\": true"); + } + destination.append("}]"); + destination.append(PHRASE).append('('); + serializeWords(destination, phrase); + destination.append("))"); + return false; + } + } + + private static class PhraseSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, item, true); + } + + static boolean serialize(StringBuilder destination, Item item, + boolean includeField) { + + PhraseItem phrase = (PhraseItem) item; + String annotations = leafAnnotations(phrase); + + if (includeField) { + destination.append(normalizeIndexName(phrase.getIndexName())) + .append(" contains "); + + } + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + + destination.append(PHRASE).append('('); + for (int i = 0; i < phrase.getItemCount(); ++i) { + if (i > 0) { + destination.append(", "); + } + Item current = phrase.getItem(i); + if (current instanceof WordItem) { + WordSerializer.serializeWordWithoutIndex(destination, + current); + } else if (current instanceof PhraseSegmentItem) { + PhraseSegmentSerializer.serialize(destination, current, + false); + } else if (current instanceof WordAlternativesItem) { + WordAlternativesSerializer.serialize(destination, (WordAlternativesItem) current, false); + } else { + throw new IllegalArgumentException( + "Serializing of " + + current.getClass().getSimpleName() + + " in phrases not implemented, please report this as a bug."); + } + } + destination.append(')'); + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + + } + + private static class PredicateQuerySerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + PredicateQueryItem pItem = (PredicateQueryItem) item; + destination.append("predicate(").append(pItem.getIndexName()) + .append(','); + appendFeatures(destination, pItem.getFeatures()); + destination.append(','); + appendFeatures(destination, pItem.getRangeFeatures()); + destination.append(')'); + return false; + } + + private void appendFeatures(StringBuilder destination, + Collection<? extends PredicateQueryItem.EntryBase> features) { + if (features.isEmpty()) { + destination.append('0'); // Workaround for empty maps. + return; + } + destination.append('{'); + boolean first = true; + for (PredicateQueryItem.EntryBase entry : features) { + if (!first) { + destination.append(','); + } + if (entry.getSubQueryBitmap() != PredicateQueryItem.ALL_SUB_QUERIES) { + destination.append("\"0x").append( + Long.toHexString(entry.getSubQueryBitmap())); + destination.append("\":{"); + appendKeyValue(destination, entry); + destination.append('}'); + } else { + appendKeyValue(destination, entry); + } + first = false; + } + destination.append('}'); + } + + private void appendKeyValue(StringBuilder destination, + PredicateQueryItem.EntryBase entry) { + destination.append('"'); + escape(entry.getKey(), destination); + destination.append("\":"); + if (entry instanceof PredicateQueryItem.Entry) { + destination.append('"'); + escape(((PredicateQueryItem.Entry) entry).getValue(), + destination); + destination.append('"'); + } else { + destination.append(((PredicateQueryItem.RangeEntry) entry) + .getValue()); + destination.append('L'); + } + } + + } + + private static class RangeSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + RangeItem range = (RangeItem) item; + String annotations = leafAnnotations(range); + if (annotations.length() > 0) { + destination.append("[{").append(annotations).append("}]"); + } + destination.append(RANGE).append('(') + .append(normalizeIndexName(range.getIndexName())) + .append(", "); + appendNumberImage(destination, range.getFrom()); // TODO: Serialize + // inclusive/exclusive + destination.append(", "); + appendNumberImage(destination, range.getTo()); + destination.append(')'); + return false; + } + + private void appendNumberImage(StringBuilder destination, Number number) { + destination.append(number.toString()); + if (number instanceof Long) { + destination.append('L'); + } + } + } + + private static class RankSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return ", "; + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append(RANK).append('('); + return true; + + } + + } + + private static class WordAlternativesSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, (WordAlternativesItem) item, true); + } + + static boolean serialize(StringBuilder destination, WordAlternativesItem alternatives, boolean includeField) { + String annotations = leafAnnotations(alternatives); + Substring origin = alternatives.getOrigin(); + boolean isFromQuery = alternatives.isFromQuery(); + boolean needsAnnotations = annotations.length() > 0 || origin != null || !isFromQuery; + + if (includeField) { + destination.append(normalizeIndexName(alternatives.getIndexName())).append(" contains "); + } + + if (needsAnnotations) { + destination.append("([{"); + int initLen = destination.length(); + + if (origin != null) { + String image = origin.getSuperstring(); + int offset = origin.start; + int length = origin.end - origin.start; + serializeOrigin(destination, image, offset, length); + } + if (!isFromQuery) { + comma(destination, initLen); + destination.append('"').append(IMPLICIT_TRANSFORMS).append("\": false"); + } + if (annotations.length() > 0) { + comma(destination, initLen); + destination.append(annotations); + } + + destination.append("}]"); + } + + destination.append(ALTERNATIVES).append("({"); + int initLen = destination.length(); + List<WordAlternativesItem.Alternative> sortedAlternatives = new ArrayList<>(alternatives.getAlternatives()); + // ensure most precise forms first + Collections.sort(sortedAlternatives, (x, y) -> Double.compare(y.exactness, x.exactness)); + for (WordAlternativesItem.Alternative alternative : sortedAlternatives) { + comma(destination, initLen); + destination.append('"'); + escape(alternative.word, destination); + destination.append("\": ").append(Double.toString(alternative.exactness)); + } + destination.append("})"); + if (needsAnnotations) { + destination.append(')'); + } + return false; + } + } + + private static abstract class Serializer { + abstract void onExit(StringBuilder destination, Item item); + + String separator(Deque<SerializerWrapper> state) { + throw new UnsupportedOperationException( + "Having several items for this query operator serializer, " + + this.getClass().getSimpleName() + + ", not yet implemented."); + } + + abstract boolean serialize(StringBuilder destination, Item item); + } + + private static final class SerializerWrapper { + int subItems; + final Serializer type; + final Item item; + + SerializerWrapper(Serializer type, Item item) { + subItems = 0; + this.type = type; + this.item = item; + } + + } + + private static final class TokenComparator implements + Comparator<Entry<Object, Integer>> { + + @SuppressWarnings({ "rawtypes", "unchecked" }) + @Override + public int compare(Entry<Object, Integer> o1, Entry<Object, Integer> o2) { + Comparable c1 = (Comparable) o1.getKey(); + Comparable c2 = (Comparable) o2.getKey(); + return c1.compareTo(c2); + } + } + + private static class VespaVisitor extends QueryVisitor { + + final StringBuilder destination; + final Deque<SerializerWrapper> state = new ArrayDeque<>(); + + VespaVisitor(StringBuilder destination) { + this.destination = destination; + } + + @Override + public void onExit() { + SerializerWrapper w = state.removeFirst(); + w.type.onExit(destination, w.item); + w = state.peekFirst(); + if (w != null) { + w.subItems += 1; + } + } + + @Override + public boolean visit(Item item) { + Serializer doIt = dispatch.get(item.getClass()); + + if (doIt == null) { + throw new IllegalArgumentException(item.getClass() + + " not supported for YQL+ marshalling."); + } + + if (state.peekFirst() != null && state.peekFirst().subItems > 0) { + destination.append(state.peekFirst().type.separator(state)); + } + state.addFirst(new SerializerWrapper(doIt, item)); + return doIt.serialize(destination, item); + + } + } + + private static class WandSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + serializeWeightedSetContents(destination, WAND, + (WeightedSetItem) item, + specificAnnotations((WandItem) item)); + return false; + } + + private String specificAnnotations(WandItem w) { + StringBuilder annotations = new StringBuilder(); + int targetNumHits = w.getTargetNumHits(); + double scoreThreshold = w.getScoreThreshold(); + double thresholdBoostFactor = w.getThresholdBoostFactor(); + if (targetNumHits != 10) { + annotations.append('"').append(TARGET_NUM_HITS).append("\": ") + .append(targetNumHits); + } + if (scoreThreshold != 0) { + comma(annotations, 0); + annotations.append('"').append(SCORE_THRESHOLD).append("\": ") + .append(scoreThreshold); + } + if (thresholdBoostFactor != 1) { + comma(annotations, 0); + annotations.append('"').append(THRESHOLD_BOOST_FACTOR) + .append("\": ").append(thresholdBoostFactor); + } + return annotations.toString(); + } + + } + + private static class WeakAndSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + if (needsAnnotationBlock((WeakAndItem) item)) { + destination.append(')'); + } + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return ", "; + } + + private boolean needsAnnotationBlock(WeakAndItem item) { + return nonDefaultScoreThreshold(item) || nonDefaultTargetNumHits(item); + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + WeakAndItem w = (WeakAndItem) item; + if (needsAnnotationBlock(w)) { + destination.append("([{"); + } + int lengthBeforeAnnotations = destination.length(); + if (nonDefaultTargetNumHits(w)) { + destination.append('"').append(TARGET_NUM_HITS).append("\": ").append(w.getN()); + } + if (nonDefaultScoreThreshold(w)) { + comma(destination, lengthBeforeAnnotations); + destination.append('"').append(SCORE_THRESHOLD).append("\": ").append(w.getScoreThreshold()); + } + if (needsAnnotationBlock(w)) { + destination.append("}]"); + } + destination.append(WEAK_AND).append('('); + return true; + } + + private boolean nonDefaultScoreThreshold(WeakAndItem w) { + return w.getScoreThreshold() > 0; + } + + private boolean nonDefaultTargetNumHits(WeakAndItem w) { + return w.getN() != WeakAndItem.defaultN; + } + } + + private static class WeightedSetSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + serializeWeightedSetContents(destination, WEIGHTED_SET, + (WeightedSetItem) item); + return false; + } + + } + + private static class WordSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + WordItem w = (WordItem) item; + StringBuilder wordAnnotations = getAllAnnotations(w); + + destination.append(normalizeIndexName(w.getIndexName())).append( + " contains "); + VespaSerializer.annotatedTerm(destination, w, wordAnnotations.toString()); + return false; + } + + static void serializeWordWithoutIndex(StringBuilder destination, + Item item) { + WordItem w = (WordItem) item; + StringBuilder wordAnnotations = getAllAnnotations(w); + + VespaSerializer.annotatedTerm(destination, w, wordAnnotations.toString()); + } + + private static StringBuilder getAllAnnotations(WordItem w) { + StringBuilder wordAnnotations = new StringBuilder( + WordSerializer.wordAnnotations(w)); + String leafAnnotations = leafAnnotations(w); + + if (leafAnnotations.length() > 0) { + comma(wordAnnotations, 0); + wordAnnotations.append(leafAnnotations(w)); + } + return wordAnnotations; + } + + private static String wordAnnotations(WordItem item) { + Substring origin = item.getOrigin(); + boolean usePositionData = item.usePositionData(); + boolean stemmed = item.isStemmed(); + boolean lowercased = item.isLowercased(); + boolean accentDrop = item.isNormalizable(); + SegmentingRule andSegmenting = item.getSegmentingRule(); + boolean isFromQuery = item.isFromQuery(); + StringBuilder annotation = new StringBuilder(); + boolean prefix = item instanceof PrefixItem; + boolean suffix = item instanceof SuffixItem; + boolean substring = item instanceof SubstringItem; + int initLen = annotation.length(); + String image; + int offset; + int length; + + if (origin == null) { + image = item.getRawWord(); + offset = 0; + length = image.length(); + } else { + image = origin.getSuperstring(); + offset = origin.start; + length = origin.end - origin.start; + } + + if (!image.substring(offset, offset + length).equals( + item.getIndexedString())) { + VespaSerializer.serializeOrigin(annotation, image, offset, + length); + } + if (usePositionData != true) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(USE_POSITION_DATA) + .append("\": false"); + } + if (stemmed == true) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(STEM).append("\": false"); + } + if (lowercased == true) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(NORMALIZE_CASE) + .append("\": false"); + } + if (accentDrop == false) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(ACCENT_DROP).append("\": false"); + } + if (andSegmenting == SegmentingRule.BOOLEAN_AND) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(AND_SEGMENTING) + .append("\": true"); + } + if (!isFromQuery) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(IMPLICIT_TRANSFORMS) + .append("\": false"); + } + if (prefix) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(PREFIX).append("\": true"); + } + if (suffix) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(SUFFIX).append("\": true"); + } + if (substring) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(SUBSTRING).append("\": true"); + } + return annotation.toString(); + } + + } + + private static final char[] DIGITS = new char[] { '0', '1', '2', '3', '4', + '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + + private static final Map<Class<?>, Serializer> dispatch; + + private static final Comparator<? super Entry<Object, Integer>> tokenComparator = new TokenComparator(); + + static { + Map<Class<?>, Serializer> dispatchBuilder = new HashMap<>(); + dispatchBuilder.put(AndItem.class, new AndSerializer()); + dispatchBuilder.put(AndSegmentItem.class, new AndSegmentSerializer()); + dispatchBuilder.put(DotProductItem.class, new DotProductSerializer()); + dispatchBuilder.put(EquivItem.class, new EquivSerializer()); + dispatchBuilder.put(IntItem.class, new NumberSerializer()); + dispatchBuilder.put(MarkerWordItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(NearItem.class, new NearSerializer()); + dispatchBuilder.put(NotItem.class, new NotSerializer()); + dispatchBuilder.put(NullItem.class, new NullSerializer()); + dispatchBuilder.put(ONearItem.class, new ONearSerializer()); + dispatchBuilder.put(OrItem.class, new OrSerializer()); + dispatchBuilder.put(PhraseItem.class, new PhraseSerializer()); + dispatchBuilder.put(PhraseSegmentItem.class, new PhraseSegmentSerializer()); + dispatchBuilder.put(PredicateQueryItem.class, + new PredicateQuerySerializer()); + dispatchBuilder.put(PrefixItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(WordAlternativesItem.class, new WordAlternativesSerializer()); + dispatchBuilder.put(RangeItem.class, new RangeSerializer()); + dispatchBuilder.put(RankItem.class, new RankSerializer()); + dispatchBuilder.put(SubstringItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(SuffixItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(WandItem.class, new WandSerializer()); + dispatchBuilder.put(WeakAndItem.class, new WeakAndSerializer()); + dispatchBuilder.put(WeightedSetItem.class, new WeightedSetSerializer()); + dispatchBuilder.put(WordItem.class, new WordSerializer()); + dispatchBuilder.put(RegExpItem.class, new RegExpSerializer()); + dispatch = ImmutableMap.copyOf(dispatchBuilder); + } + + /** + * Do YQL+ escaping, which is basically the same as for JSON, of the + * incoming string to the "quoted" buffer. The buffer returned is the same + * as the one given in the "quoted" parameter. + * + * @param in a string to escape + * @param escaped the target buffer for escaped data + * @return the same buffer as given in the "quoted" parameter + */ + private static StringBuilder escape(String in, StringBuilder escaped) { + for (char c : in.toCharArray()) { + switch (c) { + case ('\b'): + escaped.append("\\b"); + break; + case ('\t'): + escaped.append("\\t"); + break; + case ('\n'): + escaped.append("\\n"); + break; + case ('\f'): + escaped.append("\\f"); + break; + case ('\r'): + escaped.append("\\r"); + break; + case ('"'): + escaped.append("\\\""); + break; + case ('\''): + escaped.append("\\'"); + break; + case ('\\'): + escaped.append("\\\\"); + break; + case ('/'): + escaped.append("\\/"); + break; + default: + if (c < 32 || c >= 127) { + escaped.append("\\u").append(fourDigitHexString(c)); + } else { + escaped.append(c); + } + } + } + return escaped; + } + + private static char[] fourDigitHexString(char c) { + char[] hex = new char[4]; + int in = ((c) & 0xFFFF); + for (int i = 3; i >= 0; --i) { + hex[i] = DIGITS[in & 0xF]; + in >>>= 4; + } + return hex; + } + + static String getIndexName(Item item) { + if (!(item instanceof IndexedItem)) + throw new IllegalArgumentException("Expected IndexedItem, got " + item.getClass()); + return normalizeIndexName(((IndexedItem) item).getIndexName()); + } + + public static String serialize(Query query) { + StringBuilder out = new StringBuilder(); + serialize(query.getModel().getQueryTree().getRoot(), out); + for (GroupingRequest request : GroupingRequest.getRequests(query)) { + out.append(" | "); + serialize(request, out); + } + return out.toString(); + } + + private static void serialize(GroupingRequest request, StringBuilder out) { + Iterator<Continuation> it = request.continuations().iterator(); + if (it.hasNext()) { + out.append("[{ 'continuations':["); + while (it.hasNext()) { + out.append('\'').append(it.next()).append('\''); + if (it.hasNext()) { + out.append(", "); + } + } + out.append("] }]"); + } + out.append(request.getRootOperation()); + } + + private static void serialize(Item item, StringBuilder out) { + VespaVisitor visitor = new VespaVisitor(out); + ToolBox.visit(visitor, item); + } + + static String serialize(Item item) { + StringBuilder out = new StringBuilder(); + serialize(item, out); + return out.toString(); + } + + private static void serializeWeightedSetContents(StringBuilder destination, + String opName, WeightedSetItem weightedSet) { + serializeWeightedSetContents(destination, opName, weightedSet, ""); + } + + private static void serializeWeightedSetContents( + StringBuilder destination, + String opName, WeightedSetItem weightedSet, + String optionalAnnotations) { + addAnnotations(destination, weightedSet, optionalAnnotations); + destination.append(opName).append('(') + .append(normalizeIndexName(weightedSet.getIndexName())) + .append(", {"); + int initLen = destination.length(); + List<Entry<Object, Integer>> tokens = new ArrayList<>( + weightedSet.getNumTokens()); + for (Iterator<Entry<Object, Integer>> i = weightedSet.getTokens(); i + .hasNext();) { + tokens.add(i.next()); + } + Collections.sort(tokens, tokenComparator); + for (Entry<Object, Integer> entry : tokens) { + comma(destination, initLen); + destination.append('"'); + escape(entry.getKey().toString(), destination); + destination.append("\": ").append(entry.getValue().toString()); + } + destination.append("})"); + } + + private static void addAnnotations( + StringBuilder destination, + WeightedSetItem weightedSet, String optionalAnnotations) { + int preAnnotationValueLen; + int incomingLen = destination.length(); + String annotations = leafAnnotations(weightedSet); + + if (optionalAnnotations.length() > 0 || annotations.length() > 0) { + destination.append("[{"); + } + preAnnotationValueLen = destination.length(); + if (annotations.length() > 0) { + destination.append(annotations); + } + if (optionalAnnotations.length() > 0) { + comma(destination, preAnnotationValueLen); + destination.append(optionalAnnotations); + } + if (destination.length() > incomingLen) { + destination.append("}]"); + } + } + + private static void comma(StringBuilder annotation, int initLen) { + if (annotation.length() > initLen) { + annotation.append(", "); + } + } + + private static String leafAnnotations(TaggableItem item) { + // TODO there is no usable API for the general annotations map in the + // Item instances + StringBuilder annotation = new StringBuilder(); + int initLen = annotation.length(); + { + int uniqueId = item.getUniqueID(); + double connectivity = item.getConnectivity(); + TaggableItem connectedTo = (TaggableItem) item.getConnectedItem(); + double significance = item.getSignificance(); + if (connectedTo != null && connectedTo.getUniqueID() != 0) { + annotation.append('"').append(CONNECTIVITY).append("\": {\"") + .append(CONNECTION_ID).append("\": ") + .append(connectedTo.getUniqueID()).append(", \"") + .append(CONNECTION_WEIGHT).append("\": ") + .append(connectivity).append("}"); + } + if (item.hasExplicitSignificance()) { + comma(annotation, initLen); + annotation.append('"').append(SIGNIFICANCE).append("\": ") + .append(significance); + } + if (uniqueId != 0) { + comma(annotation, initLen); + annotation.append('"').append(UNIQUE_ID).append("\": ") + .append(uniqueId); + } + } + { + Item leaf = (Item) item; + boolean filter = leaf.isFilter(); + boolean isRanked = leaf.isRanked(); + String label = leaf.getLabel(); + int weight = leaf.getWeight(); + + if (filter == true) { + comma(annotation, initLen); + annotation.append("\"").append(FILTER).append("\": true"); + } + if (isRanked == false) { + comma(annotation, initLen); + annotation.append("\"").append(RANKED).append("\": false"); + } + if (label != null) { + comma(annotation, initLen); + annotation.append("\"").append(LABEL).append("\": \""); + escape(label, annotation); + annotation.append("\""); + } + if (weight != 100) { + comma(annotation, initLen); + annotation.append('"').append(WEIGHT).append("\": ") + .append(weight); + } + } + if (item instanceof IntItem) { + int hitLimit = ((IntItem) item).getHitLimit(); + if (hitLimit != 0) { + comma(annotation, initLen); + annotation.append('"').append(HIT_LIMIT).append("\": ") + .append(hitLimit); + } + } + return annotation.toString(); + } + + private static void serializeOrigin(StringBuilder destination, + String image, int offset, int length) { + destination.append('"').append(ORIGIN).append("\": {\"") + .append(ORIGIN_ORIGINAL).append("\": \""); + escape(image, destination); + destination.append("\", \"").append(ORIGIN_OFFSET).append("\": ") + .append(offset).append(", \"").append(ORIGIN_LENGTH) + .append("\": ").append(length).append("}"); + } + + private static String normalizeIndexName(@NonNull String indexName) { + if (indexName.length() == 0) { + return "default"; + } else { + return indexName; + } + } + + private static void annotatedTerm(StringBuilder destination, IndexedItem w, String annotations) { + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + destination.append('"'); + escape(w.getIndexedString(), destination).append('"'); + if (annotations.length() > 0) { + destination.append(')'); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java new file mode 100644 index 00000000000..a7cc06c95f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -0,0 +1,1894 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import java.math.BigInteger; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + +import com.google.common.annotations.Beta; +import com.google.common.base.Preconditions; +import com.yahoo.collections.LazyMap; +import com.yahoo.collections.LazySet; +import com.yahoo.collections.Tuple2; +import com.yahoo.component.Version; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Normalizer; +import com.yahoo.language.process.Segmenter; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.AndSegmentItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.DotProductItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.Limit; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.ONearItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PhraseSegmentItem; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.PrefixItem; +import com.yahoo.prelude.query.RangeItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.RegExpItem; +import com.yahoo.prelude.query.SegmentItem; +import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.Substring; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.prelude.query.SuffixItem; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.prelude.query.ToolBox.QueryVisitor; +import com.yahoo.prelude.query.WandItem; +import com.yahoo.prelude.query.WeakAndItem; +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.prelude.query.WordAlternativesItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.query.Sorting.AttributeSorter; +import com.yahoo.search.query.Sorting.FieldOrder; +import com.yahoo.search.query.Sorting.LowerCaseSorter; +import com.yahoo.search.query.Sorting.Order; +import com.yahoo.search.query.Sorting.RawSorter; +import com.yahoo.search.query.Sorting.UcaSorter; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.Parser; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * The YQL query language. + * + * <p> + * This class <em>must</em> be kept in lockstep with {@link VespaSerializer}. + * Adding anything here will usually require a corresponding addition in + * VespaSerializer. + * </p> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:stiankri@yahoo-inc.com">Stian Kristoffersen</a> + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@Beta +public class YqlParser implements Parser { + + private static final String DESCENDING_HITS_ORDER = "descending"; + private static final String ASCENDING_HITS_ORDER = "ascending"; + + private enum SegmentWhen { + NEVER, POSSIBLY, ALWAYS; + } + + private static final Integer DEFAULT_HITS = 10; + private static final Integer DEFAULT_OFFSET = 0; + private static final Integer DEFAULT_TARGET_NUM_HITS = 10; + private static final String ACCENT_DROP_DESCRIPTION = "setting for whether to remove accents if field implies it"; + private static final String ANNOTATIONS = "annotations"; + private static final String FILTER_DESCRIPTION = "term filter setting"; + private static final String IMPLICIT_TRANSFORMS_DESCRIPTION = "setting for whether built-in query transformers should touch the term"; + private static final String NFKC = "nfkc"; + private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it"; + private static final String ORIGIN_DESCRIPTION = "string origin for a term"; + private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking"; + private static final String SEGMENTER_BACKEND = "backend"; + private static final String SEGMENTER = "segmenter"; + private static final String SEGMENTER_VERSION = "version"; + private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it"; + private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item"; + private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty"; + private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex"; + private static final String USER_INPUT_GRAMMAR = "grammar"; + private static final String USER_INPUT_LANGUAGE = "language"; + private static final String USER_INPUT_RAW = "raw"; + private static final String USER_INPUT_SEGMENT = "segment"; + private static final String USER_INPUT = "userInput"; + private static final String USER_QUERY = "userQuery"; + private static final String NON_EMPTY = "nonEmpty"; + + public static final String SORTING_FUNCTION = "function"; + public static final String SORTING_LOCALE = "locale"; + public static final String SORTING_STRENGTH = "strength"; + + static final String ACCENT_DROP = "accentDrop"; + static final String ALTERNATIVES = "alternatives"; + static final String AND_SEGMENTING = "andSegmenting"; + static final String BOUNDS = "bounds"; + static final String BOUNDS_LEFT_OPEN = "leftOpen"; + static final String BOUNDS_OPEN = "open"; + static final String BOUNDS_RIGHT_OPEN = "rightOpen"; + static final String CONNECTION_ID = "id"; + static final String CONNECTION_WEIGHT = "weight"; + static final String CONNECTIVITY = "connectivity"; + static final String DISTANCE = "distance"; + static final String DOT_PRODUCT = "dotProduct"; + static final String EQUIV = "equiv"; + static final String FILTER = "filter"; + static final String HIT_LIMIT = "hitLimit"; + static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; + static final String LABEL = "label"; + static final String NEAR = "near"; + static final String NORMALIZE_CASE = "normalizeCase"; + static final String ONEAR = "onear"; + static final String ORIGIN_LENGTH = "length"; + static final String ORIGIN_OFFSET = "offset"; + static final String ORIGIN = "origin"; + static final String ORIGIN_ORIGINAL = "original"; + static final String PHRASE = "phrase"; + static final String PREDICATE = "predicate"; + static final String PREFIX = "prefix"; + static final String RANGE = "range"; + static final String RANKED = "ranked"; + static final String RANK = "rank"; + static final String SCORE_THRESHOLD = "scoreThreshold"; + static final String SIGNIFICANCE = "significance"; + static final String STEM = "stem"; + static final String SUBSTRING = "substring"; + static final String SUFFIX = "suffix"; + static final String TARGET_NUM_HITS = "targetNumHits"; + static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; + static final String UNIQUE_ID = "id"; + static final String USE_POSITION_DATA = "usePositionData"; + static final String WAND = "wand"; + static final String WEAK_AND = "weakAnd"; + static final String WEIGHTED_SET = "weightedSet"; + static final String WEIGHT = "weight"; + + private final IndexFacts indexFacts; + private final List<ConnectedItem> connectedItems = new ArrayList<>(); + private final List<VespaGroupingStep> groupingSteps = new ArrayList<>(); + private final Map<Integer, TaggableItem> identifiedItems = LazyMap.newHashMap(); + private final Normalizer normalizer; + private final Segmenter segmenter; + private final Set<String> yqlSources = LazySet.newHashSet(); + private final Set<String> yqlSummaryFields = LazySet.newHashSet(); + private final String localSegmenterBackend; + private final Version localSegmenterVersion; + private Integer hits; + private Integer offset; + private Integer timeout; + private Query userQuery; + private Parsable currentlyParsing; + private IndexFacts.Session indexFactsSession; + private Set<String> docTypes; + private Sorting sorting; + private String segmenterBackend; + private Version segmenterVersion; + private boolean queryParser = true; + private boolean resegment = false; + private final Deque<OperatorNode<?>> annotationStack = new ArrayDeque<>(); + private final ParserEnvironment environment; + + private static final QueryVisitor noEmptyTerms = new QueryVisitor() { + + @Override + public boolean visit(Item item) { + if (item instanceof NullItem) { + throw new IllegalArgumentException("Got NullItem inside nonEmpty()."); + } else if (item instanceof WordItem) { + if (((WordItem) item).getIndexedString().isEmpty()) { + throw new IllegalArgumentException("Searching for empty string inside nonEmpty()"); + } + } else if (item instanceof CompositeItem) { + if (((CompositeItem) item).getItemCount() == 0) { + throw new IllegalArgumentException("Empty composite operator (" + item.getName() + ") inside nonEmpty()"); + } + } + return true; + } + + @Override + public void onExit() { + // NOP + } + }; + + public YqlParser(ParserEnvironment environment) { + indexFacts = environment.getIndexFacts(); + normalizer = environment.getLinguistics().getNormalizer(); + segmenter = environment.getLinguistics().getSegmenter(); + this.environment = environment; + + Tuple2<String, Version> version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER); + localSegmenterBackend = version.first; + localSegmenterVersion = version.second; + } + + @NonNull + @Override + public QueryTree parse(Parsable query) { + indexFactsSession = indexFacts.newSession(query.getSources(), query.getRestrict()); + connectedItems.clear(); + groupingSteps.clear(); + identifiedItems.clear(); + yqlSources.clear(); + yqlSummaryFields.clear(); + annotationStack.clear(); + hits = null; + offset = null; + timeout = null; + // userQuery set prior to calling this + currentlyParsing = query; + docTypes = null; + sorting = null; + segmenterBackend = null; + segmenterVersion = null; + // queryParser set prior to calling this + resegment = false; + return buildTree(fetchFilterPart()); + } + + private void joinDocTypesFromUserQueryAndYql() { + List<String> allSourceNames = new ArrayList<>(currentlyParsing.getSources().size() + yqlSources.size()); + if ( ! yqlSources.isEmpty()) { + allSourceNames.addAll(currentlyParsing.getSources()); + allSourceNames.addAll(yqlSources); + } else { + // no sources == all sources in Vespa + } + indexFactsSession = indexFacts.newSession(allSourceNames, currentlyParsing.getRestrict()); + docTypes = new HashSet<>(indexFactsSession.documentTypes()); + } + + @NonNull + private QueryTree buildTree(OperatorNode<?> filterPart) { + Preconditions.checkArgument(filterPart.getArguments().length == 2, + "Expected 2 arguments to filter, got %s.", + filterPart.getArguments().length); + populateYqlSources(filterPart.<OperatorNode<?>> getArgument(0)); + final OperatorNode<ExpressionOperator> filterExpression = filterPart + .getArgument(1); + populateLinguisticsAnnotations(filterExpression); + Item root = convertExpression(filterExpression); + connectItems(); + userQuery = null; + return new QueryTree(root); + } + + private void populateLinguisticsAnnotations( + OperatorNode<ExpressionOperator> filterExpression) { + Map<?, ?> segmenter = getAnnotation(filterExpression, SEGMENTER, + Map.class, null, "segmenter engine and version"); + if (segmenter == null) { + segmenterVersion = null; + segmenterBackend = null; + resegment = false; + } else { + segmenterBackend = getMapValue(SEGMENTER, segmenter, + SEGMENTER_BACKEND, String.class); + try { + segmenterVersion = new Version(getMapValue(SEGMENTER, + segmenter, SEGMENTER_VERSION, String.class)); + } catch (RuntimeException e) { + segmenterVersion = null; + } + if (localSegmenterBackend.equals(segmenterBackend) + && localSegmenterVersion.equals(segmenterVersion)) { + resegment = false; + } else { + resegment = true; + } + } + } + + private void populateYqlSources(OperatorNode<?> filterArgs) { + yqlSources.clear(); + if (filterArgs.getOperator() == SequenceOperator.SCAN) { + for (String source : filterArgs.<List<String>> getArgument(0)) { + yqlSources.add(source); + } + } else if (filterArgs.getOperator() == SequenceOperator.ALL) { + // yqlSources has already been cleared + } else if (filterArgs.getOperator() == SequenceOperator.MULTISOURCE) { + for (List<String> source : filterArgs.<List<List<String>>> getArgument(0)) { + yqlSources.add(source.get(0)); + } + } else { + throw newUnexpectedArgumentException(filterArgs.getOperator(), + SequenceOperator.SCAN, SequenceOperator.ALL, + SequenceOperator.MULTISOURCE); + } + joinDocTypesFromUserQueryAndYql(); + } + + private void populateYqlSummaryFields( + List<OperatorNode<ProjectOperator>> fields) { + yqlSummaryFields.clear(); + for (OperatorNode<ProjectOperator> field : fields) { + assertHasOperator(field, ProjectOperator.FIELD); + yqlSummaryFields.add(field.getArgument(1, String.class)); + } + } + + private void connectItems() { + for (ConnectedItem entry : connectedItems) { + TaggableItem to = identifiedItems.get(entry.toId); + Preconditions + .checkNotNull(to, + "Item '%s' was specified to connect to item with ID %s, which does not " + + "exist in the query.", entry.fromItem, + entry.toId); + entry.fromItem.setConnectivity((Item) to, entry.weight); + } + } + + @NonNull + private Item convertExpression(OperatorNode<ExpressionOperator> ast) { + try { + annotationStack.addFirst(ast); + switch (ast.getOperator()) { + case AND: + return buildAnd(ast); + case OR: + return buildOr(ast); + case EQ: + return buildEquals(ast); + case LT: + return buildLessThan(ast); + case GT: + return buildGreaterThan(ast); + case LTEQ: + return buildLessThanOrEquals(ast); + case GTEQ: + return buildGreaterThanOrEquals(ast); + case CONTAINS: + return buildTermSearch(ast); + case MATCHES: + return buildRegExpSearch(ast); + case CALL: + return buildFunctionCall(ast); + default: + throw newUnexpectedArgumentException(ast.getOperator(), + ExpressionOperator.AND, ExpressionOperator.CALL, + ExpressionOperator.CONTAINS, ExpressionOperator.EQ, + ExpressionOperator.GT, ExpressionOperator.GTEQ, + ExpressionOperator.LT, ExpressionOperator.LTEQ, + ExpressionOperator.OR); + } + } finally { + annotationStack.removeFirst(); + } + } + + @NonNull + private Item buildFunctionCall(OperatorNode<ExpressionOperator> ast) { + List<String> names = ast.getArgument(0); + Preconditions.checkArgument(names.size() == 1, + "Expected 1 name, got %s.", names.size()); + switch (names.get(0)) { + case USER_QUERY: + return fetchUserQuery(); + case RANGE: + return buildRange(ast); + case WAND: + return buildWand(ast); + case WEIGHTED_SET: + return buildWeightedSet(ast); + case DOT_PRODUCT: + return buildDotProduct(ast); + case PREDICATE: + return buildPredicate(ast); + case RANK: + return buildRank(ast); + case WEAK_AND: + return buildWeakAnd(ast); + case USER_INPUT: + return buildUserInput(ast); + case NON_EMPTY: + return ensureNonEmpty(ast); + default: + throw newUnexpectedArgumentException(names.get(0), DOT_PRODUCT, + RANGE, RANK, USER_QUERY, WAND, WEAK_AND, WEIGHTED_SET, + PREDICATE, USER_INPUT, NON_EMPTY); + } + } + + private Item ensureNonEmpty(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 1, + "Expected 1 arguments, got %s.", args.size()); + Item item = convertExpression(args.get(0)); + ToolBox.visit(noEmptyTerms, item); + return item; + } + + @NonNull + private Item buildWeightedSet(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 2, + "Expected 2 arguments, got %s.", args.size()); + + return fillWeightedSet(ast, args.get(1), new WeightedSetItem( + getIndex(args.get(0)))); + } + + @NonNull + private Item buildDotProduct(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 2, + "Expected 2 arguments, got %s.", args.size()); + + return fillWeightedSet(ast, args.get(1), new DotProductItem( + getIndex(args.get(0)))); + } + + @NonNull + private Item buildPredicate(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 3, + "Expected 3 arguments, got %s.", args.size()); + + final PredicateQueryItem item = new PredicateQueryItem(); + item.setIndexName(getIndex(args.get(0))); + + addFeatures(args.get(1), + (key, value, subqueryBitmap) -> item.addFeature(key, (String) value, subqueryBitmap), PredicateQueryItem.ALL_SUB_QUERIES); + addFeatures(args.get(2), (key, value, subqueryBitmap) -> { + if (value instanceof Long) { + item.addRangeFeature(key, (Long) value, subqueryBitmap); + } else { + item.addRangeFeature(key, (Integer) value, subqueryBitmap); + } + }, PredicateQueryItem.ALL_SUB_QUERIES); + return leafStyleSettings(ast, item); + } + + interface AddFeature { + public void addFeature(String key, Object value, long subqueryBitmap); + } + + private void addFeatures(OperatorNode<ExpressionOperator> map, + AddFeature item, long subqueryBitmap) { + if (map.getOperator() != ExpressionOperator.MAP) { + return; + } + assertHasOperator(map, ExpressionOperator.MAP); + List<String> keys = map.getArgument(0); + List<OperatorNode<ExpressionOperator>> values = map.getArgument(1); + for (int i = 0; i < keys.size(); ++i) { + String key = keys.get(i); + OperatorNode<ExpressionOperator> value = values.get(i); + if (value.getOperator() == ExpressionOperator.ARRAY) { + List<OperatorNode<ExpressionOperator>> multiValues = value + .getArgument(0); + for (OperatorNode<ExpressionOperator> multiValue : multiValues) { + assertHasOperator(multiValue, ExpressionOperator.LITERAL); + item.addFeature(key, multiValue.getArgument(0), subqueryBitmap); + } + } else if (value.getOperator() == ExpressionOperator.LITERAL) { + item.addFeature(key, value.getArgument(0), subqueryBitmap); + } else { + assertHasOperator(value, ExpressionOperator.MAP); // Subquery syntax + Preconditions.checkArgument(key.indexOf("0x") == 0 || key.indexOf("[") == 0); + if (key.indexOf("0x") == 0) { + String subqueryString = key.substring(2); + if (subqueryString.length() > 16) { + throw new NumberFormatException( + "Too long subquery string: " + key); + } + long currentSubqueryBitmap = new BigInteger(subqueryString, 16).longValue(); + addFeatures(value, item, currentSubqueryBitmap); + } else { + StringTokenizer bits = new StringTokenizer(key.substring(1, key.length() - 1), ","); + long currentSubqueryBitmap = 0; + while (bits.hasMoreTokens()) { + int bit = Integer.parseInt(bits.nextToken().trim()); + currentSubqueryBitmap |= 1L << bit; + } + addFeatures(value, item, currentSubqueryBitmap); + } + } + } + } + + @NonNull + private Item buildWand(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); + + WandItem out = new WandItem(getIndex(args.get(0)), getAnnotation(ast, + TARGET_NUM_HITS, Integer.class, DEFAULT_TARGET_NUM_HITS, + "desired number of hits to accumulate in wand")); + Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, + Double.class, null, "min score for hit inclusion"); + if (scoreThreshold != null) { + out.setScoreThreshold(scoreThreshold); + } + Double thresholdBoostFactor = getAnnotation(ast, + THRESHOLD_BOOST_FACTOR, Double.class, null, + "boost factor used to boost threshold before comparing against upper bound score"); + if (thresholdBoostFactor != null) { + out.setThresholdBoostFactor(thresholdBoostFactor); + } + return fillWeightedSet(ast, args.get(1), out); + } + + @NonNull + private WeightedSetItem fillWeightedSet(OperatorNode<ExpressionOperator> ast, + OperatorNode<ExpressionOperator> arg, + @NonNull WeightedSetItem out) { + addItems(arg, out); + return leafStyleSettings(ast, out); + } + + @NonNull + private Item instantiatePhraseItem(String field, OperatorNode<ExpressionOperator> ast) { + assertHasFunctionName(ast, PHRASE); + + if (getAnnotation(ast, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) != null) { + return instantiatePhraseSegmentItem(field, ast, false); + } + + PhraseItem phrase = new PhraseItem(); + phrase.setIndexName(field); + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + if (word.getOperator() == ExpressionOperator.CALL) { + List<String> names = word.getArgument(0); + switch (names.get(0)) { + case PHRASE: + if (getAnnotation(word, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) == null) { + phrase.addItem(instantiatePhraseItem(field, word)); + } else { + phrase.addItem(instantiatePhraseSegmentItem(field, word, true)); + } + break; + case ALTERNATIVES: + phrase.addItem(instantiateWordAlternativesItem(field, word)); + break; + default: + throw new IllegalArgumentException("Expected phrase or word alternatives, got " + names.get(0)); + } + } else { + phrase.addItem(instantiateWordItem(field, word, phrase.getClass())); + } + } + return leafStyleSettings(ast, phrase); + } + + @NonNull + private Item instantiatePhraseSegmentItem(String field, OperatorNode<ExpressionOperator> ast, boolean forcePhrase) { + Substring origin = getOrigin(ast); + Boolean stem = getAnnotation(ast, STEM, Boolean.class, Boolean.TRUE, STEM_DESCRIPTION); + Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, Boolean.FALSE, + "setting for whether to force using AND for segments on and off"); + SegmentItem phrase; + List<String> words = null; + + if (forcePhrase || !andSegmenting) { + phrase = new PhraseSegmentItem(origin.getValue(), origin.getValue(), true, !stem, origin); + } else { + phrase = new AndSegmentItem(origin.getValue(), true, !stem); + } + phrase.setIndexName(field); + + if (resegment + && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) { + words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage()); + } + + if (words != null && words.size() > 0) { + for (String word : words) { + phrase.addItem(new WordItem(word, field, true)); + } + } else { + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + phrase.addItem(instantiateWordItem(field, word, phrase.getClass(), SegmentWhen.NEVER)); + } + } + if (phrase instanceof TaggableItem) { + leafStyleSettings(ast, (TaggableItem) phrase); + } + phrase.lock(); + return phrase; + } + + @NonNull + private Item instantiateNearItem(String field, OperatorNode<ExpressionOperator> ast) { + assertHasFunctionName(ast, NEAR); + + NearItem near = new NearItem(); + near.setIndexName(field); + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + near.addItem(instantiateWordItem(field, word, near.getClass())); + } + Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for NEAR operator"); + if (distance != null) { + near.setDistance(distance); + } + return near; + } + + @NonNull + private Item instantiateONearItem(String field, OperatorNode<ExpressionOperator> ast) { + assertHasFunctionName(ast, ONEAR); + + NearItem onear = new ONearItem(); + onear.setIndexName(field); + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + onear.addItem(instantiateWordItem(field, word, onear.getClass())); + } + Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for ONEAR operator"); + if (distance != null) { + onear.setDistance(distance); + } + return onear; + } + + @NonNull + private Item fetchUserQuery() { + Preconditions.checkState(!queryParser, + "Tried inserting user query into itself."); + Preconditions.checkState(userQuery != null, + "User query must be set before trying to build complete query " + + "tree including user query."); + return userQuery.getModel().getQueryTree().getRoot(); + } + + @NonNull + private Item buildUserInput(OperatorNode<ExpressionOperator> ast) { + + String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class, + Query.Type.ALL.toString(), "grammar for handling user input"); + String defaultIndex = getAnnotation(ast, USER_INPUT_DEFAULT_INDEX, + String.class, "default", "default index for user input terms"); + Boolean allowEmpty = getAnnotation(ast, USER_INPUT_ALLOW_EMPTY, Boolean.class, + Boolean.FALSE, "flag for allowing NullItem to be returned"); + String wordData; + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + + // TODO add support for default arguments if property results in nothing + wordData = getStringContents(args.get(0)); + if (allowEmpty.booleanValue() && (wordData == null || wordData.isEmpty())) { + return new NullItem(); + } + String languageTag = getAnnotation(ast, USER_INPUT_LANGUAGE, + String.class, "en", + "language setting for segmenting user input parameter"); + Language language = Language.fromLanguageTag(languageTag); + Item item; + if (USER_INPUT_RAW.equals(grammar)) { + item = instantiateWordItem(defaultIndex, wordData, ast, null, SegmentWhen.NEVER, + language); + } else if (USER_INPUT_SEGMENT.equals(grammar)) { + item = instantiateWordItem(defaultIndex, wordData, ast, null, + SegmentWhen.ALWAYS, language); + } else { + item = parseUserInput(grammar, defaultIndex, wordData, language, allowEmpty.booleanValue()); + propagateUserInputAnnotations(ast, item); + } + return item; + } + + private String getStringContents( + OperatorNode<ExpressionOperator> propertySniffer) { + String wordData; + + switch (propertySniffer.getOperator()) { + case LITERAL: + wordData = propertySniffer.getArgument(0, String.class); + break; + case VARREF: + Preconditions + .checkState(userQuery != null, + "properties must be available when trying to fetch user input"); + wordData = userQuery.properties().getString( + propertySniffer.getArgument(0, String.class)); + break; + default: + throw newUnexpectedArgumentException(propertySniffer.getOperator(), + ExpressionOperator.LITERAL, ExpressionOperator.VARREF); + } + return wordData; + } + + private class AnnotationPropagator extends QueryVisitor { + private final Boolean isRanked; + private final Boolean filter; + private final Boolean stem; + private final Boolean normalizeCase; + private final Boolean accentDrop; + private final Boolean usePositionData; + + public AnnotationPropagator(OperatorNode<ExpressionOperator> ast) { + isRanked = getAnnotation(ast, RANKED, Boolean.class, null, + RANKED_DESCRIPTION); + filter = getAnnotation(ast, FILTER, Boolean.class, null, + FILTER_DESCRIPTION); + stem = getAnnotation(ast, STEM, Boolean.class, null, + STEM_DESCRIPTION); + normalizeCase = getAnnotation(ast, NORMALIZE_CASE, Boolean.class, + Boolean.TRUE, NORMALIZE_CASE_DESCRIPTION); + accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, null, + ACCENT_DROP_DESCRIPTION); + usePositionData = getAnnotation(ast, USE_POSITION_DATA, + Boolean.class, null, USE_POSITION_DATA_DESCRIPTION); + } + + @Override + public boolean visit(Item item) { + if (item instanceof WordItem) { + WordItem w = (WordItem) item; + if (usePositionData != null) { + w.setPositionData(usePositionData); + } + if (stem != null) { + w.setStemmed(!stem); + } + if (normalizeCase != null) { + w.setLowercased(!normalizeCase); + } + if (accentDrop != null) { + w.setNormalizable(accentDrop); + } + } + if (item instanceof TaggableItem) { + if (isRanked != null) { + item.setRanked(isRanked); + } + if (filter != null) { + item.setFilter(filter); + } + } + return true; + } + + @Override + public void onExit() { + // intentionally left blank + } + } + + private void propagateUserInputAnnotations( + OperatorNode<ExpressionOperator> ast, Item item) { + ToolBox.visit(new AnnotationPropagator(ast), item); + + } + + @NonNull + private Item parseUserInput(String grammar, String defaultIndex, String wordData, + Language language, boolean allowNullItem) { + Item item; + Query.Type parseAs = Query.Type.getType(grammar); + Parser parser = ParserFactory.newInstance(parseAs, environment); + // perhaps not use already resolved doctypes, but respect source and + // restrict + item = parser.parse( + new Parsable().setQuery(wordData).addSources(docTypes) + .setLanguage(language) + .setDefaultIndexName(defaultIndex)).getRoot(); + // the null check should be unnecessary, but is there to avoid having to + // suppress null warnings + if (!allowNullItem && (item == null || item instanceof NullItem)) { + throw new IllegalArgumentException("Parsing \"" + wordData + + "\" only resulted in NullItem."); + } + return item; + } + + @NonNull + private OperatorNode<?> fetchFilterPart() { + ProgramParser parser = new ProgramParser(); + OperatorNode<?> ast; + try { + ast = parser.parse("query", currentlyParsing.getQuery()); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + assertHasOperator(ast, StatementOperator.PROGRAM); + Preconditions.checkArgument(ast.getArguments().length == 1, + "Expected only a single argument to the root node, got %s.", + ast.getArguments().length); + // TODO: should we check size of first argument as well? + ast = ast.<List<OperatorNode<?>>> getArgument(0).get(0); + assertHasOperator(ast, StatementOperator.EXECUTE); + + ast = ast.getArgument(0); + ast = fetchTimeout(ast); + ast = fetchPipe(ast); + ast = fetchSummaryFields(ast); + ast = fetchOffsetAndHits(ast); + ast = fetchSorting(ast); + assertHasOperator(ast, SequenceOperator.FILTER); + return ast; + } + + @SuppressWarnings("unchecked") + private OperatorNode<?> fetchPipe(OperatorNode<?> toScan) { + OperatorNode<?> ast = toScan; + while (ast.getOperator() == SequenceOperator.PIPE) { + OperatorNode<ExpressionOperator> groupingAst = ast + .<List<OperatorNode<ExpressionOperator>>> getArgument(2) + .get(0); + GroupingOperation groupingOperation = GroupingOperation + .fromString(groupingAst.<String> getArgument(0)); + VespaGroupingStep groupingStep = new VespaGroupingStep( + groupingOperation); + List<String> continuations = getAnnotation(groupingAst, + "continuations", List.class, Collections.emptyList(), + "grouping continuations"); + for (String continuation : continuations) { + groupingStep.continuations().add( + Continuation.fromString(continuation)); + } + groupingSteps.add(groupingStep); + ast = ast.getArgument(0); + } + Collections.reverse(groupingSteps); + return ast; + } + + @NonNull + private OperatorNode<?> fetchSorting(OperatorNode<?> ast) { + if (ast.getOperator() != SequenceOperator.SORT) { + return ast; + } + List<FieldOrder> sortingInit = new ArrayList<>(); + List<OperatorNode<?>> sortArguments = ast.getArgument(1); + for (OperatorNode<?> op : sortArguments) { + final OperatorNode<ExpressionOperator> fieldNode = op + .<OperatorNode<ExpressionOperator>> getArgument(0); + String field = fetchFieldRead(fieldNode); + String locale = getAnnotation(fieldNode, SORTING_LOCALE, + String.class, null, "locale used by sorting function"); + String function = getAnnotation(fieldNode, SORTING_FUNCTION, + String.class, null, + "sorting function for the specified attribute"); + String strength = getAnnotation(fieldNode, SORTING_STRENGTH, + String.class, null, "strength for sorting function"); + AttributeSorter sorter; + if (function == null) { + sorter = new AttributeSorter(field); + } else if (Sorting.LOWERCASE.equals(function)) { + sorter = new LowerCaseSorter(field); + } else if (Sorting.RAW.equals(function)) { + sorter = new RawSorter(field); + } else if (Sorting.UCA.equals(function)) { + if (locale != null) { + UcaSorter.Strength ucaStrength = UcaSorter.Strength.UNDEFINED; + if (strength != null) { + if (Sorting.STRENGTH_PRIMARY.equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.PRIMARY; + } else if (Sorting.STRENGTH_SECONDARY + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.SECONDARY; + } else if (Sorting.STRENGTH_TERTIARY + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.TERTIARY; + } else if (Sorting.STRENGTH_QUATERNARY + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.QUATERNARY; + } else if (Sorting.STRENGTH_IDENTICAL + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.IDENTICAL; + } else { + throw newUnexpectedArgumentException(function, + Sorting.STRENGTH_PRIMARY, + Sorting.STRENGTH_SECONDARY, + Sorting.STRENGTH_TERTIARY, + Sorting.STRENGTH_QUATERNARY, + Sorting.STRENGTH_IDENTICAL); + } + sorter = new UcaSorter(field, locale, ucaStrength); + } else { + sorter = new UcaSorter(field, locale, ucaStrength); + } + } else { + sorter = new UcaSorter(field); + } + } else { + throw newUnexpectedArgumentException(function, "lowercase", + "raw", "uca"); + } + switch ((SortOperator) op.getOperator()) { + case ASC: + sortingInit.add(new FieldOrder(sorter, Order.ASCENDING)); + break; + case DESC: + sortingInit.add(new FieldOrder(sorter, Order.DESCENDING)); + break; + default: + throw newUnexpectedArgumentException(op.getOperator(), + SortOperator.ASC, SortOperator.DESC); + } + } + sorting = new Sorting(sortingInit); + return ast.getArgument(0); + } + + @NonNull + private OperatorNode<?> fetchOffsetAndHits(OperatorNode<?> ast) { + if (ast.getOperator() == SequenceOperator.OFFSET) { + offset = ast.<OperatorNode<?>> getArgument(1) + .<Integer> getArgument(0); + hits = DEFAULT_HITS; + return ast.getArgument(0); + } + if (ast.getOperator() == SequenceOperator.SLICE) { + offset = ast.<OperatorNode<?>> getArgument(1) + .<Integer> getArgument(0); + hits = ast.<OperatorNode<?>> getArgument(2) + .<Integer> getArgument(0) - offset; + return ast.getArgument(0); + } + if (ast.getOperator() == SequenceOperator.LIMIT) { + hits = ast.<OperatorNode<?>> getArgument(1) + .<Integer> getArgument(0); + offset = DEFAULT_OFFSET; + return ast.getArgument(0); + } + return ast; + } + + @NonNull + private OperatorNode<?> fetchSummaryFields(OperatorNode<?> ast) { + if (ast.getOperator() != SequenceOperator.PROJECT) { + return ast; + } + Preconditions.checkArgument(ast.getArguments().length == 2, + "Expected 2 arguments to PROJECT, got %s.", + ast.getArguments().length); + populateYqlSummaryFields(ast + .<List<OperatorNode<ProjectOperator>>> getArgument(1)); + return ast.getArgument(0); + } + + private OperatorNode<?> fetchTimeout(OperatorNode<?> ast) { + if (ast.getOperator() != SequenceOperator.TIMEOUT) { + return ast; + } + timeout = ast.<OperatorNode<?>> getArgument(1).<Integer> getArgument(0); + return ast.getArgument(0); + } + + @NonNull + private static String fetchFieldRead(OperatorNode<ExpressionOperator> ast) { + assertHasOperator(ast, ExpressionOperator.READ_FIELD); + return ast.getArgument(1); + } + + @NonNull + private IntItem buildGreaterThanOrEquals( + OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem("[" + fetchConditionWord(ast) + ";]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem("[;" + fetchConditionWord(ast) + "]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildLessThanOrEquals(OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem("[;" + fetchConditionWord(ast) + "]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem("[" + fetchConditionWord(ast) + ";]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildGreaterThan(OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem(">" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem("<" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildLessThan(OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem("<" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem(">" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildEquals(OperatorNode<ExpressionOperator> ast) { + IntItem number = new IntItem(fetchConditionWord(ast), + fetchConditionIndex(ast)); + if (isIndexOnLeftHandSide(ast)) { + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private String fetchConditionIndex(OperatorNode<ExpressionOperator> ast) { + OperatorNode<ExpressionOperator> lhs = ast.getArgument(0); + OperatorNode<ExpressionOperator> rhs = ast.getArgument(1); + if (lhs.getOperator() == ExpressionOperator.LITERAL + || lhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(rhs, ExpressionOperator.READ_FIELD); + return getIndex(rhs); + } + if (rhs.getOperator() == ExpressionOperator.LITERAL + || rhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(lhs, ExpressionOperator.READ_FIELD); + return getIndex(lhs); + } + throw new IllegalArgumentException( + "Expected LITERAL and READ_FIELD, got " + lhs.getOperator() + + " and " + rhs.getOperator() + "."); + } + + private static String getNumberAsString(OperatorNode<ExpressionOperator> ast) { + String negative = ""; + OperatorNode<ExpressionOperator> currentAst = ast; + if (currentAst.getOperator() == ExpressionOperator.NEGATE) { + negative = "-"; + currentAst = currentAst.getArgument(0); + } + assertHasOperator(currentAst, ExpressionOperator.LITERAL); + return negative + currentAst.getArgument(0).toString(); + } + + @NonNull + private static String fetchConditionWord( + OperatorNode<ExpressionOperator> ast) { + OperatorNode<ExpressionOperator> lhs = ast.getArgument(0); + OperatorNode<ExpressionOperator> rhs = ast.getArgument(1); + if (lhs.getOperator() == ExpressionOperator.LITERAL + || lhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(rhs, ExpressionOperator.READ_FIELD); + return getNumberAsString(lhs); + } + if (rhs.getOperator() == ExpressionOperator.LITERAL + || rhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(lhs, ExpressionOperator.READ_FIELD); + return getNumberAsString(rhs); + } + throw new IllegalArgumentException( + "Expected LITERAL/NEGATE and READ_FIELD, got " + + lhs.getOperator() + " and " + rhs.getOperator() + "."); + } + + private static boolean isIndexOnLeftHandSide( + OperatorNode<ExpressionOperator> ast) { + return ast.getArgument(0, OperatorNode.class).getOperator() == ExpressionOperator.READ_FIELD; + } + + @NonNull + private CompositeItem buildAnd(OperatorNode<ExpressionOperator> ast) { + AndItem andItem = new AndItem(); + NotItem notItem = new NotItem(); + convertVarArgsAnd(ast, 0, andItem, notItem); + Preconditions + .checkArgument(andItem.getItemCount() > 0, + "Vespa does not support AND with no logically positive branches."); + if (notItem.getItemCount() == 0) { + return andItem; + } + if (andItem.getItemCount() == 1) { + notItem.setPositiveItem(andItem.getItem(0)); + } else { + notItem.setPositiveItem(andItem); + } + return notItem; + } + + @NonNull + private CompositeItem buildOr(OperatorNode<ExpressionOperator> spec) { + return convertVarArgs(spec, 0, new OrItem()); + } + + @NonNull + private CompositeItem buildWeakAnd(OperatorNode<ExpressionOperator> spec) { + WeakAndItem weakAnd = new WeakAndItem(); + Integer targetNumHits = getAnnotation(spec, TARGET_NUM_HITS, + Integer.class, null, "desired minimum hits to produce"); + if (targetNumHits != null) { + weakAnd.setN(targetNumHits); + } + Integer scoreThreshold = getAnnotation(spec, SCORE_THRESHOLD, + Integer.class, null, "min dot product score for hit inclusion"); + if (scoreThreshold != null) { + weakAnd.setScoreThreshold(scoreThreshold); + } + return convertVarArgs(spec, 1, weakAnd); + } + + @NonNull + private CompositeItem buildRank(OperatorNode<ExpressionOperator> spec) { + return convertVarArgs(spec, 1, new RankItem()); + } + + @NonNull + private CompositeItem convertVarArgs(OperatorNode<ExpressionOperator> ast, + int argIdx, @NonNull + CompositeItem out) { + Iterable<OperatorNode<ExpressionOperator>> args = ast + .getArgument(argIdx); + for (OperatorNode<ExpressionOperator> arg : args) { + assertHasOperator(arg, ExpressionOperator.class); + out.addItem(convertExpression(arg)); + } + return out; + } + + private void convertVarArgsAnd(OperatorNode<ExpressionOperator> ast, + int argIdx, AndItem outAnd, NotItem outNot) { + Iterable<OperatorNode<ExpressionOperator>> args = ast + .getArgument(argIdx); + for (OperatorNode<ExpressionOperator> arg : args) { + assertHasOperator(arg, ExpressionOperator.class); + if (arg.getOperator() == ExpressionOperator.NOT) { + OperatorNode<ExpressionOperator> exp = arg.getArgument(0); + assertHasOperator(exp, ExpressionOperator.class); + outNot.addNegativeItem(convertExpression(exp)); + } else { + outAnd.addItem(convertExpression(arg)); + } + } + } + + @NonNull + private Item buildTermSearch(OperatorNode<ExpressionOperator> ast) { + assertHasOperator(ast, ExpressionOperator.CONTAINS); + return instantiateLeafItem( + getIndex(ast.<OperatorNode<ExpressionOperator>> getArgument(0)), + ast.<OperatorNode<ExpressionOperator>> getArgument(1)); + } + + @NonNull + private Item buildRegExpSearch(OperatorNode<ExpressionOperator> ast) { + assertHasOperator(ast, ExpressionOperator.MATCHES); + String field = getIndex(ast.<OperatorNode<ExpressionOperator>> getArgument(0)); + OperatorNode<ExpressionOperator> ast1 = ast.<OperatorNode<ExpressionOperator>> getArgument(1); + String wordData = getStringContents(ast1); + RegExpItem regExp = new RegExpItem(field, true, wordData); + return leafStyleSettings(ast1, regExp); + } + + + @NonNull + private Item buildRange(OperatorNode<ExpressionOperator> spec) { + assertHasOperator(spec, ExpressionOperator.CALL); + assertHasFunctionName(spec, RANGE); + + IntItem range = instantiateRangeItem( + spec.<List<OperatorNode<ExpressionOperator>>> getArgument(1), + spec); + return leafStyleSettings(spec, range); + } + + private static Number negate(Number x) { + if (x.getClass() == Integer.class) { + int x1 = x.intValue(); + return Integer.valueOf(-x1); + } else if (x.getClass() == Long.class) { + long x1 = x.longValue(); + return Long.valueOf(-x1); + } else if (x.getClass() == Float.class) { + float x1 = x.floatValue(); + return Float.valueOf(-x1); + } else if (x.getClass() == Double.class) { + double x1 = x.doubleValue(); + return Double.valueOf(-x1); + } else { + throw newUnexpectedArgumentException(x.getClass(), Integer.class, + Long.class, Float.class, Double.class); + } + } + + @NonNull + private IntItem instantiateRangeItem( + List<OperatorNode<ExpressionOperator>> args, + OperatorNode<ExpressionOperator> spec) { + Preconditions.checkArgument(args.size() == 3, + "Expected 3 arguments, got %s.", args.size()); + + Number lowerArg = getBound(args.get(1)); + Number upperArg = getBound(args.get(2)); + String bounds = getAnnotation(spec, BOUNDS, String.class, null, + "whether bounds should be open or closed"); + // TODO: add support for implicit transforms + if (bounds == null) { + return new RangeItem(lowerArg, upperArg, getIndex(args.get(0))); + } else { + Limit from; + Limit to; + if (BOUNDS_OPEN.equals(bounds)) { + from = new Limit(lowerArg, false); + to = new Limit(upperArg, false); + } else if (BOUNDS_LEFT_OPEN.equals(bounds)) { + from = new Limit(lowerArg, false); + to = new Limit(upperArg, true); + } else if (BOUNDS_RIGHT_OPEN.equals(bounds)) { + from = new Limit(lowerArg, true); + to = new Limit(upperArg, false); + } else { + throw newUnexpectedArgumentException(bounds, BOUNDS_OPEN, + BOUNDS_LEFT_OPEN, BOUNDS_RIGHT_OPEN); + } + return new IntItem(from, to, getIndex(args.get(0))); + } + } + + private Number getBound(OperatorNode<ExpressionOperator> bound) { + Number boundValue; + OperatorNode<ExpressionOperator> currentBound = bound; + boolean negate = false; + if (currentBound.getOperator() == ExpressionOperator.NEGATE) { + currentBound = currentBound.getArgument(0); + negate = true; + } + assertHasOperator(currentBound, ExpressionOperator.LITERAL); + boundValue = currentBound.getArgument(0, Number.class); + if (negate) { + boundValue = negate(boundValue); + } + return boundValue; + } + + @NonNull + private Item instantiateLeafItem(String field, + OperatorNode<ExpressionOperator> ast) { + switch (ast.getOperator()) { + case LITERAL: + case VARREF: + return instantiateWordItem(field, ast, null); + case CALL: + return instantiateCompositeLeaf(field, ast); + default: + throw newUnexpectedArgumentException(ast.getOperator().name(), + ExpressionOperator.CALL, ExpressionOperator.LITERAL); + } + } + + @NonNull + private Item instantiateCompositeLeaf(String field, + OperatorNode<ExpressionOperator> ast) { + List<String> names = ast.getArgument(0); + Preconditions.checkArgument(names.size() == 1, + "Expected 1 name, got %s.", names.size()); + switch (names.get(0)) { + case PHRASE: + return instantiatePhraseItem(field, ast); + case NEAR: + return instantiateNearItem(field, ast); + case ONEAR: + return instantiateONearItem(field, ast); + case EQUIV: + return instantiateEquivItem(field, ast); + case ALTERNATIVES: + return instantiateWordAlternativesItem(field, ast); + default: + throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, + ONEAR, PHRASE); + } + } + + private Item instantiateWordAlternativesItem(String field, OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() >= 1, "Expected 1 or more arguments, got %s.", args.size()); + Preconditions.checkArgument(args.get(0).getOperator() == ExpressionOperator.MAP, "Expected MAP, got %s.", args.get(0) + .getOperator()); + + List<WordAlternativesItem.Alternative> terms = new ArrayList<>(); + List<String> keys = args.get(0).getArgument(0); + List<OperatorNode<ExpressionOperator>> values = args.get(0).getArgument(1); + for (int i = 0; i < keys.size(); ++i) { + String term = keys.get(i); + double exactness; + OperatorNode<ExpressionOperator> value = values.get(i); + switch (value.getOperator()) { + case LITERAL: + exactness = value.getArgument(0, Double.class); + break; + default: + throw newUnexpectedArgumentException(value.getOperator(), ExpressionOperator.LITERAL); + } + terms.add(new WordAlternativesItem.Alternative(term, exactness)); + } + Substring origin = getOrigin(ast); + final Boolean isFromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, + IMPLICIT_TRANSFORMS_DESCRIPTION); + return leafStyleSettings(ast, new WordAlternativesItem(field, isFromQuery, origin, terms)); + } + + @NonNull + private Item instantiateEquivItem(String field, + OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() >= 2, + "Expected 2 or more arguments, got %s.", args.size()); + + EquivItem equiv = new EquivItem(); + equiv.setIndexName(field); + for (OperatorNode<ExpressionOperator> arg : args) { + switch (arg.getOperator()) { + case LITERAL: + equiv.addItem(instantiateWordItem(field, arg, equiv.getClass())); + break; + case CALL: + assertHasFunctionName(arg, PHRASE); + equiv.addItem(instantiatePhraseItem(field, arg)); + break; + default: + throw newUnexpectedArgumentException(arg.getOperator(), + ExpressionOperator.CALL, ExpressionOperator.LITERAL); + } + } + return leafStyleSettings(ast, equiv); + } + + @NonNull + private Item instantiateWordItem(String field, + OperatorNode<ExpressionOperator> ast, Class<?> parent) { + return instantiateWordItem(field, ast, parent, SegmentWhen.POSSIBLY); + } + + @NonNull + private Item instantiateWordItem(String field, + OperatorNode<ExpressionOperator> ast, Class<?> parent, + SegmentWhen segmentPolicy) { + String wordData = getStringContents(ast); + return instantiateWordItem(field, wordData, ast, parent, + segmentPolicy, null); + } + + @NonNull + private Item instantiateWordItem(String field, + String rawWord, + OperatorNode<ExpressionOperator> ast, Class<?> parent, + SegmentWhen segmentPolicy, Language language) { + String wordData = rawWord; + if (getAnnotation(ast, NFKC, Boolean.class, Boolean.TRUE, + "setting for whether to NFKC normalize input data")) { + wordData = normalizer.normalize(wordData); + } + boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, + Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION); + boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, + Boolean.FALSE, + "setting for whether to use prefix match of input data"); + boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, + Boolean.FALSE, + "setting for whether to use suffix match of input data"); + boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, + Boolean.FALSE, + "setting for whether to use substring match of input data"); + Preconditions.checkArgument((prefixMatch ? 1 : 0) + + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, + "Only one of prefix, substring and suffix can be set."); + @NonNull + final TaggableItem wordItem; + + if (prefixMatch) { + wordItem = new PrefixItem(wordData, fromQuery); + } else if (suffixMatch) { + wordItem = new SuffixItem(wordData, fromQuery); + } else if (substrMatch) { + wordItem = new SubstringItem(wordData, fromQuery); + } else { + switch (segmentPolicy) { + case NEVER: + wordItem = new WordItem(wordData, fromQuery); + break; + case POSSIBLY: + if (shouldResegmentWord(field, fromQuery)) { + wordItem = resegment(field, ast, wordData, fromQuery, + parent, language); + } else { + wordItem = new WordItem(wordData, fromQuery); + } + break; + case ALWAYS: + wordItem = resegment(field, ast, wordData, fromQuery, parent, + language); + break; + default: + throw new IllegalArgumentException( + "Unexpected segmenting rule: " + segmentPolicy); + } + } + if (wordItem instanceof WordItem) { + prepareWord(field, ast, fromQuery, (WordItem) wordItem); + } + return (Item) leafStyleSettings(ast, wordItem); + } + + @SuppressWarnings({"deprecation"}) + private boolean shouldResegmentWord(String field, boolean fromQuery) { + return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute(); + } + + @NonNull + private TaggableItem resegment(String field, + OperatorNode<ExpressionOperator> ast, String wordData, + boolean fromQuery, Class<?> parent, Language language) { + final TaggableItem wordItem; + String toSegment = wordData; + final Substring s = getOrigin(ast); + final Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language; + if (s != null) { + toSegment = s.getValue(); + } + List<String> words = segmenter.segment(toSegment, + usedLanguage); + if (words.size() == 0) { + wordItem = new WordItem(wordData, fromQuery); + } else if (words.size() == 1 || !phraseArgumentSupported(parent)) { + wordItem = new WordItem(words.get(0), fromQuery); + } else { + wordItem = new PhraseSegmentItem(toSegment, fromQuery, false); + ((PhraseSegmentItem) wordItem).setIndexName(field); + for (String w : words) { + WordItem segment = new WordItem(w, fromQuery); + prepareWord(field, ast, fromQuery, segment); + ((PhraseSegmentItem) wordItem).addItem(segment); + } + ((PhraseSegmentItem) wordItem).lock(); + } + return wordItem; + } + + private boolean phraseArgumentSupported(Class<?> parent) { + if (parent == null) { + return true; + } else if (parent == PhraseItem.class) { + // not supported in backend, but the container flattens the + // arguments itself + return true; + } else if (parent == EquivItem.class) { + return true; + } else { + return false; + } + } + + private void prepareWord(String field, + OperatorNode<ExpressionOperator> ast, boolean fromQuery, + WordItem wordItem) { + wordItem.setIndexName(field); + wordStyleSettings(ast, wordItem); + if (shouldResegmentWord(field, fromQuery)) { + // force re-stemming, new case normalization, etc + wordItem.setStemmed(false); + wordItem.setLowercased(false); + wordItem.setNormalizable(true); + } + } + + @NonNull + private <T extends TaggableItem> T leafStyleSettings(OperatorNode<?> ast, + @NonNull + T out) { + { + Map<?, ?> connectivity = getAnnotation(ast, CONNECTIVITY, + Map.class, null, "connectivity settings"); + if (connectivity != null) { + connectedItems.add(new ConnectedItem(out, getMapValue( + CONNECTIVITY, connectivity, CONNECTION_ID, + Integer.class), getMapValue(CONNECTIVITY, connectivity, + CONNECTION_WEIGHT, Number.class).doubleValue())); + } + Number significance = getAnnotation(ast, SIGNIFICANCE, + Number.class, null, "term significance"); + if (significance != null) { + out.setSignificance(significance.doubleValue()); + } + Integer uniqueId = getAnnotation(ast, UNIQUE_ID, Integer.class, + null, "term ID", false); + if (uniqueId != null) { + out.setUniqueID(uniqueId); + identifiedItems.put(uniqueId, out); + } + } + { + Item leaf = (Item) out; + Map<?, ?> itemAnnotations = getAnnotation(ast, ANNOTATIONS, + Map.class, Collections.emptyMap(), "item annotation map"); + for (Map.Entry<?, ?> entry : itemAnnotations.entrySet()) { + Preconditions.checkArgument(entry.getKey() instanceof String, + "Expected String annotation key, got %s.", entry + .getKey().getClass()); + Preconditions.checkArgument(entry.getValue() instanceof String, + "Expected String annotation value, got %s.", entry + .getValue().getClass()); + leaf.addAnnotation((String) entry.getKey(), entry.getValue()); + } + Boolean filter = getAnnotation(ast, FILTER, Boolean.class, null, + FILTER_DESCRIPTION); + if (filter != null) { + leaf.setFilter(filter); + } + Boolean isRanked = getAnnotation(ast, RANKED, Boolean.class, null, + RANKED_DESCRIPTION); + if (isRanked != null) { + leaf.setRanked(isRanked); + } + String label = getAnnotation(ast, LABEL, String.class, null, + "item label"); + if (label != null) { + leaf.setLabel(label); + } + Integer weight = getAnnotation(ast, WEIGHT, Integer.class, null, + "term weight for ranking"); + if (weight != null) { + leaf.setWeight(weight); + } + } + if (out instanceof IntItem) { + IntItem number = (IntItem) out; + Integer hitLimit = getCappedRangeSearchParameter(ast); + if (hitLimit != null) { + number.setHitLimit(hitLimit.intValue()); + } + } + + return out; + } + + private Integer getCappedRangeSearchParameter(OperatorNode<?> ast) { + Integer hitLimit = getAnnotation(ast, HIT_LIMIT, Integer.class, null, "hit limit"); + + if (hitLimit != null) { + Boolean ascending = getAnnotation(ast, ASCENDING_HITS_ORDER, Boolean.class, null, + "ascending population ordering for capped range search"); + Boolean descending = getAnnotation(ast, DESCENDING_HITS_ORDER, Boolean.class, null, + "descending population ordering for capped range search"); + Preconditions.checkArgument(ascending == null || descending == null, + "Settings for both ascending and descending ordering set, only one of these expected."); + if (Boolean.TRUE.equals(descending) || Boolean.FALSE.equals(ascending)) { + hitLimit = Integer.valueOf(hitLimit.intValue() * -1); + } + } + return hitLimit; + } + + @Beta + public boolean isQueryParser() { + return queryParser; + } + + @Beta + public void setQueryParser(boolean queryParser) { + this.queryParser = queryParser; + } + + @Beta + public void setUserQuery(@NonNull Query userQuery) { + this.userQuery = userQuery; + } + + @Beta + public Set<String> getYqlSummaryFields() { + return yqlSummaryFields; + } + + @Beta + public List<VespaGroupingStep> getGroupingSteps() { + return groupingSteps; + } + + /** + * Give the offset expected from the latest parsed query if anything is + * explicitly specified. + * + * @return an Integer instance or null + */ + public Integer getOffset() { + return offset; + } + + /** + * Give the number of hits expected from the latest parsed query if anything + * is explicitly specified. + * + * @return an Integer instance or null + */ + public Integer getHits() { + return hits; + } + + /** + * The timeout specified in the YQL+ query last parsed. + * + * @return an Integer instance or null + */ + public Integer getTimeout() { + return timeout; + } + + /** + * The sorting specified in the YQL+ query last parsed. + * + * @return a Sorting instance or null + */ + public Sorting getSorting() { + return sorting; + } + + Set<String> getDocTypes() { + return docTypes; + } + + Set<String> getYqlSources() { + return yqlSources; + } + + private static void assertHasOperator(OperatorNode<?> ast, + Class<? extends Operator> expectedOperatorClass) { + Preconditions.checkArgument( + expectedOperatorClass.isInstance(ast.getOperator()), + "Expected operator class %s, got %s.", + expectedOperatorClass.getName(), ast.getOperator().getClass() + .getName()); + } + + private static void assertHasOperator(OperatorNode<?> ast, + Operator expectedOperator) { + Preconditions.checkArgument(ast.getOperator() == expectedOperator, + "Expected operator %s, got %s.", expectedOperator, + ast.getOperator()); + } + + private static void assertHasFunctionName(OperatorNode<?> ast, + String expectedFunctionName) { + List<String> names = ast.getArgument(0); + Preconditions.checkArgument(expectedFunctionName.equals(names.get(0)), + "Expected function '%s', got '%s'.", expectedFunctionName, + names.get(0)); + } + + private static void addItems(OperatorNode<ExpressionOperator> ast, + WeightedSetItem out) { + switch (ast.getOperator()) { + case MAP: + addStringItems(ast, out); + break; + case ARRAY: + addLongItems(ast, out); + break; + default: + throw newUnexpectedArgumentException(ast.getOperator(), + ExpressionOperator.ARRAY, ExpressionOperator.MAP); + } + } + + private static void addStringItems(OperatorNode<ExpressionOperator> ast, + WeightedSetItem out) { + List<String> keys = ast.getArgument(0); + List<OperatorNode<ExpressionOperator>> values = ast.getArgument(1); + for (int i = 0; i < keys.size(); ++i) { + OperatorNode<ExpressionOperator> tokenWeight = values.get(i); + assertHasOperator(tokenWeight, ExpressionOperator.LITERAL); + out.addToken(keys.get(i), tokenWeight.getArgument(0, Integer.class)); + } + } + + private static void addLongItems(OperatorNode<ExpressionOperator> ast, + WeightedSetItem out) { + List<OperatorNode<ExpressionOperator>> values = ast.getArgument(0); + for (OperatorNode<ExpressionOperator> value : values) { + assertHasOperator(value, ExpressionOperator.ARRAY); + List<OperatorNode<ExpressionOperator>> args = value.getArgument(0); + Preconditions.checkArgument(args.size() == 2, + "Expected item and weight, got %s.", args); + + OperatorNode<ExpressionOperator> tokenValueNode = args.get(0); + assertHasOperator(tokenValueNode, ExpressionOperator.LITERAL); + Number tokenValue = tokenValueNode.getArgument(0, Number.class); + Preconditions.checkArgument(tokenValue instanceof Integer + || tokenValue instanceof Long, + "Expected Integer or Long, got %s.", tokenValue.getClass() + .getName()); + + OperatorNode<ExpressionOperator> tokenWeightNode = args.get(1); + assertHasOperator(tokenWeightNode, ExpressionOperator.LITERAL); + Integer tokenWeight = tokenWeightNode.getArgument(0, Integer.class); + + out.addToken(tokenValue.longValue(), tokenWeight); + } + } + + private void wordStyleSettings(OperatorNode<ExpressionOperator> ast, + WordItem out) { + Substring origin = getOrigin(ast); + if (origin != null) { + out.setOrigin(origin); + } + Boolean usePositionData = getAnnotation(ast, USE_POSITION_DATA, + Boolean.class, null, + USE_POSITION_DATA_DESCRIPTION); + if (usePositionData != null) { + out.setPositionData(usePositionData); + } + Boolean stem = getAnnotation(ast, STEM, Boolean.class, null, + STEM_DESCRIPTION); + if (stem != null) { + out.setStemmed(!stem); + } + Boolean normalizeCase = getAnnotation(ast, NORMALIZE_CASE, + Boolean.class, null, + NORMALIZE_CASE_DESCRIPTION); + if (normalizeCase != null) { + out.setLowercased(!normalizeCase); + } + Boolean accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, + null, + ACCENT_DROP_DESCRIPTION); + if (accentDrop != null) { + out.setNormalizable(accentDrop); + } + Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, + Boolean.class, null, + "setting for whether to force using AND for segments on and off"); + if (andSegmenting != null) { + if (andSegmenting) { + out.setSegmentingRule(SegmentingRule.BOOLEAN_AND); + } else { + out.setSegmentingRule(SegmentingRule.PHRASE); + } + } + } + + @NonNull + private String getIndex(OperatorNode<ExpressionOperator> operatorNode) { + String index = fetchFieldRead(operatorNode); + Preconditions.checkArgument(indexFactsSession.isIndex(index), "Field '%s' does not exist.", index); + return indexFactsSession.getCanonicName(index); + } + + private Substring getOrigin(OperatorNode<ExpressionOperator> ast) { + Map<?, ?> origin = getAnnotation(ast, ORIGIN, Map.class, null, + ORIGIN_DESCRIPTION); + if (origin == null) { + return null; + } + String original = getMapValue(ORIGIN, origin, ORIGIN_ORIGINAL, + String.class); + int offset = getMapValue(ORIGIN, origin, ORIGIN_OFFSET, Integer.class); + int length = getMapValue(ORIGIN, origin, ORIGIN_LENGTH, Integer.class); + return new Substring(offset, length + offset, original); + } + + private static <T> T getMapValue(String mapName, Map<?, ?> map, String key, + Class<T> expectedValueClass) { + Object value = map.get(key); + Preconditions.checkArgument(value != null, + "Map annotation '%s' must contain an entry with key '%s'.", + mapName, key); + assert value != null; + Preconditions.checkArgument(expectedValueClass.isInstance(value), + "Expected %s for entry '%s' in map annotation '%s', got %s.", + expectedValueClass.getName(), key, mapName, value.getClass() + .getName()); + return expectedValueClass.cast(value); + } + + private <T> T getAnnotation(OperatorNode<?> ast, String key, + Class<T> expectedClass, T defaultValue, String description) { + return getAnnotation(ast, key, expectedClass, defaultValue, + description, true); + } + + private <T> T getAnnotation(OperatorNode<?> ast, String key, + Class<T> expectedClass, T defaultValue, String description, boolean considerParents) { + Object value = ast.getAnnotation(key); + for (Iterator<OperatorNode<?>> i = annotationStack.iterator(); value == null + && considerParents && i.hasNext();) { + value = i.next().getAnnotation(key); + } + if (value == null) { + return defaultValue; + } + Preconditions.checkArgument(expectedClass.isInstance(value), + "Expected %s for annotation '%s' (%s), got %s.", expectedClass + .getName(), key, description, value.getClass() + .getName()); + return expectedClass.cast(value); + } + + private static IllegalArgumentException newUnexpectedArgumentException( + Object actual, Object... expected) { + StringBuilder out = new StringBuilder("Expected "); + for (int i = 0, len = expected.length; i < len; ++i) { + out.append(expected[i]); + if (i < len - 2) { + out.append(", "); + } else if (i < len - 1) { + out.append(" or "); + } + } + out.append(", got ").append(actual).append("."); + return new IllegalArgumentException(out.toString()); + } + + String getSegmenterBackend() { + return segmenterBackend; + } + + Version getSegmenterVersion() { + return segmenterVersion; + } + + private static final class ConnectedItem { + + final double weight; + final int toId; + final TaggableItem fromItem; + + ConnectedItem(TaggableItem fromItem, int toId, double weight) { + this.weight = weight; + this.toId = toId; + this.fromItem = fromItem; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlQuery.java b/container-search/src/main/java/com/yahoo/search/yql/YqlQuery.java new file mode 100644 index 00000000000..27c27b88d24 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlQuery.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * A Yql query. These usually contains variables, which allows the yql query to be parsed once at configuration + * time and turned into fully specified queries at request time without reparsing. + * + * @author bratseth + */ +// TODO: This is just a skeleton +public class YqlQuery { + + private YqlQuery(String yqlQuery) { + // TODO + } + + /** Creates a YQl query form a string */ + public static YqlQuery from(String yqlQueryString) { + return new YqlQuery(yqlQueryString); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/package-info.java b/container-search/src/main/java/com/yahoo/search/yql/package-info.java new file mode 100644 index 00000000000..79cf983e471 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * YQL+ integration. + * + * <p>Not a public API.</p> + */ +@ExportPackage +package com.yahoo.search.yql; + +import com.yahoo.osgi.annotation.ExportPackage; + diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java b/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java new file mode 100644 index 00000000000..60dd24c5ccc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/AnnotationClass.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +// TODO: Javadoc +// TODO: Eventually we need to define the set of classes available here + +public class AnnotationClass { + + private String clazz; + + public AnnotationClass(String clazz) { + this.clazz = clazz; + } + + public String getClazz() { + return clazz; + } + + + @Override + public boolean equals(Object o) { + if (!(o instanceof AnnotationClass)) { + return false; + } + AnnotationClass aClass = (AnnotationClass)o; + return aClass.clazz == null ? clazz == null : clazz.equals(aClass.getClazz()); + } + + @Override + public int hashCode() { + return clazz == null ? 0 : clazz.hashCode(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java b/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java new file mode 100644 index 00000000000..759cf2b173b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Annotations.java @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * An annotation is a description of a an area of text, with a given class. For example, an annotation for the + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Annotations { + + + private Span span; + + protected Map<String,Object> annotations; + + + /** + * Adds an annotation to the the the set of annotations. + */ + public void put(String key,Object o) { + if(annotations == null) { + annotations = new HashMap<>(); + } + annotations.put(key,o); + } + + public Map<String,Object> getMap() { + if(annotations == null) { + return Collections.emptyMap(); + } else { + return annotations; + } + } + + public Annotations(Span span) { + this.span = span; + } + + public Object get(String key) { + return getMap().get(key); + } + + /** + * The span that this annotation is for. + */ + public Span getSpan() { + return span; + } + + /** + * The text this annotation is for. + */ + public String getSubString() { + return span.getText(); + } + + + /** + * Helper function to get a Double annotation. + * <p> + * This function first checks if the Object in a map is a <code>Number</code>, and then calls doubleValue() on it + * If it is not, then Double.parseDouble() is called on the string representation of the object. If the string + * is not parseable as a double, a NumberFormatException is thrown. + */ + public Double getDouble(String key) { + Object o = getMap().get(key); + if(o instanceof Number) { + return ((Number)o).doubleValue(); + } else if(o == null) { + return null; + } else { + return Double.parseDouble(o.toString()); + } + } + + /** + * Helper function to get a String from the Annotation. This function will simply call <code>toString()</code> on the + * object saved in the Annotation or return null if the object is null; + */ + public String getString(String key) { + Object o = getMap().get(key); + if(o == null) { + return null; + } else { + return o.toString(); + } + } + + /** + * Helper function to get a Double annotation. + * <p> + * This function first checks if the Object in a map is a <code>Number</code>, and intValue() is called on it. + * If it is not, then Double.parseDouble() is called on the string representation of the object. If the string + * is not parseable as a double, a NumberFormatException is thrown. + */ + public Integer getInteger(String key) { + Object o = getMap().get(key); + if(o == null) { + return null; + } else if(o instanceof Number) { + return ((Number)o).intValue(); + } else { + return Integer.parseInt(o.toString()); + } + } + + /** + * Helper function to get a Boolean annotation. + */ + public Boolean getBoolean(String key) { + Object o = getMap().get(key); + if(o == null || !(o instanceof Boolean)) { + return null; + } else { + return (Boolean) o; + } + } + + +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java b/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java new file mode 100644 index 00000000000..d80ff80f172 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Interpretation.java @@ -0,0 +1,404 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * + * An interpretation of a text. + * + * This class it the main class to use when when querying and modifying annotations for a text. + * + * The interpretation consists of a tree of annotations, with the nodes in tree being Spans. An annotation + * is defined by its annotationClass ("person"), and by a key/value map of + * parameters for that annotationClass (if the person is an actor or other notable person). + * + * This class is the main class for querying and setting annotations, where modifying the span tree + * is not needed. + * + * @see Span + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Interpretation { + + private Modification modification; + private double probability; + private Span rootSpan; + + public final static AnnotationClass INTERPRETATION_CLASS = new AnnotationClass("interpretation"); + + + /** + * Creates a new interpretation and a new modification from the text, + * with the probability set to the default value(0.0). + + */ + public Interpretation(String text) { + this(text,0.0); + } + + /** + * Creates a new interpretation and a new modification from the text, with the given probability. + */ + public Interpretation(String text, double probabilty) { + this(new Modification(text),probabilty); + } + + + /** + * Creates a new interpretation based on the modification, with the probability set to the default value(0.0). + */ + public Interpretation(Modification modification) { + this(modification,0.0); + } + + /** + * Creates an interpretation based on the modification given. + */ + public Interpretation(Modification modification,double probability) { + this.modification = modification; + rootSpan = new Span(modification); + setProbability(probability); + } + + + public Modification getModification() { + return modification; + } + + + /** + * The probability that this interpretation is correct. + * @return a value between 0.0 and 1.0 that gives the probability that this interpretation is correct + */ + public double getProbability() { + return probability; + } + + /** + * Sets he probability that this interpretation is the correct. The value is not normalized, + * meaning that it can have a value larger than 1.0. + * + * The value is used when sorting interpretations. + */ + public void setProbability(double probability) { + if (probability < 0) { + probability = 0.0; + } else if (probability > 1.0) { + probability = 1.0; + } + this.probability = probability; + + } + + /** Returns the root of the tree representation of the interpretation */ + public Span root() { return rootSpan; } + + + // Wrapper methods for Span + + /** + * Return the annotation with the given annotationclass (and create it if necessary). + * @param annotationClass The class of the annotation + * + */ + public Annotations annotate(String annotationClass) { + return annotate(new AnnotationClass(annotationClass)); + } + + /** + * Return the annotation with the given annotationclass (and create it if necessary). + * @param annotationClass The class of the annotation + * + */ + public Annotations annotate(AnnotationClass annotationClass) { + return rootSpan.annotate(annotationClass); + } + + /** + * Sets a key/value pair for an annotation. If an annotation of the class does not + * exist, a new is created. + * + * A shortcut for annotate(annotationClass).put(key,value) + * @param annotationClass class of the annotation + * @param key key of the property to set on the annotation + * @param value value of the property to set on the annotation + */ + public void annotate(String annotationClass, String key, Object value) { + annotate(new AnnotationClass(annotationClass)).put(key,value); + } + + /** + * Sets a key/value pair for an annotation. If an annotation of the class does not + * exist, a new is created. + * + * A shortcut for annotate(annotationClass).put(key,value) + * @param annotationClass class of the annotation + * @param key key of the property to set on the annotation + * @param value value of the property to set on the annotation + */ + public void annotate(AnnotationClass annotationClass, String key, Object value) { + annotate(annotationClass).put(key,value); + } + + /** + * Returns the annotation with the given annotationClass (and create it if necessary). + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + */ + public Annotations annotate(int from, int to, String annotationClass) { + return annotate(from,to,new AnnotationClass(annotationClass)); + } + + /** + * Returns the annotation with the given annotationClass (and create it if necessary). + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + */ + public Annotations annotate(int from, int to, AnnotationClass annotationClass) { + return rootSpan.annotate(from,to,annotationClass); + } + + /** + * Sets a key/value pair for an annotation of a substring. If an annotation of the class + * does not exist, a new is created. + * + * A shortcut for annotate(from, to, annotationClass, key, value + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + * @param key key of property to set on annotation + * @param value value of property to set on annotation + */ + public void annotate(int from, int to, String annotationClass, String key, Object value) { + annotate(from, to,new AnnotationClass(annotationClass)).put(key, value); + } + + /** + * Sets a key/value pair for an annotation of a substring. If an annotation of the class + * does not exist, a new is created. + * + * A shortcut for annotate(from, to, annotationClass, key, value + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + * @param key key of property to set on annotation + * @param value value of property to set on annotation + */ + public void annotate(int from, int to, AnnotationClass annotationClass, String key, Object value) { + annotate(from, to, annotationClass).put(key, value); + } + + /** + * Gets all annotations mentioned in the query. This will also return all subannotations, even those that + * override their parents + */ + public Map<AnnotationClass,List<Annotations>> getAll() { + return rootSpan.getAllAnnotations(); + } + + /** + * Returns a list of all annotations of the given class that exists in the text. This will also return + * all subannotations, even those that override their parents. + * If there are none, an empty list is returned, never null. The returned list should not be modified. + */ + public List<Annotations> getAll(String annotationClass) { + return getAll(new AnnotationClass(annotationClass)); + } + + /** + * Returns a list of all annotations of the given class that exists in the text. This will also return + * all subannotations, even those that override their parent. + * If there are none, an empty list is returned, never null. The returned list should not be modified. + */ + public List<Annotations> getAll(AnnotationClass annotationClass) { + // TODO: This implementation is very inefficient because it unnecessarily collects for all classes + Map<AnnotationClass,List<Annotations>> all = getAll(); + if(all.containsKey(annotationClass)){ + return all.get(annotationClass); + } else { + return Collections.emptyList(); + } + } + + /** + * Returns the annotation marked with the annotationClass. + * + * This is different from annotate(annotationClass) because a new annotation + * will not be created if it does not exist. + * + * @param annotationClass class of the annotation + * @return an annotation with the given class, null if it does not exists + */ + public Annotations get(String annotationClass) { + return get(new AnnotationClass(annotationClass)); + } + + /** + * Returns the annotation marked with the annotationClass. + * + * This is different from annotate(annotationClass) because a new annotation + * will not be created if it does not exist. + * + * @param annotationClass class of the annotation + * @return an annotation with the given class, null if it does not exists + */ + public Annotations get(AnnotationClass annotationClass) { + return rootSpan.getAnnotation(annotationClass); + } + + /** + * Gets the value of a property set on an annotation. + * If the annotation or the key/value pair does not exists, null + * is returned + */ + public Object get(String annotationClass,String key) { + return get(new AnnotationClass(annotationClass),key); + } + + /** + * Gets the value of a property set on an annotation. + * If the annotation or the key/value pair does not exists, null + * is returned + */ + public Object get(AnnotationClass annotationClass,String key) { + Annotations annotations = get(annotationClass); + if(annotations != null) { + return annotations.get(key); + } else { + return null; + } + } + + /** + * Equivalent to <code>get(from,to,new AnnotationClass(annotationClass))</code> + */ + public Annotations get(int from, int to, String annotationClass) { + return get(from,to,new AnnotationClass(annotationClass)); + } + + /** + * Gets an annotation that is set on a substring. + * + * This function first tries to find an annotation of annotationClass that + * describe the range (from,to). If that does not exist, it tries to find the smallest range + * which both contain (from,to) and has an annotation of annotationClass. + * If that does not exist, null is returned. + * + * For example, if these annotations has been set for the text "new york city": + * i.annotate(0,3,"token") //new + * i.annotate(4,8,"token") //york + * i.annotate(9,13,"city") //tokem + * i.annotate(0,8,"city") //new york + * i.annotate(0,13,"city") //new york city + * + * then: + * + * i.get(0,3,"token") //returns "token" - annotation for"new" + * i.get(0,3,"city") //returns "city" - annotation for "new york" + * i.get(9,13,"city") //returns "city" - annotation for "new york city" + * + * @param from start of the substring + * @param to end of the substring + * @param annotationClass class of the annotation + * @return the anno + */ + public Annotations get(int from, int to, AnnotationClass annotationClass ) { + return rootSpan.getAnnotation(from,to,annotationClass); + } + + /** + * Get the value of a property set on a substring annotation. + * + * If the annotation or the key/value pair does not exists, null + * is returned. + * + */ + public Object get(int from,int to,String annotationClass,String key) { + Annotations annotations = get(from,to,annotationClass); + if(annotations != null) { + return annotations.get(key); + } else { + return null; + } + } + + /** + * Gets all the annotationclasses that describes the text. + + */ + public Set<AnnotationClass> getClasses() { + return rootSpan.getClasses(); + } + + /** + * Gets all annotationclasses that describe a substring + */ + public Set<AnnotationClass> getClasses(int from,int to) { + return rootSpan.getClasses(from,to); + } + + + /** + * Gets the lowermost spans (usually the spans marked with token). + */ + public List<Span> getTokens() { + return rootSpan.getTokens(); + } + + /** + * Returns all spans that consists of the term given. If no span with that term exists, + * the empty list is returned. + */ + public List<Span> getTermSpans(String term) { + return rootSpan.getTermSpans(term); + } + + public @Override String toString() { + StringBuilder sb = new StringBuilder(); + Map<AnnotationClass, List<Annotations>> annotations = getAll(); + Iterator<Map.Entry<AnnotationClass,List<Annotations>>> mapIterator = annotations.entrySet().iterator(); + while (mapIterator.hasNext()) { + Map.Entry<AnnotationClass, List<Annotations>> entry = mapIterator.next(); + Iterator<Annotations> annoIterator = entry.getValue().iterator(); + sb.append(entry.getKey()).append(" : ["); + + while (annoIterator.hasNext()) { + Annotations annotation = annoIterator.next(); + sb.append("\"").append(annotation.getSubString()).append("\""); + dumpAnnotation(sb, annotation); + if(annoIterator.hasNext()) { + sb.append(","); + } + } + sb.append("]"); + if(mapIterator.hasNext()) { + sb.append(", "); + } + } + sb.append(")"); + return sb.toString(); + } + + private void dumpAnnotation(StringBuilder sb, Annotations annotations) { + if (annotations.getMap().size() > 0) { + sb.append(" : {"); + Iterator<Map.Entry<String,Object>> valueIterator = annotations.getMap().entrySet().iterator(); + while(valueIterator.hasNext()) { + Map.Entry<String,Object> value = valueIterator.next(); + sb.append(value.getKey()).append(" : ").append(value.getValue()); + if(valueIterator.hasNext()) { + sb.append(", "); + } + } + sb.append("}"); + + } + } +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java b/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java new file mode 100644 index 00000000000..ab92b6de961 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Modification.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.HashMap; + +/** + * A modification of a text. + * + * This class represents a possible rewrite of an original text. Reasons for rewrite may be due to possible + * spelling errors in the text or to query expansion. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Modification extends HashMap<String,Object>{ + + /** + * + */ + private static final long serialVersionUID = -8522335044460396296L; + + + public final static AnnotationClass MODIFICATION_CLASS = new AnnotationClass("modification"); + + + private String text; + private Annotations annotations; + + public Modification(String text) { + this.text = text; + Span span = new Span(this); + this.annotations = span.annotate(MODIFICATION_CLASS); + } + + public String getText() { + return text; + } + + public Annotations getAnnotation() { + return annotations; + } + +} + diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/Span.java b/container-search/src/main/java/com/yahoo/text/interpretation/Span.java new file mode 100644 index 00000000000..39457a0fc99 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/Span.java @@ -0,0 +1,349 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text.interpretation; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +/** + * Span is a description of a part of a text, modeled as a tree. + * + * A span is defined by the range (from,to) and by a set of annotations on that range. It also contains a set + * of child nodes that all have the restriction + * <code>child.from >= parent.from && child.to <= parent.to && (child.to-child.from) < (parent.to-parent.from)</code> + * This means that all spans on a text can be modeled as a tree, where all child spans are guaranteed to be contained + * inside its parent span. + * <p> + * A span will usually be used indirectly through Interpretation. + * + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class Span { + + private final Modification modification; + private List<Span> subSpans = null; //Lazy because of a large number of leaf nodes + private final Map<AnnotationClass, Annotations> annotations = new HashMap<>(); + private Span parent; //Yes, this _should_ be final, but might be changed when adding an annotation + private final int from; + private final int to; + + + /** + * Creates a new root span based on the modfication + */ + Span(final Modification modification) { + this.modification = modification; + this.parent = null; + this.from = 0; + this.to = modification.getText().length(); + } + + //This constructor is private to ensure that all child spans for a span is contained inside it. + private Span(int from, int to, Span parent) { + this.parent = parent; + this.modification = parent.modification; + this.from = from; + this.to = to; + } + + + + /** + * Returns the text that this spans is + */ + public String getText() { + return modification.getText().substring(from, to); + } + + + public String toString() { + return "SPAN: " + getText(); + } + + + public Annotations annotate(AnnotationClass clazz) { + Annotations annotations = this.annotations.get(clazz); + if (!this.annotations.containsKey(clazz)) { + annotations = new Annotations(this); + this.annotations.put(clazz, annotations); + } + return annotations; + } + + /** + * This will either create or get the annotation of the class annotation + */ + public Annotations annotate(int from, int to, AnnotationClass clazz) { + return addAnnotation(from, to, clazz); + } + + + /** + * Returns all annotations that are contained in either this subspan or in any of its subannotations + */ + public Map<AnnotationClass, List<Annotations>> getAllAnnotations() { + Map<AnnotationClass, List<Annotations>> result = new HashMap<>(); + getAllAnnotations(result); + return result; + } + + /** + * Returns all spans, either this or any of the spans that are inherits this span that match the given term + */ + public List<Span> getTermSpans(String term) { + List<Span> spans = new ArrayList<>(); + getTermSpans(term, spans); + return spans; + } + + /** + * Returns the annotations with a specific class for the area defined by this span + * <p> + * + * This function will query its parent to find any annotation that is set for an area that this span is contained + * in. If there are conflicts (several annotations defined with the same annotation class), the annotation + * that is defined for the smallest area (furthest down in the tree), is used. + */ + public Annotations getAnnotation(AnnotationClass clazz) { + return getAnnotation(from, to, clazz); + } + + /** + * Returns the annotations with a specific class for the area defined by (from,to). + * + * This function will query its parent to find any annotation that is set for an area that this span is contained + * in. If there are conflicts (several annotations defined with the same annotation class), the annotation + * that is defined for the smallest area (furthest down in the tree), is used. + * + * @throws RuntimeException if (from,to) is not contained in the span + */ + public Annotations getAnnotation(int from, int to, AnnotationClass clazz) { + if(from < this.from || to > this.to) { + throw new RuntimeException("Trying to get a range that is outside this span"); + } + if (this.parent != null) { + return parent.getAnnotation(from, to, clazz); + } else { + return getBestAnnotation(from, to, clazz ); + + } + } + + /** + * Returns all AnnotationClasses that are defined for this span and any of its superspans. + */ + public Set<AnnotationClass> getClasses() { + return getClasses(from, to); + + } + + /** + * Returns all AnnotationClasses that are defined for the range (from,to). + * + * @throws RuntimeException if (from,to) is not contained in the span + */ + public Set<AnnotationClass> getClasses(int from, int to) { + if(from < this.from || to > this.to) { + throw new RuntimeException("Trying to get a range that is outside this span"); + } + if (this.parent != null) { + return parent.getClasses(from, to); + } else { + HashSet<AnnotationClass> classes = new HashSet<>(); + getAnnotationClasses(from, to, classes); + return classes; + } + } + + + + /** + * Returns an unmodifiable list of all spans below this span that is a leaf node + */ + public List<Span> getTokens() { + List<Span> spans = new ArrayList<>(); + getTokens(spans); + return Collections.unmodifiableList(spans); + } + + /** + * Returns true if this class + */ + public boolean hasClass(AnnotationClass clazz) { + return getClasses().contains(clazz); + } + + /** + * Returns all spans that are directly childrens of this span. If the span is a leaf, the empty + * list will be returned. The list is unmodifable. + */ + public List<Span> getSubSpans() { + return subSpans == null ? + Collections.<Span>emptyList() : + Collections.unmodifiableList(subSpans); + } + + /** hack */ + public int getFrom() { return from; } + /** hack */ + public int getTo() { return to; } + + //Needed by addAnnotation + private List<Span> getRemovableSubSpan() { + return subSpans == null ? + Collections.<Span>emptyList() : + subSpans; + } + + + private void addSubSpan(Span span) { + if(subSpans == null) { + subSpans = new ArrayList<>(); + } + subSpans.add(span); + } + + + /* + * How this works: + * + * First we check if any excisting subannotation can contain this annotation. If so, we leave it to them to add + * the new annotation. + * + * Then we check if the new annotation intersects any of the excisting annotations. That is illegal to do + * + * We then add all subannotations that are strictly contained in the new annotation to the new annotation. + */ + private Annotations addAnnotation(int from, int to, AnnotationClass clazz) { + if (equalsRange(from, to)) { + //We simply add everything from the new span to this + if (annotations.containsKey(clazz)) { + return annotations.get(clazz); + } else { + Annotations nAnnotations = new Annotations(this); + annotations.put(clazz,nAnnotations); + return nAnnotations; + } + } + + //We then check if any of the children intersects + for (Span subSpan : getSubSpans()) { + if (subSpan.intersects(from, to)) { + throw new RuntimeException("Trying to add span that intersects already excisting span"); + } else if (subSpan.contains(from, to)) { + return subSpan.addAnnotation(from, to, clazz); + } + } + + //We now know that we have to add the new span to this span + Span span = new Span(from, to, this); + Annotations nAnnotations = new Annotations(span); + span.annotations.put(clazz,nAnnotations); + addSubSpan(span); + + + //We then add any subannotation that is inside the span + Iterator<Span> subIterator = getRemovableSubSpan().iterator(); + + while (subIterator.hasNext()) { + Span subSpan = subIterator.next(); + if (subSpan.contains(from, to)) { + return subSpan.addAnnotation(from, to, clazz); + } else if (subSpan.isInside(from, to)) { + //Overtake the subannotation + subSpan.parent = span; + span.addSubSpan(subSpan); + subIterator.remove(); + } + } + return nAnnotations; + } + + + private boolean contains(int from, int to) { + return this.from <= from && this.to >= to; + } + + private boolean isInside(int from, int to) { + return this.from >= from && this.to <= to; + } + + + private boolean intersects(int from, int to) { + return (this.from < from && this.to > from && this.to < to) + || (this.from < to && this.to > to && this.from > from); + + + } + + private boolean equalsRange(int from, int to) { + return this.from == from && this.to == to; + } + + private void getAllAnnotations(Map<AnnotationClass, List<Annotations>> results) { + for(Map.Entry<AnnotationClass, Annotations> entry : annotations.entrySet()) { + List<Annotations> anList = results.get(entry.getKey()); + if (anList == null) { + anList = new ArrayList<>(); + results.put(entry.getKey(), anList); + } + anList.add(entry.getValue()); + } + + for(Span subSpan : getSubSpans()) { + subSpan.getAllAnnotations(results); + } + } + + private void getTermSpans(String term, List<Span> spans) { + if(term.equalsIgnoreCase(this.getText())) { + spans.add(this); + } + for(Span subSpan : getSubSpans()) { + subSpan.getTermSpans(term, spans); + } + } + + + private void getAnnotationClasses(int from, int to, Set<AnnotationClass> classes) { + if (!contains(from, to)) { + return; + } + classes.addAll(annotations.keySet()); + for (Span subSpan : getSubSpans()) { + subSpan.getAnnotationClasses(from, to, classes); + } + } + + private void getTokens(List<Span> spans) { + if (getSubSpans().size() == 0) { + spans.add(this); + } else { + for (Span subSpan : getSubSpans()) { + subSpan.getTokens(spans); + } + + } + } + + private Annotations getBestAnnotation(int from, int to, AnnotationClass clazz) { + if (!contains(from, to)) { + return null; + } + //First yourself, then the subs + Annotations annotations = this.annotations.get(clazz); + for (Span subSpan : getSubSpans()) { + Annotations subAnnotations = subSpan.getBestAnnotation(from, to, clazz); + if (subAnnotations != null) { + annotations = subAnnotations; + } + } + return annotations; + } +} diff --git a/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java b/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java new file mode 100644 index 00000000000..902dc58d551 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/text/interpretation/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Models a set of hierarchical annotations (typically produced by QLAS) of a natural language string. + */ +@ExportPackage +@PublicApi +package com.yahoo.text.interpretation; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/.gitignore b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/.gitignore diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/ListMerger.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/ListMerger.java new file mode 100644 index 00000000000..423b2c66965 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/ListMerger.java @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import java.util.List; +import java.util.ListIterator; +import java.util.ArrayList; + +/** + * A list merger that merges two sorted lists. + * + * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a> + */ +public class ListMerger { + public static <T extends Comparable<? super T>> void mergeLinkedLists(List<T> to, List<T> from, int maxEntryCount) { + int entryCount = 0; + ListIterator<T> i = to.listIterator(); + while (!from.isEmpty()) { + T fromElement = from.remove(0); + while (i.hasNext()) { + T toElement = i.next(); + if (toElement.compareTo(fromElement) > 0) { + i.previous(); + break; + } else { + entryCount++; + if (entryCount >= maxEntryCount) { + break; + } + } + } + if (entryCount >= maxEntryCount) { + break; + } + i.add(fromElement); + entryCount++; + if (entryCount >= maxEntryCount) { + break; + } + } + while (i.hasNext()) { + i.next(); + i.remove(); + } + } + + public static <T extends Comparable<? super T>> List<T> mergeIntoArrayList(List<T> l1, List<T> l2, int maxEntryCount) { + List<T> mergedList = new ArrayList<>(); + ListIterator<T> i1 = l1.listIterator(); + ListIterator<T> i2 = l2.listIterator(); + + T e1 = null; + if (i1.hasNext()) { + e1 = i1.next(); + } + T e2 = null; + if (i2.hasNext()) { + e2 = i2.next(); + } + + while (e1 != null && e2 != null && mergedList.size() < maxEntryCount) { + if (e1.compareTo(e2) <= 0) { + mergedList.add(e1); + if (i1.hasNext()) { + e1 = i1.next(); + } else { + e1 = null; + } + } else { + mergedList.add(e2); + if (i2.hasNext()) { + e2 = i2.next(); + } else { + e2 = null; + } + } + } + + if (e2 == null) { + while (e1 != null && mergedList.size() < maxEntryCount) { + mergedList.add(e1); + if (i1.hasNext()) { + e1 = i1.next(); + } else { + e1 = null; + } + } + } else if (e1 == null) { + while (e2 != null && mergedList.size() < maxEntryCount) { + mergedList.add(e2); + if (i2.hasNext()) { + e2 = i2.next(); + } else { + e2 = null; + } + } + } + + return mergedList; + } + + public static <T extends Comparable<? super T>> List<T> mergeIntoArrayList(List<T> l1, List<T> l2) { + return mergeIntoArrayList(l1, l2, Integer.MAX_VALUE); + } +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java new file mode 100644 index 00000000000..3d795674039 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/MetricsSearcher.java @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import com.yahoo.log.event.Event; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.vdslib.VisitorStatistics; + +import java.util.Map; +import java.util.TreeMap; +import java.util.logging.Logger; + +import static com.yahoo.vespa.streamingvisitors.VdsStreamingSearcher.STREAMING_STATISTICS; + +/** + * Generates mail-specific query metrics. + */ +public class MetricsSearcher extends Searcher { + + private static final CompoundName metricsearcherId=new CompoundName("metricsearcher.id"); + private static final CompoundName streamingLoadtype=new CompoundName("streaming.loadtype"); + + private static final Logger log = Logger.getLogger(MetricsSearcher.class.getName()); + + static class Stats { + long latency = 0; + int count = 0; + int ok = 0; + int failed = 0; + long dataStreamed = 0; + long documentsStreamed = 0; + } + + Map<String, Stats> statMap = new TreeMap<>(); + private long lastMetricLog = 0; + + @Override + public Result search(Query query, Execution execution) { + long timeMs = System.currentTimeMillis(); + + /** Backwards compatibility - convert metricsearcher.id to streaming.loadtype */ + String metricName = query.properties().getString(metricsearcherId); + if (metricName != null) { + query.properties().set(streamingLoadtype, metricName); + } + + Result result = execution.search(query); + + long latency = System.currentTimeMillis() - timeMs; + + metricName = query.properties().getString(streamingLoadtype); + if (metricName == null) { + return result; + } + + synchronized(this) { + Stats stats = statMap.get(metricName); + + if (stats == null) { + stats = new Stats(); + statMap.put(metricName, stats); + } + + stats.count++; + stats.latency += latency; + + if (result.hits().getError() != null && + !result.hits().getErrorHit().hasOnlyErrorCode(ErrorMessage.NULL_QUERY) && + !result.hits().getErrorHit().hasOnlyErrorCode(3)) { + stats.failed++; + } else { + stats.ok++; + } + + VisitorStatistics visitorstats = null; + final QueryContext queryContext = query.getContext(false); + if (queryContext != null) { + visitorstats = (VisitorStatistics)queryContext.getProperty(STREAMING_STATISTICS); + } + if (visitorstats != null) { + stats.dataStreamed += visitorstats.getBytesVisited(); + stats.documentsStreamed += visitorstats.getDocumentsVisited(); + } else { + log.fine("No visitor statistics set in query! - don't use metrics searcher without streaming search"); + } + + if ((timeMs - lastMetricLog) > 60000) { + for (Map.Entry<String, Stats> entry : statMap.entrySet()) { + stats = entry.getValue(); + Event.value(entry.getKey() + "_latency", stats.count > 0 ? (double)stats.latency / (double)stats.count : 0); + Event.value(entry.getKey() + "_ok", stats.ok); + Event.value(entry.getKey() + "_failed", stats.failed); + Event.value(entry.getKey() + "_bytesstreamed", stats.dataStreamed); + Event.value(entry.getKey() + "_documentsstreamed", stats.documentsStreamed); + + stats.latency = 0; + stats.count = 0; + } + + lastMetricLog = timeMs; + } + } + + return result; + } +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsStreamingSearcher.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsStreamingSearcher.java new file mode 100644 index 00000000000..b95a4269cf7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsStreamingSearcher.java @@ -0,0 +1,299 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.document.DocumentId; +import com.yahoo.document.idstring.IdString; +import com.yahoo.document.select.parser.ParseException; +import com.yahoo.document.select.parser.TokenMgrError; +import com.yahoo.fs4.DocsumPacket; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.log.LogLevel; +import com.yahoo.messagebus.routing.Route; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.prelude.fastsearch.CacheKey; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.prelude.fastsearch.TimeoutException; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Relevance; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.vdslib.DocumentSummary; +import com.yahoo.vdslib.SearchResult; + +/** + * The searcher which forwards queries to storage nodes using visiting. + * The searcher is a visitor client responsible for starting search + * visitors in storage and collecting and merging the results. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a> + */ +@SuppressWarnings("deprecation") +public class VdsStreamingSearcher extends VespaBackEndSearcher { + + private static final CompoundName streamingUserid=new CompoundName("streaming.userid"); + private static final CompoundName streamingGroupname=new CompoundName("streaming.groupname"); + private static final CompoundName streamingSelection=new CompoundName("streaming.selection"); + + public static final String STREAMING_STATISTICS = "streaming.statistics"; + private VisitorFactory visitorFactory; + private static final Logger log = Logger.getLogger(VdsStreamingSearcher.class.getName()); + private Route route; + /** The configId used to access the searchcluster. */ + private String searchClusterConfigId = null; + /** The route to the storage cluster. */ + private String storageClusterRouteSpec = null; + + String getSearchClusterConfigId() { return searchClusterConfigId; } + String getStorageClusterRouteSpec() { return storageClusterRouteSpec; } + public final void setSearchClusterConfigId(String clusterName) { + this.searchClusterConfigId = clusterName; + } + + public final void setStorageClusterRouteSpec(String storageClusterRouteSpec) { + this.storageClusterRouteSpec = storageClusterRouteSpec; + } + + private static class VdsVisitorFactory implements VisitorFactory { + @Override + public Visitor createVisitor(Query query, String searchCluster, Route route) { + return new VdsVisitor(query, searchCluster, route); + } + } + + public VdsStreamingSearcher() { + visitorFactory = new VdsVisitorFactory(); + } + + public VdsStreamingSearcher(VisitorFactory visitorFactory) { + this.visitorFactory = visitorFactory; + } + + @Override + protected void doPartialFill(Result result, String summaryClass) { + } + + @Override + public Result doSearch2(Query query, QueryPacket queryPacket, CacheKey cacheKey, Execution execution) { + // TODO refactor this method into smaller methods, it's hard to see the actual code + lazyTrace(query, 7, "Routing to storage cluster ", getStorageClusterRouteSpec()); + + if (route == null) { + route = Route.parse(getStorageClusterRouteSpec()); + } + lazyTrace(query, 8, "Route is ", route); + + lazyTrace(query, 7, "doSearch2(): query docsum class=", + query.getPresentation().getSummary(), ", default docsum class=", + getDefaultDocsumClass()); + + if (query.getPresentation().getSummary() == null) { + lazyTrace(query, 6, + "doSearch2(): No summary class specified in query, using default: ", + getDefaultDocsumClass()); + query.getPresentation().setSummary(getDefaultDocsumClass()); + } else { + lazyTrace(query, 6, + "doSearch2(): Summary class has been specified in query: ", + query.getPresentation().getSummary()); + } + + lazyTrace(query, 8, "doSearch2(): rank properties=", query.getRanking()); + lazyTrace(query, 8, "doSearch2(): sort specification=", query + .getRanking().getSorting() == null ? null : query.getRanking() + .getSorting().fieldOrders()); + + int documentSelectionQueryParameterCount = 0; + if (query.properties().getString(streamingUserid) != null) documentSelectionQueryParameterCount++; + if (query.properties().getString(streamingGroupname) != null) documentSelectionQueryParameterCount++; + if (query.properties().getString(streamingSelection) != null) documentSelectionQueryParameterCount++; + if (documentSelectionQueryParameterCount != 1) { + return new Result(query, ErrorMessage.createBackendCommunicationError("Streaming search needs one and " + + "only one of these query parameters to be set: streaming.userid, streaming.groupname, " + + "streaming.selection")); + } + query.trace("Routing to search cluster " + getSearchClusterConfigId(), 4); + Visitor visitor = visitorFactory.createVisitor(query, getSearchClusterConfigId(), route); + try { + visitor.doSearch(); + } catch (ParseException e) { + return new Result(query, ErrorMessage.createBackendCommunicationError( + "Failed to parse document selection string: " + e.getMessage() + "'.")); + } catch (TokenMgrError e) { + return new Result(query, ErrorMessage.createBackendCommunicationError( + "Failed to tokenize document selection string: " + e.getMessage() + "'.")); + } catch (TimeoutException e) { + return new Result(query, ErrorMessage.createTimeout(e.getMessage())); + } catch (InterruptedException|IllegalArgumentException e) { + return new Result(query, ErrorMessage.createBackendCommunicationError(e.getMessage())); + } + + lazyTrace(query, 8, "offset=", query.getOffset(), ", hits=", query.getHits()); + + Result result = new Result(query); + List<SearchResult.Hit> hits = visitor.getHits(); // Sorted on rank + Map<String, DocumentSummary.Summary> summaryMap = visitor.getSummaryMap(); + + lazyTrace(query, 7, "total hit count = ", visitor.getTotalHitCount(), + ", returned hit count = ", hits.size(), ", summary count = ", + summaryMap.size()); + + result.setTotalHitCount(visitor.getTotalHitCount()); + query.trace(visitor.getStatistics().toString(), false, 2); + query.getContext(true).setProperty(STREAMING_STATISTICS, visitor.getStatistics()); + + Packet[] summaryPackets = new Packet [hits.size()]; + + int index = 0; + boolean skippedEarlierResult = false; + for (SearchResult.Hit hit : hits) { + if (!verifyDocId(hit.getDocId(), query, skippedEarlierResult)) { + skippedEarlierResult = true; + continue; + } + FastHit fastHit = buildSummaryHit(query, hit); + result.hits().add(fastHit); + + DocumentSummary.Summary summary = summaryMap.get(hit.getDocId()); + if (summary != null) { + DocsumPacket dp = new DocsumPacket(summary.getSummary()); + //log.log(LogLevel.SPAM, "DocsumPacket: " + dp); + summaryPackets[index] = dp; + } else { + return new Result(query, ErrorMessage.createBackendCommunicationError( + "Did not find summary for hit with document id " + hit.getDocId())); + } + + index++; + } + if (result.isFilled(query.getPresentation().getSummary())) { + lazyTrace(query, 8, "Result is filled for summary class ", query.getPresentation().getSummary()); + } else { + lazyTrace(query, 8, "Result is not filled for summary class ", query.getPresentation().getSummary()); + } + + List<Grouping> groupingList = visitor.getGroupings(); + lazyTrace(query, 8, "Grouping list=", groupingList); + if ( ! groupingList.isEmpty() ) { + GroupingListHit groupHit = new GroupingListHit(groupingList, getDocsumDefinitionSet(query)); + result.hits().add(groupHit); + } + + int skippedHits; + try { + skippedHits = fillHits(result, 0, summaryPackets, query.getPresentation().getSummary()); + } catch (IOException e) { + return new Result(query, ErrorMessage.createBackendCommunicationError( + "Error filling hits with summary fields")); + } + + if (skippedHits==0) { + query.trace("All hits have been filled",4); // TODO: cache results or result.analyzeHits(); ? + } else { + lazyTrace(query, 8, "Skipping some hits for query: ", result.getQuery()); + } + + lazyTrace(query, 8, "Returning result ", result); + + if ( skippedHits>0 ) { + getLogger().info("skipping " + skippedHits + " hits for query: " + result.getQuery()); + result.hits().addError(com.yahoo.search.result.ErrorMessage.createTimeout("Missing hit summary data for " + skippedHits + " hits")); + } + + return result; + } + + private FastHit buildSummaryHit(Query query, SearchResult.Hit hit) { + FastHit fastHit = new FastHit(); + fastHit.setQuery(query); + fastHit.setSource("VdsStreamingSearcher"); + fastHit.setId(hit.getDocId()); + // TODO: remove seField("uri", ...), just a helper for Velocity templates + fastHit.setField("uri", hit.getDocId()); + fastHit.types().add("summary"); + + fastHit.setRelevance(new Relevance(hit.getRank())); + + fastHit.setFillable(); + return fastHit; + } + + private static void lazyTrace(Query query, int level, Object... args) { + if (query.isTraceable(level)) { + StringBuilder s = new StringBuilder(); + for (Object arg : args) { + s.append(arg); + } + query.trace(s.toString(), level); + } + } + + static boolean verifyDocId(String id, Query query, boolean skippedEarlierResult) { + String expUserId = query.properties().getString(streamingUserid); + String expGroupName = query.properties().getString(streamingGroupname); + + LogLevel logLevel = LogLevel.ERROR; + if (skippedEarlierResult) { + logLevel = LogLevel.DEBUG; + } + + DocumentId docId; + try { + docId = new DocumentId(id); + } catch (IllegalArgumentException iae) { + log.log(logLevel, "Bad result for " + query + ": " + iae.getMessage()); + return false; + } + + if (expUserId != null) { + long userId; + + if (docId.getScheme().hasNumber()) { + userId = docId.getScheme().getNumber(); + } else { + log.log(logLevel, "Got result with wrong scheme (expected " + IdString.Scheme.userdoc + + " or " + IdString.Scheme.orderdoc + ") in document ID (" + id + ") for " + query); + return false; + } + if (new BigInteger(expUserId).longValue() != userId) { + log.log(logLevel, "Got result with wrong user ID (expected " + expUserId + ") in document ID (" + + id + ") for " + query); + return false; + } + } else if (expGroupName != null) { + String groupName; + + if (docId.getScheme().hasGroup()) { + groupName = docId.getScheme().getGroup(); + } else { + log.log(logLevel, "Got result with wrong scheme (expected " + IdString.Scheme.groupdoc + + " or " + IdString.Scheme.orderdoc + ") in document ID (" + id + ") for " + query); + return false; + } + if (!expGroupName.equals(groupName)) { + log.log(logLevel, "Got result with wrong group name (expected " + expGroupName + ") in document ID (" + + id + ") for " + query); + return false; + } + } + return true; + } + + public Pong ping(Ping ping, Execution execution) { + // TODO add a real pong + return new Pong(); + } +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java new file mode 100644 index 00000000000..3fb8173e6ec --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java @@ -0,0 +1,434 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import com.yahoo.document.select.OrderingSpecification; +import com.yahoo.document.select.parser.ParseException; +import com.yahoo.documentapi.*; +import com.yahoo.documentapi.messagebus.MessageBusDocumentAccess; +import com.yahoo.documentapi.messagebus.MessageBusParams; +import com.yahoo.documentapi.messagebus.loadtypes.LoadType; +import com.yahoo.documentapi.messagebus.loadtypes.LoadTypeSet; +import com.yahoo.documentapi.messagebus.protocol.DocumentProtocol; +import com.yahoo.documentapi.messagebus.protocol.DocumentSummaryMessage; +import com.yahoo.documentapi.messagebus.protocol.QueryResultMessage; +import com.yahoo.documentapi.messagebus.protocol.SearchResultMessage; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.log.LogLevel; +import com.yahoo.messagebus.Message; +import com.yahoo.messagebus.routing.Route; +import com.yahoo.search.Query; +import com.yahoo.prelude.fastsearch.TimeoutException; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Model; +import com.yahoo.search.query.Presentation; +import com.yahoo.search.query.Ranking; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.vdslib.DocumentSummary; +import com.yahoo.vdslib.SearchResult; +import com.yahoo.vdslib.VisitorStatistics; +import com.yahoo.vespa.objects.BufferSerializer; + +import java.lang.IllegalArgumentException; +import java.lang.Integer; +import java.lang.InterruptedException; +import java.lang.Object; +import java.lang.RuntimeException; +import java.lang.String; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Logger; + +/** + * A visitor data handler that performs a query in VDS with the + * searchvisitor visitor plugin. It collects and merges hits (sorted + * descending on rank), summaries (sorted on document id), and + * groupings. The resulting data can be fetched when the query has + * completed. + * + * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a> + */ +class VdsVisitor extends VisitorDataHandler implements Visitor { + + private static final CompoundName streamingUserid=new CompoundName("streaming.userid"); + private static final CompoundName streamingGroupname=new CompoundName("streaming.groupname"); + private static final CompoundName streamingSelection=new CompoundName("streaming.selection"); + private static final CompoundName streamingHeadersonly=new CompoundName("streaming.headersonly"); + private static final CompoundName streamingFromtimestamp=new CompoundName("streaming.fromtimestamp"); + private static final CompoundName streamingTotimestamp=new CompoundName("streaming.totimestamp"); + private static final CompoundName streamingLoadtype=new CompoundName("streaming.loadtype"); + private static final CompoundName streamingPriority=new CompoundName("streaming.priority"); + private static final CompoundName streamingOrdering=new CompoundName("streaming.ordering"); + private static final CompoundName streamingMaxbucketspervisitor=new CompoundName("streaming.maxbucketspervisitor"); + + private static final Logger log = Logger.getLogger(VdsVisitor.class.getName()); + private final VisitorParameters params = new VisitorParameters(""); + private List<SearchResult.Hit> hits = new ArrayList<>(); + private int totalHitCount = 0; + + private final Map<String, DocumentSummary.Summary> summaryMap = new HashMap<>(); + private final Map<Integer, Grouping> groupingMap = new ConcurrentHashMap<>(); + private Query query = null; + private VisitorSessionFactory visitorSessionFactory; + + static int getOrdering(String ordering) { + if (ordering.equals("+")) { + return OrderingSpecification.ASCENDING; + } else if (ordering.equals("-")) { + return OrderingSpecification.DESCENDING; + } else { + throw new RuntimeException("Ordering must be on the format {+/-}"); + } + } + + public static interface VisitorSessionFactory { + public VisitorSession createVisitorSession(VisitorParameters params) throws ParseException; + public LoadTypeSet getLoadTypeSet(); + } + + private static class MessageBusVisitorSessionFactory implements VisitorSessionFactory { + private static final LoadTypeSet loadTypes = new LoadTypeSet("client"); + private static final DocumentAccess access = new MessageBusDocumentAccess(new MessageBusParams(loadTypes)); + + @Override + public VisitorSession createVisitorSession(VisitorParameters params) throws ParseException { + return access.createVisitorSession(params); + } + + @Override + public LoadTypeSet getLoadTypeSet() { + return loadTypes; + } + } + + public VdsVisitor(Query query, String searchCluster, Route route) { + this.query = query; + visitorSessionFactory = new MessageBusVisitorSessionFactory(); + setVisitorParameters(searchCluster, route); + } + + public VdsVisitor(Query query, String searchCluster, Route route, VisitorSessionFactory visitorSessionFactory) { + this.query = query; + this.visitorSessionFactory = visitorSessionFactory; + setVisitorParameters(searchCluster, route); + } + + private void setVisitorParameters(String searchCluster, Route route) { + if (query.properties().getString(streamingUserid) != null) { + params.setDocumentSelection("id.user==" + query.properties().getString(streamingUserid)); + } else if (query.properties().getString(streamingGroupname) != null) { + params.setDocumentSelection("id.group==\"" + query.properties().getString(streamingGroupname) + "\""); + } else if (query.properties().getString(streamingSelection) != null) { + params.setDocumentSelection(query.properties().getString(streamingSelection)); + } + params.setTimeoutMs(query.getTimeout()); + params.setVisitorLibrary("searchvisitor"); + params.setLocalDataHandler(this); + params.setVisitHeadersOnly(query.properties().getBoolean(streamingHeadersonly)); + if (query.properties().getDouble(streamingFromtimestamp) != null) { + params.setFromTimestamp(query.properties().getDouble(streamingFromtimestamp).longValue()); + } + if (query.properties().getDouble(streamingTotimestamp) != null) { + params.setToTimestamp(query.properties().getDouble(streamingTotimestamp).longValue()); + } + params.visitInconsistentBuckets(true); + params.setPriority(DocumentProtocol.Priority.VERY_HIGH); + + if (query.properties().getString(streamingLoadtype) != null) { + LoadType loadType = visitorSessionFactory.getLoadTypeSet().getNameMap().get(query.properties().getString(streamingLoadtype)); + if (loadType != null) { + params.setLoadType(loadType); + params.setPriority(loadType.getPriority()); + } + } + + if (query.properties().getString(streamingPriority) != null) { + params.setPriority(DocumentProtocol.getPriorityByName( + query.properties().getString(streamingPriority))); + } + + params.setMaxPending(Integer.MAX_VALUE); + params.setMaxBucketsPerVisitor(Integer.MAX_VALUE); + params.setTraceLevel(query.getTraceLevel()); + + String ordering = query.properties().getString(streamingOrdering); + if (ordering != null) { + params.setVisitorOrdering(getOrdering(ordering)); + params.setMaxFirstPassHits(query.getOffset() + query.getHits()); + params.setMaxBucketsPerVisitor(1); + params.setDynamicallyIncreaseMaxBucketsPerVisitor(true); + } + + String maxbuckets = query.properties().getString(streamingMaxbucketspervisitor); + if (maxbuckets != null) { + params.setMaxBucketsPerVisitor(Integer.parseInt(maxbuckets)); + } + + EncodedData ed = new EncodedData(); + encodeQueryData(query, 0, ed); + params.setLibraryParameter("query", ed.getEncodedData()); + params.setLibraryParameter("querystackcount", String.valueOf(ed.getReturned())); + params.setLibraryParameter("searchcluster", searchCluster.getBytes()); + if (query.getPresentation().getSummary() != null) { + params.setLibraryParameter("summaryclass", query.getPresentation().getSummary()); + } else { + params.setLibraryParameter("summaryclass", "default"); + } + params.setLibraryParameter("summarycount", String.valueOf(query.getOffset() + query.getHits())); + params.setLibraryParameter("rankprofile", query.getRanking().getProfile()); + params.setLibraryParameter("queryflags", String.valueOf(getQueryFlags(query))); + + ByteBuffer buf = ByteBuffer.allocate(1024); + + if (query.getRanking().getLocation() != null) { + buf.clear(); + query.getRanking().getLocation().encode(buf); + buf.flip(); + byte[] af = new byte [buf.remaining()]; + buf.get(af); + params.setLibraryParameter("location", af); + } + + if (query.hasEncodableProperties()) { + encodeQueryData(query, 1, ed); + params.setLibraryParameter("rankproperties", ed.getEncodedData()); + } + + List<Grouping> groupingList = GroupingExecutor.getGroupingList(query); + if (groupingList.size() > 0){ + BufferSerializer gbuf = new BufferSerializer(new GrowableByteBuffer()); + gbuf.putInt(null, groupingList.size()); + for(Grouping g: groupingList){ + g.serialize(gbuf); + } + gbuf.flip(); + byte [] blob = gbuf.getBytes(null, gbuf.getBuf().limit()); + params.setLibraryParameter("aggregation", blob); + } + + if (query.getRanking().getSorting() != null) { + encodeQueryData(query, 3, ed); + params.setLibraryParameter("sort", ed.getEncodedData()); + } + + params.setRoute(route); + } + + static int getQueryFlags(Query query) { + int flags = 0; + + boolean requestCoverage=true; // Always request coverage information + + flags |= 0; // was collapse + flags |= query.properties().getBoolean(Model.ESTIMATE) ? 0x00000080 : 0; + flags |= (query.getRanking().getFreshness() != null) ? 0x00002000 : 0; + flags |= requestCoverage ? 0x00008000 : 0; + flags |= query.getNoCache() ? 0x00010000 : 0; + flags |= 0x00020000; // was PARALLEL + flags |= query.properties().getBoolean(Ranking.RANKFEATURES,false) ? 0x00040000 : 0; + + return flags; + } + + private static class EncodedData { + private Object returned; + private byte[] encoded; + + public void setReturned(Object o){ + this.returned = o; + } + public Object getReturned(){ + return returned; + } + public void setEncodedData(byte[] data){ + encoded = data; + } + public byte[] getEncodedData(){ + return encoded; + } + } + + private static void encodeQueryData(Query query, int code, EncodedData ed){ + ByteBuffer buf = ByteBuffer.allocate(1024); + while (true) { + try { + switch(code){ + case 0: + ed.setReturned(query.getModel().getQueryTree().getRoot().encode(buf)); + break; + case 1: + ed.setReturned(query.encodeAsProperties(buf, true)); + break; + case 2: + throw new IllegalArgumentException("old aggregation no longer exists!"); + case 3: + if (query.getRanking().getSorting() != null) + ed.setReturned(query.getRanking().getSorting().encode(buf)); + else + ed.setReturned(0); + break; + } + buf.flip(); + break; + } catch (java.nio.BufferOverflowException e) { + int size = buf.limit(); + buf = ByteBuffer.allocate(size*2); + } + } + byte [] bb = new byte [buf.remaining()]; + buf.get(bb); + ed.setEncodedData(bb); + } + + @Override + public void doSearch() throws InterruptedException, ParseException, TimeoutException { + VisitorSession session = visitorSessionFactory.createVisitorSession(params); + try { + if ( !session.waitUntilDone(query.getTimeout())) { + log.log(LogLevel.DEBUG, "Visitor returned from waitUntilDone without being completed for " + query + " with selection " + params.getDocumentSelection()); + session.abort(); + throw new TimeoutException("Query timed out in " + VdsStreamingSearcher.class.getName()); + } + } finally { + session.destroy(); + } + + query.trace(session.getTrace().toString(), false, query.getTraceLevel()); + + if (params.getControlHandler().getResult().code == VisitorControlHandler.CompletionCode.SUCCESS) { + if (log.isLoggable(LogLevel.DEBUG)) { + log.log(LogLevel.DEBUG, "VdsVisitor completed successfully for " + query + " with selection " + params.getDocumentSelection()); + } + } else { + throw new IllegalArgumentException("Query failed: " // TODO: Is it necessary to use a runtime exception? + + params.getControlHandler().getResult().code + ": " + + params.getControlHandler().getResult().message); + } + } + + @Override + public VisitorStatistics getStatistics() { + return params.getControlHandler().getVisitorStatistics(); + } + + @Override + public void onMessage(Message m, AckToken token) { + if (m instanceof QueryResultMessage) { + QueryResultMessage qm = (QueryResultMessage)m; + onQueryResult(qm.getResult(), qm.getSummary()); + } else if (m instanceof SearchResultMessage) { + onSearchResult(((SearchResultMessage) m).getResult()); + } else if (m instanceof DocumentSummaryMessage) { + DocumentSummaryMessage dsm = (DocumentSummaryMessage)m; + onDocumentSummary(dsm.getResult()); + } else { + throw new UnsupportedOperationException("Received unsupported message " + m + ". VdsVisitor can only accept query result, search result, and documentsummary messages."); + } + ack(token); + } + + public void onQueryResult(SearchResult sr, DocumentSummary summary) { + handleSearchResult(sr); + handleSummary(summary); + } + + public void onSearchResult(SearchResult sr) { + if (log.isLoggable(LogLevel.SPAM)) { + log.log(LogLevel.SPAM, "Got SearchResult for query with selection " + params.getDocumentSelection()); + } + handleSearchResult(sr); + } + + private void handleSearchResult(SearchResult sr) { + final int hitCountTotal = sr.getTotalHitCount(); + final int hitCount = sr.getHitCount(); + if (log.isLoggable(LogLevel.DEBUG)) { + log.log(LogLevel.DEBUG, "Got SearchResult with " + hitCountTotal + " in total and " + hitCount + " hits in real for query with selection " + params.getDocumentSelection()); + } + + List<SearchResult.Hit> newHits = new ArrayList<>(hitCount); + for (int i = 0; i < hitCount; i++) { + SearchResult.Hit hit = sr.getHit(i); + newHits.add(hit); + } + synchronized (this) { + totalHitCount += hitCountTotal; + hits = ListMerger.mergeIntoArrayList(hits, newHits, query.getOffset() + query.getHits()); + } + + Map<Integer, byte []> newGroupingMap = sr.getGroupingList(); + mergeGroupingMaps(newGroupingMap); + } + + private void mergeGroupingMaps(Map<Integer, byte []> newGroupingMap) { + if (log.isLoggable(LogLevel.SPAM)) { + log.log(LogLevel.SPAM, "mergeGroupingMaps: newGroupingMap = " + newGroupingMap); + } + for(Integer key : newGroupingMap.keySet()) { + byte [] value = newGroupingMap.get(key); + + Grouping newGrouping = new Grouping(); + if (log.isLoggable(LogLevel.SPAM)) { + log.log(LogLevel.SPAM, "Received group with key " + key + " and size " + value.length); + } + BufferSerializer buf = new BufferSerializer( new GrowableByteBuffer(ByteBuffer.wrap(value)) ); + newGrouping.deserialize(buf); + if (buf.getBuf().hasRemaining()) { + throw new IllegalArgumentException("Failed deserializing grouping. There are still data left. Position = " + buf.position() + ", limit = " + buf.getBuf().limit()); + } + + synchronized (groupingMap) { + if (groupingMap.containsKey(key)) { + Grouping grouping = groupingMap.get(key); + grouping.merge(newGrouping); + } else { + groupingMap.put(key, newGrouping); + } + } + } + } + + public void onDocumentSummary(DocumentSummary ds) { + if (log.isLoggable(LogLevel.SPAM)) { + log.log(LogLevel.SPAM, "Got DocumentSummary for query with selection " + params.getDocumentSelection()); + } + handleSummary(ds); + } + + private void handleSummary(DocumentSummary ds) { + int summaryCount = ds.getSummaryCount(); + if (log.isLoggable(LogLevel.DEBUG)) { + log.log(LogLevel.DEBUG, "Got DocumentSummary with " + summaryCount + " summaries for query with selection " + params.getDocumentSelection()); + } + synchronized (summaryMap) { + for (int i = 0; i < summaryCount; i++) { + DocumentSummary.Summary summary = ds.getSummary(i); + summaryMap.put(summary.getDocId(), summary); + } + } + } + + @Override + final public List<SearchResult.Hit> getHits() { + int fromIndex = Math.min(hits.size(), query.getOffset()); + int toIndex = Math.min(hits.size(), query.getOffset() + query.getHits()); + return hits.subList(fromIndex, toIndex); + } + + @Override + final public Map<String, DocumentSummary.Summary> getSummaryMap() { return summaryMap; } + + @Override + final public int getTotalHitCount() { return totalHitCount; } + + @Override + final public List<Grouping> getGroupings() { + Collection<Grouping> groupings = groupingMap.values(); + for (Grouping g : groupings) { + g.postMerge(); + } + Grouping[] array = groupings.toArray(new Grouping[groupings.size()]); + return Arrays.asList(array); + } +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/Visitor.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/Visitor.java new file mode 100644 index 00000000000..954f7b47f3f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/Visitor.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import com.yahoo.document.select.parser.ParseException; +import com.yahoo.prelude.fastsearch.TimeoutException; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.vdslib.DocumentSummary; +import com.yahoo.vdslib.SearchResult; +import com.yahoo.vdslib.VisitorStatistics; + +import java.util.List; +import java.util.Map; + +/** + * Visitor for performing searches and accessing results. + * + * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a> + */ +interface Visitor { + + void doSearch() throws InterruptedException, ParseException, TimeoutException; + + VisitorStatistics getStatistics(); + + List<SearchResult.Hit> getHits(); + + Map<String, DocumentSummary.Summary> getSummaryMap(); + + int getTotalHitCount(); + + List<Grouping> getGroupings(); +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VisitorFactory.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VisitorFactory.java new file mode 100644 index 00000000000..711b9381e4e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VisitorFactory.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import com.yahoo.messagebus.routing.Route; +import com.yahoo.search.Query; + +/** + * A factory that creates Visitors. + * + * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a> + */ +interface VisitorFactory { + public Visitor createVisitor(Query query, String searchCluster, Route route); +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/test/.gitignore b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/test/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/test/.gitignore |