aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java
diff options
context:
space:
mode:
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java175
1 files changed, 175 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java
new file mode 100644
index 00000000000..eb6737ba9d8
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java
@@ -0,0 +1,175 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.prelude.query;
+
+import com.yahoo.collections.CopyOnWriteHashMap;
+import com.yahoo.compress.IntegerCompressor;
+import com.yahoo.prelude.query.textualrepresentation.Discloser;
+
+import edu.umd.cs.findbugs.annotations.NonNull;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * A term which contains a weighted set.
+ *
+ * When using a weighted set to search a field, all tokens present in
+ * the searched field will be reverse matched against the weighted
+ * set. This means that using a weighted set to search a single-value
+ * attribute field will have similar semantics to using a normal term
+ * to search a weighted set field. The low-level matching information
+ * resulting from matching a document with a weighted set term will
+ * contain the weights of all the matched tokens in descending
+ * order. Each matched weight will be represented as a standard
+ * occurrence on position 0 in element 0.
+ *
+ */
+public class WeightedSetItem extends SimpleTaggableItem {
+
+ @NonNull
+ private String indexName = "";
+
+ private CopyOnWriteHashMap<Object,Integer> set = new CopyOnWriteHashMap<>(1000);
+
+ /** Creates an empty weighted set; note you must provide an index name up front */
+ public WeightedSetItem(String indexName) {
+ if (indexName == null) {
+ this.indexName = "";
+ } else {
+ this.indexName = indexName;
+ }
+ }
+
+ public Integer addToken(long value, int weight) {
+ return addInternal(value, weight);
+ }
+ /**
+ * Add weighted token.
+ * If token is already in the set, the maximum weight is kept.
+ * NOTE: The weight must be 1 or more; negative values (and zero) are not allowed.
+ * @return weight of added token (might be old value, if kept)
+ */
+ public Integer addToken(String token, int weight) {
+ if (token == null) throw new IllegalArgumentException("token must be a string");
+ return addInternal(token, weight);
+ }
+ private Integer addInternal(Object token, int weight) {
+ Integer newWeight = weight;
+ Integer oldWeight = set.put(token, newWeight);
+ if (oldWeight != null && oldWeight > newWeight) {
+ set.put(token, oldWeight);
+ return oldWeight;
+ }
+ return newWeight;
+ }
+
+ /**
+ * Add token with weight 1.
+ */
+ public Integer addToken(String token) {
+ return addToken(token, 1);
+ }
+
+ public Integer getTokenWeight(String token) {
+ return set.get(token);
+ }
+
+ public Integer removeToken(String token) {
+ return set.remove(token);
+ }
+
+ public int getNumTokens() {
+ return set.size();
+ }
+
+ public Iterator<Map.Entry<Object,Integer>> getTokens() {
+ return set.entrySet().iterator();
+ }
+
+ @Override
+ public void setIndexName(String index) {
+ if (index == null) {
+ this.indexName = "";
+ } else {
+ this.indexName = index;
+ }
+ }
+
+ @NonNull
+ public String getIndexName() {
+ return indexName;
+ }
+
+ @Override
+ public ItemType getItemType() {
+ return ItemType.WEIGHTEDSET;
+ }
+
+ @Override
+ public String getName() {
+ return getItemType().name();
+ }
+
+ // for tracing - random text format
+ @Override
+ protected void appendBodyString(StringBuilder buffer) {
+ buffer.append(indexName);
+ buffer.append("{");
+ for (Map.Entry<Object, Integer> entry : set.entrySet()) {
+ buffer.append("[");
+ buffer.append(entry.getValue());
+ buffer.append("]:\"");
+ buffer.append(entry.getKey());
+ buffer.append("\",");
+ }
+ buffer.deleteCharAt(buffer.length() - 1); // remove extra ","
+ buffer.append("}");
+ }
+
+ @Override
+ public void disclose(Discloser discloser) {
+ super.disclose(discloser);
+ discloser.addProperty("index", indexName);
+ for (Map.Entry<Object, Integer> entry : set.entrySet()) {
+ WordItem subitem = new WordItem(entry.getKey().toString(), indexName);
+ subitem.setWeight(entry.getValue());
+ discloser.addChild(subitem);
+ }
+ }
+
+ @Override
+ public int encode(ByteBuffer buffer) {
+ encodeThis(buffer);
+ int itemCount = 1;
+ for (Map.Entry<Object, Integer> entry : set.entrySet()) {
+ Object key = entry.getKey();
+ if (key instanceof Long) {
+ new PureWeightedInteger((Long)key, entry.getValue()).encode(buffer);
+ } else {
+ new PureWeightedString(key.toString(), entry.getValue()).encode(buffer);
+ }
+ itemCount++;
+ }
+ return itemCount;
+ }
+
+ @Override
+ protected void encodeThis(ByteBuffer buffer) {
+ super.encodeThis(buffer);
+ IntegerCompressor.putCompressedPositiveNumber(set.size(), buffer);
+ putString(indexName, buffer);
+ }
+
+ @Override
+ public int getTermCount() {
+ return 1; // this is just one (big) term
+ }
+
+ @Override
+ public WeightedSetItem clone() {
+ WeightedSetItem clone = (WeightedSetItem)super.clone();
+ clone.set = this.set.clone();
+ return clone;
+ }
+}