diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java new file mode 100644 index 00000000000..eb6737ba9d8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.collections.CopyOnWriteHashMap; +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.Map; + +/** + * A term which contains a weighted set. + * + * When using a weighted set to search a field, all tokens present in + * the searched field will be reverse matched against the weighted + * set. This means that using a weighted set to search a single-value + * attribute field will have similar semantics to using a normal term + * to search a weighted set field. The low-level matching information + * resulting from matching a document with a weighted set term will + * contain the weights of all the matched tokens in descending + * order. Each matched weight will be represented as a standard + * occurrence on position 0 in element 0. + * + */ +public class WeightedSetItem extends SimpleTaggableItem { + + @NonNull + private String indexName = ""; + + private CopyOnWriteHashMap<Object,Integer> set = new CopyOnWriteHashMap<>(1000); + + /** Creates an empty weighted set; note you must provide an index name up front */ + public WeightedSetItem(String indexName) { + if (indexName == null) { + this.indexName = ""; + } else { + this.indexName = indexName; + } + } + + public Integer addToken(long value, int weight) { + return addInternal(value, weight); + } + /** + * Add weighted token. + * If token is already in the set, the maximum weight is kept. + * NOTE: The weight must be 1 or more; negative values (and zero) are not allowed. + * @return weight of added token (might be old value, if kept) + */ + public Integer addToken(String token, int weight) { + if (token == null) throw new IllegalArgumentException("token must be a string"); + return addInternal(token, weight); + } + private Integer addInternal(Object token, int weight) { + Integer newWeight = weight; + Integer oldWeight = set.put(token, newWeight); + if (oldWeight != null && oldWeight > newWeight) { + set.put(token, oldWeight); + return oldWeight; + } + return newWeight; + } + + /** + * Add token with weight 1. + */ + public Integer addToken(String token) { + return addToken(token, 1); + } + + public Integer getTokenWeight(String token) { + return set.get(token); + } + + public Integer removeToken(String token) { + return set.remove(token); + } + + public int getNumTokens() { + return set.size(); + } + + public Iterator<Map.Entry<Object,Integer>> getTokens() { + return set.entrySet().iterator(); + } + + @Override + public void setIndexName(String index) { + if (index == null) { + this.indexName = ""; + } else { + this.indexName = index; + } + } + + @NonNull + public String getIndexName() { + return indexName; + } + + @Override + public ItemType getItemType() { + return ItemType.WEIGHTEDSET; + } + + @Override + public String getName() { + return getItemType().name(); + } + + // for tracing - random text format + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(indexName); + buffer.append("{"); + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + buffer.append("["); + buffer.append(entry.getValue()); + buffer.append("]:\""); + buffer.append(entry.getKey()); + buffer.append("\","); + } + buffer.deleteCharAt(buffer.length() - 1); // remove extra "," + buffer.append("}"); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", indexName); + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + WordItem subitem = new WordItem(entry.getKey().toString(), indexName); + subitem.setWeight(entry.getValue()); + discloser.addChild(subitem); + } + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + Object key = entry.getKey(); + if (key instanceof Long) { + new PureWeightedInteger((Long)key, entry.getValue()).encode(buffer); + } else { + new PureWeightedString(key.toString(), entry.getValue()).encode(buffer); + } + itemCount++; + } + return itemCount; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(set.size(), buffer); + putString(indexName, buffer); + } + + @Override + public int getTermCount() { + return 1; // this is just one (big) term + } + + @Override + public WeightedSetItem clone() { + WeightedSetItem clone = (WeightedSetItem)super.clone(); + clone.set = this.set.clone(); + return clone; + } +} |