summaryrefslogtreecommitdiffstats
path: root/predicate-search/src/main/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexBuilder.java
diff options
context:
space:
mode:
Diffstat (limited to 'predicate-search/src/main/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexBuilder.java')
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexBuilder.java120
1 files changed, 120 insertions, 0 deletions
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexBuilder.java b/predicate-search/src/main/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexBuilder.java
new file mode 100644
index 00000000000..d1086eaca23
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexBuilder.java
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.index.conjunction;
+
+import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
+import com.gs.collections.api.map.primitive.IntObjectMap;
+import com.gs.collections.impl.map.mutable.primitive.IntObjectHashMap;
+import com.gs.collections.impl.map.mutable.primitive.LongObjectHashMap;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * A builder for {@link ConjunctionIndex}.
+ *
+ * @author bjorncs
+ */
+public class ConjunctionIndexBuilder {
+ // A map from K value to FeatureIndex
+ private final HashMap<Integer, FeatureIndexBuilder> kIndexBuilder = new HashMap<>();
+ private final List<Integer> zListBuilder = new ArrayList<>();
+ // Unique ids / mapping from internal to external id. LinkedHashSet as the insertion order is crucial.
+ private final Set<Long> seenIds = new LinkedHashSet<>();
+ private int idCounter = 0;
+ private int conjunctionsSeen = 0;
+
+ private static class FeatureIndexBuilder {
+ // Maps a feature id to conjunction id
+ private final Map<Long, Set<Integer>> map = new HashMap<>();
+
+ public void insert(long featureId, int conjunctionId) {
+ map.computeIfAbsent(featureId, k -> new TreeSet<>()).add(conjunctionId);
+ }
+ }
+
+ public void indexConjunction(IndexableFeatureConjunction c) {
+ ++conjunctionsSeen;
+ long externalId = c.id;
+ if (seenIds.contains(externalId)) return;
+
+ seenIds.add(externalId);
+ int internalId = generateInternalId();
+ FeatureIndexBuilder featureIndexBuilder = kIndexBuilder.computeIfAbsent(c.k, (k) -> new FeatureIndexBuilder());
+ c.features.forEach(f -> featureIndexBuilder.insert(f, internalId));
+ c.negatedFeatures.forEach(f -> featureIndexBuilder.insert(f, internalId & ~1));
+ if (c.k == 0) {
+ zListBuilder.add(internalId);
+ }
+ }
+
+ private int generateInternalId() {
+ return ((idCounter++) << 1) | 1;
+ }
+
+ public ConjunctionIndex build() {
+ int[] zList = Ints.toArray(zListBuilder);
+ IntObjectMap<ConjunctionIndex.FeatureIndex> kIndex = buildKIndex(kIndexBuilder);
+ long[] idMapping = Longs.toArray(seenIds);
+ return new ConjunctionIndex(kIndex, zList, idMapping);
+ }
+
+ /**
+ * @return The number of unique features in index.
+ */
+ public long calculateFeatureCount() {
+ return kIndexBuilder.values().stream()
+ .map(index -> index.map.keySet())
+ .reduce(
+ new HashSet<>(),
+ (acc, keySet) -> {
+ keySet.forEach(acc::add);
+ return acc;
+ }, (acc1, acc2) -> {
+ acc1.addAll(acc2);
+ return acc1;
+ })
+ .size();
+ }
+
+ /**
+ * @return The number of unique conjunctions indexed.
+ */
+ public long getUniqueConjunctionCount() {
+ return seenIds.size();
+ }
+
+ public int getZListSize() {
+ return zListBuilder.size();
+ }
+
+ public int getConjunctionsSeen() {
+ return conjunctionsSeen;
+ }
+
+ private static IntObjectMap<ConjunctionIndex.FeatureIndex> buildKIndex(HashMap<Integer, FeatureIndexBuilder> kIndexBuilder) {
+ IntObjectHashMap<ConjunctionIndex.FeatureIndex> map = new IntObjectHashMap<>();
+ for (Map.Entry<Integer, FeatureIndexBuilder> entry : kIndexBuilder.entrySet()) {
+ map.put(entry.getKey(), buildFeatureIndex(entry.getValue()));
+ }
+ map.compact();
+ return map;
+ }
+
+ private static ConjunctionIndex.FeatureIndex buildFeatureIndex(FeatureIndexBuilder featureIndexBuilder) {
+ LongObjectHashMap<int[]> map = new LongObjectHashMap<>();
+ for (Map.Entry<Long, Set<Integer>> featureEntry : featureIndexBuilder.map.entrySet()) {
+ int[] conjunctionIds = Ints.toArray(featureEntry.getValue());
+ map.put(featureEntry.getKey(), conjunctionIds);
+ }
+ map.compact();
+ return new ConjunctionIndex.FeatureIndex(map);
+ }
+
+}