// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.predicate.index.conjunction; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.gs.collections.api.map.primitive.IntObjectMap; import com.gs.collections.impl.map.mutable.primitive.IntObjectHashMap; import com.gs.collections.impl.map.mutable.primitive.LongObjectHashMap; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; /** * A builder for {@link ConjunctionIndex}. * * @author bjorncs */ public class ConjunctionIndexBuilder { // A map from K value to FeatureIndex private final HashMap kIndexBuilder = new HashMap<>(); private final List zListBuilder = new ArrayList<>(); // Unique ids / mapping from internal to external id. LinkedHashSet as the insertion order is crucial. private final Set seenIds = new LinkedHashSet<>(); private int idCounter = 0; private int conjunctionsSeen = 0; private static class FeatureIndexBuilder { // Maps a feature id to conjunction id private final Map> map = new HashMap<>(); public void insert(long featureId, int conjunctionId) { map.computeIfAbsent(featureId, k -> new TreeSet<>()).add(conjunctionId); } } public void indexConjunction(IndexableFeatureConjunction c) { ++conjunctionsSeen; long externalId = c.id; if (seenIds.contains(externalId)) return; seenIds.add(externalId); int internalId = generateInternalId(); FeatureIndexBuilder featureIndexBuilder = kIndexBuilder.computeIfAbsent(c.k, (k) -> new FeatureIndexBuilder()); c.features.forEach(f -> featureIndexBuilder.insert(f, internalId)); c.negatedFeatures.forEach(f -> featureIndexBuilder.insert(f, internalId & ~1)); if (c.k == 0) { zListBuilder.add(internalId); } } private int generateInternalId() { return ((idCounter++) << 1) | 1; } public ConjunctionIndex build() { int[] zList = Ints.toArray(zListBuilder); IntObjectMap kIndex = buildKIndex(kIndexBuilder); long[] idMapping = Longs.toArray(seenIds); return new ConjunctionIndex(kIndex, zList, idMapping); } /** * @return The number of unique features in index. */ public long calculateFeatureCount() { return kIndexBuilder.values().stream() .map(index -> index.map.keySet()) .reduce( new HashSet<>(), (acc, keySet) -> { keySet.forEach(acc::add); return acc; }, (acc1, acc2) -> { acc1.addAll(acc2); return acc1; }) .size(); } /** * @return The number of unique conjunctions indexed. */ public long getUniqueConjunctionCount() { return seenIds.size(); } public int getZListSize() { return zListBuilder.size(); } public int getConjunctionsSeen() { return conjunctionsSeen; } private static IntObjectMap buildKIndex(HashMap kIndexBuilder) { IntObjectHashMap map = new IntObjectHashMap<>(); for (Map.Entry entry : kIndexBuilder.entrySet()) { map.put(entry.getKey(), buildFeatureIndex(entry.getValue())); } map.compact(); return map; } private static ConjunctionIndex.FeatureIndex buildFeatureIndex(FeatureIndexBuilder featureIndexBuilder) { LongObjectHashMap map = new LongObjectHashMap<>(); for (Map.Entry> featureEntry : featureIndexBuilder.map.entrySet()) { int[] conjunctionIds = Ints.toArray(featureEntry.getValue()); map.put(featureEntry.getKey(), conjunctionIds); } map.compact(); return new ConjunctionIndex.FeatureIndex(map); } }