1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.predicate.index.conjunction;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import com.gs.collections.api.map.primitive.IntObjectMap;
import com.gs.collections.impl.map.mutable.primitive.IntObjectHashMap;
import com.gs.collections.impl.map.mutable.primitive.LongObjectHashMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* A builder for {@link ConjunctionIndex}.
*
* @author bjorncs
*/
public class ConjunctionIndexBuilder {
// A map from K value to FeatureIndex
private final HashMap<Integer, FeatureIndexBuilder> kIndexBuilder = new HashMap<>();
private final List<Integer> zListBuilder = new ArrayList<>();
// Unique ids / mapping from internal to external id. LinkedHashSet as the insertion order is crucial.
private final Set<Long> seenIds = new LinkedHashSet<>();
private int idCounter = 0;
private int conjunctionsSeen = 0;
private static class FeatureIndexBuilder {
// Maps a feature id to conjunction id
private final Map<Long, Set<Integer>> map = new HashMap<>();
public void insert(long featureId, int conjunctionId) {
map.computeIfAbsent(featureId, k -> new TreeSet<>()).add(conjunctionId);
}
}
public void indexConjunction(IndexableFeatureConjunction c) {
++conjunctionsSeen;
long externalId = c.id;
if (seenIds.contains(externalId)) return;
seenIds.add(externalId);
int internalId = generateInternalId();
FeatureIndexBuilder featureIndexBuilder = kIndexBuilder.computeIfAbsent(c.k, (k) -> new FeatureIndexBuilder());
c.features.forEach(f -> featureIndexBuilder.insert(f, internalId));
c.negatedFeatures.forEach(f -> featureIndexBuilder.insert(f, internalId & ~1));
if (c.k == 0) {
zListBuilder.add(internalId);
}
}
private int generateInternalId() {
return ((idCounter++) << 1) | 1;
}
public ConjunctionIndex build() {
int[] zList = Ints.toArray(zListBuilder);
IntObjectMap<ConjunctionIndex.FeatureIndex> kIndex = buildKIndex(kIndexBuilder);
long[] idMapping = Longs.toArray(seenIds);
return new ConjunctionIndex(kIndex, zList, idMapping);
}
/**
* @return The number of unique features in index.
*/
public long calculateFeatureCount() {
return kIndexBuilder.values().stream()
.map(index -> index.map.keySet())
.reduce(
new HashSet<>(),
(acc, keySet) -> {
keySet.forEach(acc::add);
return acc;
}, (acc1, acc2) -> {
acc1.addAll(acc2);
return acc1;
})
.size();
}
/**
* @return The number of unique conjunctions indexed.
*/
public long getUniqueConjunctionCount() {
return seenIds.size();
}
public int getZListSize() {
return zListBuilder.size();
}
public int getConjunctionsSeen() {
return conjunctionsSeen;
}
private static IntObjectMap<ConjunctionIndex.FeatureIndex> buildKIndex(HashMap<Integer, FeatureIndexBuilder> kIndexBuilder) {
IntObjectHashMap<ConjunctionIndex.FeatureIndex> map = new IntObjectHashMap<>();
for (Map.Entry<Integer, FeatureIndexBuilder> entry : kIndexBuilder.entrySet()) {
map.put(entry.getKey(), buildFeatureIndex(entry.getValue()));
}
map.compact();
return map;
}
private static ConjunctionIndex.FeatureIndex buildFeatureIndex(FeatureIndexBuilder featureIndexBuilder) {
LongObjectHashMap<int[]> map = new LongObjectHashMap<>();
for (Map.Entry<Long, Set<Integer>> featureEntry : featureIndexBuilder.map.entrySet()) {
int[] conjunctionIds = Ints.toArray(featureEntry.getValue());
map.put(featureEntry.getKey(), conjunctionIds);
}
map.compact();
return new ConjunctionIndex.FeatureIndex(map);
}
}
|