1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.predicate.index;
import com.google.common.primitives.Ints;
import org.eclipse.collections.impl.map.mutable.primitive.ObjectIntHashMap;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
/**
* @author bjorncs
*/
public class CachedPostingListCounterTest {
@Test
public void require_that_docids_are_counted_correctly() {
int nDocuments = 4;
byte[] nPostingListsPerDocument = new byte[nDocuments];
CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
c.countPostingListsPerDocument(
list(
postingList(0, 1, 2, 3),
postingList(1, 2),
postingList(1, 3),
postingList(3)),
nPostingListsPerDocument);
assertArrayEquals(new byte[]{1, 3, 2, 3}, nPostingListsPerDocument);
}
@Test
public void require_that_most_costly_posting_lists_are_first_in_bit_vector() {
int nDocuments = 5;
CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
List<PostingList> list = new ArrayList<>();
PostingList p1 = postingList(1, 2, 4);
PostingList p2 = postingList(0, 1, 2, 3, 4);
PostingList p3 = postingList(1, 2, 3, 4);
PostingList p4 = postingList(3, 4);
list.add(p1); list.add(p2); list.add(p3); list.add(p4);
for (int i = 0; i < 100; i++) {
list.add(postingList(0));
}
c.registerUsage(list);
CachedPostingListCounter newC = c.rebuildCache();
ObjectIntHashMap<int[]> mapping = newC.getPostingListMapping();
assertEquals(0, mapping.getIfAbsent(p2.getDocIds(), -1));
assertEquals(1, mapping.getIfAbsent(p3.getDocIds(), -1));
assertEquals(2, mapping.getIfAbsent(p1.getDocIds(), -1));
assertEquals(3, mapping.getIfAbsent(p4.getDocIds(), -1));
int[] bitVector = newC.getBitVector();
assertEquals(0b0001, bitVector[0] & 0b1111);
assertEquals(0b0111, bitVector[1] & 0b1111);
assertEquals(0b0111, bitVector[2] & 0b1111);
assertEquals(0b1011, bitVector[3] & 0b1111);
assertEquals(0b1111, bitVector[4] & 0b1111);
}
@Test
public void require_that_cached_docids_are_counted_correctly() {
int nDocuments = 4;
byte[] nPostingListsPerDocument = new byte[nDocuments];
CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
PostingList p1 = postingList(0, 1, 2, 3);
PostingList p2 = postingList(1, 2);
PostingList p3 = postingList(1, 3);
PostingList p4 = postingList(3);
List<PostingList> postingLists = list(p1, p2, p3, p4);
c.registerUsage(postingLists);
CachedPostingListCounter newC = c.rebuildCache();
newC.countPostingListsPerDocument(postingLists, nPostingListsPerDocument);
assertArrayEquals(new byte[]{1, 3, 2, 3}, nPostingListsPerDocument);
newC.countPostingListsPerDocument(list(p1, p2), nPostingListsPerDocument);
assertArrayEquals(new byte[]{1, 2, 2, 1}, nPostingListsPerDocument);
}
@Test
public void require_that_cache_rebuilding_behaves_correctly_for_large_amount_of_posting_lists() {
int nDocuments = 4;
byte[] nPostingListsPerDocument = new byte[nDocuments];
CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
List<PostingList> postingLists = new ArrayList<>(100 * nDocuments);
for (int i = 0; i < 100 * nDocuments; i++) {
postingLists.add(postingList(i % nDocuments));
}
c.registerUsage(postingLists);
CachedPostingListCounter newC = c.rebuildCache();
newC.countPostingListsPerDocument(postingLists, nPostingListsPerDocument);
assertArrayEquals(new byte[]{100, 100, 100, 100}, nPostingListsPerDocument);
List<PostingList> doc0PostingLists = new ArrayList<>();
for (int i = 0; i < 100 * nDocuments; i += nDocuments) {
doc0PostingLists.add(postingLists.get(i));
}
newC.countPostingListsPerDocument(doc0PostingLists, nPostingListsPerDocument);
assertArrayEquals(new byte[]{100, 0, 0, 0}, nPostingListsPerDocument);
}
private static List<PostingList> list(PostingList... postingLists) {
return Arrays.asList(postingLists);
}
private static PostingList postingList(Integer... docIds) {
PostingList postingList = mock(PostingList.class);
when(postingList.getDocIds()).thenReturn(Ints.toArray(Arrays.asList((docIds))));
return postingList;
}
}
|