aboutsummaryrefslogtreecommitdiffstats
path: root/predicate-search/src/test/java/com/yahoo/search/predicate/index/CachedPostingListCounterTest.java
blob: 68f692e311981402ca10f20b325caeb087f7ec40 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.predicate.index;

import com.google.common.primitives.Ints;
import org.eclipse.collections.impl.map.mutable.primitive.ObjectIntHashMap;
import org.junit.jupiter.api.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

/**
 * @author bjorncs
 */
public class CachedPostingListCounterTest {

    @Test
    void require_that_docids_are_counted_correctly() {
        int nDocuments = 4;
        byte[] nPostingListsPerDocument = new byte[nDocuments];
        CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
        c.countPostingListsPerDocument(
                list(
                        postingList(0, 1, 2, 3),
                        postingList(1, 2),
                        postingList(1, 3),
                        postingList(3)),
                nPostingListsPerDocument);
        assertArrayEquals(new byte[]{1, 3, 2, 3}, nPostingListsPerDocument);
    }

    @Test
    void require_that_most_costly_posting_lists_are_first_in_bit_vector() {
        int nDocuments = 5;
        CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
        List<PostingList> list = new ArrayList<>();
        PostingList p1 = postingList(1, 2, 4);
        PostingList p2 = postingList(0, 1, 2, 3, 4);
        PostingList p3 = postingList(1, 2, 3, 4);
        PostingList p4 = postingList(3, 4);
        list.add(p1);
        list.add(p2);
        list.add(p3);
        list.add(p4);
        for (int i = 0; i < 100; i++) {
            list.add(postingList(0));
        }
        c.registerUsage(list);
        CachedPostingListCounter newC = c.rebuildCache();
        ObjectIntHashMap<int[]> mapping = newC.getPostingListMapping();
        assertEquals(0, mapping.getIfAbsent(p2.getDocIds(), -1));
        assertEquals(1, mapping.getIfAbsent(p3.getDocIds(), -1));
        assertEquals(2, mapping.getIfAbsent(p1.getDocIds(), -1));
        assertEquals(3, mapping.getIfAbsent(p4.getDocIds(), -1));

        int[] bitVector = newC.getBitVector();
        assertEquals(0b0001, bitVector[0] & 0b1111);
        assertEquals(0b0111, bitVector[1] & 0b1111);
        assertEquals(0b0111, bitVector[2] & 0b1111);
        assertEquals(0b1011, bitVector[3] & 0b1111);
        assertEquals(0b1111, bitVector[4] & 0b1111);
    }

    @Test
    void require_that_cached_docids_are_counted_correctly() {
        int nDocuments = 4;
        byte[] nPostingListsPerDocument = new byte[nDocuments];
        CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
        PostingList p1 = postingList(0, 1, 2, 3);
        PostingList p2 = postingList(1, 2);
        PostingList p3 = postingList(1, 3);
        PostingList p4 = postingList(3);
        List<PostingList> postingLists = list(p1, p2, p3, p4);
        c.registerUsage(postingLists);
        CachedPostingListCounter newC = c.rebuildCache();
        newC.countPostingListsPerDocument(postingLists, nPostingListsPerDocument);
        assertArrayEquals(new byte[]{1, 3, 2, 3}, nPostingListsPerDocument);
        newC.countPostingListsPerDocument(list(p1, p2), nPostingListsPerDocument);
        assertArrayEquals(new byte[]{1, 2, 2, 1}, nPostingListsPerDocument);
    }

    @Test
    void require_that_cache_rebuilding_behaves_correctly_for_large_amount_of_posting_lists() {
        int nDocuments = 4;
        byte[] nPostingListsPerDocument = new byte[nDocuments];
        CachedPostingListCounter c = new CachedPostingListCounter(nDocuments);
        List<PostingList> postingLists = new ArrayList<>(100 * nDocuments);
        for (int i = 0; i < 100 * nDocuments; i++) {
            postingLists.add(postingList(i % nDocuments));
        }
        c.registerUsage(postingLists);
        CachedPostingListCounter newC = c.rebuildCache();
        newC.countPostingListsPerDocument(postingLists, nPostingListsPerDocument);
        assertArrayEquals(new byte[]{100, 100, 100, 100}, nPostingListsPerDocument);

        List<PostingList> doc0PostingLists = new ArrayList<>();
        for (int i = 0; i < 100 * nDocuments; i += nDocuments) {
            doc0PostingLists.add(postingLists.get(i));
        }
        newC.countPostingListsPerDocument(doc0PostingLists, nPostingListsPerDocument);
        assertArrayEquals(new byte[]{100, 0, 0, 0}, nPostingListsPerDocument);
    }

    private static List<PostingList> list(PostingList... postingLists) {
        return Arrays.asList(postingLists);
    }

    private static PostingList postingList(Integer... docIds) {
        PostingList postingList = mock(PostingList.class);
        when(postingList.getDocIds()).thenReturn(Ints.toArray(Arrays.asList((docIds))));
        return postingList;
    }

}