diff options
Diffstat (limited to 'predicate-search/src/test/java/com/yahoo/search/predicate/index')
11 files changed, 1578 insertions, 0 deletions
diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/BoundsPostingListTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/BoundsPostingListTest.java new file mode 100644 index 00000000000..06782e3603a --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/BoundsPostingListTest.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import com.google.common.primitives.Ints; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static java.util.stream.Collectors.toList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * This test lies in com.yahoo.search.predicate to get access to some methods of PredicateIndex. + * + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +public class BoundsPostingListTest { + + @Test + public void requireThatPostingListChecksBounds() { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + List<Integer> docIds = new ArrayList<>(); + List<Integer> dataRefs = new ArrayList<>(); + for (int id = 1; id < 100; ++id) { + List<IntervalWithBounds> boundsList = new ArrayList<>(); + for (int i = 0; i <= id; ++i) { + int bounds; + if (id < 30) { + bounds = 0x80000000 | i; // diff >= i + } else if (id < 60) { + bounds = 0x40000000 | i; // diff < i + } else { + bounds = (i << 16) | (i + 10); // i < diff < i + 10 + } + boundsList.add(new IntervalWithBounds((i + 1) << 16 | 0xffff, bounds)); + } + docIds.add(id); + dataRefs.add(builder.insert(boundsList.stream().flatMap(IntervalWithBounds::stream).collect(toList()))); + } + + PredicateIntervalStore store = builder.build(); + BoundsPostingList postingList = new BoundsPostingList( + store, Ints.toArray(docIds), Ints.toArray(dataRefs), 0xffffffffffffffffL, 5); + assertEquals(-1, postingList.getDocId()); + assertEquals(0, postingList.getInterval()); + assertEquals(0xffffffffffffffffL, postingList.getSubquery()); + + checkNext(postingList, 0, 1, 2); // [0..] .. [1..] + checkNext(postingList, 1, 2, 3); // [0..] .. [2..] + checkNext(postingList, 10, 11, 6); // [0..] .. [5..] + checkNext(postingList, 20, 21, 6); + + checkNext(postingList, 30, 31, 26); // [..5] .. [..30] + checkNext(postingList, 50, 51, 46); + + checkNext(postingList, 60, 61, 6); // [0..10] .. [5..15] + + postingList = new BoundsPostingList(store, Ints.toArray(docIds), Ints.toArray(dataRefs), 0xffffffffffffffffL, 40); + checkNext(postingList, 0, 1, 2); + checkNext(postingList, 20, 21, 22); + + checkNext(postingList, 30, 31, 0); // skip ahead to match + checkNext(postingList, 32, 33, 0); // skip ahead to match + checkNext(postingList, 33, 34, 0); // skip ahead to match + checkNext(postingList, 40, 41, 1); + checkNext(postingList, 50, 51, 11); // [..40] .. [..50] + + checkNext(postingList, 60, 61, 10); // [31..40] .. [40..49] + } + + private void checkNext(BoundsPostingList postingList, int movePast, int docId, int intervalCount) { + assertTrue("Unable to move past " + movePast, postingList.nextDocument(movePast)); + assertEquals(intervalCount > 0, postingList.prepareIntervals()); + assertEquals(docId, postingList.getDocId()); + for (int i = 0; i < intervalCount - 1; ++i) { + assertTrue("Too few intervals, expected " + intervalCount, postingList.nextInterval()); + } + assertFalse("Too many intervals, expected " + intervalCount, postingList.nextInterval()); + } + +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/CachedPostingListCounterTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/CachedPostingListCounterTest.java new file mode 100644 index 00000000000..d1b8dd01039 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/CachedPostingListCounterTest.java @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import com.gs.collections.impl.map.mutable.primitive.ObjectIntHashMap; +import org.apache.commons.lang.ArrayUtils; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * @author bjorncs + */ +public class CachedPostingListCounterTest { + + @Test + public void require_that_docids_are_counted_correctly() { + int nDocuments = 4; + byte[] nPostingListsPerDocument = new byte[nDocuments]; + CachedPostingListCounter c = new CachedPostingListCounter(nDocuments); + c.countPostingListsPerDocument( + list( + postingList(0, 1, 2, 3), + postingList(1, 2), + postingList(1, 3), + postingList(3)), + nPostingListsPerDocument); + assertArrayEquals(new byte[]{1, 3, 2, 3}, nPostingListsPerDocument); + } + + @Test + public void require_that_most_costly_posting_lists_are_first_in_bit_vector() { + int nDocuments = 5; + CachedPostingListCounter c = new CachedPostingListCounter(nDocuments); + List<PostingList> list = new ArrayList<>(); + PostingList p1 = postingList(1, 2, 4); + PostingList p2 = postingList(0, 1, 2, 3, 4); + PostingList p3 = postingList(1, 2, 3, 4); + PostingList p4 = postingList(3, 4); + list.add(p1); list.add(p2); list.add(p3); list.add(p4); + for (int i = 0; i < 100; i++) { + list.add(postingList(0)); + } + c.registerUsage(list); + CachedPostingListCounter newC = c.rebuildCache(); + ObjectIntHashMap<int[]> mapping = newC.getPostingListMapping(); + assertEquals(0, mapping.getIfAbsent(p2.getDocIds(), -1)); + assertEquals(1, mapping.getIfAbsent(p3.getDocIds(), -1)); + assertEquals(2, mapping.getIfAbsent(p1.getDocIds(), -1)); + assertEquals(3, mapping.getIfAbsent(p4.getDocIds(), -1)); + + int[] bitVector = newC.getBitVector(); + assertEquals(0b0001, bitVector[0] & 0b1111); + assertEquals(0b0111, bitVector[1] & 0b1111); + assertEquals(0b0111, bitVector[2] & 0b1111); + assertEquals(0b1011, bitVector[3] & 0b1111); + assertEquals(0b1111, bitVector[4] & 0b1111); + } + + @Test + public void require_that_cached_docids_are_counted_correctly() { + int nDocuments = 4; + byte[] nPostingListsPerDocument = new byte[nDocuments]; + CachedPostingListCounter c = new CachedPostingListCounter(nDocuments); + PostingList p1 = postingList(0, 1, 2, 3); + PostingList p2 = postingList(1, 2); + PostingList p3 = postingList(1, 3); + PostingList p4 = postingList(3); + List<PostingList> postingLists = list(p1, p2, p3, p4); + c.registerUsage(postingLists); + CachedPostingListCounter newC = c.rebuildCache(); + newC.countPostingListsPerDocument(postingLists, nPostingListsPerDocument); + assertArrayEquals(new byte[]{1, 3, 2, 3}, nPostingListsPerDocument); + newC.countPostingListsPerDocument(list(p1, p2), nPostingListsPerDocument); + assertArrayEquals(new byte[]{1, 2, 2, 1}, nPostingListsPerDocument); + } + + @Test + public void require_that_cache_rebuilding_behaves_correctly_for_large_amount_of_posting_lists() { + int nDocuments = 4; + byte[] nPostingListsPerDocument = new byte[nDocuments]; + CachedPostingListCounter c = new CachedPostingListCounter(nDocuments); + List<PostingList> postingLists = new ArrayList<>(100 * nDocuments); + for (int i = 0; i < 100 * nDocuments; i++) { + postingLists.add(postingList(i % nDocuments)); + } + c.registerUsage(postingLists); + CachedPostingListCounter newC = c.rebuildCache(); + newC.countPostingListsPerDocument(postingLists, nPostingListsPerDocument); + assertArrayEquals(new byte[]{100, 100, 100, 100}, nPostingListsPerDocument); + + List<PostingList> doc0PostingLists = new ArrayList<>(); + for (int i = 0; i < 100 * nDocuments; i += nDocuments) { + doc0PostingLists.add(postingLists.get(i)); + } + newC.countPostingListsPerDocument(doc0PostingLists, nPostingListsPerDocument); + assertArrayEquals(new byte[]{100, 0, 0, 0}, nPostingListsPerDocument); + } + + private static List<PostingList> list(PostingList... postingLists) { + return Arrays.asList(postingLists); + } + + private static PostingList postingList(Integer... docIds) { + PostingList postingList = mock(PostingList.class); + when(postingList.getDocIds()).thenReturn(ArrayUtils.toPrimitive(docIds)); + return postingList; + } + +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/IntervalPostingListTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/IntervalPostingListTest.java new file mode 100644 index 00000000000..41f4ba55750 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/IntervalPostingListTest.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import com.yahoo.search.predicate.SubqueryBitmap; +import org.junit.Test; + +import java.util.Arrays; + +import static junit.framework.TestCase.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class IntervalPostingListTest { + @Test + public void requireThatPostingListCanIterate() { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + int ref1 = builder.insert(Arrays.asList(0x1ffff)); + int ref2 = builder.insert(Arrays.asList(0x1ffff)); + int ref3 = builder.insert(Arrays.asList(0x10001, 0x2ffff)); + IntervalPostingList postingList = new IntervalPostingList( + builder.build(), new int[]{2, 4, 6}, new int[] {ref1, ref2, ref3}, SubqueryBitmap.ALL_SUBQUERIES); + assertEquals(-1, postingList.getDocId()); + assertEquals(0, postingList.getInterval()); + assertEquals(0xffffffffffffffffL, postingList.getSubquery()); + + assertTrue(postingList.nextDocument(0)); + assertTrue(postingList.prepareIntervals()); + assertEquals(2, postingList.getDocId()); + assertEquals(0x1ffff, postingList.getInterval()); + assertFalse(postingList.nextInterval()); + + assertTrue(postingList.nextDocument(4)); + assertTrue(postingList.prepareIntervals()); + assertEquals(6, postingList.getDocId()); + assertEquals(0x10001, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x2ffff, postingList.getInterval()); + assertFalse(postingList.nextInterval()); + + assertFalse(postingList.nextDocument(8)); + } + +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateIntervalStoreTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateIntervalStoreTest.java new file mode 100644 index 00000000000..1f2fd390cb5 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateIntervalStoreTest.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import com.google.common.primitives.Ints; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static com.yahoo.search.predicate.serialization.SerializationTestHelper.assertSerializationDeserializationMatches; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author bjorncs + */ +public class PredicateIntervalStoreTest { + + @Test(expected = IllegalArgumentException.class) + public void requireThatEmptyIntervalListThrows() { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + builder.insert(new ArrayList<>()); + } + + @Test + public void requireThatSingleIntervalCanBeInserted() { + testInsertAndRetrieve(0x0001ffff); + } + + @Test + public void requireThatMultiIntervalEntriesCanBeInserted() { + testInsertAndRetrieve(0x00010001, 0x00020002, 0x0003ffff); + testInsertAndRetrieve(0x00010001, 0x00020002, 0x00030003, 0x00040004, 0x00050005, 0x00060006, + 0x00070007, 0x00080008, 0x00090009, 0x000a000a); + } + + @Test + public void requireThatDifferentSizeIntervalArraysCanBeInserted() { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + int intervals1[] = new int[] {0x00010001, 0x00020002}; + int intervals2[] = new int[] {0x00010001, 0x00020002, 0x00030003}; + assertEquals(0, builder.insert(Ints.asList(intervals1))); + assertEquals(1, builder.insert(Ints.asList(intervals2))); + } + + @Test + public void requireThatSerializationAndDeserializationRetainIntervals() throws IOException { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + builder.insert(Arrays.asList(0x00010001, 0x00020002)); + builder.insert(Arrays.asList(0x00010001, 0x00020002, 0x00030003)); + builder.insert(Arrays.asList(0x0fffffff, 0x00020002, 0x00030003)); + PredicateIntervalStore store = builder.build(); + assertSerializationDeserializationMatches( + store, PredicateIntervalStore::writeToOutputStream, PredicateIntervalStore::fromInputStream); + } + + @Test + public void requireThatEqualIntervalListsReturnsSameReference() { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + List<Integer> intervals1 = Arrays.asList(0x00010001, 0x00020002); + List<Integer> intervals2 = Arrays.asList(0x00010001, 0x00020002); + int ref1 = builder.insert(intervals1); + int ref2 = builder.insert(intervals2); + PredicateIntervalStore store = builder.build(); + int[] a1 = store.get(ref1); + int[] a2 = store.get(ref2); + assertTrue(a1 == a2); + } + + private static void testInsertAndRetrieve(int... intervals) { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + int ref = builder.insert(Ints.asList(intervals)); + PredicateIntervalStore store = builder.build(); + + int retrieved[] = store.get(ref); + assertArrayEquals(intervals, retrieved); + } + +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateRangeTermExpanderTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateRangeTermExpanderTest.java new file mode 100644 index 00000000000..8eacf126bd7 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateRangeTermExpanderTest.java @@ -0,0 +1,354 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import com.yahoo.document.predicate.PredicateHash; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Iterator; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +/** + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +public class PredicateRangeTermExpanderTest { + @Test + public void requireThatSmallRangeIsExpanded() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10); + Iterator<String> expectedLabels = Arrays.asList( + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999", + "key=0-99999", + "key=0-999999", + "key=0-9999999", + "key=0-99999999", + "key=0-999999999", + "key=0-9999999999", + "key=0-99999999999", + "key=0-999999999999", + "key=0-9999999999999", + "key=0-99999999999999", + "key=0-999999999999999", + "key=0-9999999999999999", + "key=0-99999999999999999", + "key=0-999999999999999999").iterator(); + expander.expand("key", 42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=40"), edge); assertEquals(2, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatLargeRangeIsExpanded() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10); + Iterator<String> expectedLabels = Arrays.asList( + "key=123456789012345670-123456789012345679", + "key=123456789012345600-123456789012345699", + "key=123456789012345000-123456789012345999", + "key=123456789012340000-123456789012349999", + "key=123456789012300000-123456789012399999", + "key=123456789012000000-123456789012999999", + "key=123456789010000000-123456789019999999", + "key=123456789000000000-123456789099999999", + "key=123456789000000000-123456789999999999", + "key=123456780000000000-123456789999999999", + "key=123456700000000000-123456799999999999", + "key=123456000000000000-123456999999999999", + "key=123450000000000000-123459999999999999", + "key=123400000000000000-123499999999999999", + "key=123000000000000000-123999999999999999", + "key=120000000000000000-129999999999999999", + "key=100000000000000000-199999999999999999", + "key=0-999999999999999999").iterator(); + expander.expand("key", 123456789012345678L, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=123456789012345670"), edge); assertEquals(8, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatMaxRangeIsExpanded() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10); + expander.expand("key", 9223372036854775807L, range -> fail(), + (edge, value) -> { + assertEquals(PredicateHash.hash64("key=9223372036854775800"), edge); + assertEquals(7, value); + }); + } + + @Test + public void requireThatSmallNegativeRangeIsExpanded() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10); + Iterator<String> expectedLabels = Arrays.asList( + "key=-49-40", + "key=-99-0", + "key=-999-0", + "key=-9999-0", + "key=-99999-0", + "key=-999999-0", + "key=-9999999-0", + "key=-99999999-0", + "key=-999999999-0", + "key=-9999999999-0", + "key=-99999999999-0", + "key=-999999999999-0", + "key=-9999999999999-0", + "key=-99999999999999-0", + "key=-999999999999999-0", + "key=-9999999999999999-0", + "key=-99999999999999999-0", + "key=-999999999999999999-0").iterator(); + expander.expand("key", -42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=-40"), edge); assertEquals(2, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatMinRangeIsExpanded() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10); + expander.expand("key", -9223372036854775808L, range -> fail(), + (edge, value) -> { + assertEquals(PredicateHash.hash64("key=-9223372036854775800"), edge); + assertEquals(8, value); + }); + } + + @Test + public void requireThatMinRangeMinus9IsExpanded() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10); + Iterator<String> expectedLabels = Arrays.asList( + "key=-9223372036854775799-9223372036854775790", + "key=-9223372036854775799-9223372036854775700").iterator(); + expander.expand("key", -9223372036854775799L, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=-9223372036854775790"), edge); assertEquals(9, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatMinRangeIsExpandedWithArity8() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(8); + expander.expand("key", -9223372036854775808L, range -> fail(), + (edge, value) -> { + assertEquals(PredicateHash.hash64("key=-9223372036854775808"), edge); + assertEquals(0, value); + }); + } + + @Test + public void requireThatSmallRangeIsExpandedInArity2() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(2); + Iterator<String> expectedLabels = Arrays.asList( + "key=42-43", + "key=40-43", + "key=40-47", + "key=32-47", + "key=32-63", + "key=0-63", + "key=0-127", + "key=0-255", + "key=0-511", + "key=0-1023", + "key=0-2047", + "key=0-4095", + "key=0-8191", + "key=0-16383", + "key=0-32767", + "key=0-65535", + "key=0-131071", + "key=0-262143", + "key=0-524287", + "key=0-1048575", + "key=0-2097151", + "key=0-4194303", + "key=0-8388607", + "key=0-16777215", + "key=0-33554431", + "key=0-67108863", + "key=0-134217727", + "key=0-268435455", + "key=0-536870911", + "key=0-1073741823", + "key=0-2147483647", + "key=0-4294967295", + "key=0-8589934591", + "key=0-17179869183", + "key=0-34359738367", + "key=0-68719476735", + "key=0-137438953471", + "key=0-274877906943", + "key=0-549755813887", + "key=0-1099511627775", + "key=0-2199023255551", + "key=0-4398046511103", + "key=0-8796093022207", + "key=0-17592186044415", + "key=0-35184372088831", + "key=0-70368744177663", + "key=0-140737488355327", + "key=0-281474976710655", + "key=0-562949953421311", + "key=0-1125899906842623", + "key=0-2251799813685247", + "key=0-4503599627370495", + "key=0-9007199254740991", + "key=0-18014398509481983", + "key=0-36028797018963967", + "key=0-72057594037927935", + "key=0-144115188075855871", + "key=0-288230376151711743", + "key=0-576460752303423487", + "key=0-1152921504606846975", + "key=0-2305843009213693951", + "key=0-4611686018427387903", + "key=0-9223372036854775807").iterator(); + expander.expand("key", 42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=42"), edge); assertEquals(0, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatSmallNegativeRangeIsExpandedInArity2() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(2); + Iterator<String> expectedLabels = Arrays.asList( + "key=-43-42", + "key=-43-40", + "key=-47-40", + "key=-47-32", + "key=-63-32", + "key=-63-0", + "key=-127-0", + "key=-255-0", + "key=-511-0", + "key=-1023-0", + "key=-2047-0", + "key=-4095-0", + "key=-8191-0", + "key=-16383-0", + "key=-32767-0", + "key=-65535-0", + "key=-131071-0", + "key=-262143-0", + "key=-524287-0", + "key=-1048575-0", + "key=-2097151-0", + "key=-4194303-0", + "key=-8388607-0", + "key=-16777215-0", + "key=-33554431-0", + "key=-67108863-0", + "key=-134217727-0", + "key=-268435455-0", + "key=-536870911-0", + "key=-1073741823-0", + "key=-2147483647-0", + "key=-4294967295-0", + "key=-8589934591-0", + "key=-17179869183-0", + "key=-34359738367-0", + "key=-68719476735-0", + "key=-137438953471-0", + "key=-274877906943-0", + "key=-549755813887-0", + "key=-1099511627775-0", + "key=-2199023255551-0", + "key=-4398046511103-0", + "key=-8796093022207-0", + "key=-17592186044415-0", + "key=-35184372088831-0", + "key=-70368744177663-0", + "key=-140737488355327-0", + "key=-281474976710655-0", + "key=-562949953421311-0", + "key=-1125899906842623-0", + "key=-2251799813685247-0", + "key=-4503599627370495-0", + "key=-9007199254740991-0", + "key=-18014398509481983-0", + "key=-36028797018963967-0", + "key=-72057594037927935-0", + "key=-144115188075855871-0", + "key=-288230376151711743-0", + "key=-576460752303423487-0", + "key=-1152921504606846975-0", + "key=-2305843009213693951-0", + "key=-4611686018427387903-0", + "key=-9223372036854775807-0").iterator(); + expander.expand("key", -42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=-42"), edge); assertEquals(0, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatUpperBoundIsUsed() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10, -99, 9999); + Iterator<String> expectedLabels = Arrays.asList( + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999").iterator(); + expander.expand("key", 42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=40"), edge); assertEquals(2, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatLowerBoundIsUsed() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10, -9999, 99); + Iterator<String> expectedLabels = Arrays.asList( + "key=-49-40", + "key=-99-0", + "key=-999-0", + "key=-9999-0").iterator(); + expander.expand("key", -42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=-40"), edge); assertEquals(2, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatSearchesOutsideBoundsGenerateNoLabels() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10, 0, 200); + expander.expand("key", -10, x -> fail(), (x,y) -> fail()); + expander.expand("key", 210, x -> fail(), (x, y) -> fail()); + } + + @Test + public void requireThatUpperAndLowerBoundGreaterThan0Works() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10, 100, 9999); + Iterator<String> expectedLabels = Arrays.asList( + "key=140-149", + "key=100-199", + "key=0-999", + "key=0-9999").iterator(); + expander.expand("key", 142, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=140"), edge); assertEquals(2, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatSearchCloseToUnevenUpperBoundIsSensible() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10, -99, 1234); + Iterator<String> expectedLabels = Arrays.asList( + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999").iterator(); + expander.expand("key", 42, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=40"), edge); assertEquals(2, value); }); + assertFalse(expectedLabels.hasNext()); + } + + @Test + public void requireThatSearchCloseToMaxUnevenUpperBoundIsSensible() { + PredicateRangeTermExpander expander = new PredicateRangeTermExpander(10, 0, 9223372036854771234L); + Iterator<String> expectedLabels = Arrays.asList( + "key=9223372036854770000-9223372036854770009", + "key=9223372036854770000-9223372036854770099", + "key=9223372036854770000-9223372036854770999").iterator(); + expander.expand("key", 9223372036854770000L, range -> assertEquals(PredicateHash.hash64(expectedLabels.next()), range), + (edge, value) -> { assertEquals(PredicateHash.hash64("key=9223372036854770000"), edge); assertEquals(0, value); }); + assertFalse(expectedLabels.hasNext()); + } +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateSearchTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateSearchTest.java new file mode 100644 index 00000000000..64a44ff3680 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/PredicateSearchTest.java @@ -0,0 +1,305 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import com.yahoo.search.predicate.Hit; +import com.yahoo.search.predicate.SubqueryBitmap; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static java.util.stream.Collectors.toList; +import static org.junit.Assert.assertEquals; + +/** + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + * @author bjorncs + */ +public class PredicateSearchTest { + + @Test + public void requireThatNoStreamsReturnNoResults() { + PredicateSearch search = new PredicateSearch(new ArrayList<>(), new byte[0], new byte[0], new short[0], 1); + assertEquals(0, search.stream().count()); + } + + @Test + public void requireThatSingleStreamFiltersOnConstructedCompleteIntervals() { + PredicateSearch search = createPredicateSearch( + new byte[]{1, 1, 1}, + postingList( + SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000100ff), + entry(1, 0x00010001, 0x000200ff), + entry(2, 0x00010042))); + assertEquals(Arrays.asList(new Hit(0), new Hit(1)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatMinFeatureIsUsedToPruneResults() { + PredicateSearch search = createPredicateSearch( + new byte[]{3, 1}, + postingList( + SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000100ff), + entry(1, 0x000100ff))); + assertEquals(Arrays.asList(new Hit(1)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatAHighKCanYieldResults() { + PredicateSearch search = createPredicateSearch( + new byte[]{2}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000200ff))); + assertEquals(Arrays.asList(new Hit(0)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatPostingListsAreSortedAfterAdvancing() { + PredicateSearch search = createPredicateSearch( + new byte[] {2, 1, 1, 1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000100ff), + entry(3, 0x000100ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(1, 0x000100ff), + entry(2, 0x000100ff))); + assertEquals(Arrays.asList(new Hit(1), new Hit(2), new Hit(3)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatEmptyPostingListsWork() { + PredicateSearch search = createPredicateSearch( + new byte[0], + postingList(SubqueryBitmap.ALL_SUBQUERIES)); + assertEquals(Arrays.asList().toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatShorterPostingListEndingIsOk() { + PredicateSearch search = createPredicateSearch( + new byte[]{1, 1, 1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000100ff), + entry(1, 0x000100ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(2, 0x000100ff))); + assertEquals(Arrays.asList(new Hit(0), new Hit(1), new Hit(2)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatSortingWorksForManyPostingLists() { + PredicateSearch search = createPredicateSearch( + new byte[]{1, 5, 2, 2}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000100ff), + entry(1, 0x000100ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(1, 0x000100ff), + entry(2, 0x000100ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(1, 0x000100ff), + entry(3, 0x000100ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(1, 0x000100ff), + entry(2, 0x000100ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(1, 0x000100ff), + entry(3, 0x000100ff))); + assertEquals( + Arrays.asList(new Hit(0), new Hit(1), new Hit(2), new Hit(3)).toString(), + search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatInsufficientIntervalCoveragePreventsMatch() { + PredicateSearch search = createPredicateSearch( + new byte[]{1, 1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001), + entry(1, 0x000200ff))); + assertEquals(Arrays.asList().toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatIntervalsAreSorted() { + PredicateSearch search = createPredicateSearch( + new byte[]{1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x000300ff)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00020002))); + assertEquals(Arrays.asList(new Hit(0)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatThereCanBeManyIntervals() { + PredicateSearch search = createPredicateSearch( + new byte[]{1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001, 0x00020002, 0x00030003, 0x000100ff, 0x00040004, 0x00050005, 0x00060006))); + assertEquals(Arrays.asList(new Hit(0)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatNotIsSupported_NoMatch() { + PredicateSearch search = createPredicateSearch( + new byte[]{1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010000, 0x00ff0001))); + assertEquals(Arrays.asList().toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatNotIsSupported_Match() { + PredicateSearch search = createPredicateSearch( + new byte[]{1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010000, 0x00ff0001))); + assertEquals(Arrays.asList(new Hit(0)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatNotIsSupported_NoMatchBecauseOfPreviousTerm() { + PredicateSearch search = createPredicateSearch( + new byte[]{1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00020001, 0x00ff0001))); + assertEquals(Arrays.asList().toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatIntervalSortingWorksAsUnsigned() { + PredicateSearch search = createPredicateSearch( + new byte[]{1}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00fe0001, 0x00ff00fe))); + assertEquals(Arrays.asList(new Hit(0)).toString(), search.stream().collect(toList()).toString()); + } + + @Test + public void requireThatMatchCanRequireMultiplePostingLists() { + PredicateSearch search = createPredicateSearch( + new byte[]{6}, + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010001)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x0002000b, 0x00030003)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00040003)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00050004)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00010008, 0x00060006)), + postingList(SubqueryBitmap.ALL_SUBQUERIES, + entry(0, 0x00020002, 0x000700ff))); + assertEquals(Arrays.asList(new Hit(0)).toString(), search.stream().collect(toList()).toString()); + } + + private static PredicateSearch createPredicateSearch(byte[] minFeatures, PostingList... postingLists) { + byte[] nPostingListsForDocument = new byte[minFeatures.length]; + short[] intervalEnds = new short[minFeatures.length]; + Arrays.fill(intervalEnds, (short) 0xFF); + List<PostingList> list = Arrays.asList(postingLists); + for (PostingList postingList : postingLists) { + for (int id : postingList.getDocIds()) { + nPostingListsForDocument[id]++; + } + } + return new PredicateSearch(list, nPostingListsForDocument, minFeatures, intervalEnds, 0xFF); + } + + private static class SimplePostingList implements PostingList { + private final long subquery; + private final Entry[] entries; + private int[] currentIntervals; + private int currentIntervalIndex; + private int currentDocId; + private int currentIndex; + + public SimplePostingList(long subquery, Entry... entries) { + this.subquery = subquery; + this.entries = entries; + this.currentIndex = 0; + this.currentDocId = -1; + } + + @Override + public boolean nextDocument(int docId) { + while (currentIndex < entries.length && entries[currentIndex].docId <= docId) { + ++currentIndex; + } + if (currentIndex == entries.length) { + return false; + } + Entry entry = entries[currentIndex]; + currentDocId = entry.docId; + currentIntervals = entry.intervals; + currentIntervalIndex = 0; + return true; + } + + @Override + public boolean prepareIntervals() { + return true; + } + + @Override + public boolean nextInterval() { + return ++currentIntervalIndex < currentIntervals.length; + } + + @Override + public int getDocId() { + return currentDocId; + } + + @Override + public int size() { + return entries.length; + } + + @Override + public int getInterval() { + return currentIntervals[currentIntervalIndex]; + } + + @Override + public long getSubquery() { + return subquery; + } + + @Override + public int[] getDocIds() { + return Arrays.stream(entries).mapToInt(e -> e.docId).toArray(); + } + + public static class Entry { + public final int docId; + public final int[] intervals; + + public Entry(int docId, int... intervals) { + this.docId = docId; + this.intervals = intervals; + } + } + } + + private static SimplePostingList postingList(long subquery, SimplePostingList.Entry... entries) { + return new SimplePostingList(subquery, entries); + } + + private static SimplePostingList.Entry entry(int docId, int... intervals) { + return new SimplePostingList.Entry(docId, intervals); + } +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/SimpleIndexTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/SimpleIndexTest.java new file mode 100644 index 00000000000..3f6b803c33a --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/SimpleIndexTest.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import org.junit.Test; + +import java.io.IOException; + +import static com.yahoo.search.predicate.serialization.SerializationTestHelper.assertSerializationDeserializationMatches; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +/** + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + * @author bjorncs + */ +public class SimpleIndexTest { + + private static final long KEY = 0x12345L; + private static final int DOC_ID = 42; + + @Test + public void requireThatValuesCanBeInserted() { + SimpleIndex.Builder builder = new SimpleIndex.Builder(); + builder.insert(KEY, new Posting(DOC_ID, 10)); + SimpleIndex index = builder.build(); + SimpleIndex.Entry e = index.getPostingList(KEY); + assertNotNull(e); + assertEquals(1, e.docIds.length); + + builder = new SimpleIndex.Builder(); + builder.insert(KEY, new Posting(DOC_ID, 10)); + builder.insert(KEY, new Posting(DOC_ID + 1, 20)); + index = builder.build(); + e = index.getPostingList(KEY); + assertEquals(2, e.docIds.length); + assertEquals(10, e.dataRefs[0]); + assertEquals(20, e.dataRefs[1]); + } + + @Test + public void requireThatEntriesAreSortedOnId() { + SimpleIndex.Builder builder = new SimpleIndex.Builder(); + builder.insert(KEY, new Posting(DOC_ID, 10)); + builder.insert(KEY, new Posting(DOC_ID - 1, 20)); // Out of order + builder.insert(KEY, new Posting(DOC_ID + 1, 30)); + SimpleIndex index = builder.build(); + SimpleIndex.Entry entry = index.getPostingList(KEY); + assertEquals(3, entry.docIds.length); + assertEquals(DOC_ID - 1, entry.docIds[0]); + assertEquals(DOC_ID, entry.docIds[1]); + assertEquals(DOC_ID + 1, entry.docIds[2]); + } + + @Test + public void requireThatSerializationAndDeserializationRetainDictionary() throws IOException { + SimpleIndex.Builder builder = new SimpleIndex.Builder(); + builder.insert(KEY, new Posting(DOC_ID, 10)); + builder.insert(KEY, new Posting(DOC_ID + 1, 20)); + builder.insert(KEY, new Posting(DOC_ID + 2, 30)); + builder.insert(KEY + 0xFFFFFF, new Posting(DOC_ID, 100)); + builder.insert(KEY + 0xFFFFFF, new Posting(DOC_ID + 1, 200)); + SimpleIndex index = builder.build(); + assertSerializationDeserializationMatches(index, SimpleIndex::writeToOutputStream, SimpleIndex::fromInputStream); + } +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/ZeroConstraintPostingListTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/ZeroConstraintPostingListTest.java new file mode 100644 index 00000000000..652441b796a --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/ZeroConstraintPostingListTest.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +public class ZeroConstraintPostingListTest { + + @Test + public void requireThatPostingListCanIterate() { + ZeroConstraintPostingList postingList = + new ZeroConstraintPostingList(new int[] {2, 4, 6, 8}); + assertEquals(-1, postingList.getDocId()); + assertEquals(Interval.fromBoundaries(1, Interval.ZERO_CONSTRAINT_RANGE), postingList.getInterval()); + assertEquals(0xffffffffffffffffL, postingList.getSubquery()); + + assertTrue(postingList.nextDocument(0)); + assertEquals(2, postingList.getDocId()); + assertTrue(postingList.prepareIntervals()); + assertFalse(postingList.nextInterval()); + + assertTrue(postingList.nextDocument(7)); + assertEquals(8, postingList.getDocId()); + + assertTrue(postingList.nextDocument(7)); + assertEquals(8, postingList.getDocId()); + + assertFalse(postingList.nextDocument(8)); + } +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/ZstarCompressedPostingListTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/ZstarCompressedPostingListTest.java new file mode 100644 index 00000000000..3eb389757e3 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/ZstarCompressedPostingListTest.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index; + +import org.junit.Test; + +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +public class ZstarCompressedPostingListTest { + @Test + public void requireThatPostingListCanIterate() { + PredicateIntervalStore.Builder builder = new PredicateIntervalStore.Builder(); + int ref1 = builder.insert(Arrays.asList(0x10000)); + int ref2 = builder.insert(Arrays.asList(0x10000, 0x0ffff)); + int ref3 = builder.insert(Arrays.asList(0x10000, 0x00003, 0x40003, 0x60005)); + ZstarCompressedPostingList postingList = new ZstarCompressedPostingList( + builder.build(), new int[]{2, 4, 6}, new int[]{ref1, ref2, ref3}); + assertEquals(-1, postingList.getDocId()); + assertEquals(0, postingList.getInterval()); + assertEquals(0xffffffffffffffffL, postingList.getSubquery()); + + assertTrue(postingList.nextDocument(0)); + assertTrue(postingList.prepareIntervals()); + assertEquals(2, postingList.getDocId()); + assertEquals(0x10000, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x20001, postingList.getInterval()); + assertFalse(postingList.nextInterval()); + + assertTrue(postingList.nextDocument(2)); + assertTrue(postingList.prepareIntervals()); + assertEquals(4, postingList.getDocId()); + assertEquals(0x00010000, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0xffff0001, postingList.getInterval()); + assertFalse(postingList.nextInterval()); + + assertTrue(postingList.nextDocument(4)); + assertTrue(postingList.prepareIntervals()); + assertEquals(6, postingList.getDocId()); + assertEquals(0x10000, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x30001, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x40003, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x50004, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x60005, postingList.getInterval()); + assertTrue(postingList.nextInterval()); + assertEquals(0x70006, postingList.getInterval()); + assertFalse(postingList.nextInterval()); + + assertFalse(postingList.nextDocument(6)); + } +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIdIteratorTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIdIteratorTest.java new file mode 100644 index 00000000000..d324faec50a --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIdIteratorTest.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index.conjunction; + +import com.yahoo.search.predicate.SubqueryBitmap; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author bjorncs + */ +public class ConjunctionIdIteratorTest { + + @SuppressWarnings("PointlessBitwiseExpression") + @Test + public void require_that_next_returns_skips_to_correct_value() { + // NOTE: LST bit represents the conjunction sign: 0 => negative, 1 => positive. + int[] conjunctionIds = new int[]{ + 0 | 1, + 2 | 0, + 4 | 0, + 6 | 1, + 8 | 1, + 10 | 0}; + + ConjunctionIdIterator postingList = + new ConjunctionIdIterator(SubqueryBitmap.ALL_SUBQUERIES, conjunctionIds); + + assertEquals(1, postingList.getConjunctionId()); + assertEquals(1, postingList.getConjunctionId()); // Should not change. + + assertTrue(postingList.next(2)); + assertEquals(2, postingList.getConjunctionId()); + assertTrue(postingList.next(0)); // Should not change current conjunction id + assertEquals(2, postingList.getConjunctionId()); + + assertTrue(postingList.next(6 | 1)); // Should skip past id 4 + assertEquals(7, postingList.getConjunctionId()); + + assertTrue(postingList.next(8)); // Should skip to 9 + assertEquals(9, postingList.getConjunctionId()); + + assertTrue(postingList.next(10 | 1)); + assertEquals(10, postingList.getConjunctionId()); + + assertFalse(postingList.next(12)); // End of posting list + } + + @Test + public void require_that_subquery_is_correct() { + ConjunctionIdIterator iterator = new ConjunctionIdIterator(0b1111, new int[]{1}); + assertEquals(0b1111, iterator.getSubqueryBitmap()); + } +} diff --git a/predicate-search/src/test/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexTest.java b/predicate-search/src/test/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexTest.java new file mode 100644 index 00000000000..70fd8b4b6f5 --- /dev/null +++ b/predicate-search/src/test/java/com/yahoo/search/predicate/index/conjunction/ConjunctionIndexTest.java @@ -0,0 +1,375 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.index.conjunction; + +import com.yahoo.document.predicate.FeatureConjunction; +import com.yahoo.document.predicate.Predicate; +import com.yahoo.search.predicate.PredicateQuery; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static com.yahoo.document.predicate.Predicates.feature; +import static com.yahoo.document.predicate.Predicates.not; +import static com.yahoo.search.predicate.serialization.SerializationTestHelper.assertSerializationDeserializationMatches; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class ConjunctionIndexTest { + + @Test + public void require_that_single_conjunction_can_be_indexed() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + builder.indexConjunction(indexableConj(conj(feature("a").inSet("1"), feature("b").inSet("2")))); + assertEquals(2, builder.calculateFeatureCount()); + assertEquals(1, builder.getUniqueConjunctionCount()); + } + + @Test + public void require_that_large_conjunction_can_be_indexed() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + builder.indexConjunction(indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("1"), + feature("c").inSet("1")))); + assertEquals(3, builder.calculateFeatureCount()); + assertEquals(1, builder.getUniqueConjunctionCount()); + } + + @Test + public void require_that_multiple_conjunctions_can_be_indexed() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + builder.indexConjunction(indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("3")))); + builder.indexConjunction(indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("3")))); // Duplicate + builder.indexConjunction(indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("2"), + feature("c").inSet("3")))); + builder.indexConjunction(indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("2"), + feature("c").inSet("3")))); // Duplicate + builder.indexConjunction(indexableConj( + conj( + feature("d").inSet("1"), + feature("e").inSet("5")))); + assertEquals(6, builder.calculateFeatureCount()); + assertEquals(3, builder.getUniqueConjunctionCount()); + } + + @Test + public void require_that_search_for_simple_conjunctions_work() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + + IndexableFeatureConjunction c1 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("2"))); + IndexableFeatureConjunction c2 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("2"), + feature("c").inSet("3"))); + IndexableFeatureConjunction c3 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("5"))); + + builder.indexConjunction(c1); + builder.indexConjunction(c2); + builder.indexConjunction(c3); + ConjunctionIndex index = builder.build(); + ConjunctionIndex.Searcher searcher = index.searcher(); + + PredicateQuery query = new PredicateQuery(); + query.addFeature("a", "1"); + query.addFeature("b", "2"); + assertHitsEquals(searcher.search(query), c1); + query.addFeature("c", "3"); + assertHitsEquals(searcher.search(query), c1, c2); + query.addFeature("b", "5"); + assertHitsEquals(searcher.search(query), c1, c2, c3); + } + + + @Test + public void require_that_conjunction_with_not_is_indexed() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + builder.indexConjunction(indexableConj( + conj( + not(feature("a").inSet("1")), + not(feature("b").inSet("1"))))); + builder.indexConjunction(indexableConj( + conj( + feature("a").inSet("1"), + not(feature("b").inSet("1"))))); + assertEquals(2, builder.calculateFeatureCount()); + assertEquals(2, builder.getUniqueConjunctionCount()); + assertEquals(1, builder.getZListSize()); + } + + @Test + public void require_that_not_works_when_k_is_0() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + IndexableFeatureConjunction c1 = indexableConj( + conj( + not(feature("a").inSet("1")), + not(feature("b").inSet("1")))); + IndexableFeatureConjunction c2 = indexableConj( + conj( + not(feature("a").inSet("1")), + not(feature("b").inSet("1")), + not(feature("c").inSet("1")))); + IndexableFeatureConjunction c3 = indexableConj( + conj( + not(feature("a").inSet("1")), + not(feature("b").inSet("1")), + not(feature("c").inSet("1")), + not(feature("d").inSet("1")))); + IndexableFeatureConjunction c4 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("1"))); + builder.indexConjunction(c1); + builder.indexConjunction(c2); + builder.indexConjunction(c3); + builder.indexConjunction(c4); + ConjunctionIndex index = builder.build(); + ConjunctionIndex.Searcher searcher = index.searcher(); + + PredicateQuery query = new PredicateQuery(); + assertHitsEquals(searcher.search(query), c1, c2, c3); + query.addFeature("a", "1"); + query.addFeature("b", "1"); + assertHitsEquals(searcher.search(query), c4); + query.addFeature("c", "1"); + assertHitsEquals(searcher.search(query), c4); + query.addFeature("d", "1"); + assertHitsEquals(searcher.search(query), c4); + } + + @Test + public void require_that_not_works_when_k_is_1() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + IndexableFeatureConjunction c1 = indexableConj( + conj( + feature("a").inSet("1"), + not(feature("b").inSet("1")))); + IndexableFeatureConjunction c2 = indexableConj( + conj( + feature("a").inSet("1"), + not(feature("b").inSet("1")), + not(feature("c").inSet("1")))); + IndexableFeatureConjunction c3 = indexableConj( + conj( + feature("a").inSet("1"), + not(feature("b").inSet("1")), + not(feature("c").inSet("1")), + not(feature("d").inSet("1")))); + builder.indexConjunction(c1); + builder.indexConjunction(c2); + builder.indexConjunction(c3); + ConjunctionIndex index = builder.build(); + ConjunctionIndex.Searcher searcher = index.searcher(); + + PredicateQuery query = new PredicateQuery(); + assertTrue(searcher.search(query).isEmpty()); + query.addFeature("a", "1"); + assertHitsEquals(searcher.search(query), c1, c2, c3); + query.addFeature("b", "1"); + assertTrue(searcher.search(query).isEmpty()); + query.addFeature("c", "1"); + assertTrue(searcher.search(query).isEmpty()); + query.addFeature("d", "1"); + assertTrue(searcher.search(query).isEmpty()); + } + + @Test + public void require_that_not_works_when_k_is_2() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + IndexableFeatureConjunction c1 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("1"), + not(feature("c").inSet("1")))); + IndexableFeatureConjunction c2 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("1"), + not(feature("c").inSet("1")), + not(feature("d").inSet("1")))); + IndexableFeatureConjunction c3 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("1"), + not(feature("c").inSet("1")), + not(feature("d").inSet("1")), + not(feature("e").inSet("1")))); + builder.indexConjunction(c1); + builder.indexConjunction(c2); + builder.indexConjunction(c3); + ConjunctionIndex index = builder.build(); + ConjunctionIndex.Searcher searcher = index.searcher(); + + PredicateQuery query = new PredicateQuery(); + query.addFeature("a", "1"); + query.addFeature("b", "1"); + assertHitsEquals(searcher.search(query), c1, c2, c3); + query.addFeature("c", "1"); + assertTrue(searcher.search(query).isEmpty()); + query.addFeature("d", "1"); + assertTrue(searcher.search(query).isEmpty()); + query.addFeature("e", "1"); + assertTrue(searcher.search(query).isEmpty()); + } + + @Test + public void require_that_multi_term_queries_are_supported() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + IndexableFeatureConjunction c1 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("3"))); + builder.indexConjunction(c1); + ConjunctionIndex index = builder.build(); + ConjunctionIndex.Searcher searcher = index.searcher(); + + PredicateQuery query = new PredicateQuery(); + query.addFeature("a", "1"); + query.addFeature("a", "2"); + assertTrue(searcher.search(query).isEmpty()); + query.addFeature("b", "3"); + assertHitsEquals(searcher.search(query), c1); + } + + @Test + public void require_that_subqueries_are_supported() { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + IndexableFeatureConjunction c1 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("3"), + not(feature("c").inSet("4")))); + IndexableFeatureConjunction c2 = indexableConj( + conj( + feature("a").inSet("1"), + feature("b").inSet("3"))); + IndexableFeatureConjunction c3 = indexableConj( + conj( + feature("a").inSet("2"), + feature("b").inSet("3"))); + IndexableFeatureConjunction c4 = indexableConj( + conj( + feature("e").inSet("5"), + feature("f").inSet("6")) + ); + builder.indexConjunction(c1); + builder.indexConjunction(c2); + builder.indexConjunction(c3); + builder.indexConjunction(c4); + ConjunctionIndex index = builder.build(); + ConjunctionIndex.Searcher searcher = index.searcher(); + + PredicateQuery query = new PredicateQuery(); + + //subquery 0: a=2 and b=3 + //subquery 1: a=1 and b=3 + //subquery 2: a=1 and b=3 + query.addFeature("a", "1", 0b110); + query.addFeature("a", "2", 0b001); + query.addFeature("b", "3", 0b111); + List<ConjunctionHit> expectedHits = matchingConjunctionList( + new ConjunctionHit(c1.id, 0b110), + new ConjunctionHit(c2.id, 0b110), + new ConjunctionHit(c3.id, 0b001) + ); + + List<ConjunctionHit> hits = searcher.search(query); + assertHitsEquals(expectedHits, hits); + + //subquery 0: a=2 and b=3 and c=4 + //subquery 1: a=1 and b=3 + //subquery 2: a=1 and b=3 and c=4 + query.addFeature("c", "4", 0b101); + expectedHits = matchingConjunctionList( + new ConjunctionHit(c1.id, 0b010), + new ConjunctionHit(c2.id, 0b110), + new ConjunctionHit(c3.id, 0b001) + ); + hits = searcher.search(query); + assertHitsEquals(expectedHits, hits); + + // subquery 0: a=2 and e=5 + // subquery 1: b=3 and f=6 + PredicateQuery query2 = new PredicateQuery(); + query2.addFeature("a", "2", 0b01); + query2.addFeature("b", "3", 0b10); + query2.addFeature("e", "5", 0b01); + query2.addFeature("f", "6", 0b10); + expectedHits = matchingConjunctionList( + new ConjunctionHit(c1.id, 0b010), + new ConjunctionHit(c2.id, 0b110), + new ConjunctionHit(c3.id, 0b001) + ); + hits = searcher.search(query); + assertHitsEquals(expectedHits, hits); + } + + @Test + public void require_that_serialization_and_deserialization_retain_data() throws IOException { + ConjunctionIndexBuilder builder = new ConjunctionIndexBuilder(); + builder.indexConjunction(indexableConj( + conj( + not(feature("a").inSet("1")), + not(feature("b").inSet("3")), + not(feature("c").inSet("4"))))); + builder.indexConjunction(indexableConj( + conj( + feature("d").inSet("5"), + feature("e").inSet("6")))); + ConjunctionIndex index = builder.build(); + assertSerializationDeserializationMatches( + index, ConjunctionIndex::writeToOutputStream, ConjunctionIndex::fromInputStream); + } + + private static List<ConjunctionHit> matchingConjunctionList(ConjunctionHit... conjunctionHits) { + return Arrays.asList(conjunctionHits); + } + + private static void assertHitsEquals(List<ConjunctionHit> hits, IndexableFeatureConjunction... conjunctions) { + Arrays.sort(conjunctions, (c1, c2) -> Long.compareUnsigned(c1.id, c2.id)); + Collections.sort(hits); + assertEquals(conjunctions.length, hits.size()); + for (int i = 0; i < hits.size(); i++) { + assertEquals(conjunctions[i].id, hits.get(i).conjunctionId); + } + } + + private static void assertHitsEquals(List<ConjunctionHit> expectedHits, List<ConjunctionHit> hits) { + Collections.sort(expectedHits); + Collections.sort(hits); + assertArrayEquals( + expectedHits.toArray(new ConjunctionHit[expectedHits.size()]), + hits.toArray(new ConjunctionHit[expectedHits.size()])); + } + + private static FeatureConjunction conj(Predicate... operands) { + return new FeatureConjunction(Arrays.asList(operands)); + } + + private static IndexableFeatureConjunction indexableConj(FeatureConjunction conjunction) { + return new IndexableFeatureConjunction(conjunction); + } +} |