diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests/bytecomplens |
Publish
Diffstat (limited to 'searchlib/src/tests/bytecomplens')
-rw-r--r-- | searchlib/src/tests/bytecomplens/.gitignore | 5 | ||||
-rw-r--r-- | searchlib/src/tests/bytecomplens/CMakeLists.txt | 8 | ||||
-rw-r--r-- | searchlib/src/tests/bytecomplens/DESC | 1 | ||||
-rw-r--r-- | searchlib/src/tests/bytecomplens/FILES | 1 | ||||
-rw-r--r-- | searchlib/src/tests/bytecomplens/bytecomp.cpp | 102 | ||||
-rw-r--r-- | searchlib/src/tests/bytecomplens/example.txt | 122 | ||||
-rw-r--r-- | searchlib/src/tests/bytecomplens/tblprint.cpp | 357 |
7 files changed, 596 insertions, 0 deletions
diff --git a/searchlib/src/tests/bytecomplens/.gitignore b/searchlib/src/tests/bytecomplens/.gitignore new file mode 100644 index 00000000000..afe9bff02f6 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/.gitignore @@ -0,0 +1,5 @@ +*.So +.depend* +Makefile +bytecomp_test +searchlib_bytecomp_test_app diff --git a/searchlib/src/tests/bytecomplens/CMakeLists.txt b/searchlib/src/tests/bytecomplens/CMakeLists.txt new file mode 100644 index 00000000000..188c3fccbdf --- /dev/null +++ b/searchlib/src/tests/bytecomplens/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bytecomp_test_app + SOURCES + bytecomp.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bytecomp_test_app NO_VALGRIND COMMAND searchlib_bytecomp_test_app) diff --git a/searchlib/src/tests/bytecomplens/DESC b/searchlib/src/tests/bytecomplens/DESC new file mode 100644 index 00000000000..e40e528ddea --- /dev/null +++ b/searchlib/src/tests/bytecomplens/DESC @@ -0,0 +1 @@ +Test of search::ByteCompressedLengths class. Look at bytecomp.cpp for details. diff --git a/searchlib/src/tests/bytecomplens/FILES b/searchlib/src/tests/bytecomplens/FILES new file mode 100644 index 00000000000..c44e7f254f8 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/FILES @@ -0,0 +1 @@ +bytecomplens.cpp diff --git a/searchlib/src/tests/bytecomplens/bytecomp.cpp b/searchlib/src/tests/bytecomplens/bytecomp.cpp new file mode 100644 index 00000000000..63aa2da15f6 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/bytecomp.cpp @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <memory> +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bytecomplens_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/random.h> +#include <vespa/searchlib/docstore/bytecomplens.h> + + +class Test : public vespalib::TestApp { +private: + void testRandomLengths(); + +public: + int Main() { + TEST_INIT("bytecomplens_test"); + testRandomLengths(); TEST_FLUSH(); + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + + +void +Test::testRandomLengths() +{ + vespalib::RandomGen rndgen(0x07031969); + +#define TBLSIZ 0xc00000 + + auto lentable = std::unique_ptr<uint32_t[]>(new uint32_t[TBLSIZ]); + auto offtable = std::unique_ptr<uint64_t[]>(new uint64_t[TBLSIZ]); + + uint64_t offset = 16; + + for (int i = 0; i < TBLSIZ; i++) { + int sel = rndgen.nextInt32(); + int val = rndgen.nextInt32(); + switch (sel & 0x7) { + case 0: + val &= 0x7F; + break; + case 1: + val &= 0xFF; + break; + case 3: + val &= 0x1FFF; + break; + case 4: + val &= 0x3FFF; + break; + case 5: + val &= 0x7FFF; + break; + case 6: + val &= 0xFFFF; + break; + case 7: + default: + val &= 0xFFFFF; + break; + } + offtable[i] = offset; + lentable[i] = val; + offset += val; + } + + LOG(info, "made %d random offsets", TBLSIZ); + + search::ByteCompressedLengths foo; + + LOG(info, "empty BCL using %9ld bytes memory", foo.memoryUsed()); + + foo.addOffsetTable(TBLSIZ/4, offtable.get()); + foo.addOffsetTable(TBLSIZ/4, offtable.get() + 1*(TBLSIZ/4)); + + LOG(info, "half BCL using %9ld bytes memory", foo.memoryUsed()); + + search::ByteCompressedLengths bar; + foo.swap(bar); + bar.addOffsetTable(TBLSIZ/4, offtable.get() + 2*(TBLSIZ/4)); + bar.addOffsetTable(TBLSIZ/4, offtable.get() + 3*(TBLSIZ/4)); + foo.swap(bar); + + LOG(info, "full BCL using %9ld bytes memory", foo.memoryUsed()); + + LOG(info, "constructed %d byte compressed lengths", TBLSIZ-1); + + for (int i = 0; i < TBLSIZ-1; i++) { + search::ByteCompressedLengths::OffLen offlen; + offlen = foo.getOffLen(i); + + if ((i % 1000000) == 0) { + LOG(info, "data blob [%d] length %ld offset %ld", i, offlen.length, offlen.offset); + } + EXPECT_EQUAL(lentable[i], offlen.length); + EXPECT_EQUAL(offtable[i], offlen.offset); + } +} + diff --git a/searchlib/src/tests/bytecomplens/example.txt b/searchlib/src/tests/bytecomplens/example.txt new file mode 100644 index 00000000000..6dc3df0118a --- /dev/null +++ b/searchlib/src/tests/bytecomplens/example.txt @@ -0,0 +1,122 @@ +offset length BCN val L0 len/off skipL1 skipL2 skipL3 + +976 18707 [ 93 92 01 ] 3/0 976/0/0/0 +19683 11527 [ 87 5A ] 2/3 +31210 3926 [ D6 1E ] 2/5 +35136 2 [ 02 ] 1/7 +35138 6060 [ AC 2F ] 2/8 34162/8 +41198 649445 [ E5 D1 27 ] 3/10 +690643 2866 [ B2 16 ] 2/13 +693509 824767 [ BF AB 32 ] 3/15 +1518276 499173 [ E5 BB 1E ] 3/18 1483138/10 +2017449 20455 [ E7 9F 01 ] 3/21 +2037904 11 [ 0B ] 1/24 +2037915 19207 [ 87 96 01 ] 3/25 +2057122 6355 [ D3 31 ] 2/28 538846/10 +2063477 3422 [ DE 1A ] 2/30 +2066899 10683 [ BB 53 ] 2/32 +2077582 7360 [ C0 39 ] 2/34 +2084942 17969 [ B1 8C 01 ] 3/36 2083966/36/12 +2102911 6114 [ E2 2F ] 2/39 +2109025 31741 [ FD F7 01 ] 3/41 +2140766 581588 [ D4 BF 23 ] 3/44 +2722354 5341 [ DD 29 ] 2/47 637412/11 +2727695 13774 [ CE 6B ] 2/49 +2741469 717809 [ F1 E7 2B ] 3/51 +3459278 815406 [ AE E2 31 ] 3/54 +4274684 89 [ 59 ] 1/57 1552330/10 +4274773 4545 [ C1 23 ] 2/58 +4279318 803868 [ 9C 88 31 ] 3/60 +5083186 12865 [ C1 64 ] 2/63 +5096051 75 [ 4B ] 1/65 821367/8 +5096126 40734 [ 9E BE 02 ] 3/66 +5136860 101 [ 65 ] 1/69 +5136961 128 [ 80 01 ] 2/70 +5137089 253 [ FD 01 ] 2/72 3052147/36/12 +5137342 13 [ 0D ] 1/74 +5137355 24986 [ 9A C3 01 ] 3/75 +5162341 231 [ E7 01 ] 2/78 +5162572 997853 [ DD F3 3C ] 3/80 25483/8 +6160425 4728 [ F8 24 ] 2/83 +6165153 2025 [ E9 0F ] 2/85 +6167178 7281 [ F1 38 ] 2/87 +6174459 1026302 [ FE D1 3E ] 3/89 1011887/9 +7200761 848783 [ 8F E7 33 ] 3/92 +8049544 145767 [ E7 F2 08 ] 3/95 +8195311 19103 [ 9F 95 01 ] 3/98 +8214414 22166 [ 96 AD 01 ] 3/101 2039955/12 +8236580 30020 [ C4 EA 01 ] 3/104 +8266600 13 [ 0D ] 1/107 +8266613 120 [ 78 ] 1/108 +8266733 22398 [ FE AE 01 ] 3/109 3129644/37/12 +8289131 10832 [ D0 54 ] 2/112 +8299963 3765 [ B5 1D ] 2/114 +8303728 432771 [ 83 B5 1A ] 3/116 +8736499 30133 [ B5 EB 01 ] 3/119 469766/10 +8766632 6444 [ AC 32 ] 2/122 +8773076 16033 [ A1 7D ] 2/124 +8789109 78 [ 4E ] 1/126 +8789187 12510 [ DE 61 ] 2/127 52688/8 +8801697 12441 [ 99 61 ] 2/129 +8814138 117 [ 75 ] 1/131 +8814255 7147 [ EB 37 ] 2/132 +8821402 189 [ BD 01 ] 2/134 32215/7 +8821591 199704 [ 98 98 0C ] 3/136 +9021295 13240 [ B8 67 ] 2/139 +9034535 110 [ 6E ] 1/141 +9034645 31677 [ BD F7 01 ] 3/142 9034645/142/48/17 +9066322 18547 [ F3 90 01 ] 3/145 +9084869 734679 [ D7 EB 2C ] 3/148 +9819548 112 [ 70 ] 1/151 +9819660 883565 [ ED F6 35 ] 3/152 785015/10 +10703225 10290 [ B2 50 ] 2/155 +10713515 21410 [ A2 A7 01 ] 3/157 +10734925 15 [ 0F ] 1/160 +10734940 747774 [ FE D1 2D ] 3/161 915280/9 +11482714 39 [ 27 ] 1/164 +11482753 77 [ 4D ] 1/165 +11482830 235 [ EB 01 ] 2/166 +11483065 1991 [ C7 0F ] 2/168 748125/7 +11485056 9187 [ E3 47 ] 2/170 +11494243 18800 [ F0 92 01 ] 3/172 +11513043 1042219 [ AB CE 3F ] 3/175 +12555262 9154 [ C2 47 ] 2/178 3520617/36/12 +12564416 43582 [ BE D4 02 ] 3/180 +12607998 847240 [ 88 DB 33 ] 3/183 +13455238 4726 [ F6 24 ] 2/186 +13459964 590348 [ 8C 84 24 ] 3/188 904702/10 +14050312 8659 [ D3 43 ] 2/191 +14058971 116 [ 74 ] 1/193 +14059087 13563 [ FB 69 ] 2/194 +14072650 713064 [ E8 C2 2B ] 3/196 612686/8 +14785714 40321 [ 81 BB 02 ] 3/199 +14826035 2296 [ F8 11 ] 2/202 +14828331 7273 [ E9 38 ] 2/204 +14835604 68285 [ BD 95 04 ] 3/206 762954/10 +14903889 235 [ EB 01 ] 2/209 +14904124 4669 [ BD 24 ] 2/211 +14908793 28535 [ F7 DE 01 ] 3/213 +14937328 19 [ 13 ] 1/216 2382066/38/12 +14937347 5369 [ F9 29 ] 2/217 +14942716 602191 [ CF E0 24 ] 3/219 +15544907 2653 [ DD 14 ] 2/222 +15547560 25755 [ 9B C9 01 ] 3/224 610232/8 +15573315 11349 [ D5 58 ] 2/227 +15584664 15006 [ 9E 75 ] 2/229 +15599670 89 [ 59 ] 1/231 +15599759 52772 [ A4 9C 03 ] 3/232 52199/8 +15652531 776175 [ EF AF 2F ] 3/235 +16428706 126 [ 7E ] 1/238 +16428832 3884 [ AC 1E ] 2/239 +16432716 33958 [ A6 89 02 ] 3/241 832957/9 +16466674 122 [ 7A ] 1/244 +16466796 41895 [ A7 C7 02 ] 3/245 +16508691 105882 [ 9A BB 06 ] 3/248 +16614573 11067 [ BB 56 ] 2/251 1677245/35/12 +16625640 4588 [ EC 23 ] 2/253 +16630228 7349 [ B5 39 ] 2/255 +16637577 902638 [ EE 8B 37 ] 3/257 +17540215 8737 [ A1 44 ] 2/260 925642/9 +17548952 29186 [ 82 E4 01 ] 3/262 +17578138 41 [ 29 ] 1/265 +17578179 diff --git a/searchlib/src/tests/bytecomplens/tblprint.cpp b/searchlib/src/tests/bytecomplens/tblprint.cpp new file mode 100644 index 00000000000..93657d82178 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/tblprint.cpp @@ -0,0 +1,357 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("tblprint"); +#include <vespa/vespalib/util/random.h> + +#include <vector> +#include <vespa/vespalib/data/databuffer.h> + + +/** + * Class compressing a table of offsets in memory. + * After adding (n) offsets you can access + * (n-1) pairs of (length, offset). + * All offsets must be increasing, but they + * may be added in several chunks. + **/ +class ByteCompressedLengths +{ +public: + /** + * Construct an empty instance + **/ + ByteCompressedLengths(); + + /** + * add the given offset table. + * @param entries number of offsets to store. + * @param offsets table that contains (entries) offsets. + **/ + void addOffsetTable(uint64_t entries, uint64_t *offsets); + + /** + * free resources + **/ + ~ByteCompressedLengths(); + + /** + * Fetch a length and offset from compressed data. + * Note invariant: id < size(); size() == (entries-1) + * + * @param id The index into the offset table + * @param offset Will be incremented by offset[id] + * @return The delta (offset[id+1] - offset[id]) + **/ + uint64_t getLength(uint64_t id, uint64_t &offset) const; + + /** + * The number of (length, offset) pairs stored + **/ + uint64_t size() const { return _entries; } + + struct L3Entry { + uint64_t offset; + uint64_t l0toff; + uint64_t l1toff; + uint64_t l2toff; + }; + vespalib::DataBuffer _l0space; + vespalib::DataBuffer _l1space; + vespalib::DataBuffer _l2space; + const uint8_t *_l0table; + const uint8_t *_l1table; + const uint8_t *_l2table; + + std::vector<L3Entry> _l3table; + + uint64_t _lenSum1; + uint64_t _lenSum2; + uint64_t _l0oSum1; + uint64_t _l0oSum2; + uint64_t _l1oSum2; + uint64_t _last_offset; + uint64_t _entries; + + void addOffset(uint64_t offset); +}; + +/** + * get "Byte Compressed Number" from buffer, incrementing pointer + **/ +static inline uint64_t getBCN(const uint8_t *&buffer) +{ + uint8_t b = *buffer++; + uint64_t len = (b & 127); + unsigned shiftLen = 0; + while (b & 128) { + shiftLen += 7; + b = *buffer++; + len |= ((b & 127) << shiftLen); + } + return len; +} + +static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len) +{ + size_t bytes = 0; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + buf.ensureFree(1); + buf.writeInt8(b); + ++bytes; + } while (len > 0); + return bytes; +} + + +ByteCompressedLengths::ByteCompressedLengths() + : _l0space(), + _l1space(), + _l2space(), + _l3table(), + _lenSum1(0), + _lenSum2(0), + _l0oSum1(0), + _l0oSum2(0), + _l1oSum2(0), + _last_offset(0), + _entries(0) +{ +} + + +void +ByteCompressedLengths::addOffset(uint64_t offset) +{ + assert(offset >= _last_offset); + + uint64_t len = offset - _last_offset; + uint64_t i = _entries++; + + if ((i & 3) == 0) { + _lenSum2 += _lenSum1; + _l0oSum2 += _l0oSum1; + + uint64_t t1n = i >> 2; + if ((t1n & 3) == 0) { + uint64_t t2n = t1n >> 2; + + if ((t2n & 3) == 0) { + L3Entry e; + e.offset = _last_offset; + e.l0toff = _l0space.getDataLen(); + e.l1toff = _l1space.getDataLen(); + e.l2toff = _l2space.getDataLen(); + + _l3table.push_back(e); + } else { + writeLen(_l2space, _lenSum2); + writeLen(_l2space, _l0oSum2); + writeLen(_l2space, _l1oSum2); + } + _lenSum2 = 0; + _l0oSum2 = 0; + _l1oSum2 = 0; + } else { + _l1oSum2 += writeLen(_l1space, _lenSum1); + _l1oSum2 += writeLen(_l1space, _l0oSum1); + } + _lenSum1 = 0; + _l0oSum1 = 0; + } + _l0oSum1 += writeLen(_l0space, len); + _lenSum1 += len; + _last_offset = offset; +} + + +void +ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets) +{ + if (entries == 0) return; + // Do we have some offsets already? + if (_entries > 0) { + // yes, add first offset normally + addOffset(offsets[0]); + } else { + // no, special treatment for very first offset + _last_offset = offsets[0]; + } + for (uint64_t cnt = 1; cnt < entries; ++cnt) { + addOffset(offsets[cnt]); + } + _l0table = (uint8_t *)_l0space.getData(); + _l1table = (uint8_t *)_l1space.getData(); + _l2table = (uint8_t *)_l2space.getData(); + + LOG(debug, "compressed %ld offsets", (_entries+1)); + LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t)); + LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries", + _l0space.getDataLen(), + _l1space.getDataLen(), + _l2space.getDataLen(), + _l3table.size()); + LOG(debug, "(%ld bytes)", + (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() + + _l3table.size()*sizeof(L3Entry))); +} + + +ByteCompressedLengths::~ByteCompressedLengths() +{ +} + +uint64_t +ByteCompressedLengths::getLength(uint64_t numSkip, uint64_t &offset) const +{ + assert(numSkip < _entries); + + unsigned skipL0 = numSkip & 3; + unsigned skipL1 = (numSkip >> 2) & 3; + unsigned skipL2 = (numSkip >> 4) & 3; + uint64_t skipL3 = (numSkip >> 6); + + offset += _l3table[skipL3].offset; + uint64_t l0toff = _l3table[skipL3].l0toff; + uint64_t l1toff = _l3table[skipL3].l1toff; + uint64_t l2toff = _l3table[skipL3].l2toff; + + // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + + const uint8_t *l2pos = _l2table + l2toff; + + while (skipL2 > 0) { + --skipL2; + offset += getBCN(l2pos); + l0toff += getBCN(l2pos); + l1toff += getBCN(l2pos); + } + + const uint8_t *l1pos = _l1table + l1toff; + + while (skipL1 > 0) { + --skipL1; + offset += getBCN(l1pos); + l0toff += getBCN(l1pos); + + } + const uint8_t *l0pos = _l0table + l0toff; + + while (skipL0 > 0) { + --skipL0; + offset += getBCN(l0pos); + } + // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + return getBCN(l0pos); +} + + + +class Test { +public: + static void printTable(); +}; + + + +int main(int /*argc*/, char ** /*argv*/) +{ + Test::printTable(); + return 0; +} + +void +Test::printTable() +{ + vespalib::RandomGen rndgen(0x07031969); +#define TBLSIZ 120 + uint32_t *lentable = new uint32_t[TBLSIZ]; + uint64_t *offtable = new uint64_t[TBLSIZ]; + + uint64_t offset = 16 + TBLSIZ*8; + + for (int i = 0; i < TBLSIZ; i++) { + int sel = rndgen.nextInt32(); + int val = rndgen.nextInt32(); + switch (sel & 0x7) { + case 0: + val &= 0x7F; + break; + case 1: + val &= 0xFF; + break; + case 3: + val &= 0x1FFF; + break; + case 4: + val &= 0x3FFF; + break; + case 5: + val &= 0x7FFF; + break; + case 6: + val &= 0xFFFF; + break; + case 7: + default: + val &= 0xFFFFF; + break; + } + offtable[i] = offset; + lentable[i] = val; + offset += val; + } + + ByteCompressedLengths foo; + foo.addOffsetTable(TBLSIZ, offtable); + + const uint8_t *l1pos = foo._l1table; + const uint8_t *l2pos = foo._l2table; + + printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n", + "offset", "length", "BCN val", "L0 len/off", "skipL1", "skipL2", "skipL3"); + + int slb = 0; + for (int i = 0; i+1 < TBLSIZ; i++) { + printf("%ld\t%d\t[", offtable[i], lentable[i]); + int bytes=0; + uint64_t len = lentable[i]; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + printf(" %02X", b); + ++bytes; + } while (len > 0); + printf(" ]\t%d", bytes); + printf("/%d", slb); + slb += bytes; + + if ((i & 63) == 0) { + printf("\t\t\t%ld/%ld/%ld/%ld", + foo._l3table[i >> 6].offset, + foo._l3table[i >> 6].l0toff, + foo._l3table[i >> 6].l1toff, + foo._l3table[i >> 6].l2toff); + } else + if ((i & 15) == 0) { + printf("\t\t%ld", getBCN(l2pos)); + printf("/%ld", getBCN(l2pos)); + printf("/%ld", getBCN(l2pos)); + } else + if ((i & 3) == 0) { + printf("\t%ld", getBCN(l1pos)); + printf("/%ld", getBCN(l1pos)); + } + printf("\n"); + } + printf("%ld\n", offtable[TBLSIZ-1]); + fflush(stdout); +} |