diff options
Diffstat (limited to 'searchlib')
43 files changed, 355 insertions, 1077 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 1f8c054c694..33b78d517a3 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -102,7 +102,6 @@ vespa_define_module( src/tests/bitcompression/expgolomb src/tests/bitvector src/tests/btree - src/tests/bytecomplens src/tests/common/bitvector src/tests/common/location src/tests/common/location_iterator diff --git a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp index 1e89fbe7501..aca9b652b94 100644 --- a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp +++ b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp @@ -72,6 +72,8 @@ TEST(AttributeHeaderTest, can_be_added_to_and_extracted_from_generic_header) verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Euclidean})); verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Angular})); verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::GeoDegrees})); + verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::InnerProduct})); + verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Hamming})); verify_roundtrip_serialization(HnswIPO()); } diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp index eeabbf23c27..1e3531dc78e 100644 --- a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp +++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/testapp.h> -#include <vespa/vespalib/stllike/string.h> #include <vespa/searchlib/attribute/attributefilewriter.h> #include <vespa/searchlib/attribute/attributefilebufferwriter.h> #include <vespa/searchlib/attribute/attribute_header.h> @@ -10,6 +9,7 @@ #include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/vespalib/data/databuffer.h> #include <vespa/fastos/file.h> #include <vespa/log/log.h> diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index d8761f69d71..2a5b8014299 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -10,7 +10,6 @@ #include <vespa/searchlib/attribute/attributememorysavetarget.h> #include <vespa/searchlib/attribute/attributesaver.h> #include <vespa/searchlib/attribute/multinumericattribute.h> -#include <vespa/searchlib/attribute/multistringattribute.h> #include <vespa/searchlib/attribute/singlenumericattribute.h> #include <vespa/searchlib/attribute/singlestringattribute.h> #include <vespa/searchlib/queryeval/executeinfo.h> @@ -21,6 +20,7 @@ #include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/util/bufferwriter.h> #include <vespa/vespalib/util/compress.h> +#include <vespa/vespalib/data/databuffer.h> #include <vespa/searchlib/attribute/attributevector.hpp> diff --git a/searchlib/src/tests/bytecomplens/.gitignore b/searchlib/src/tests/bytecomplens/.gitignore deleted file mode 100644 index afe9bff02f6..00000000000 --- a/searchlib/src/tests/bytecomplens/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.So -.depend* -Makefile -bytecomp_test -searchlib_bytecomp_test_app diff --git a/searchlib/src/tests/bytecomplens/CMakeLists.txt b/searchlib/src/tests/bytecomplens/CMakeLists.txt deleted file mode 100644 index 24ecef59b15..00000000000 --- a/searchlib/src/tests/bytecomplens/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_bytecomp_test_app TEST - SOURCES - bytecomp.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_bytecomp_test_app NO_VALGRIND COMMAND searchlib_bytecomp_test_app) diff --git a/searchlib/src/tests/bytecomplens/bytecomp.cpp b/searchlib/src/tests/bytecomplens/bytecomp.cpp deleted file mode 100644 index adc92b0097d..00000000000 --- a/searchlib/src/tests/bytecomplens/bytecomp.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <memory> -#include <vespa/log/log.h> -LOG_SETUP("bytecomplens_test"); -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/vespalib/util/random.h> -#include <vespa/searchlib/docstore/bytecomplens.h> - - -class Test : public vespalib::TestApp { -private: - void testRandomLengths(); - -public: - int Main() override { - TEST_INIT("bytecomplens_test"); - testRandomLengths(); TEST_FLUSH(); - TEST_DONE(); - } -}; - -TEST_APPHOOK(Test); - - -void -Test::testRandomLengths() -{ - vespalib::RandomGen rndgen(0x07031969); - -#define TBLSIZ 0xc00000 - - auto lentable = std::unique_ptr<uint32_t[]>(new uint32_t[TBLSIZ]); - auto offtable = std::unique_ptr<uint64_t[]>(new uint64_t[TBLSIZ]); - - uint64_t offset = 16; - - for (int i = 0; i < TBLSIZ; i++) { - int sel = rndgen.nextInt32(); - int val = rndgen.nextInt32(); - switch (sel & 0x7) { - case 0: - val &= 0x7F; - break; - case 1: - val &= 0xFF; - break; - case 3: - val &= 0x1FFF; - break; - case 4: - val &= 0x3FFF; - break; - case 5: - val &= 0x7FFF; - break; - case 6: - val &= 0xFFFF; - break; - case 7: - default: - val &= 0xFFFFF; - break; - } - offtable[i] = offset; - lentable[i] = val; - offset += val; - } - - LOG(info, "made %d random offsets", TBLSIZ); - - search::ByteCompressedLengths foo; - - LOG(info, "empty BCL using %9ld bytes memory", foo.memoryUsed()); - - foo.addOffsetTable(TBLSIZ/4, offtable.get()); - foo.addOffsetTable(TBLSIZ/4, offtable.get() + 1*(TBLSIZ/4)); - - LOG(info, "half BCL using %9ld bytes memory", foo.memoryUsed()); - - search::ByteCompressedLengths bar; - foo.swap(bar); - bar.addOffsetTable(TBLSIZ/4, offtable.get() + 2*(TBLSIZ/4)); - bar.addOffsetTable(TBLSIZ/4, offtable.get() + 3*(TBLSIZ/4)); - foo.swap(bar); - - LOG(info, "full BCL using %9ld bytes memory", foo.memoryUsed()); - - LOG(info, "constructed %d byte compressed lengths", TBLSIZ-1); - - for (int i = 0; i < TBLSIZ-1; i++) { - search::ByteCompressedLengths::OffLen offlen; - offlen = foo.getOffLen(i); - - if ((i % 1000000) == 0) { - LOG(info, "data blob [%d] length %" PRIu64 " offset %" PRIu64, i, offlen.length, offlen.offset); - } - EXPECT_EQUAL(lentable[i], offlen.length); - EXPECT_EQUAL(offtable[i], offlen.offset); - } -} - diff --git a/searchlib/src/tests/bytecomplens/example.txt b/searchlib/src/tests/bytecomplens/example.txt deleted file mode 100644 index 6dc3df0118a..00000000000 --- a/searchlib/src/tests/bytecomplens/example.txt +++ /dev/null @@ -1,122 +0,0 @@ -offset length BCN val L0 len/off skipL1 skipL2 skipL3 - -976 18707 [ 93 92 01 ] 3/0 976/0/0/0 -19683 11527 [ 87 5A ] 2/3 -31210 3926 [ D6 1E ] 2/5 -35136 2 [ 02 ] 1/7 -35138 6060 [ AC 2F ] 2/8 34162/8 -41198 649445 [ E5 D1 27 ] 3/10 -690643 2866 [ B2 16 ] 2/13 -693509 824767 [ BF AB 32 ] 3/15 -1518276 499173 [ E5 BB 1E ] 3/18 1483138/10 -2017449 20455 [ E7 9F 01 ] 3/21 -2037904 11 [ 0B ] 1/24 -2037915 19207 [ 87 96 01 ] 3/25 -2057122 6355 [ D3 31 ] 2/28 538846/10 -2063477 3422 [ DE 1A ] 2/30 -2066899 10683 [ BB 53 ] 2/32 -2077582 7360 [ C0 39 ] 2/34 -2084942 17969 [ B1 8C 01 ] 3/36 2083966/36/12 -2102911 6114 [ E2 2F ] 2/39 -2109025 31741 [ FD F7 01 ] 3/41 -2140766 581588 [ D4 BF 23 ] 3/44 -2722354 5341 [ DD 29 ] 2/47 637412/11 -2727695 13774 [ CE 6B ] 2/49 -2741469 717809 [ F1 E7 2B ] 3/51 -3459278 815406 [ AE E2 31 ] 3/54 -4274684 89 [ 59 ] 1/57 1552330/10 -4274773 4545 [ C1 23 ] 2/58 -4279318 803868 [ 9C 88 31 ] 3/60 -5083186 12865 [ C1 64 ] 2/63 -5096051 75 [ 4B ] 1/65 821367/8 -5096126 40734 [ 9E BE 02 ] 3/66 -5136860 101 [ 65 ] 1/69 -5136961 128 [ 80 01 ] 2/70 -5137089 253 [ FD 01 ] 2/72 3052147/36/12 -5137342 13 [ 0D ] 1/74 -5137355 24986 [ 9A C3 01 ] 3/75 -5162341 231 [ E7 01 ] 2/78 -5162572 997853 [ DD F3 3C ] 3/80 25483/8 -6160425 4728 [ F8 24 ] 2/83 -6165153 2025 [ E9 0F ] 2/85 -6167178 7281 [ F1 38 ] 2/87 -6174459 1026302 [ FE D1 3E ] 3/89 1011887/9 -7200761 848783 [ 8F E7 33 ] 3/92 -8049544 145767 [ E7 F2 08 ] 3/95 -8195311 19103 [ 9F 95 01 ] 3/98 -8214414 22166 [ 96 AD 01 ] 3/101 2039955/12 -8236580 30020 [ C4 EA 01 ] 3/104 -8266600 13 [ 0D ] 1/107 -8266613 120 [ 78 ] 1/108 -8266733 22398 [ FE AE 01 ] 3/109 3129644/37/12 -8289131 10832 [ D0 54 ] 2/112 -8299963 3765 [ B5 1D ] 2/114 -8303728 432771 [ 83 B5 1A ] 3/116 -8736499 30133 [ B5 EB 01 ] 3/119 469766/10 -8766632 6444 [ AC 32 ] 2/122 -8773076 16033 [ A1 7D ] 2/124 -8789109 78 [ 4E ] 1/126 -8789187 12510 [ DE 61 ] 2/127 52688/8 -8801697 12441 [ 99 61 ] 2/129 -8814138 117 [ 75 ] 1/131 -8814255 7147 [ EB 37 ] 2/132 -8821402 189 [ BD 01 ] 2/134 32215/7 -8821591 199704 [ 98 98 0C ] 3/136 -9021295 13240 [ B8 67 ] 2/139 -9034535 110 [ 6E ] 1/141 -9034645 31677 [ BD F7 01 ] 3/142 9034645/142/48/17 -9066322 18547 [ F3 90 01 ] 3/145 -9084869 734679 [ D7 EB 2C ] 3/148 -9819548 112 [ 70 ] 1/151 -9819660 883565 [ ED F6 35 ] 3/152 785015/10 -10703225 10290 [ B2 50 ] 2/155 -10713515 21410 [ A2 A7 01 ] 3/157 -10734925 15 [ 0F ] 1/160 -10734940 747774 [ FE D1 2D ] 3/161 915280/9 -11482714 39 [ 27 ] 1/164 -11482753 77 [ 4D ] 1/165 -11482830 235 [ EB 01 ] 2/166 -11483065 1991 [ C7 0F ] 2/168 748125/7 -11485056 9187 [ E3 47 ] 2/170 -11494243 18800 [ F0 92 01 ] 3/172 -11513043 1042219 [ AB CE 3F ] 3/175 -12555262 9154 [ C2 47 ] 2/178 3520617/36/12 -12564416 43582 [ BE D4 02 ] 3/180 -12607998 847240 [ 88 DB 33 ] 3/183 -13455238 4726 [ F6 24 ] 2/186 -13459964 590348 [ 8C 84 24 ] 3/188 904702/10 -14050312 8659 [ D3 43 ] 2/191 -14058971 116 [ 74 ] 1/193 -14059087 13563 [ FB 69 ] 2/194 -14072650 713064 [ E8 C2 2B ] 3/196 612686/8 -14785714 40321 [ 81 BB 02 ] 3/199 -14826035 2296 [ F8 11 ] 2/202 -14828331 7273 [ E9 38 ] 2/204 -14835604 68285 [ BD 95 04 ] 3/206 762954/10 -14903889 235 [ EB 01 ] 2/209 -14904124 4669 [ BD 24 ] 2/211 -14908793 28535 [ F7 DE 01 ] 3/213 -14937328 19 [ 13 ] 1/216 2382066/38/12 -14937347 5369 [ F9 29 ] 2/217 -14942716 602191 [ CF E0 24 ] 3/219 -15544907 2653 [ DD 14 ] 2/222 -15547560 25755 [ 9B C9 01 ] 3/224 610232/8 -15573315 11349 [ D5 58 ] 2/227 -15584664 15006 [ 9E 75 ] 2/229 -15599670 89 [ 59 ] 1/231 -15599759 52772 [ A4 9C 03 ] 3/232 52199/8 -15652531 776175 [ EF AF 2F ] 3/235 -16428706 126 [ 7E ] 1/238 -16428832 3884 [ AC 1E ] 2/239 -16432716 33958 [ A6 89 02 ] 3/241 832957/9 -16466674 122 [ 7A ] 1/244 -16466796 41895 [ A7 C7 02 ] 3/245 -16508691 105882 [ 9A BB 06 ] 3/248 -16614573 11067 [ BB 56 ] 2/251 1677245/35/12 -16625640 4588 [ EC 23 ] 2/253 -16630228 7349 [ B5 39 ] 2/255 -16637577 902638 [ EE 8B 37 ] 3/257 -17540215 8737 [ A1 44 ] 2/260 925642/9 -17548952 29186 [ 82 E4 01 ] 3/262 -17578138 41 [ 29 ] 1/265 -17578179 diff --git a/searchlib/src/tests/bytecomplens/tblprint.cpp b/searchlib/src/tests/bytecomplens/tblprint.cpp deleted file mode 100644 index 6a7347f3c0d..00000000000 --- a/searchlib/src/tests/bytecomplens/tblprint.cpp +++ /dev/null @@ -1,356 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("tblprint"); -#include <vespa/vespalib/util/random.h> - -#include <vector> -#include <vespa/vespalib/data/databuffer.h> - - -/** - * Class compressing a table of offsets in memory. - * After adding (n) offsets you can access - * (n-1) pairs of (length, offset). - * All offsets must be increasing, but they - * may be added in several chunks. - **/ -class ByteCompressedLengths -{ -public: - /** - * Construct an empty instance - **/ - ByteCompressedLengths(); - - /** - * add the given offset table. - * @param entries number of offsets to store. - * @param offsets table that contains (entries) offsets. - **/ - void addOffsetTable(uint64_t entries, uint64_t *offsets); - - /** - * free resources - **/ - ~ByteCompressedLengths(); - - /** - * Fetch a length and offset from compressed data. - * Note invariant: id < size(); size() == (entries-1) - * - * @param id The index into the offset table - * @param offset Will be incremented by offset[id] - * @return The delta (offset[id+1] - offset[id]) - **/ - uint64_t getLength(uint64_t id, uint64_t &offset) const; - - /** - * The number of (length, offset) pairs stored - **/ - uint64_t size() const { return _entries; } - - struct L3Entry { - uint64_t offset; - uint64_t l0toff; - uint64_t l1toff; - uint64_t l2toff; - }; - vespalib::DataBuffer _l0space; - vespalib::DataBuffer _l1space; - vespalib::DataBuffer _l2space; - const uint8_t *_l0table; - const uint8_t *_l1table; - const uint8_t *_l2table; - - std::vector<L3Entry> _l3table; - - uint64_t _lenSum1; - uint64_t _lenSum2; - uint64_t _l0oSum1; - uint64_t _l0oSum2; - uint64_t _l1oSum2; - uint64_t _last_offset; - uint64_t _entries; - - void addOffset(uint64_t offset); -}; - -/** - * get "Byte Compressed Number" from buffer, incrementing pointer - **/ -static inline uint64_t getBCN(const uint8_t *&buffer) -{ - uint8_t b = *buffer++; - uint64_t len = (b & 127); - unsigned shiftLen = 0; - while (b & 128) { - shiftLen += 7; - b = *buffer++; - len |= ((b & 127) << shiftLen); - } - return len; -} - -static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len) -{ - size_t bytes = 0; - do { - uint8_t b = len & 127; - len >>= 7; - if (len > 0) { - b |= 128; - } - buf.ensureFree(1); - buf.writeInt8(b); - ++bytes; - } while (len > 0); - return bytes; -} - - -ByteCompressedLengths::ByteCompressedLengths() - : _l0space(), - _l1space(), - _l2space(), - _l3table(), - _lenSum1(0), - _lenSum2(0), - _l0oSum1(0), - _l0oSum2(0), - _l1oSum2(0), - _last_offset(0), - _entries(0) -{ -} - - -void -ByteCompressedLengths::addOffset(uint64_t offset) -{ - assert(offset >= _last_offset); - - uint64_t len = offset - _last_offset; - uint64_t i = _entries++; - - if ((i & 3) == 0) { - _lenSum2 += _lenSum1; - _l0oSum2 += _l0oSum1; - - uint64_t t1n = i >> 2; - if ((t1n & 3) == 0) { - uint64_t t2n = t1n >> 2; - - if ((t2n & 3) == 0) { - L3Entry e; - e.offset = _last_offset; - e.l0toff = _l0space.getDataLen(); - e.l1toff = _l1space.getDataLen(); - e.l2toff = _l2space.getDataLen(); - - _l3table.push_back(e); - } else { - writeLen(_l2space, _lenSum2); - writeLen(_l2space, _l0oSum2); - writeLen(_l2space, _l1oSum2); - } - _lenSum2 = 0; - _l0oSum2 = 0; - _l1oSum2 = 0; - } else { - _l1oSum2 += writeLen(_l1space, _lenSum1); - _l1oSum2 += writeLen(_l1space, _l0oSum1); - } - _lenSum1 = 0; - _l0oSum1 = 0; - } - _l0oSum1 += writeLen(_l0space, len); - _lenSum1 += len; - _last_offset = offset; -} - - -void -ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets) -{ - if (entries == 0) return; - // Do we have some offsets already? - if (_entries > 0) { - // yes, add first offset normally - addOffset(offsets[0]); - } else { - // no, special treatment for very first offset - _last_offset = offsets[0]; - } - for (uint64_t cnt = 1; cnt < entries; ++cnt) { - addOffset(offsets[cnt]); - } - _l0table = (uint8_t *)_l0space.getData(); - _l1table = (uint8_t *)_l1space.getData(); - _l2table = (uint8_t *)_l2space.getData(); - - LOG(debug, "compressed %ld offsets", (_entries+1)); - LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t)); - LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries", - _l0space.getDataLen(), - _l1space.getDataLen(), - _l2space.getDataLen(), - _l3table.size()); - LOG(debug, "(%ld bytes)", - (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() + - _l3table.size()*sizeof(L3Entry))); -} - - -ByteCompressedLengths::~ByteCompressedLengths() -{ -} - -uint64_t -ByteCompressedLengths::getLength(uint64_t numSkip, uint64_t &offset) const -{ - assert(numSkip < _entries); - - unsigned skipL0 = numSkip & 3; - unsigned skipL1 = (numSkip >> 2) & 3; - unsigned skipL2 = (numSkip >> 4) & 3; - uint64_t skipL3 = (numSkip >> 6); - - offset += _l3table[skipL3].offset; - uint64_t l0toff = _l3table[skipL3].l0toff; - uint64_t l1toff = _l3table[skipL3].l1toff; - uint64_t l2toff = _l3table[skipL3].l2toff; - - // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); - - const uint8_t *l2pos = _l2table + l2toff; - - while (skipL2 > 0) { - --skipL2; - offset += getBCN(l2pos); - l0toff += getBCN(l2pos); - l1toff += getBCN(l2pos); - } - - const uint8_t *l1pos = _l1table + l1toff; - - while (skipL1 > 0) { - --skipL1; - offset += getBCN(l1pos); - l0toff += getBCN(l1pos); - - } - const uint8_t *l0pos = _l0table + l0toff; - - while (skipL0 > 0) { - --skipL0; - offset += getBCN(l0pos); - } - // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); - return getBCN(l0pos); -} - - - -class Test { -public: - static void printTable(); -}; - - - -int main(int /*argc*/, char ** /*argv*/) -{ - Test::printTable(); - return 0; -} - -void -Test::printTable() -{ - vespalib::RandomGen rndgen(0x07031969); -#define TBLSIZ 120 - uint32_t *lentable = new uint32_t[TBLSIZ]; - uint64_t *offtable = new uint64_t[TBLSIZ]; - - uint64_t offset = 16 + TBLSIZ*8; - - for (int i = 0; i < TBLSIZ; i++) { - int sel = rndgen.nextInt32(); - int val = rndgen.nextInt32(); - switch (sel & 0x7) { - case 0: - val &= 0x7F; - break; - case 1: - val &= 0xFF; - break; - case 3: - val &= 0x1FFF; - break; - case 4: - val &= 0x3FFF; - break; - case 5: - val &= 0x7FFF; - break; - case 6: - val &= 0xFFFF; - break; - case 7: - default: - val &= 0xFFFFF; - break; - } - offtable[i] = offset; - lentable[i] = val; - offset += val; - } - - ByteCompressedLengths foo; - foo.addOffsetTable(TBLSIZ, offtable); - - const uint8_t *l1pos = foo._l1table; - const uint8_t *l2pos = foo._l2table; - - printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n", - "offset", "length", "BCN val", "L0 len/off", "skipL1", "skipL2", "skipL3"); - - int slb = 0; - for (int i = 0; i+1 < TBLSIZ; i++) { - printf("%ld\t%d\t[", offtable[i], lentable[i]); - int bytes=0; - uint64_t len = lentable[i]; - do { - uint8_t b = len & 127; - len >>= 7; - if (len > 0) { - b |= 128; - } - printf(" %02X", b); - ++bytes; - } while (len > 0); - printf(" ]\t%d", bytes); - printf("/%d", slb); - slb += bytes; - - if ((i & 63) == 0) { - printf("\t\t\t%ld/%ld/%ld/%ld", - foo._l3table[i >> 6].offset, - foo._l3table[i >> 6].l0toff, - foo._l3table[i >> 6].l1toff, - foo._l3table[i >> 6].l2toff); - } else - if ((i & 15) == 0) { - printf("\t\t%ld", getBCN(l2pos)); - printf("/%ld", getBCN(l2pos)); - printf("/%ld", getBCN(l2pos)); - } else - if ((i & 3) == 0) { - printf("\t%ld", getBCN(l1pos)); - printf("/%ld", getBCN(l1pos)); - } - printf("\n"); - } - printf("%ld\n", offtable[TBLSIZ-1]); - fflush(stdout); -} diff --git a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp index b49d9c365de..6a1e4ef9fa1 100644 --- a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp +++ b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp @@ -58,8 +58,8 @@ struct OnnxFeatureTest : ::testing::Test { vespalib::string expr_name = feature_name + ".rankingScript"; indexEnv.getProperties().add(expr_name, expr); } - void add_onnx(const vespalib::string &name, const vespalib::string &file) { - indexEnv.addOnnxModel(name, file); + void add_onnx(const OnnxModel &model) { + indexEnv.addOnnxModel(model); } void compile(const vespalib::string &seed) { resolver->addSeed(seed); @@ -89,7 +89,7 @@ TEST_F(OnnxFeatureTest, simple_onnx_model_can_be_calculated) { add_expr("query_tensor", "tensor<float>(a[1],b[4]):[[docid,2,3,4]]"); add_expr("attribute_tensor", "tensor<float>(a[4],b[1]):[[5],[6],[7],[8]]"); add_expr("bias_tensor", "tensor<float>(a[1],b[1]):[[9]]"); - add_onnx("simple", simple_model); + add_onnx(OnnxModel("simple", simple_model)); compile(onnx_feature("simple")); EXPECT_EQ(get(1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0)); EXPECT_EQ(get("onnxModel(simple).output", 1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0)); @@ -101,7 +101,7 @@ TEST_F(OnnxFeatureTest, dynamic_onnx_model_can_be_calculated) { add_expr("query_tensor", "tensor<float>(a[1],b[4]):[[docid,2,3,4]]"); add_expr("attribute_tensor", "tensor<float>(a[4],b[1]):[[5],[6],[7],[8]]"); add_expr("bias_tensor", "tensor<float>(a[1],b[2]):[[4,5]]"); - add_onnx("dynamic", dynamic_model); + add_onnx(OnnxModel("dynamic", dynamic_model)); compile(onnx_feature("dynamic")); EXPECT_EQ(get(1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0)); EXPECT_EQ(get("onnxModel(dynamic).output", 1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0)); @@ -112,7 +112,7 @@ TEST_F(OnnxFeatureTest, dynamic_onnx_model_can_be_calculated) { TEST_F(OnnxFeatureTest, strange_input_and_output_names_are_normalized) { add_expr("input_0", "tensor<float>(a[2]):[10,20]"); add_expr("input_1", "tensor<float>(a[2]):[5,10]"); - add_onnx("strange_names", strange_names_model); + add_onnx(OnnxModel("strange_names", strange_names_model)); compile(onnx_feature("strange_names")); auto expect_add = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},15).add({{"d0",1}},30); auto expect_sub = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},5).add({{"d0",1}},10); @@ -121,4 +121,20 @@ TEST_F(OnnxFeatureTest, strange_input_and_output_names_are_normalized) { EXPECT_EQ(get("onnxModel(strange_names)._baz_0", 1), expect_sub); } +TEST_F(OnnxFeatureTest, input_features_and_output_names_can_be_specified) { + add_expr("my_first_input", "tensor<float>(a[2]):[10,20]"); + add_expr("my_second_input", "tensor<float>(a[2]):[5,10]"); + add_onnx(OnnxModel("custom_names", strange_names_model) + .input_feature("input:0", "rankingExpression(my_first_input)") + .input_feature("input/1", "rankingExpression(my_second_input)") + .output_name("foo/bar", "my_first_output") + .output_name("-baz:0", "my_second_output")); + compile(onnx_feature("custom_names")); + auto expect_add = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},15).add({{"d0",1}},30); + auto expect_sub = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},5).add({{"d0",1}},10); + EXPECT_EQ(get(1), expect_add); + EXPECT_EQ(get("onnxModel(custom_names).my_first_output", 1), expect_add); + EXPECT_EQ(get("onnxModel(custom_names).my_second_output", 1), expect_sub); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp index 9e5021b4778..fffb70467a3 100644 --- a/searchlib/src/tests/transactionlog/translogclient_test.cpp +++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp @@ -329,11 +329,12 @@ fillDomainTest(TransLogServer & s1, const vespalib::string & domain, size_t numP Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&value, sizeof(value))); p->add(e); if ( p->sizeBytes() > DEFAULT_PACKET_SIZE ) { - domainWriter->commit(*p, std::make_shared<CountDone>(inFlight)); + domainWriter->append(*p, std::make_shared<CountDone>(inFlight)); p = std::make_unique<Packet>(DEFAULT_PACKET_SIZE); } } - domainWriter->commit(*p, std::make_shared<CountDone>(inFlight)); + domainWriter->append(*p, std::make_shared<CountDone>(inFlight)); + auto keep = domainWriter->startCommit(Writer::DoneCallback()); LOG(info, "Inflight %ld", inFlight.load()); } while (inFlight.load() != 0) { diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index 430f2eaa560..cfbca71bd5e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -114,6 +114,8 @@ to_distance_metric(const vespalib::string& metric) return DistanceMetric::Angular; } else if (metric == geodegrees) { return DistanceMetric::GeoDegrees; + } else if (metric == innerproduct) { + return DistanceMetric::InnerProduct; } else if (metric == hamming) { return DistanceMetric::Hamming; } else { diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp index 341112f9b22..4efd64f72dd 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "attributefilebufferwriter.h" +#include <vespa/vespalib/data/databuffer.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp index 415c00cb8fd..829720e9c3e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp @@ -6,6 +6,7 @@ #include <vespa/vespalib/data/fileheader.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/vespalib/data/databuffer.h> #include <vespa/fastos/file.h> #include <vespa/log/log.h> diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp index b354566616b..454cc486f70 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "attributememoryfilebufferwriter.h" +#include <vespa/vespalib/data/databuffer.h> namespace search { @@ -11,9 +12,7 @@ AttributeMemoryFileBufferWriter(IAttributeFileWriter &memoryFileWriter) } -AttributeMemoryFileBufferWriter::~AttributeMemoryFileBufferWriter() -{ -} +AttributeMemoryFileBufferWriter::~AttributeMemoryFileBufferWriter() = default; void diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp index c28b7e9d20b..8d412364815 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp @@ -2,6 +2,7 @@ #include "attributememoryfilewriter.h" #include "attributememoryfilebufferwriter.h" +#include <vespa/vespalib/data/databuffer.h> namespace search { @@ -18,9 +19,7 @@ AttributeMemoryFileWriter::AttributeMemoryFileWriter() } -AttributeMemoryFileWriter::~AttributeMemoryFileWriter() -{ -} +AttributeMemoryFileWriter::~AttributeMemoryFileWriter() = default; AttributeMemoryFileWriter::Buffer diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.cpp b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp index 59771d7ffae..80f72aaea25 100644 --- a/searchlib/src/vespa/searchlib/attribute/attrvector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp @@ -4,6 +4,7 @@ #include "attrvector.hpp" #include "iattributesavetarget.h" #include "load_utils.h" +#include <vespa/vespalib/data/databuffer.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.attribute.attr_vector"); diff --git a/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h index bb00124c9fc..94e16b37e9d 100644 --- a/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h +++ b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h @@ -2,8 +2,9 @@ #pragma once -#include <vespa/vespalib/data/databuffer.h> +#include <memory> +namespace vespalib { class DataBuffer; } namespace search { class BufferWriter; diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp index e1ab47ed434..bbe8c9c8327 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp @@ -8,6 +8,7 @@ #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/vespalib/data/databuffer.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp index db8636f47c3..3c26e960a06 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp @@ -2,6 +2,7 @@ #include "singlenumericattributesaver.h" #include "iattributesavetarget.h" +#include <vespa/vespalib/data/databuffer.h> using vespalib::GenerationHandler; diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp index 6eff0da06e8..fd2631ac63d 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp @@ -7,6 +7,7 @@ #include "iattributesavetarget.h" #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/vespalib/data/databuffer.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.h b/searchlib/src/vespa/searchlib/common/fileheadercontext.h index 6f76fe1717d..8bb3d6a56a6 100644 --- a/searchlib/src/vespa/searchlib/common/fileheadercontext.h +++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.h @@ -3,40 +3,22 @@ #include <vespa/vespalib/stllike/string.h> -namespace vespalib -{ - -class GenericHeader; - +namespace vespalib { + class GenericHeader; } -namespace search -{ - -namespace common -{ +namespace search::common { class FileHeaderContext { public: FileHeaderContext(); + virtual ~FileHeaderContext(); - virtual - ~FileHeaderContext(); + virtual void addTags(vespalib::GenericHeader &header, const vespalib::string &name) const = 0; - virtual void - addTags(vespalib::GenericHeader &header, - const vespalib::string &name) const = 0; - - static void - addCreateAndFreezeTime(vespalib::GenericHeader &header); - - static void - setFreezeTime(vespalib::GenericHeader &header); + static void addCreateAndFreezeTime(vespalib::GenericHeader &header); + static void setFreezeTime(vespalib::GenericHeader &header); }; - -} // namespace common - -} // namespace search - +} diff --git a/searchlib/src/vespa/searchlib/config/translogserver.def b/searchlib/src/vespa/searchlib/config/translogserver.def index 38741745773..defce8c3421 100644 --- a/searchlib/src/vespa/searchlib/config/translogserver.def +++ b/searchlib/src/vespa/searchlib/config/translogserver.def @@ -15,7 +15,7 @@ basedir string default="tmp" restart ## Use fsync after each commit. ## If not the below interval is used. -usefsync bool default=false restart +usefsync bool default=false ##Number of threads available for visiting/subscription. maxthreads int default=4 restart @@ -24,12 +24,12 @@ maxthreads int default=4 restart crcmethod enum {ccitt_crc32, xxh64} default=xxh64 ## Control compression type. -compression.type enum {NONE, NONE_MULTI, LZ4, ZSTD} default=LZ4 +compression.type enum {NONE, NONE_MULTI, LZ4, ZSTD} default=NONE ## Control compression level ## LZ4 has normal range 1..9 while ZSTD has range 1..19 ## 9 is a reasonable default for both -compression.level int default=9 +compression.level int default=3 ## How large a chunk can grow in memory before beeing flushed chunk.sizelimit int default = 256000 # 256k diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt index 2b82d9e5af7..b1c27e20210 100644 --- a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt @@ -1,7 +1,6 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchlib_docstore OBJECT SOURCES - bytecomplens.cpp chunk.cpp chunkformat.cpp chunkformats.cpp diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp b/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp deleted file mode 100644 index 4ef57b77dbd..00000000000 --- a/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp +++ /dev/null @@ -1,260 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "bytecomplens.h" - -#include <vespa/log/log.h> -LOG_SETUP(".search.docstore"); - -namespace search { - -static inline uint64_t getBCN(const uint8_t *&buffer) __attribute__((__always_inline__)); - -/** - * get "Byte Compressed Number" from buffer, incrementing pointer - **/ -static inline uint64_t getBCN(const uint8_t *&buffer) -{ - uint8_t b = *buffer++; - uint64_t len = (b & 127); - unsigned shiftLen = 0; - while (b & 128) { - shiftLen += 7; - b = *buffer++; - len |= ((b & 127) << shiftLen); - } - return len; -} - -static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len) -{ - size_t bytes = 0; - do { - uint8_t b = len & 127; - len >>= 7; - if (len > 0) { - b |= 128; - } - buf.writeInt8(b); - ++bytes; - } while (len > 0); - return bytes; -} - - -ByteCompressedLengths::ByteCompressedLengths() - : _l0space(), - _l1space(), - _l2space(), - _l3table(), - _entries(0), - _progress(), - _ptrcache(), - _hasInitialOffset(false) -{ - clear(); -} - - -void -ByteCompressedLengths::clear() -{ - _l0space.clear(); - _l1space.clear(); - _l2space.clear(); - _l3table.clear(); - - _entries = 0; - - _progress.lenSum1 = 0; - _progress.lenSum2 = 0; - _progress.l0oSum1 = 0; - _progress.l0oSum2 = 0; - _progress.l1oSum2 = 0; - _progress.last_offset = 0; - - _ptrcache.l0table = NULL; - _ptrcache.l1table = NULL; - _ptrcache.l2table = NULL; - - _hasInitialOffset = false; -} - - -void -ByteCompressedLengths::swap(ByteCompressedLengths& other) -{ - _l0space.swap(other._l0space); - _l1space.swap(other._l1space); - _l2space.swap(other._l2space); - _l3table.swap(other._l3table); - - std::swap(_entries, other._entries); - std::swap(_progress, other._progress); - std::swap(_ptrcache, other._ptrcache); - std::swap(_hasInitialOffset, other._hasInitialOffset); -} - - -// add a new offset to the compressed tables -void -ByteCompressedLengths::addOffset(uint64_t offset) -{ - assert(offset >= _progress.last_offset); - - // delta from last offset: - uint64_t len = offset - _progress.last_offset; - - // which entry is this: - uint64_t idx = _entries++; - - if ((idx & 31) == 0) { - // add entry to some skip-table - _progress.lenSum2 += _progress.lenSum1; // accumulate to Level2 - _progress.l0oSum2 += _progress.l0oSum1; // accumulate to Level2 - - uint64_t t1n = idx >> 5; - if ((t1n & 31) == 0) { - // add Level2 or Level3 table entry: - uint64_t t2n = t1n >> 5; - - if ((t2n & 31) == 0) { - // add new Level3 table entry: - L3Entry e; - e.offset = _progress.last_offset; - e.l0toff = _l0space.getDataLen(); - e.l1toff = _l1space.getDataLen(); - e.l2toff = _l2space.getDataLen(); - - _l3table.push_back(e); - } else { - // write to Level2 table, sums since last reset: - writeLen(_l2space, _progress.lenSum2); // sum of Level0 lengths - writeLen(_l2space, _progress.l0oSum2); // sum size of Level0 entries - writeLen(_l2space, _progress.l1oSum2); // sum size of Level1 entries - } - // reset Level2 sums: - _progress.lenSum2 = 0; - _progress.l0oSum2 = 0; - _progress.l1oSum2 = 0; - } else { - // write to Level1 table, sums since last reset: - _progress.l1oSum2 += writeLen(_l1space, _progress.lenSum1); // sum of Level0 lengths - _progress.l1oSum2 += writeLen(_l1space, _progress.l0oSum1); // sum size of Level0 entries - } - // reset Level1 sums: - _progress.lenSum1 = 0; - _progress.l0oSum1 = 0; - } - // always write length (offset delta) to Level0 table: - _progress.l0oSum1 += writeLen(_l0space, len); // accumulate to Level1 - _progress.lenSum1 += len; // accumulate to Level1 - _progress.last_offset = offset; -} - - -void -ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets) -{ - // ignore NOP: - if (entries == 0) return; - - // Do we have some offsets already? - if (_hasInitialOffset) { - // yes, add first offset normally - addOffset(offsets[0]); - } else { - // no, special treatment for very first offset - _progress.last_offset = offsets[0]; - _hasInitialOffset = true; - } - for (uint64_t cnt = 1; cnt < entries; ++cnt) { - addOffset(offsets[cnt]); - } - - // Simplify access to actual data: - _ptrcache.l0table = (uint8_t *)_l0space.getData(); - _ptrcache.l1table = (uint8_t *)_l1space.getData(); - _ptrcache.l2table = (uint8_t *)_l2space.getData(); - - // some statistics available when debug logging: - LOG(debug, "compressed %" PRIu64 " offsets", (_entries+1)); - LOG(debug, "(%" PRIu64 " bytes)", (_entries+1)*sizeof(uint64_t)); - LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries", - _l0space.getDataLen(), - _l1space.getDataLen(), - _l2space.getDataLen(), - _l3table.size()); - LOG(debug, "(%ld bytes)", - (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() + - _l3table.size()*sizeof(L3Entry))); -} - - -ByteCompressedLengths::~ByteCompressedLengths() -{ -} - -ByteCompressedLengths::OffLen -ByteCompressedLengths::getOffLen(uint64_t idx) const -{ - assert(idx < _entries); - - unsigned skipL0 = idx & 31; - unsigned skipL1 = (idx >> 5) & 31; - unsigned skipL2 = (idx >> 10) & 31; - uint64_t skipL3 = (idx >> 15); - - uint64_t offset = _l3table[skipL3].offset; - uint64_t l0toff = _l3table[skipL3].l0toff; - uint64_t l1toff = _l3table[skipL3].l1toff; - uint64_t l2toff = _l3table[skipL3].l2toff; - - // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); - - const uint8_t *l2pos = _ptrcache.l2table + l2toff; - - while (skipL2 > 0) { - --skipL2; - offset += getBCN(l2pos); - l0toff += getBCN(l2pos); - l1toff += getBCN(l2pos); - } - - const uint8_t *l1pos = _ptrcache.l1table + l1toff; - - while (skipL1 > 0) { - --skipL1; - offset += getBCN(l1pos); - l0toff += getBCN(l1pos); - - } - const uint8_t *l0pos = _ptrcache.l0table + l0toff; - - while (skipL0 > 0) { - --skipL0; - offset += getBCN(l0pos); - } - // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); - OffLen retval; - retval.offset = offset; - retval.length = getBCN(l0pos); - return retval; -} - - -size_t -ByteCompressedLengths::memoryUsed() const -{ - size_t mem = sizeof(*this); - mem += _l0space.getBufSize(); - mem += _l1space.getBufSize(); - mem += _l2space.getBufSize(); - mem += _l3table.capacity() * sizeof(L3Entry); - return mem; -} - - - - -} // namespace search - diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.h b/searchlib/src/vespa/searchlib/docstore/bytecomplens.h deleted file mode 100644 index 88f6a11c764..00000000000 --- a/searchlib/src/vespa/searchlib/docstore/bytecomplens.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vector> -#include <vespa/vespalib/data/databuffer.h> - -namespace search { - -/** - * Class compressing a table of offsets in memory. - * After adding (n) offsets you can access - * (n-1) pairs of (length, offset). - * All offsets must be increasing, but they - * may be added in several chunks. - **/ -class ByteCompressedLengths -{ -public: - /** - * Construct an empty instance - **/ - ByteCompressedLengths(); - - /** - * add the given offset table. - * @param entries number of offsets to store. - * @param offsets pointer to table that contains (entries) offsets. - **/ - void addOffsetTable(uint64_t entries, uint64_t *offsets); - - /** - * free resources - **/ - ~ByteCompressedLengths(); - - struct OffLen - { - uint64_t offset; - uint64_t length; - }; - - /** - * Fetch an offset and length from compressed data. - * Note restriction: idx must be < size() - * - * @param idx The index into the offset table - * @return offset[id] and the delta (offset[id+1] - offset[id]) - **/ - OffLen getOffLen(uint64_t idx) const; - - /** - * The number of (length, offset) pairs stored - * Note that size() == sum(entries) - 1 - **/ - uint64_t size() const { return _entries; } - - /** - * remove all data from this instance - **/ - void clear(); - - /** - * swap all data with another instance - **/ - void swap(ByteCompressedLengths& other); - - /** - * Calculate memory used by this instance - * @return memory usage (in bytes) - **/ - size_t memoryUsed() const; - -private: - struct L3Entry { - uint64_t offset; - uint64_t l0toff; - uint64_t l1toff; - uint64_t l2toff; - }; - vespalib::DataBuffer _l0space; - vespalib::DataBuffer _l1space; - vespalib::DataBuffer _l2space; - - std::vector<L3Entry> _l3table; - - uint64_t _entries; - - struct ProgressPoint { - uint64_t lenSum1; - uint64_t lenSum2; - uint64_t l0oSum1; - uint64_t l0oSum2; - uint64_t l1oSum2; - uint64_t last_offset; - } _progress; - - struct CachedPointers { - const uint8_t *l0table; - const uint8_t *l1table; - const uint8_t *l2table; - } _ptrcache; - - bool _hasInitialOffset; - - void addOffset(uint64_t offset); -}; - -} // namespace search - diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp index fbbdcff3c5d..afa7abb9ef8 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp +++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp @@ -140,7 +140,7 @@ ChunkFormat::deserializeBody(vespalib::nbostream & is) assert(uncompressed.getData() == uncompressed.getDead()); if (uncompressed.getData() != data.c_str()) { const size_t sz(uncompressed.getDataLen()); - vespalib::nbostream(uncompressed.stealBuffer(), sz).swap(_dataBuf); + vespalib::nbostream(std::move(uncompressed).stealBuffer(), sz).swap(_dataBuf); } else { _dataBuf = vespalib::nbostream(uncompressed.getData(), uncompressed.getDataLen()); } diff --git a/searchlib/src/vespa/searchlib/docstore/value.cpp b/searchlib/src/vespa/searchlib/docstore/value.cpp index 09725b447cd..25cf93ac18b 100644 --- a/searchlib/src/vespa/searchlib/docstore/value.cpp +++ b/searchlib/src/vespa/searchlib/docstore/value.cpp @@ -1,7 +1,6 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "value.h" -#include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/compressor.h> #include <xxhash.h> @@ -66,8 +65,9 @@ Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &com _compression = type; _uncompressedSize = len; _uncompressedCrc = XXH64(input.c_str(), input.size(), 0); - _buf = std::make_shared<Alloc>(compact(_compressedSize, - (buf.getData() == compressed.getData()) ? buf.stealBuffer() : compressed.stealBuffer())); + _buf = std::make_shared<Alloc>(compact(_compressedSize,(buf.getData() == compressed.getData()) + ? std::move(buf).stealBuffer() + : std::move(compressed).stealBuffer())); assert(((type == CompressionConfig::NONE) && (len == ssize_t(_compressedSize))) || diff --git a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp index e8504480b7d..6990a0a3ed7 100644 --- a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp +++ b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp @@ -109,7 +109,7 @@ CompressedBlobSet::getBlobSet() const decompress(_compression, getBufferSize(_positions), ConstBufferRef(_buffer->c_str(), _buffer->size()), uncompressed, false); } - return BlobSet(_positions, uncompressed.stealBuffer()); + return BlobSet(_positions, std::move(uncompressed).stealBuffer()); } size_t CompressedBlobSet::size() const { diff --git a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp index 698d2309e5a..fca8988ba36 100644 --- a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp @@ -2,6 +2,7 @@ #include "onnx_feature.h" #include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/fef/onnx_model.h> #include <vespa/searchlib/fef/featureexecutor.h> #include <vespa/eval/tensor/dense/dense_tensor_view.h> #include <vespa/eval/tensor/dense/mutable_dense_tensor_view.h> @@ -85,37 +86,45 @@ OnnxBlueprint::setup(const IIndexEnvironment &env, auto optimize = (env.getFeatureMotivation() == env.FeatureMotivation::VERIFY_SETUP) ? Onnx::Optimize::DISABLE : Onnx::Optimize::ENABLE; - auto file_name = env.getOnnxModelFullPath(params[0].getValue()); - if (!file_name.has_value()) { + auto model_cfg = env.getOnnxModel(params[0].getValue()); + if (!model_cfg) { return fail("no model with name '%s' found", params[0].getValue().c_str()); } try { - _model = std::make_unique<Onnx>(file_name.value(), optimize); + _model = std::make_unique<Onnx>(model_cfg->file_path(), optimize); } catch (std::exception &ex) { return fail("model setup failed: %s", ex.what()); } Onnx::WirePlanner planner; for (size_t i = 0; i < _model->inputs().size(); ++i) { const auto &model_input = _model->inputs()[i]; - vespalib::string input_name = normalize_name(model_input.name, "input"); - if (auto maybe_input = defineInput(fmt("rankingExpression(\"%s\")", input_name.c_str()), AcceptInput::OBJECT)) { + auto input_feature = model_cfg->input_feature(model_input.name); + if (!input_feature.has_value()) { + input_feature = fmt("rankingExpression(\"%s\")", normalize_name(model_input.name, "input").c_str()); + } + if (auto maybe_input = defineInput(input_feature.value(), AcceptInput::OBJECT)) { const FeatureType &feature_input = maybe_input.value(); assert(feature_input.is_object()); if (!planner.bind_input_type(feature_input.type(), model_input)) { - return fail("incompatible type for input '%s': %s -> %s", input_name.c_str(), + return fail("incompatible type for input (%s -> %s): %s -> %s", + input_feature.value().c_str(), model_input.name.c_str(), feature_input.type().to_spec().c_str(), model_input.type_as_string().c_str()); } } } for (size_t i = 0; i < _model->outputs().size(); ++i) { const auto &model_output = _model->outputs()[i]; - vespalib::string output_name = normalize_name(model_output.name, "output"); + auto output_name = model_cfg->output_name(model_output.name); + if (!output_name.has_value()) { + output_name = normalize_name(model_output.name, "output"); + } ValueType output_type = planner.make_output_type(model_output); if (output_type.is_error()) { - return fail("unable to make compatible type for output '%s': %s -> error", - output_name.c_str(), model_output.type_as_string().c_str()); + return fail("unable to make compatible type for output (%s -> %s): %s -> error", + model_output.name.c_str(), output_name.value().c_str(), + model_output.type_as_string().c_str()); } - describeOutput(output_name, "output from onnx model", FeatureType::object(output_type)); + describeOutput(output_name.value(), "output from onnx model", FeatureType::object(output_type)); } _wire_info = planner.get_wire_info(*_model); return true; diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt index 178de1b8b87..d6f8764cd63 100644 --- a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt @@ -4,12 +4,12 @@ vespa_add_library(searchlib_fef OBJECT blueprint.cpp blueprintfactory.cpp blueprintresolver.cpp + feature_resolver.cpp feature_type.cpp featureexecutor.cpp featurenamebuilder.cpp featurenameparser.cpp featureoverrider.cpp - feature_resolver.cpp fef.cpp fieldinfo.cpp fieldpositionsiterator.cpp @@ -19,6 +19,7 @@ vespa_add_library(searchlib_fef OBJECT matchdata.cpp matchdatalayout.cpp objectstore.cpp + onnx_model.cpp parameter.cpp parameterdescriptions.cpp parametervalidator.cpp diff --git a/searchlib/src/vespa/searchlib/fef/iindexenvironment.h b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h index 26e88a98033..384b81643cc 100644 --- a/searchlib/src/vespa/searchlib/fef/iindexenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/vespalib/stllike/string.h> -#include <optional> namespace vespalib::eval { struct ConstantValue; } @@ -12,6 +11,7 @@ namespace search::fef { class Properties; class FieldInfo; class ITableManager; +class OnnxModel; /** * Abstract view of index related information available to the @@ -122,9 +122,9 @@ public: virtual std::unique_ptr<vespalib::eval::ConstantValue> getConstantValue(const vespalib::string &name) const = 0; /** - * Get the full path of the file containing the given onnx model + * Get configuration for the given onnx model. **/ - virtual std::optional<vespalib::string> getOnnxModelFullPath(const vespalib::string &name) const = 0; + virtual const OnnxModel *getOnnxModel(const vespalib::string &name) const = 0; virtual uint32_t getDistributionKey() const = 0; diff --git a/searchlib/src/vespa/searchlib/fef/onnx_model.cpp b/searchlib/src/vespa/searchlib/fef/onnx_model.cpp new file mode 100644 index 00000000000..ba5adaae857 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/onnx_model.cpp @@ -0,0 +1,55 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "onnx_model.h" +#include <tuple> + +namespace search::fef { + +OnnxModel::OnnxModel(const vespalib::string &name_in, + const vespalib::string &file_path_in) + : _name(name_in), + _file_path(file_path_in), + _input_features(), + _output_names() +{ +} + +OnnxModel & +OnnxModel::input_feature(const vespalib::string &model_input_name, const vespalib::string &input_feature) { + _input_features[model_input_name] = input_feature; + return *this; +} + +OnnxModel & +OnnxModel::output_name(const vespalib::string &model_output_name, const vespalib::string &output_name) { + _output_names[model_output_name] = output_name; + return *this; +} + +std::optional<vespalib::string> +OnnxModel::input_feature(const vespalib::string &model_input_name) const { + auto pos = _input_features.find(model_input_name); + if (pos != _input_features.end()) { + return pos->second; + } + return std::nullopt; +} + +std::optional<vespalib::string> +OnnxModel::output_name(const vespalib::string &model_output_name) const { + auto pos = _output_names.find(model_output_name); + if (pos != _output_names.end()) { + return pos->second; + } + return std::nullopt; +} + +bool +OnnxModel::operator==(const OnnxModel &rhs) const { + return (std::tie(_name, _file_path, _input_features, _output_names) == + std::tie(rhs._name, rhs._file_path, rhs._input_features, rhs._output_names)); +} + +OnnxModel::~OnnxModel() = default; + +} diff --git a/searchlib/src/vespa/searchlib/fef/onnx_model.h b/searchlib/src/vespa/searchlib/fef/onnx_model.h new file mode 100644 index 00000000000..2195a50600d --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/onnx_model.h @@ -0,0 +1,39 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <optional> +#include <map> + +namespace search::fef { + +/** + * Class containing configuration for a single onnx model setup. This + * class is used both by the IIndexEnvironment api as well as the + * OnnxModels config adapter in the search core (matching component). + **/ +class OnnxModel { +private: + vespalib::string _name; + vespalib::string _file_path; + std::map<vespalib::string,vespalib::string> _input_features; + std::map<vespalib::string,vespalib::string> _output_names; + +public: + OnnxModel(const vespalib::string &name_in, + const vespalib::string &file_path_in); + ~OnnxModel(); + + const vespalib::string &name() const { return _name; } + const vespalib::string &file_path() const { return _file_path; } + OnnxModel &input_feature(const vespalib::string &model_input_name, const vespalib::string &input_feature); + OnnxModel &output_name(const vespalib::string &model_output_name, const vespalib::string &output_name); + std::optional<vespalib::string> input_feature(const vespalib::string &model_input_name) const; + std::optional<vespalib::string> output_name(const vespalib::string &model_output_name) const; + bool operator==(const OnnxModel &rhs) const; + const std::map<vespalib::string,vespalib::string> &inspect_input_features() const { return _input_features; } + const std::map<vespalib::string,vespalib::string> &inspect_output_names() const { return _output_names; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp index 6e2e0b88fbb..d2d336dcdc8 100644 --- a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp @@ -54,20 +54,20 @@ IndexEnvironment::addConstantValue(const vespalib::string &name, (void) insertRes; } -std::optional<vespalib::string> -IndexEnvironment::getOnnxModelFullPath(const vespalib::string &name) const +const OnnxModel * +IndexEnvironment::getOnnxModel(const vespalib::string &name) const { auto pos = _models.find(name); if (pos != _models.end()) { - return pos->second; + return &pos->second; } - return std::nullopt; + return nullptr; } void -IndexEnvironment::addOnnxModel(const vespalib::string &name, const vespalib::string &path) +IndexEnvironment::addOnnxModel(const OnnxModel &model) { - _models[name] = path; + _models.insert_or_assign(model.name(), model); } diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h index 6602d9f8ee9..0d8d0091921 100644 --- a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h @@ -5,6 +5,7 @@ #include <vespa/searchlib/attribute/attributemanager.h> #include <vespa/searchlib/fef/iindexenvironment.h> #include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/fef/onnx_model.h> #include <vespa/searchlib/fef/fieldinfo.h> #include <vespa/searchlib/fef/tablemanager.h> #include <vespa/eval/eval/value_cache/constant_value.h> @@ -47,7 +48,7 @@ public: }; using ConstantsMap = std::map<vespalib::string, Constant>; - using ModelMap = std::map<vespalib::string, vespalib::string>; + using ModelMap = std::map<vespalib::string, OnnxModel>; IndexEnvironment(); ~IndexEnvironment(); @@ -84,8 +85,8 @@ public: vespalib::eval::ValueType type, std::unique_ptr<vespalib::eval::Value> value); - std::optional<vespalib::string> getOnnxModelFullPath(const vespalib::string &name) const override; - void addOnnxModel(const vespalib::string &name, const vespalib::string &path); + const OnnxModel *getOnnxModel(const vespalib::string &name) const override; + void addOnnxModel(const OnnxModel &model); private: IndexEnvironment(const IndexEnvironment &); // hide diff --git a/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp b/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp index a78db61429d..edb12b4bf36 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp @@ -94,7 +94,7 @@ XXH64CompressedChunk::decompress(nbostream & is, uint32_t uncompressedLen) { ::decompress(_type, uncompressedLen, compressed, uncompressed, false); nbostream data(uncompressed.getData(), uncompressed.getDataLen()); deserializeEntries(data); - _backing = uncompressed.stealBuffer(); + _backing = std::move(uncompressed).stealBuffer(); is.adjustReadPos(is.size()); } diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.cpp b/searchlib/src/vespa/searchlib/transactionlog/common.cpp index 556ebca06ec..3308f3182dc 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/common.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/common.cpp @@ -121,11 +121,26 @@ Packet::add(const Packet::Entry & e) _range.to(e.serial()); } +Writer::CommitResult::CommitResult() + : _callBacks() +{} +Writer::CommitResult::CommitResult( CommitPayload commitPayLoad) + : _callBacks(std::move(commitPayLoad)) +{} + +Writer::CommitResult::~CommitResult() = default; + CommitChunk::CommitChunk(size_t reserveBytes, size_t reserveCount) : _data(reserveBytes), - _callBacks() + _callBacks(std::make_shared<Writer::DoneCallbacksList>()) +{ + _callBacks->reserve(reserveCount); +} + +CommitChunk::CommitChunk(size_t reserveBytes, Writer::CommitPayload postponed) + : _data(reserveBytes), + _callBacks(std::move(postponed)) { - _callBacks.reserve(reserveCount); } CommitChunk::~CommitChunk() = default; @@ -133,7 +148,12 @@ CommitChunk::~CommitChunk() = default; void CommitChunk::add(const Packet &packet, Writer::DoneCallback onDone) { _data.merge(packet); - _callBacks.emplace_back(std::move(onDone)); + _callBacks->emplace_back(std::move(onDone)); +} + +Writer::CommitResult +CommitChunk::createCommitResult() const { + return Writer::CommitResult(_callBacks); } } diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.h b/searchlib/src/vespa/searchlib/transactionlog/common.h index 7cdfad44b87..5d07d51cdf2 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/common.h +++ b/searchlib/src/vespa/searchlib/transactionlog/common.h @@ -16,7 +16,7 @@ class SerialNumRange { public: SerialNumRange() : _from(0), _to(0) { } - SerialNumRange(SerialNum f) : _from(f), _to(f ? f-1 : f) { } + explicit SerialNumRange(SerialNum f) : _from(f), _to(f ? f-1 : f) { } SerialNumRange(SerialNum f, SerialNum t) : _from(f), _to(t) { } bool operator == (const SerialNumRange & b) const { return cmp(b) == 0; } bool operator < (const SerialNumRange & b) const { return cmp(b) < 0; } @@ -63,7 +63,7 @@ public: vespalib::ConstBufferRef _data; }; public: - Packet(size_t reserved) : _count(0), _range(), _buf(reserved) { } + explicit Packet(size_t reserved) : _count(0), _range(), _buf(reserved) { } Packet(const void * buf, size_t sz); void add(const Entry & data); void clear() { _buf.clear(); _count = 0; _range.from(0); _range.to(0); } @@ -84,8 +84,24 @@ int makeDirectory(const char * dir); class Writer { public: using DoneCallback = std::shared_ptr<IDestructorCallback>; + using DoneCallbacksList = std::vector<DoneCallback>; + using CommitPayload = std::shared_ptr<DoneCallbacksList>; + class CommitResult { + public: + CommitResult(); + CommitResult(CommitPayload callBacks); + CommitResult(CommitResult &&) noexcept = default; + CommitResult & operator = (CommitResult &&) noexcept = default; + CommitResult(const CommitResult &) = delete; + CommitResult & operator = (const CommitResult &) = delete; + ~CommitResult(); + size_t getNumOperations() const { return _callBacks->size(); } + private: + CommitPayload _callBacks; + }; virtual ~Writer() = default; - virtual void commit(const Packet & packet, DoneCallback done) = 0; + virtual void append(const Packet & packet, DoneCallback done) = 0; + [[nodiscard]] virtual CommitResult startCommit(DoneCallback onDone) = 0; }; class WriterFactory { @@ -106,14 +122,20 @@ public: class CommitChunk { public: CommitChunk(size_t reserveBytes, size_t reserveCount); + CommitChunk(size_t reserveBytes, Writer::CommitPayload postponed); ~CommitChunk(); + bool empty() const { return _callBacks->empty(); } void add(const Packet & packet, Writer::DoneCallback onDone); size_t sizeBytes() const { return _data.sizeBytes(); } const Packet & getPacket() const { return _data; } - size_t getNumCallBacks() const { return _callBacks.size(); } + size_t getNumCallBacks() const { return _callBacks->size(); } + Writer::CommitResult createCommitResult() const; + void setCommitDoneCallback(Writer::DoneCallback onDone) { _onCommitDone = std::move(onDone); } + Writer::CommitPayload stealCallbacks() { return std::move(_callBacks); } private: - Packet _data; - std::vector<Writer::DoneCallback> _callBacks; + Packet _data; + Writer::CommitPayload _callBacks; + Writer::DoneCallback _onCommitDone; }; } diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp index 415ccafda70..bd7feec0598 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp @@ -25,17 +25,25 @@ using vespalib::makeClosure; using vespalib::makeLambdaTask; using vespalib::Monitor; using vespalib::MonitorGuard; -using search::common::FileHeaderContext; using std::runtime_error; using std::make_shared; namespace search::transactionlog { +namespace { + +std::unique_ptr<CommitChunk> +createCommitChunk(const DomainConfig &cfg) { + return std::make_unique<CommitChunk>(cfg.getChunkSizeLimit(), cfg.getChunkSizeLimit()/256); +} + +} Domain::Domain(const string &domainName, const string & baseDir, Executor & executor, const DomainConfig & cfg, const FileHeaderContext &fileHeaderContext) : _config(cfg), + _currentChunk(createCommitChunk(cfg)), _lastSerial(0), - _singleCommiter(std::make_unique<vespalib::ThreadStackExecutor>(1, 128*1024)), + _singleCommitter(std::make_unique<vespalib::ThreadStackExecutor>(1, 128 * 1024)), _executor(executor), _sessionId(1), _syncMonitor(), @@ -105,7 +113,12 @@ Domain::addPart(SerialNum partId, bool isLastPart) { } } -Domain::~Domain() { } +Domain::~Domain() { + MonitorGuard guard(_currentChunkMonitor); + guard.broadcast(); + commitChunk(grabCurrentChunk(guard), guard); + _singleCommitter->shutdown().sync(); +} DomainInfo Domain::getDomainInfo() const @@ -311,15 +324,72 @@ Domain::optionallyRotateFile(SerialNum serialNum) { } void -Domain::commit(const Packet & packet, Writer::DoneCallback onDone) -{ - (void) onDone; +Domain::append(const Packet & packet, Writer::DoneCallback onDone) { + vespalib::MonitorGuard guard(_currentChunkMonitor); + if (_lastSerial >= packet.range().from()) { + throw runtime_error(fmt("Incoming serial number(%" PRIu64 ") must be bigger than the last one (%" PRIu64 ").", + packet.range().from(), _lastSerial)); + } else { + _lastSerial = packet.range().to(); + } + _currentChunk->add(packet, std::move(onDone)); + commitIfFull(guard); +} + +Domain::CommitResult +Domain::startCommit(DoneCallback onDone) { + vespalib::MonitorGuard guard(_currentChunkMonitor); + if ( !_currentChunk->empty() ) { + auto completed = grabCurrentChunk(guard); + completed->setCommitDoneCallback(std::move(onDone)); + CommitResult result(completed->createCommitResult()); + commitChunk(std::move(completed), guard); + return result; + } + return CommitResult(); +} + +void +Domain::commitIfFull(const vespalib::MonitorGuard &guard) { + if (_currentChunk->sizeBytes() > _config.getChunkSizeLimit()) { + auto completed = std::move(_currentChunk); + _currentChunk = std::make_unique<CommitChunk>(_config.getChunkSizeLimit(), completed->stealCallbacks()); + commitChunk(std::move(completed), guard); + } +} + +std::unique_ptr<CommitChunk> +Domain::grabCurrentChunk(const vespalib::MonitorGuard & guard) { + assert(guard.monitors(_currentChunkMonitor)); + auto chunk = std::move(_currentChunk); + _currentChunk = createCommitChunk(_config); + return chunk; +} + +void +Domain::commitChunk(std::unique_ptr<CommitChunk> chunk, const vespalib::MonitorGuard & chunkOrderGuard) { + assert(chunkOrderGuard.monitors(_currentChunkMonitor)); + _singleCommitter->execute( makeLambdaTask([this, chunk = std::move(chunk)]() mutable { + doCommit(std::move(chunk)); + })); +} + +void +Domain::doCommit(std::unique_ptr<CommitChunk> chunk) { + const Packet & packet = chunk->getPacket(); + if (packet.empty()) return; + vespalib::nbostream_longlivedbuf is(packet.getHandle().data(), packet.getHandle().size()); Packet::Entry entry; entry.deserialize(is); DomainPart::SP dp = optionallyRotateFile(entry.serial()); dp->commit(entry.serial(), packet); + if (_config.getFSyncOnCommit()) { + dp->sync(); + } cleanSessions(); + LOG(debug, "Releasing %zu acks and %zu entries and %zu bytes.", + chunk->getNumCallBacks(), chunk->getPacket().size(), chunk->sizeBytes()); } bool diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.h b/searchlib/src/vespa/searchlib/transactionlog/domain.h index 9adff564cc8..041ec27cf23 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domain.h +++ b/searchlib/src/vespa/searchlib/transactionlog/domain.h @@ -18,8 +18,9 @@ public: using SP = std::shared_ptr<Domain>; using Executor = vespalib::SyncableThreadExecutor; using DomainPartSP = std::shared_ptr<DomainPart>; + using FileHeaderContext = common::FileHeaderContext; Domain(const vespalib::string &name, const vespalib::string &baseDir, Executor & executor, - const DomainConfig & cfg, const common::FileHeaderContext &fileHeaderContext); + const DomainConfig & cfg, const FileHeaderContext &fileHeaderContext); ~Domain() override; @@ -27,14 +28,14 @@ public: const vespalib::string & name() const { return _name; } bool erase(SerialNum to); - void commit(const Packet & packet, Writer::DoneCallback onDone) override; + void append(const Packet & packet, Writer::DoneCallback onDone) override; + [[nodiscard]] CommitResult startCommit(DoneCallback onDone) override; int visit(const Domain::SP & self, SerialNum from, SerialNum to, std::unique_ptr<Destination> dest); SerialNum begin() const; SerialNum end() const; SerialNum getSynced() const; void triggerSyncNow(); - bool commitIfStale(); bool getMarkedDeleted() const { return _markedDeleted; } void markDeleted() { _markedDeleted = true; } @@ -55,6 +56,11 @@ public: uint64_t size() const; Domain & setConfig(const DomainConfig & cfg); private: + void commitIfFull(const vespalib::MonitorGuard & guard); + + std::unique_ptr<CommitChunk> grabCurrentChunk(const vespalib::MonitorGuard & guard); + void commitChunk(std::unique_ptr<CommitChunk> chunk, const vespalib::MonitorGuard & chunkOrderGuard); + void doCommit(std::unique_ptr<CommitChunk> chunk); SerialNum begin(const vespalib::LockGuard & guard) const; SerialNum end(const vespalib::LockGuard & guard) const; size_t byteSize(const vespalib::LockGuard & guard) const; @@ -72,23 +78,24 @@ private: using DomainPartList = std::map<SerialNum, DomainPartSP>; using DurationSeconds = std::chrono::duration<double>; - DomainConfig _config; - SerialNum _lastSerial; - std::unique_ptr<Executor> _singleCommiter; - Executor & _executor; - std::atomic<int> _sessionId; - vespalib::Monitor _syncMonitor; - bool _pendingSync; - vespalib::string _name; - DomainPartList _parts; - vespalib::Lock _lock; - vespalib::Monitor _currentChunkMonitor; - vespalib::Lock _sessionLock; - SessionList _sessions; - DurationSeconds _maxSessionRunTime; - vespalib::string _baseDir; - const common::FileHeaderContext &_fileHeaderContext; - bool _markedDeleted; + DomainConfig _config; + std::unique_ptr<CommitChunk> _currentChunk; + SerialNum _lastSerial; + std::unique_ptr<Executor> _singleCommitter; + Executor &_executor; + std::atomic<int> _sessionId; + vespalib::Monitor _syncMonitor; + bool _pendingSync; + vespalib::string _name; + DomainPartList _parts; + vespalib::Lock _lock; + vespalib::Monitor _currentChunkMonitor; + vespalib::Lock _sessionLock; + SessionList _sessions; + DurationSeconds _maxSessionRunTime; + vespalib::string _baseDir; + const FileHeaderContext &_fileHeaderContext; + bool _markedDeleted; }; } diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp index 8855183226d..b7e02894e6b 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp @@ -252,7 +252,7 @@ DomainPart::buildPacketMapping(bool allowTruncate) DomainPart::DomainPart(const string & name, const string & baseDir, SerialNum s, Encoding encoding, uint8_t compressionLevel, const FileHeaderContext &fileHeaderContext, bool allowTruncate) - : _encoding(encoding.getCrc(), Encoding::Compression::none), //TODO We do not yet support compression + : _encoding(encoding), _compressionLevel(compressionLevel), _lock(), _fileLock(), @@ -396,16 +396,19 @@ DomainPart::commit(SerialNum firstSerial, const Packet &packet) if (_range.from() == 0) { _range.from(firstSerial); } + IChunk::UP chunk = IChunk::create(_encoding, _compressionLevel); for (size_t i(0); h.size() > 0; i++) { //LOG(spam, //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)", //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining()); - IChunk::UP chunk = IChunk::create(_encoding, _compressionLevel); Packet::Entry entry; entry.deserialize(h); if (_range.to() < entry.serial()) { chunk->add(entry); - write(*_transLog, *chunk); + if (_encoding.getCompression() == Encoding::Compression::none) { + write(*_transLog, *chunk); + chunk = IChunk::create(_encoding, _compressionLevel); + } _sz++; _range.to(entry.serial()); } else { @@ -413,6 +416,9 @@ DomainPart::commit(SerialNum firstSerial, const Packet &packet) entry.serial(), _range.to())); } } + if ( ! chunk->getEntries().empty()) { + write(*_transLog, *chunk); + } bool merged(false); LockGuard guard(_lock); diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp index 269ed7e9380..0c0c9186e12 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp @@ -572,7 +572,11 @@ TransLogServer::domainCommit(FRT_RPCRequest *req) Packet packet(params[1]._data._buf, params[1]._data._len); try { vespalib::Gate gate; - domain->commit(packet, make_shared<GateCallback>(gate)); + { + // Need to scope in order to drain out all the callbacks. + domain->append(packet, make_shared<GateCallback>(gate)); + auto keep = domain->startCommit(make_shared<IgnoreCallback>()); + } gate.await(); ret.AddInt32(0); ret.AddString("ok"); |