summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp2
-rw-r--r--searchlib/src/tests/bytecomplens/.gitignore5
-rw-r--r--searchlib/src/tests/bytecomplens/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/bytecomplens/bytecomp.cpp101
-rw-r--r--searchlib/src/tests/bytecomplens/example.txt122
-rw-r--r--searchlib/src/tests/bytecomplens/tblprint.cpp356
-rw-r--r--searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp26
-rw-r--r--searchlib/src/tests/transactionlog/translogclient_test.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attrvector.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/common/fileheadercontext.h34
-rw-r--r--searchlib/src/vespa/searchlib/config/translogserver.def6
-rw-r--r--searchlib/src/vespa/searchlib/docstore/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp260
-rw-r--r--searchlib/src/vespa/searchlib/docstore/bytecomplens.h110
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformat.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/value.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/docstore/visitcache.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/onnx_feature.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/fef/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/fef/iindexenvironment.h6
-rw-r--r--searchlib/src/vespa/searchlib/fef/onnx_model.cpp55
-rw-r--r--searchlib/src/vespa/searchlib/fef/onnx_model.h39
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/indexenvironment.h7
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/chunks.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/common.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/common.h34
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domain.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domain.h47
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp6
43 files changed, 355 insertions, 1077 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 1f8c054c694..33b78d517a3 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -102,7 +102,6 @@ vespa_define_module(
src/tests/bitcompression/expgolomb
src/tests/bitvector
src/tests/btree
- src/tests/bytecomplens
src/tests/common/bitvector
src/tests/common/location
src/tests/common/location_iterator
diff --git a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
index 1e89fbe7501..aca9b652b94 100644
--- a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
@@ -72,6 +72,8 @@ TEST(AttributeHeaderTest, can_be_added_to_and_extracted_from_generic_header)
verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Euclidean}));
verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Angular}));
verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::GeoDegrees}));
+ verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::InnerProduct}));
+ verify_roundtrip_serialization(HnswIPO({16, 100, DistanceMetric::Hamming}));
verify_roundtrip_serialization(HnswIPO());
}
diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
index eeabbf23c27..1e3531dc78e 100644
--- a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
+++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
@@ -1,7 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/vespalib/stllike/string.h>
#include <vespa/searchlib/attribute/attributefilewriter.h>
#include <vespa/searchlib/attribute/attributefilebufferwriter.h>
#include <vespa/searchlib/attribute/attribute_header.h>
@@ -10,6 +9,7 @@
#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/searchlib/common/fileheadercontext.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/data/databuffer.h>
#include <vespa/fastos/file.h>
#include <vespa/log/log.h>
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
index d8761f69d71..2a5b8014299 100644
--- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -10,7 +10,6 @@
#include <vespa/searchlib/attribute/attributememorysavetarget.h>
#include <vespa/searchlib/attribute/attributesaver.h>
#include <vespa/searchlib/attribute/multinumericattribute.h>
-#include <vespa/searchlib/attribute/multistringattribute.h>
#include <vespa/searchlib/attribute/singlenumericattribute.h>
#include <vespa/searchlib/attribute/singlestringattribute.h>
#include <vespa/searchlib/queryeval/executeinfo.h>
@@ -21,6 +20,7 @@
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/util/bufferwriter.h>
#include <vespa/vespalib/util/compress.h>
+#include <vespa/vespalib/data/databuffer.h>
#include <vespa/searchlib/attribute/attributevector.hpp>
diff --git a/searchlib/src/tests/bytecomplens/.gitignore b/searchlib/src/tests/bytecomplens/.gitignore
deleted file mode 100644
index afe9bff02f6..00000000000
--- a/searchlib/src/tests/bytecomplens/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.So
-.depend*
-Makefile
-bytecomp_test
-searchlib_bytecomp_test_app
diff --git a/searchlib/src/tests/bytecomplens/CMakeLists.txt b/searchlib/src/tests/bytecomplens/CMakeLists.txt
deleted file mode 100644
index 24ecef59b15..00000000000
--- a/searchlib/src/tests/bytecomplens/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_bytecomp_test_app TEST
- SOURCES
- bytecomp.cpp
- DEPENDS
- searchlib
-)
-vespa_add_test(NAME searchlib_bytecomp_test_app NO_VALGRIND COMMAND searchlib_bytecomp_test_app)
diff --git a/searchlib/src/tests/bytecomplens/bytecomp.cpp b/searchlib/src/tests/bytecomplens/bytecomp.cpp
deleted file mode 100644
index adc92b0097d..00000000000
--- a/searchlib/src/tests/bytecomplens/bytecomp.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <memory>
-#include <vespa/log/log.h>
-LOG_SETUP("bytecomplens_test");
-#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/vespalib/util/random.h>
-#include <vespa/searchlib/docstore/bytecomplens.h>
-
-
-class Test : public vespalib::TestApp {
-private:
- void testRandomLengths();
-
-public:
- int Main() override {
- TEST_INIT("bytecomplens_test");
- testRandomLengths(); TEST_FLUSH();
- TEST_DONE();
- }
-};
-
-TEST_APPHOOK(Test);
-
-
-void
-Test::testRandomLengths()
-{
- vespalib::RandomGen rndgen(0x07031969);
-
-#define TBLSIZ 0xc00000
-
- auto lentable = std::unique_ptr<uint32_t[]>(new uint32_t[TBLSIZ]);
- auto offtable = std::unique_ptr<uint64_t[]>(new uint64_t[TBLSIZ]);
-
- uint64_t offset = 16;
-
- for (int i = 0; i < TBLSIZ; i++) {
- int sel = rndgen.nextInt32();
- int val = rndgen.nextInt32();
- switch (sel & 0x7) {
- case 0:
- val &= 0x7F;
- break;
- case 1:
- val &= 0xFF;
- break;
- case 3:
- val &= 0x1FFF;
- break;
- case 4:
- val &= 0x3FFF;
- break;
- case 5:
- val &= 0x7FFF;
- break;
- case 6:
- val &= 0xFFFF;
- break;
- case 7:
- default:
- val &= 0xFFFFF;
- break;
- }
- offtable[i] = offset;
- lentable[i] = val;
- offset += val;
- }
-
- LOG(info, "made %d random offsets", TBLSIZ);
-
- search::ByteCompressedLengths foo;
-
- LOG(info, "empty BCL using %9ld bytes memory", foo.memoryUsed());
-
- foo.addOffsetTable(TBLSIZ/4, offtable.get());
- foo.addOffsetTable(TBLSIZ/4, offtable.get() + 1*(TBLSIZ/4));
-
- LOG(info, "half BCL using %9ld bytes memory", foo.memoryUsed());
-
- search::ByteCompressedLengths bar;
- foo.swap(bar);
- bar.addOffsetTable(TBLSIZ/4, offtable.get() + 2*(TBLSIZ/4));
- bar.addOffsetTable(TBLSIZ/4, offtable.get() + 3*(TBLSIZ/4));
- foo.swap(bar);
-
- LOG(info, "full BCL using %9ld bytes memory", foo.memoryUsed());
-
- LOG(info, "constructed %d byte compressed lengths", TBLSIZ-1);
-
- for (int i = 0; i < TBLSIZ-1; i++) {
- search::ByteCompressedLengths::OffLen offlen;
- offlen = foo.getOffLen(i);
-
- if ((i % 1000000) == 0) {
- LOG(info, "data blob [%d] length %" PRIu64 " offset %" PRIu64, i, offlen.length, offlen.offset);
- }
- EXPECT_EQUAL(lentable[i], offlen.length);
- EXPECT_EQUAL(offtable[i], offlen.offset);
- }
-}
-
diff --git a/searchlib/src/tests/bytecomplens/example.txt b/searchlib/src/tests/bytecomplens/example.txt
deleted file mode 100644
index 6dc3df0118a..00000000000
--- a/searchlib/src/tests/bytecomplens/example.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-offset length BCN val L0 len/off skipL1 skipL2 skipL3
-
-976 18707 [ 93 92 01 ] 3/0 976/0/0/0
-19683 11527 [ 87 5A ] 2/3
-31210 3926 [ D6 1E ] 2/5
-35136 2 [ 02 ] 1/7
-35138 6060 [ AC 2F ] 2/8 34162/8
-41198 649445 [ E5 D1 27 ] 3/10
-690643 2866 [ B2 16 ] 2/13
-693509 824767 [ BF AB 32 ] 3/15
-1518276 499173 [ E5 BB 1E ] 3/18 1483138/10
-2017449 20455 [ E7 9F 01 ] 3/21
-2037904 11 [ 0B ] 1/24
-2037915 19207 [ 87 96 01 ] 3/25
-2057122 6355 [ D3 31 ] 2/28 538846/10
-2063477 3422 [ DE 1A ] 2/30
-2066899 10683 [ BB 53 ] 2/32
-2077582 7360 [ C0 39 ] 2/34
-2084942 17969 [ B1 8C 01 ] 3/36 2083966/36/12
-2102911 6114 [ E2 2F ] 2/39
-2109025 31741 [ FD F7 01 ] 3/41
-2140766 581588 [ D4 BF 23 ] 3/44
-2722354 5341 [ DD 29 ] 2/47 637412/11
-2727695 13774 [ CE 6B ] 2/49
-2741469 717809 [ F1 E7 2B ] 3/51
-3459278 815406 [ AE E2 31 ] 3/54
-4274684 89 [ 59 ] 1/57 1552330/10
-4274773 4545 [ C1 23 ] 2/58
-4279318 803868 [ 9C 88 31 ] 3/60
-5083186 12865 [ C1 64 ] 2/63
-5096051 75 [ 4B ] 1/65 821367/8
-5096126 40734 [ 9E BE 02 ] 3/66
-5136860 101 [ 65 ] 1/69
-5136961 128 [ 80 01 ] 2/70
-5137089 253 [ FD 01 ] 2/72 3052147/36/12
-5137342 13 [ 0D ] 1/74
-5137355 24986 [ 9A C3 01 ] 3/75
-5162341 231 [ E7 01 ] 2/78
-5162572 997853 [ DD F3 3C ] 3/80 25483/8
-6160425 4728 [ F8 24 ] 2/83
-6165153 2025 [ E9 0F ] 2/85
-6167178 7281 [ F1 38 ] 2/87
-6174459 1026302 [ FE D1 3E ] 3/89 1011887/9
-7200761 848783 [ 8F E7 33 ] 3/92
-8049544 145767 [ E7 F2 08 ] 3/95
-8195311 19103 [ 9F 95 01 ] 3/98
-8214414 22166 [ 96 AD 01 ] 3/101 2039955/12
-8236580 30020 [ C4 EA 01 ] 3/104
-8266600 13 [ 0D ] 1/107
-8266613 120 [ 78 ] 1/108
-8266733 22398 [ FE AE 01 ] 3/109 3129644/37/12
-8289131 10832 [ D0 54 ] 2/112
-8299963 3765 [ B5 1D ] 2/114
-8303728 432771 [ 83 B5 1A ] 3/116
-8736499 30133 [ B5 EB 01 ] 3/119 469766/10
-8766632 6444 [ AC 32 ] 2/122
-8773076 16033 [ A1 7D ] 2/124
-8789109 78 [ 4E ] 1/126
-8789187 12510 [ DE 61 ] 2/127 52688/8
-8801697 12441 [ 99 61 ] 2/129
-8814138 117 [ 75 ] 1/131
-8814255 7147 [ EB 37 ] 2/132
-8821402 189 [ BD 01 ] 2/134 32215/7
-8821591 199704 [ 98 98 0C ] 3/136
-9021295 13240 [ B8 67 ] 2/139
-9034535 110 [ 6E ] 1/141
-9034645 31677 [ BD F7 01 ] 3/142 9034645/142/48/17
-9066322 18547 [ F3 90 01 ] 3/145
-9084869 734679 [ D7 EB 2C ] 3/148
-9819548 112 [ 70 ] 1/151
-9819660 883565 [ ED F6 35 ] 3/152 785015/10
-10703225 10290 [ B2 50 ] 2/155
-10713515 21410 [ A2 A7 01 ] 3/157
-10734925 15 [ 0F ] 1/160
-10734940 747774 [ FE D1 2D ] 3/161 915280/9
-11482714 39 [ 27 ] 1/164
-11482753 77 [ 4D ] 1/165
-11482830 235 [ EB 01 ] 2/166
-11483065 1991 [ C7 0F ] 2/168 748125/7
-11485056 9187 [ E3 47 ] 2/170
-11494243 18800 [ F0 92 01 ] 3/172
-11513043 1042219 [ AB CE 3F ] 3/175
-12555262 9154 [ C2 47 ] 2/178 3520617/36/12
-12564416 43582 [ BE D4 02 ] 3/180
-12607998 847240 [ 88 DB 33 ] 3/183
-13455238 4726 [ F6 24 ] 2/186
-13459964 590348 [ 8C 84 24 ] 3/188 904702/10
-14050312 8659 [ D3 43 ] 2/191
-14058971 116 [ 74 ] 1/193
-14059087 13563 [ FB 69 ] 2/194
-14072650 713064 [ E8 C2 2B ] 3/196 612686/8
-14785714 40321 [ 81 BB 02 ] 3/199
-14826035 2296 [ F8 11 ] 2/202
-14828331 7273 [ E9 38 ] 2/204
-14835604 68285 [ BD 95 04 ] 3/206 762954/10
-14903889 235 [ EB 01 ] 2/209
-14904124 4669 [ BD 24 ] 2/211
-14908793 28535 [ F7 DE 01 ] 3/213
-14937328 19 [ 13 ] 1/216 2382066/38/12
-14937347 5369 [ F9 29 ] 2/217
-14942716 602191 [ CF E0 24 ] 3/219
-15544907 2653 [ DD 14 ] 2/222
-15547560 25755 [ 9B C9 01 ] 3/224 610232/8
-15573315 11349 [ D5 58 ] 2/227
-15584664 15006 [ 9E 75 ] 2/229
-15599670 89 [ 59 ] 1/231
-15599759 52772 [ A4 9C 03 ] 3/232 52199/8
-15652531 776175 [ EF AF 2F ] 3/235
-16428706 126 [ 7E ] 1/238
-16428832 3884 [ AC 1E ] 2/239
-16432716 33958 [ A6 89 02 ] 3/241 832957/9
-16466674 122 [ 7A ] 1/244
-16466796 41895 [ A7 C7 02 ] 3/245
-16508691 105882 [ 9A BB 06 ] 3/248
-16614573 11067 [ BB 56 ] 2/251 1677245/35/12
-16625640 4588 [ EC 23 ] 2/253
-16630228 7349 [ B5 39 ] 2/255
-16637577 902638 [ EE 8B 37 ] 3/257
-17540215 8737 [ A1 44 ] 2/260 925642/9
-17548952 29186 [ 82 E4 01 ] 3/262
-17578138 41 [ 29 ] 1/265
-17578179
diff --git a/searchlib/src/tests/bytecomplens/tblprint.cpp b/searchlib/src/tests/bytecomplens/tblprint.cpp
deleted file mode 100644
index 6a7347f3c0d..00000000000
--- a/searchlib/src/tests/bytecomplens/tblprint.cpp
+++ /dev/null
@@ -1,356 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/log/log.h>
-LOG_SETUP("tblprint");
-#include <vespa/vespalib/util/random.h>
-
-#include <vector>
-#include <vespa/vespalib/data/databuffer.h>
-
-
-/**
- * Class compressing a table of offsets in memory.
- * After adding (n) offsets you can access
- * (n-1) pairs of (length, offset).
- * All offsets must be increasing, but they
- * may be added in several chunks.
- **/
-class ByteCompressedLengths
-{
-public:
- /**
- * Construct an empty instance
- **/
- ByteCompressedLengths();
-
- /**
- * add the given offset table.
- * @param entries number of offsets to store.
- * @param offsets table that contains (entries) offsets.
- **/
- void addOffsetTable(uint64_t entries, uint64_t *offsets);
-
- /**
- * free resources
- **/
- ~ByteCompressedLengths();
-
- /**
- * Fetch a length and offset from compressed data.
- * Note invariant: id < size(); size() == (entries-1)
- *
- * @param id The index into the offset table
- * @param offset Will be incremented by offset[id]
- * @return The delta (offset[id+1] - offset[id])
- **/
- uint64_t getLength(uint64_t id, uint64_t &offset) const;
-
- /**
- * The number of (length, offset) pairs stored
- **/
- uint64_t size() const { return _entries; }
-
- struct L3Entry {
- uint64_t offset;
- uint64_t l0toff;
- uint64_t l1toff;
- uint64_t l2toff;
- };
- vespalib::DataBuffer _l0space;
- vespalib::DataBuffer _l1space;
- vespalib::DataBuffer _l2space;
- const uint8_t *_l0table;
- const uint8_t *_l1table;
- const uint8_t *_l2table;
-
- std::vector<L3Entry> _l3table;
-
- uint64_t _lenSum1;
- uint64_t _lenSum2;
- uint64_t _l0oSum1;
- uint64_t _l0oSum2;
- uint64_t _l1oSum2;
- uint64_t _last_offset;
- uint64_t _entries;
-
- void addOffset(uint64_t offset);
-};
-
-/**
- * get "Byte Compressed Number" from buffer, incrementing pointer
- **/
-static inline uint64_t getBCN(const uint8_t *&buffer)
-{
- uint8_t b = *buffer++;
- uint64_t len = (b & 127);
- unsigned shiftLen = 0;
- while (b & 128) {
- shiftLen += 7;
- b = *buffer++;
- len |= ((b & 127) << shiftLen);
- }
- return len;
-}
-
-static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len)
-{
- size_t bytes = 0;
- do {
- uint8_t b = len & 127;
- len >>= 7;
- if (len > 0) {
- b |= 128;
- }
- buf.ensureFree(1);
- buf.writeInt8(b);
- ++bytes;
- } while (len > 0);
- return bytes;
-}
-
-
-ByteCompressedLengths::ByteCompressedLengths()
- : _l0space(),
- _l1space(),
- _l2space(),
- _l3table(),
- _lenSum1(0),
- _lenSum2(0),
- _l0oSum1(0),
- _l0oSum2(0),
- _l1oSum2(0),
- _last_offset(0),
- _entries(0)
-{
-}
-
-
-void
-ByteCompressedLengths::addOffset(uint64_t offset)
-{
- assert(offset >= _last_offset);
-
- uint64_t len = offset - _last_offset;
- uint64_t i = _entries++;
-
- if ((i & 3) == 0) {
- _lenSum2 += _lenSum1;
- _l0oSum2 += _l0oSum1;
-
- uint64_t t1n = i >> 2;
- if ((t1n & 3) == 0) {
- uint64_t t2n = t1n >> 2;
-
- if ((t2n & 3) == 0) {
- L3Entry e;
- e.offset = _last_offset;
- e.l0toff = _l0space.getDataLen();
- e.l1toff = _l1space.getDataLen();
- e.l2toff = _l2space.getDataLen();
-
- _l3table.push_back(e);
- } else {
- writeLen(_l2space, _lenSum2);
- writeLen(_l2space, _l0oSum2);
- writeLen(_l2space, _l1oSum2);
- }
- _lenSum2 = 0;
- _l0oSum2 = 0;
- _l1oSum2 = 0;
- } else {
- _l1oSum2 += writeLen(_l1space, _lenSum1);
- _l1oSum2 += writeLen(_l1space, _l0oSum1);
- }
- _lenSum1 = 0;
- _l0oSum1 = 0;
- }
- _l0oSum1 += writeLen(_l0space, len);
- _lenSum1 += len;
- _last_offset = offset;
-}
-
-
-void
-ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets)
-{
- if (entries == 0) return;
- // Do we have some offsets already?
- if (_entries > 0) {
- // yes, add first offset normally
- addOffset(offsets[0]);
- } else {
- // no, special treatment for very first offset
- _last_offset = offsets[0];
- }
- for (uint64_t cnt = 1; cnt < entries; ++cnt) {
- addOffset(offsets[cnt]);
- }
- _l0table = (uint8_t *)_l0space.getData();
- _l1table = (uint8_t *)_l1space.getData();
- _l2table = (uint8_t *)_l2space.getData();
-
- LOG(debug, "compressed %ld offsets", (_entries+1));
- LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t));
- LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries",
- _l0space.getDataLen(),
- _l1space.getDataLen(),
- _l2space.getDataLen(),
- _l3table.size());
- LOG(debug, "(%ld bytes)",
- (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() +
- _l3table.size()*sizeof(L3Entry)));
-}
-
-
-ByteCompressedLengths::~ByteCompressedLengths()
-{
-}
-
-uint64_t
-ByteCompressedLengths::getLength(uint64_t numSkip, uint64_t &offset) const
-{
- assert(numSkip < _entries);
-
- unsigned skipL0 = numSkip & 3;
- unsigned skipL1 = (numSkip >> 2) & 3;
- unsigned skipL2 = (numSkip >> 4) & 3;
- uint64_t skipL3 = (numSkip >> 6);
-
- offset += _l3table[skipL3].offset;
- uint64_t l0toff = _l3table[skipL3].l0toff;
- uint64_t l1toff = _l3table[skipL3].l1toff;
- uint64_t l2toff = _l3table[skipL3].l2toff;
-
- // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
-
- const uint8_t *l2pos = _l2table + l2toff;
-
- while (skipL2 > 0) {
- --skipL2;
- offset += getBCN(l2pos);
- l0toff += getBCN(l2pos);
- l1toff += getBCN(l2pos);
- }
-
- const uint8_t *l1pos = _l1table + l1toff;
-
- while (skipL1 > 0) {
- --skipL1;
- offset += getBCN(l1pos);
- l0toff += getBCN(l1pos);
-
- }
- const uint8_t *l0pos = _l0table + l0toff;
-
- while (skipL0 > 0) {
- --skipL0;
- offset += getBCN(l0pos);
- }
- // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
- return getBCN(l0pos);
-}
-
-
-
-class Test {
-public:
- static void printTable();
-};
-
-
-
-int main(int /*argc*/, char ** /*argv*/)
-{
- Test::printTable();
- return 0;
-}
-
-void
-Test::printTable()
-{
- vespalib::RandomGen rndgen(0x07031969);
-#define TBLSIZ 120
- uint32_t *lentable = new uint32_t[TBLSIZ];
- uint64_t *offtable = new uint64_t[TBLSIZ];
-
- uint64_t offset = 16 + TBLSIZ*8;
-
- for (int i = 0; i < TBLSIZ; i++) {
- int sel = rndgen.nextInt32();
- int val = rndgen.nextInt32();
- switch (sel & 0x7) {
- case 0:
- val &= 0x7F;
- break;
- case 1:
- val &= 0xFF;
- break;
- case 3:
- val &= 0x1FFF;
- break;
- case 4:
- val &= 0x3FFF;
- break;
- case 5:
- val &= 0x7FFF;
- break;
- case 6:
- val &= 0xFFFF;
- break;
- case 7:
- default:
- val &= 0xFFFFF;
- break;
- }
- offtable[i] = offset;
- lentable[i] = val;
- offset += val;
- }
-
- ByteCompressedLengths foo;
- foo.addOffsetTable(TBLSIZ, offtable);
-
- const uint8_t *l1pos = foo._l1table;
- const uint8_t *l2pos = foo._l2table;
-
- printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
- "offset", "length", "BCN val", "L0 len/off", "skipL1", "skipL2", "skipL3");
-
- int slb = 0;
- for (int i = 0; i+1 < TBLSIZ; i++) {
- printf("%ld\t%d\t[", offtable[i], lentable[i]);
- int bytes=0;
- uint64_t len = lentable[i];
- do {
- uint8_t b = len & 127;
- len >>= 7;
- if (len > 0) {
- b |= 128;
- }
- printf(" %02X", b);
- ++bytes;
- } while (len > 0);
- printf(" ]\t%d", bytes);
- printf("/%d", slb);
- slb += bytes;
-
- if ((i & 63) == 0) {
- printf("\t\t\t%ld/%ld/%ld/%ld",
- foo._l3table[i >> 6].offset,
- foo._l3table[i >> 6].l0toff,
- foo._l3table[i >> 6].l1toff,
- foo._l3table[i >> 6].l2toff);
- } else
- if ((i & 15) == 0) {
- printf("\t\t%ld", getBCN(l2pos));
- printf("/%ld", getBCN(l2pos));
- printf("/%ld", getBCN(l2pos));
- } else
- if ((i & 3) == 0) {
- printf("\t%ld", getBCN(l1pos));
- printf("/%ld", getBCN(l1pos));
- }
- printf("\n");
- }
- printf("%ld\n", offtable[TBLSIZ-1]);
- fflush(stdout);
-}
diff --git a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp
index b49d9c365de..6a1e4ef9fa1 100644
--- a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp
+++ b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp
@@ -58,8 +58,8 @@ struct OnnxFeatureTest : ::testing::Test {
vespalib::string expr_name = feature_name + ".rankingScript";
indexEnv.getProperties().add(expr_name, expr);
}
- void add_onnx(const vespalib::string &name, const vespalib::string &file) {
- indexEnv.addOnnxModel(name, file);
+ void add_onnx(const OnnxModel &model) {
+ indexEnv.addOnnxModel(model);
}
void compile(const vespalib::string &seed) {
resolver->addSeed(seed);
@@ -89,7 +89,7 @@ TEST_F(OnnxFeatureTest, simple_onnx_model_can_be_calculated) {
add_expr("query_tensor", "tensor<float>(a[1],b[4]):[[docid,2,3,4]]");
add_expr("attribute_tensor", "tensor<float>(a[4],b[1]):[[5],[6],[7],[8]]");
add_expr("bias_tensor", "tensor<float>(a[1],b[1]):[[9]]");
- add_onnx("simple", simple_model);
+ add_onnx(OnnxModel("simple", simple_model));
compile(onnx_feature("simple"));
EXPECT_EQ(get(1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0));
EXPECT_EQ(get("onnxModel(simple).output", 1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0));
@@ -101,7 +101,7 @@ TEST_F(OnnxFeatureTest, dynamic_onnx_model_can_be_calculated) {
add_expr("query_tensor", "tensor<float>(a[1],b[4]):[[docid,2,3,4]]");
add_expr("attribute_tensor", "tensor<float>(a[4],b[1]):[[5],[6],[7],[8]]");
add_expr("bias_tensor", "tensor<float>(a[1],b[2]):[[4,5]]");
- add_onnx("dynamic", dynamic_model);
+ add_onnx(OnnxModel("dynamic", dynamic_model));
compile(onnx_feature("dynamic"));
EXPECT_EQ(get(1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0));
EXPECT_EQ(get("onnxModel(dynamic).output", 1), TensorSpec("tensor<float>(d0[1],d1[1])").add({{"d0",0},{"d1",0}}, 79.0));
@@ -112,7 +112,7 @@ TEST_F(OnnxFeatureTest, dynamic_onnx_model_can_be_calculated) {
TEST_F(OnnxFeatureTest, strange_input_and_output_names_are_normalized) {
add_expr("input_0", "tensor<float>(a[2]):[10,20]");
add_expr("input_1", "tensor<float>(a[2]):[5,10]");
- add_onnx("strange_names", strange_names_model);
+ add_onnx(OnnxModel("strange_names", strange_names_model));
compile(onnx_feature("strange_names"));
auto expect_add = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},15).add({{"d0",1}},30);
auto expect_sub = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},5).add({{"d0",1}},10);
@@ -121,4 +121,20 @@ TEST_F(OnnxFeatureTest, strange_input_and_output_names_are_normalized) {
EXPECT_EQ(get("onnxModel(strange_names)._baz_0", 1), expect_sub);
}
+TEST_F(OnnxFeatureTest, input_features_and_output_names_can_be_specified) {
+ add_expr("my_first_input", "tensor<float>(a[2]):[10,20]");
+ add_expr("my_second_input", "tensor<float>(a[2]):[5,10]");
+ add_onnx(OnnxModel("custom_names", strange_names_model)
+ .input_feature("input:0", "rankingExpression(my_first_input)")
+ .input_feature("input/1", "rankingExpression(my_second_input)")
+ .output_name("foo/bar", "my_first_output")
+ .output_name("-baz:0", "my_second_output"));
+ compile(onnx_feature("custom_names"));
+ auto expect_add = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},15).add({{"d0",1}},30);
+ auto expect_sub = TensorSpec("tensor<float>(d0[2])").add({{"d0",0}},5).add({{"d0",1}},10);
+ EXPECT_EQ(get(1), expect_add);
+ EXPECT_EQ(get("onnxModel(custom_names).my_first_output", 1), expect_add);
+ EXPECT_EQ(get("onnxModel(custom_names).my_second_output", 1), expect_sub);
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp
index 9e5021b4778..fffb70467a3 100644
--- a/searchlib/src/tests/transactionlog/translogclient_test.cpp
+++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp
@@ -329,11 +329,12 @@ fillDomainTest(TransLogServer & s1, const vespalib::string & domain, size_t numP
Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&value, sizeof(value)));
p->add(e);
if ( p->sizeBytes() > DEFAULT_PACKET_SIZE ) {
- domainWriter->commit(*p, std::make_shared<CountDone>(inFlight));
+ domainWriter->append(*p, std::make_shared<CountDone>(inFlight));
p = std::make_unique<Packet>(DEFAULT_PACKET_SIZE);
}
}
- domainWriter->commit(*p, std::make_shared<CountDone>(inFlight));
+ domainWriter->append(*p, std::make_shared<CountDone>(inFlight));
+ auto keep = domainWriter->startCommit(Writer::DoneCallback());
LOG(info, "Inflight %ld", inFlight.load());
}
while (inFlight.load() != 0) {
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
index 430f2eaa560..cfbca71bd5e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
@@ -114,6 +114,8 @@ to_distance_metric(const vespalib::string& metric)
return DistanceMetric::Angular;
} else if (metric == geodegrees) {
return DistanceMetric::GeoDegrees;
+ } else if (metric == innerproduct) {
+ return DistanceMetric::InnerProduct;
} else if (metric == hamming) {
return DistanceMetric::Hamming;
} else {
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
index 341112f9b22..4efd64f72dd 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "attributefilebufferwriter.h"
+#include <vespa/vespalib/data/databuffer.h>
namespace search {
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
index 415c00cb8fd..829720e9c3e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
@@ -6,6 +6,7 @@
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/searchlib/common/fileheadercontext.h>
#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/data/databuffer.h>
#include <vespa/fastos/file.h>
#include <vespa/log/log.h>
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
index b354566616b..454cc486f70 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "attributememoryfilebufferwriter.h"
+#include <vespa/vespalib/data/databuffer.h>
namespace search {
@@ -11,9 +12,7 @@ AttributeMemoryFileBufferWriter(IAttributeFileWriter &memoryFileWriter)
}
-AttributeMemoryFileBufferWriter::~AttributeMemoryFileBufferWriter()
-{
-}
+AttributeMemoryFileBufferWriter::~AttributeMemoryFileBufferWriter() = default;
void
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
index c28b7e9d20b..8d412364815 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
@@ -2,6 +2,7 @@
#include "attributememoryfilewriter.h"
#include "attributememoryfilebufferwriter.h"
+#include <vespa/vespalib/data/databuffer.h>
namespace search {
@@ -18,9 +19,7 @@ AttributeMemoryFileWriter::AttributeMemoryFileWriter()
}
-AttributeMemoryFileWriter::~AttributeMemoryFileWriter()
-{
-}
+AttributeMemoryFileWriter::~AttributeMemoryFileWriter() = default;
AttributeMemoryFileWriter::Buffer
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.cpp b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
index 59771d7ffae..80f72aaea25 100644
--- a/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
@@ -4,6 +4,7 @@
#include "attrvector.hpp"
#include "iattributesavetarget.h"
#include "load_utils.h"
+#include <vespa/vespalib/data/databuffer.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.attribute.attr_vector");
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h
index bb00124c9fc..94e16b37e9d 100644
--- a/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h
+++ b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h
@@ -2,8 +2,9 @@
#pragma once
-#include <vespa/vespalib/data/databuffer.h>
+#include <memory>
+namespace vespalib { class DataBuffer; }
namespace search {
class BufferWriter;
diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
index e1ab47ed434..bbe8c9c8327 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
@@ -8,6 +8,7 @@
#include <vespa/searchlib/query/query_term_simple.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/vespalib/data/databuffer.h>
namespace search {
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
index db8636f47c3..3c26e960a06 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
@@ -2,6 +2,7 @@
#include "singlenumericattributesaver.h"
#include "iattributesavetarget.h"
+#include <vespa/vespalib/data/databuffer.h>
using vespalib::GenerationHandler;
diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
index 6eff0da06e8..fd2631ac63d 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
@@ -7,6 +7,7 @@
#include "iattributesavetarget.h"
#include <vespa/searchlib/query/query_term_simple.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/vespalib/data/databuffer.h>
namespace search {
diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.h b/searchlib/src/vespa/searchlib/common/fileheadercontext.h
index 6f76fe1717d..8bb3d6a56a6 100644
--- a/searchlib/src/vespa/searchlib/common/fileheadercontext.h
+++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.h
@@ -3,40 +3,22 @@
#include <vespa/vespalib/stllike/string.h>
-namespace vespalib
-{
-
-class GenericHeader;
-
+namespace vespalib {
+ class GenericHeader;
}
-namespace search
-{
-
-namespace common
-{
+namespace search::common {
class FileHeaderContext
{
public:
FileHeaderContext();
+ virtual ~FileHeaderContext();
- virtual
- ~FileHeaderContext();
+ virtual void addTags(vespalib::GenericHeader &header, const vespalib::string &name) const = 0;
- virtual void
- addTags(vespalib::GenericHeader &header,
- const vespalib::string &name) const = 0;
-
- static void
- addCreateAndFreezeTime(vespalib::GenericHeader &header);
-
- static void
- setFreezeTime(vespalib::GenericHeader &header);
+ static void addCreateAndFreezeTime(vespalib::GenericHeader &header);
+ static void setFreezeTime(vespalib::GenericHeader &header);
};
-
-} // namespace common
-
-} // namespace search
-
+}
diff --git a/searchlib/src/vespa/searchlib/config/translogserver.def b/searchlib/src/vespa/searchlib/config/translogserver.def
index 38741745773..defce8c3421 100644
--- a/searchlib/src/vespa/searchlib/config/translogserver.def
+++ b/searchlib/src/vespa/searchlib/config/translogserver.def
@@ -15,7 +15,7 @@ basedir string default="tmp" restart
## Use fsync after each commit.
## If not the below interval is used.
-usefsync bool default=false restart
+usefsync bool default=false
##Number of threads available for visiting/subscription.
maxthreads int default=4 restart
@@ -24,12 +24,12 @@ maxthreads int default=4 restart
crcmethod enum {ccitt_crc32, xxh64} default=xxh64
## Control compression type.
-compression.type enum {NONE, NONE_MULTI, LZ4, ZSTD} default=LZ4
+compression.type enum {NONE, NONE_MULTI, LZ4, ZSTD} default=NONE
## Control compression level
## LZ4 has normal range 1..9 while ZSTD has range 1..19
## 9 is a reasonable default for both
-compression.level int default=9
+compression.level int default=3
## How large a chunk can grow in memory before beeing flushed
chunk.sizelimit int default = 256000 # 256k
diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
index 2b82d9e5af7..b1c27e20210 100644
--- a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
@@ -1,7 +1,6 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(searchlib_docstore OBJECT
SOURCES
- bytecomplens.cpp
chunk.cpp
chunkformat.cpp
chunkformats.cpp
diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp b/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp
deleted file mode 100644
index 4ef57b77dbd..00000000000
--- a/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp
+++ /dev/null
@@ -1,260 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "bytecomplens.h"
-
-#include <vespa/log/log.h>
-LOG_SETUP(".search.docstore");
-
-namespace search {
-
-static inline uint64_t getBCN(const uint8_t *&buffer) __attribute__((__always_inline__));
-
-/**
- * get "Byte Compressed Number" from buffer, incrementing pointer
- **/
-static inline uint64_t getBCN(const uint8_t *&buffer)
-{
- uint8_t b = *buffer++;
- uint64_t len = (b & 127);
- unsigned shiftLen = 0;
- while (b & 128) {
- shiftLen += 7;
- b = *buffer++;
- len |= ((b & 127) << shiftLen);
- }
- return len;
-}
-
-static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len)
-{
- size_t bytes = 0;
- do {
- uint8_t b = len & 127;
- len >>= 7;
- if (len > 0) {
- b |= 128;
- }
- buf.writeInt8(b);
- ++bytes;
- } while (len > 0);
- return bytes;
-}
-
-
-ByteCompressedLengths::ByteCompressedLengths()
- : _l0space(),
- _l1space(),
- _l2space(),
- _l3table(),
- _entries(0),
- _progress(),
- _ptrcache(),
- _hasInitialOffset(false)
-{
- clear();
-}
-
-
-void
-ByteCompressedLengths::clear()
-{
- _l0space.clear();
- _l1space.clear();
- _l2space.clear();
- _l3table.clear();
-
- _entries = 0;
-
- _progress.lenSum1 = 0;
- _progress.lenSum2 = 0;
- _progress.l0oSum1 = 0;
- _progress.l0oSum2 = 0;
- _progress.l1oSum2 = 0;
- _progress.last_offset = 0;
-
- _ptrcache.l0table = NULL;
- _ptrcache.l1table = NULL;
- _ptrcache.l2table = NULL;
-
- _hasInitialOffset = false;
-}
-
-
-void
-ByteCompressedLengths::swap(ByteCompressedLengths& other)
-{
- _l0space.swap(other._l0space);
- _l1space.swap(other._l1space);
- _l2space.swap(other._l2space);
- _l3table.swap(other._l3table);
-
- std::swap(_entries, other._entries);
- std::swap(_progress, other._progress);
- std::swap(_ptrcache, other._ptrcache);
- std::swap(_hasInitialOffset, other._hasInitialOffset);
-}
-
-
-// add a new offset to the compressed tables
-void
-ByteCompressedLengths::addOffset(uint64_t offset)
-{
- assert(offset >= _progress.last_offset);
-
- // delta from last offset:
- uint64_t len = offset - _progress.last_offset;
-
- // which entry is this:
- uint64_t idx = _entries++;
-
- if ((idx & 31) == 0) {
- // add entry to some skip-table
- _progress.lenSum2 += _progress.lenSum1; // accumulate to Level2
- _progress.l0oSum2 += _progress.l0oSum1; // accumulate to Level2
-
- uint64_t t1n = idx >> 5;
- if ((t1n & 31) == 0) {
- // add Level2 or Level3 table entry:
- uint64_t t2n = t1n >> 5;
-
- if ((t2n & 31) == 0) {
- // add new Level3 table entry:
- L3Entry e;
- e.offset = _progress.last_offset;
- e.l0toff = _l0space.getDataLen();
- e.l1toff = _l1space.getDataLen();
- e.l2toff = _l2space.getDataLen();
-
- _l3table.push_back(e);
- } else {
- // write to Level2 table, sums since last reset:
- writeLen(_l2space, _progress.lenSum2); // sum of Level0 lengths
- writeLen(_l2space, _progress.l0oSum2); // sum size of Level0 entries
- writeLen(_l2space, _progress.l1oSum2); // sum size of Level1 entries
- }
- // reset Level2 sums:
- _progress.lenSum2 = 0;
- _progress.l0oSum2 = 0;
- _progress.l1oSum2 = 0;
- } else {
- // write to Level1 table, sums since last reset:
- _progress.l1oSum2 += writeLen(_l1space, _progress.lenSum1); // sum of Level0 lengths
- _progress.l1oSum2 += writeLen(_l1space, _progress.l0oSum1); // sum size of Level0 entries
- }
- // reset Level1 sums:
- _progress.lenSum1 = 0;
- _progress.l0oSum1 = 0;
- }
- // always write length (offset delta) to Level0 table:
- _progress.l0oSum1 += writeLen(_l0space, len); // accumulate to Level1
- _progress.lenSum1 += len; // accumulate to Level1
- _progress.last_offset = offset;
-}
-
-
-void
-ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets)
-{
- // ignore NOP:
- if (entries == 0) return;
-
- // Do we have some offsets already?
- if (_hasInitialOffset) {
- // yes, add first offset normally
- addOffset(offsets[0]);
- } else {
- // no, special treatment for very first offset
- _progress.last_offset = offsets[0];
- _hasInitialOffset = true;
- }
- for (uint64_t cnt = 1; cnt < entries; ++cnt) {
- addOffset(offsets[cnt]);
- }
-
- // Simplify access to actual data:
- _ptrcache.l0table = (uint8_t *)_l0space.getData();
- _ptrcache.l1table = (uint8_t *)_l1space.getData();
- _ptrcache.l2table = (uint8_t *)_l2space.getData();
-
- // some statistics available when debug logging:
- LOG(debug, "compressed %" PRIu64 " offsets", (_entries+1));
- LOG(debug, "(%" PRIu64 " bytes)", (_entries+1)*sizeof(uint64_t));
- LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries",
- _l0space.getDataLen(),
- _l1space.getDataLen(),
- _l2space.getDataLen(),
- _l3table.size());
- LOG(debug, "(%ld bytes)",
- (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() +
- _l3table.size()*sizeof(L3Entry)));
-}
-
-
-ByteCompressedLengths::~ByteCompressedLengths()
-{
-}
-
-ByteCompressedLengths::OffLen
-ByteCompressedLengths::getOffLen(uint64_t idx) const
-{
- assert(idx < _entries);
-
- unsigned skipL0 = idx & 31;
- unsigned skipL1 = (idx >> 5) & 31;
- unsigned skipL2 = (idx >> 10) & 31;
- uint64_t skipL3 = (idx >> 15);
-
- uint64_t offset = _l3table[skipL3].offset;
- uint64_t l0toff = _l3table[skipL3].l0toff;
- uint64_t l1toff = _l3table[skipL3].l1toff;
- uint64_t l2toff = _l3table[skipL3].l2toff;
-
- // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
-
- const uint8_t *l2pos = _ptrcache.l2table + l2toff;
-
- while (skipL2 > 0) {
- --skipL2;
- offset += getBCN(l2pos);
- l0toff += getBCN(l2pos);
- l1toff += getBCN(l2pos);
- }
-
- const uint8_t *l1pos = _ptrcache.l1table + l1toff;
-
- while (skipL1 > 0) {
- --skipL1;
- offset += getBCN(l1pos);
- l0toff += getBCN(l1pos);
-
- }
- const uint8_t *l0pos = _ptrcache.l0table + l0toff;
-
- while (skipL0 > 0) {
- --skipL0;
- offset += getBCN(l0pos);
- }
- // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
- OffLen retval;
- retval.offset = offset;
- retval.length = getBCN(l0pos);
- return retval;
-}
-
-
-size_t
-ByteCompressedLengths::memoryUsed() const
-{
- size_t mem = sizeof(*this);
- mem += _l0space.getBufSize();
- mem += _l1space.getBufSize();
- mem += _l2space.getBufSize();
- mem += _l3table.capacity() * sizeof(L3Entry);
- return mem;
-}
-
-
-
-
-} // namespace search
-
diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.h b/searchlib/src/vespa/searchlib/docstore/bytecomplens.h
deleted file mode 100644
index 88f6a11c764..00000000000
--- a/searchlib/src/vespa/searchlib/docstore/bytecomplens.h
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vector>
-#include <vespa/vespalib/data/databuffer.h>
-
-namespace search {
-
-/**
- * Class compressing a table of offsets in memory.
- * After adding (n) offsets you can access
- * (n-1) pairs of (length, offset).
- * All offsets must be increasing, but they
- * may be added in several chunks.
- **/
-class ByteCompressedLengths
-{
-public:
- /**
- * Construct an empty instance
- **/
- ByteCompressedLengths();
-
- /**
- * add the given offset table.
- * @param entries number of offsets to store.
- * @param offsets pointer to table that contains (entries) offsets.
- **/
- void addOffsetTable(uint64_t entries, uint64_t *offsets);
-
- /**
- * free resources
- **/
- ~ByteCompressedLengths();
-
- struct OffLen
- {
- uint64_t offset;
- uint64_t length;
- };
-
- /**
- * Fetch an offset and length from compressed data.
- * Note restriction: idx must be < size()
- *
- * @param idx The index into the offset table
- * @return offset[id] and the delta (offset[id+1] - offset[id])
- **/
- OffLen getOffLen(uint64_t idx) const;
-
- /**
- * The number of (length, offset) pairs stored
- * Note that size() == sum(entries) - 1
- **/
- uint64_t size() const { return _entries; }
-
- /**
- * remove all data from this instance
- **/
- void clear();
-
- /**
- * swap all data with another instance
- **/
- void swap(ByteCompressedLengths& other);
-
- /**
- * Calculate memory used by this instance
- * @return memory usage (in bytes)
- **/
- size_t memoryUsed() const;
-
-private:
- struct L3Entry {
- uint64_t offset;
- uint64_t l0toff;
- uint64_t l1toff;
- uint64_t l2toff;
- };
- vespalib::DataBuffer _l0space;
- vespalib::DataBuffer _l1space;
- vespalib::DataBuffer _l2space;
-
- std::vector<L3Entry> _l3table;
-
- uint64_t _entries;
-
- struct ProgressPoint {
- uint64_t lenSum1;
- uint64_t lenSum2;
- uint64_t l0oSum1;
- uint64_t l0oSum2;
- uint64_t l1oSum2;
- uint64_t last_offset;
- } _progress;
-
- struct CachedPointers {
- const uint8_t *l0table;
- const uint8_t *l1table;
- const uint8_t *l2table;
- } _ptrcache;
-
- bool _hasInitialOffset;
-
- void addOffset(uint64_t offset);
-};
-
-} // namespace search
-
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
index fbbdcff3c5d..afa7abb9ef8 100644
--- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
@@ -140,7 +140,7 @@ ChunkFormat::deserializeBody(vespalib::nbostream & is)
assert(uncompressed.getData() == uncompressed.getDead());
if (uncompressed.getData() != data.c_str()) {
const size_t sz(uncompressed.getDataLen());
- vespalib::nbostream(uncompressed.stealBuffer(), sz).swap(_dataBuf);
+ vespalib::nbostream(std::move(uncompressed).stealBuffer(), sz).swap(_dataBuf);
} else {
_dataBuf = vespalib::nbostream(uncompressed.getData(), uncompressed.getDataLen());
}
diff --git a/searchlib/src/vespa/searchlib/docstore/value.cpp b/searchlib/src/vespa/searchlib/docstore/value.cpp
index 09725b447cd..25cf93ac18b 100644
--- a/searchlib/src/vespa/searchlib/docstore/value.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/value.cpp
@@ -1,7 +1,6 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "value.h"
-#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/compressor.h>
#include <xxhash.h>
@@ -66,8 +65,9 @@ Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &com
_compression = type;
_uncompressedSize = len;
_uncompressedCrc = XXH64(input.c_str(), input.size(), 0);
- _buf = std::make_shared<Alloc>(compact(_compressedSize,
- (buf.getData() == compressed.getData()) ? buf.stealBuffer() : compressed.stealBuffer()));
+ _buf = std::make_shared<Alloc>(compact(_compressedSize,(buf.getData() == compressed.getData())
+ ? std::move(buf).stealBuffer()
+ : std::move(compressed).stealBuffer()));
assert(((type == CompressionConfig::NONE) &&
(len == ssize_t(_compressedSize))) ||
diff --git a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp
index e8504480b7d..6990a0a3ed7 100644
--- a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp
@@ -109,7 +109,7 @@ CompressedBlobSet::getBlobSet() const
decompress(_compression, getBufferSize(_positions),
ConstBufferRef(_buffer->c_str(), _buffer->size()), uncompressed, false);
}
- return BlobSet(_positions, uncompressed.stealBuffer());
+ return BlobSet(_positions, std::move(uncompressed).stealBuffer());
}
size_t CompressedBlobSet::size() const {
diff --git a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp
index 698d2309e5a..fca8988ba36 100644
--- a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp
@@ -2,6 +2,7 @@
#include "onnx_feature.h"
#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/onnx_model.h>
#include <vespa/searchlib/fef/featureexecutor.h>
#include <vespa/eval/tensor/dense/dense_tensor_view.h>
#include <vespa/eval/tensor/dense/mutable_dense_tensor_view.h>
@@ -85,37 +86,45 @@ OnnxBlueprint::setup(const IIndexEnvironment &env,
auto optimize = (env.getFeatureMotivation() == env.FeatureMotivation::VERIFY_SETUP)
? Onnx::Optimize::DISABLE
: Onnx::Optimize::ENABLE;
- auto file_name = env.getOnnxModelFullPath(params[0].getValue());
- if (!file_name.has_value()) {
+ auto model_cfg = env.getOnnxModel(params[0].getValue());
+ if (!model_cfg) {
return fail("no model with name '%s' found", params[0].getValue().c_str());
}
try {
- _model = std::make_unique<Onnx>(file_name.value(), optimize);
+ _model = std::make_unique<Onnx>(model_cfg->file_path(), optimize);
} catch (std::exception &ex) {
return fail("model setup failed: %s", ex.what());
}
Onnx::WirePlanner planner;
for (size_t i = 0; i < _model->inputs().size(); ++i) {
const auto &model_input = _model->inputs()[i];
- vespalib::string input_name = normalize_name(model_input.name, "input");
- if (auto maybe_input = defineInput(fmt("rankingExpression(\"%s\")", input_name.c_str()), AcceptInput::OBJECT)) {
+ auto input_feature = model_cfg->input_feature(model_input.name);
+ if (!input_feature.has_value()) {
+ input_feature = fmt("rankingExpression(\"%s\")", normalize_name(model_input.name, "input").c_str());
+ }
+ if (auto maybe_input = defineInput(input_feature.value(), AcceptInput::OBJECT)) {
const FeatureType &feature_input = maybe_input.value();
assert(feature_input.is_object());
if (!planner.bind_input_type(feature_input.type(), model_input)) {
- return fail("incompatible type for input '%s': %s -> %s", input_name.c_str(),
+ return fail("incompatible type for input (%s -> %s): %s -> %s",
+ input_feature.value().c_str(), model_input.name.c_str(),
feature_input.type().to_spec().c_str(), model_input.type_as_string().c_str());
}
}
}
for (size_t i = 0; i < _model->outputs().size(); ++i) {
const auto &model_output = _model->outputs()[i];
- vespalib::string output_name = normalize_name(model_output.name, "output");
+ auto output_name = model_cfg->output_name(model_output.name);
+ if (!output_name.has_value()) {
+ output_name = normalize_name(model_output.name, "output");
+ }
ValueType output_type = planner.make_output_type(model_output);
if (output_type.is_error()) {
- return fail("unable to make compatible type for output '%s': %s -> error",
- output_name.c_str(), model_output.type_as_string().c_str());
+ return fail("unable to make compatible type for output (%s -> %s): %s -> error",
+ model_output.name.c_str(), output_name.value().c_str(),
+ model_output.type_as_string().c_str());
}
- describeOutput(output_name, "output from onnx model", FeatureType::object(output_type));
+ describeOutput(output_name.value(), "output from onnx model", FeatureType::object(output_type));
}
_wire_info = planner.get_wire_info(*_model);
return true;
diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
index 178de1b8b87..d6f8764cd63 100644
--- a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
@@ -4,12 +4,12 @@ vespa_add_library(searchlib_fef OBJECT
blueprint.cpp
blueprintfactory.cpp
blueprintresolver.cpp
+ feature_resolver.cpp
feature_type.cpp
featureexecutor.cpp
featurenamebuilder.cpp
featurenameparser.cpp
featureoverrider.cpp
- feature_resolver.cpp
fef.cpp
fieldinfo.cpp
fieldpositionsiterator.cpp
@@ -19,6 +19,7 @@ vespa_add_library(searchlib_fef OBJECT
matchdata.cpp
matchdatalayout.cpp
objectstore.cpp
+ onnx_model.cpp
parameter.cpp
parameterdescriptions.cpp
parametervalidator.cpp
diff --git a/searchlib/src/vespa/searchlib/fef/iindexenvironment.h b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h
index 26e88a98033..384b81643cc 100644
--- a/searchlib/src/vespa/searchlib/fef/iindexenvironment.h
+++ b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h
@@ -3,7 +3,6 @@
#pragma once
#include <vespa/vespalib/stllike/string.h>
-#include <optional>
namespace vespalib::eval { struct ConstantValue; }
@@ -12,6 +11,7 @@ namespace search::fef {
class Properties;
class FieldInfo;
class ITableManager;
+class OnnxModel;
/**
* Abstract view of index related information available to the
@@ -122,9 +122,9 @@ public:
virtual std::unique_ptr<vespalib::eval::ConstantValue> getConstantValue(const vespalib::string &name) const = 0;
/**
- * Get the full path of the file containing the given onnx model
+ * Get configuration for the given onnx model.
**/
- virtual std::optional<vespalib::string> getOnnxModelFullPath(const vespalib::string &name) const = 0;
+ virtual const OnnxModel *getOnnxModel(const vespalib::string &name) const = 0;
virtual uint32_t getDistributionKey() const = 0;
diff --git a/searchlib/src/vespa/searchlib/fef/onnx_model.cpp b/searchlib/src/vespa/searchlib/fef/onnx_model.cpp
new file mode 100644
index 00000000000..ba5adaae857
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/onnx_model.cpp
@@ -0,0 +1,55 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "onnx_model.h"
+#include <tuple>
+
+namespace search::fef {
+
+OnnxModel::OnnxModel(const vespalib::string &name_in,
+ const vespalib::string &file_path_in)
+ : _name(name_in),
+ _file_path(file_path_in),
+ _input_features(),
+ _output_names()
+{
+}
+
+OnnxModel &
+OnnxModel::input_feature(const vespalib::string &model_input_name, const vespalib::string &input_feature) {
+ _input_features[model_input_name] = input_feature;
+ return *this;
+}
+
+OnnxModel &
+OnnxModel::output_name(const vespalib::string &model_output_name, const vespalib::string &output_name) {
+ _output_names[model_output_name] = output_name;
+ return *this;
+}
+
+std::optional<vespalib::string>
+OnnxModel::input_feature(const vespalib::string &model_input_name) const {
+ auto pos = _input_features.find(model_input_name);
+ if (pos != _input_features.end()) {
+ return pos->second;
+ }
+ return std::nullopt;
+}
+
+std::optional<vespalib::string>
+OnnxModel::output_name(const vespalib::string &model_output_name) const {
+ auto pos = _output_names.find(model_output_name);
+ if (pos != _output_names.end()) {
+ return pos->second;
+ }
+ return std::nullopt;
+}
+
+bool
+OnnxModel::operator==(const OnnxModel &rhs) const {
+ return (std::tie(_name, _file_path, _input_features, _output_names) ==
+ std::tie(rhs._name, rhs._file_path, rhs._input_features, rhs._output_names));
+}
+
+OnnxModel::~OnnxModel() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/fef/onnx_model.h b/searchlib/src/vespa/searchlib/fef/onnx_model.h
new file mode 100644
index 00000000000..2195a50600d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/onnx_model.h
@@ -0,0 +1,39 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <optional>
+#include <map>
+
+namespace search::fef {
+
+/**
+ * Class containing configuration for a single onnx model setup. This
+ * class is used both by the IIndexEnvironment api as well as the
+ * OnnxModels config adapter in the search core (matching component).
+ **/
+class OnnxModel {
+private:
+ vespalib::string _name;
+ vespalib::string _file_path;
+ std::map<vespalib::string,vespalib::string> _input_features;
+ std::map<vespalib::string,vespalib::string> _output_names;
+
+public:
+ OnnxModel(const vespalib::string &name_in,
+ const vespalib::string &file_path_in);
+ ~OnnxModel();
+
+ const vespalib::string &name() const { return _name; }
+ const vespalib::string &file_path() const { return _file_path; }
+ OnnxModel &input_feature(const vespalib::string &model_input_name, const vespalib::string &input_feature);
+ OnnxModel &output_name(const vespalib::string &model_output_name, const vespalib::string &output_name);
+ std::optional<vespalib::string> input_feature(const vespalib::string &model_input_name) const;
+ std::optional<vespalib::string> output_name(const vespalib::string &model_output_name) const;
+ bool operator==(const OnnxModel &rhs) const;
+ const std::map<vespalib::string,vespalib::string> &inspect_input_features() const { return _input_features; }
+ const std::map<vespalib::string,vespalib::string> &inspect_output_names() const { return _output_names; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp
index 6e2e0b88fbb..d2d336dcdc8 100644
--- a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp
+++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp
@@ -54,20 +54,20 @@ IndexEnvironment::addConstantValue(const vespalib::string &name,
(void) insertRes;
}
-std::optional<vespalib::string>
-IndexEnvironment::getOnnxModelFullPath(const vespalib::string &name) const
+const OnnxModel *
+IndexEnvironment::getOnnxModel(const vespalib::string &name) const
{
auto pos = _models.find(name);
if (pos != _models.end()) {
- return pos->second;
+ return &pos->second;
}
- return std::nullopt;
+ return nullptr;
}
void
-IndexEnvironment::addOnnxModel(const vespalib::string &name, const vespalib::string &path)
+IndexEnvironment::addOnnxModel(const OnnxModel &model)
{
- _models[name] = path;
+ _models.insert_or_assign(model.name(), model);
}
diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h
index 6602d9f8ee9..0d8d0091921 100644
--- a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h
+++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h
@@ -5,6 +5,7 @@
#include <vespa/searchlib/attribute/attributemanager.h>
#include <vespa/searchlib/fef/iindexenvironment.h>
#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/onnx_model.h>
#include <vespa/searchlib/fef/fieldinfo.h>
#include <vespa/searchlib/fef/tablemanager.h>
#include <vespa/eval/eval/value_cache/constant_value.h>
@@ -47,7 +48,7 @@ public:
};
using ConstantsMap = std::map<vespalib::string, Constant>;
- using ModelMap = std::map<vespalib::string, vespalib::string>;
+ using ModelMap = std::map<vespalib::string, OnnxModel>;
IndexEnvironment();
~IndexEnvironment();
@@ -84,8 +85,8 @@ public:
vespalib::eval::ValueType type,
std::unique_ptr<vespalib::eval::Value> value);
- std::optional<vespalib::string> getOnnxModelFullPath(const vespalib::string &name) const override;
- void addOnnxModel(const vespalib::string &name, const vespalib::string &path);
+ const OnnxModel *getOnnxModel(const vespalib::string &name) const override;
+ void addOnnxModel(const OnnxModel &model);
private:
IndexEnvironment(const IndexEnvironment &); // hide
diff --git a/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp b/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp
index a78db61429d..edb12b4bf36 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/chunks.cpp
@@ -94,7 +94,7 @@ XXH64CompressedChunk::decompress(nbostream & is, uint32_t uncompressedLen) {
::decompress(_type, uncompressedLen, compressed, uncompressed, false);
nbostream data(uncompressed.getData(), uncompressed.getDataLen());
deserializeEntries(data);
- _backing = uncompressed.stealBuffer();
+ _backing = std::move(uncompressed).stealBuffer();
is.adjustReadPos(is.size());
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.cpp b/searchlib/src/vespa/searchlib/transactionlog/common.cpp
index 556ebca06ec..3308f3182dc 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/common.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/common.cpp
@@ -121,11 +121,26 @@ Packet::add(const Packet::Entry & e)
_range.to(e.serial());
}
+Writer::CommitResult::CommitResult()
+ : _callBacks()
+{}
+Writer::CommitResult::CommitResult( CommitPayload commitPayLoad)
+ : _callBacks(std::move(commitPayLoad))
+{}
+
+Writer::CommitResult::~CommitResult() = default;
+
CommitChunk::CommitChunk(size_t reserveBytes, size_t reserveCount)
: _data(reserveBytes),
- _callBacks()
+ _callBacks(std::make_shared<Writer::DoneCallbacksList>())
+{
+ _callBacks->reserve(reserveCount);
+}
+
+CommitChunk::CommitChunk(size_t reserveBytes, Writer::CommitPayload postponed)
+ : _data(reserveBytes),
+ _callBacks(std::move(postponed))
{
- _callBacks.reserve(reserveCount);
}
CommitChunk::~CommitChunk() = default;
@@ -133,7 +148,12 @@ CommitChunk::~CommitChunk() = default;
void
CommitChunk::add(const Packet &packet, Writer::DoneCallback onDone) {
_data.merge(packet);
- _callBacks.emplace_back(std::move(onDone));
+ _callBacks->emplace_back(std::move(onDone));
+}
+
+Writer::CommitResult
+CommitChunk::createCommitResult() const {
+ return Writer::CommitResult(_callBacks);
}
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.h b/searchlib/src/vespa/searchlib/transactionlog/common.h
index 7cdfad44b87..5d07d51cdf2 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/common.h
+++ b/searchlib/src/vespa/searchlib/transactionlog/common.h
@@ -16,7 +16,7 @@ class SerialNumRange
{
public:
SerialNumRange() : _from(0), _to(0) { }
- SerialNumRange(SerialNum f) : _from(f), _to(f ? f-1 : f) { }
+ explicit SerialNumRange(SerialNum f) : _from(f), _to(f ? f-1 : f) { }
SerialNumRange(SerialNum f, SerialNum t) : _from(f), _to(t) { }
bool operator == (const SerialNumRange & b) const { return cmp(b) == 0; }
bool operator < (const SerialNumRange & b) const { return cmp(b) < 0; }
@@ -63,7 +63,7 @@ public:
vespalib::ConstBufferRef _data;
};
public:
- Packet(size_t reserved) : _count(0), _range(), _buf(reserved) { }
+ explicit Packet(size_t reserved) : _count(0), _range(), _buf(reserved) { }
Packet(const void * buf, size_t sz);
void add(const Entry & data);
void clear() { _buf.clear(); _count = 0; _range.from(0); _range.to(0); }
@@ -84,8 +84,24 @@ int makeDirectory(const char * dir);
class Writer {
public:
using DoneCallback = std::shared_ptr<IDestructorCallback>;
+ using DoneCallbacksList = std::vector<DoneCallback>;
+ using CommitPayload = std::shared_ptr<DoneCallbacksList>;
+ class CommitResult {
+ public:
+ CommitResult();
+ CommitResult(CommitPayload callBacks);
+ CommitResult(CommitResult &&) noexcept = default;
+ CommitResult & operator = (CommitResult &&) noexcept = default;
+ CommitResult(const CommitResult &) = delete;
+ CommitResult & operator = (const CommitResult &) = delete;
+ ~CommitResult();
+ size_t getNumOperations() const { return _callBacks->size(); }
+ private:
+ CommitPayload _callBacks;
+ };
virtual ~Writer() = default;
- virtual void commit(const Packet & packet, DoneCallback done) = 0;
+ virtual void append(const Packet & packet, DoneCallback done) = 0;
+ [[nodiscard]] virtual CommitResult startCommit(DoneCallback onDone) = 0;
};
class WriterFactory {
@@ -106,14 +122,20 @@ public:
class CommitChunk {
public:
CommitChunk(size_t reserveBytes, size_t reserveCount);
+ CommitChunk(size_t reserveBytes, Writer::CommitPayload postponed);
~CommitChunk();
+ bool empty() const { return _callBacks->empty(); }
void add(const Packet & packet, Writer::DoneCallback onDone);
size_t sizeBytes() const { return _data.sizeBytes(); }
const Packet & getPacket() const { return _data; }
- size_t getNumCallBacks() const { return _callBacks.size(); }
+ size_t getNumCallBacks() const { return _callBacks->size(); }
+ Writer::CommitResult createCommitResult() const;
+ void setCommitDoneCallback(Writer::DoneCallback onDone) { _onCommitDone = std::move(onDone); }
+ Writer::CommitPayload stealCallbacks() { return std::move(_callBacks); }
private:
- Packet _data;
- std::vector<Writer::DoneCallback> _callBacks;
+ Packet _data;
+ Writer::CommitPayload _callBacks;
+ Writer::DoneCallback _onCommitDone;
};
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
index 415ccafda70..bd7feec0598 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
@@ -25,17 +25,25 @@ using vespalib::makeClosure;
using vespalib::makeLambdaTask;
using vespalib::Monitor;
using vespalib::MonitorGuard;
-using search::common::FileHeaderContext;
using std::runtime_error;
using std::make_shared;
namespace search::transactionlog {
+namespace {
+
+std::unique_ptr<CommitChunk>
+createCommitChunk(const DomainConfig &cfg) {
+ return std::make_unique<CommitChunk>(cfg.getChunkSizeLimit(), cfg.getChunkSizeLimit()/256);
+}
+
+}
Domain::Domain(const string &domainName, const string & baseDir, Executor & executor,
const DomainConfig & cfg, const FileHeaderContext &fileHeaderContext)
: _config(cfg),
+ _currentChunk(createCommitChunk(cfg)),
_lastSerial(0),
- _singleCommiter(std::make_unique<vespalib::ThreadStackExecutor>(1, 128*1024)),
+ _singleCommitter(std::make_unique<vespalib::ThreadStackExecutor>(1, 128 * 1024)),
_executor(executor),
_sessionId(1),
_syncMonitor(),
@@ -105,7 +113,12 @@ Domain::addPart(SerialNum partId, bool isLastPart) {
}
}
-Domain::~Domain() { }
+Domain::~Domain() {
+ MonitorGuard guard(_currentChunkMonitor);
+ guard.broadcast();
+ commitChunk(grabCurrentChunk(guard), guard);
+ _singleCommitter->shutdown().sync();
+}
DomainInfo
Domain::getDomainInfo() const
@@ -311,15 +324,72 @@ Domain::optionallyRotateFile(SerialNum serialNum) {
}
void
-Domain::commit(const Packet & packet, Writer::DoneCallback onDone)
-{
- (void) onDone;
+Domain::append(const Packet & packet, Writer::DoneCallback onDone) {
+ vespalib::MonitorGuard guard(_currentChunkMonitor);
+ if (_lastSerial >= packet.range().from()) {
+ throw runtime_error(fmt("Incoming serial number(%" PRIu64 ") must be bigger than the last one (%" PRIu64 ").",
+ packet.range().from(), _lastSerial));
+ } else {
+ _lastSerial = packet.range().to();
+ }
+ _currentChunk->add(packet, std::move(onDone));
+ commitIfFull(guard);
+}
+
+Domain::CommitResult
+Domain::startCommit(DoneCallback onDone) {
+ vespalib::MonitorGuard guard(_currentChunkMonitor);
+ if ( !_currentChunk->empty() ) {
+ auto completed = grabCurrentChunk(guard);
+ completed->setCommitDoneCallback(std::move(onDone));
+ CommitResult result(completed->createCommitResult());
+ commitChunk(std::move(completed), guard);
+ return result;
+ }
+ return CommitResult();
+}
+
+void
+Domain::commitIfFull(const vespalib::MonitorGuard &guard) {
+ if (_currentChunk->sizeBytes() > _config.getChunkSizeLimit()) {
+ auto completed = std::move(_currentChunk);
+ _currentChunk = std::make_unique<CommitChunk>(_config.getChunkSizeLimit(), completed->stealCallbacks());
+ commitChunk(std::move(completed), guard);
+ }
+}
+
+std::unique_ptr<CommitChunk>
+Domain::grabCurrentChunk(const vespalib::MonitorGuard & guard) {
+ assert(guard.monitors(_currentChunkMonitor));
+ auto chunk = std::move(_currentChunk);
+ _currentChunk = createCommitChunk(_config);
+ return chunk;
+}
+
+void
+Domain::commitChunk(std::unique_ptr<CommitChunk> chunk, const vespalib::MonitorGuard & chunkOrderGuard) {
+ assert(chunkOrderGuard.monitors(_currentChunkMonitor));
+ _singleCommitter->execute( makeLambdaTask([this, chunk = std::move(chunk)]() mutable {
+ doCommit(std::move(chunk));
+ }));
+}
+
+void
+Domain::doCommit(std::unique_ptr<CommitChunk> chunk) {
+ const Packet & packet = chunk->getPacket();
+ if (packet.empty()) return;
+
vespalib::nbostream_longlivedbuf is(packet.getHandle().data(), packet.getHandle().size());
Packet::Entry entry;
entry.deserialize(is);
DomainPart::SP dp = optionallyRotateFile(entry.serial());
dp->commit(entry.serial(), packet);
+ if (_config.getFSyncOnCommit()) {
+ dp->sync();
+ }
cleanSessions();
+ LOG(debug, "Releasing %zu acks and %zu entries and %zu bytes.",
+ chunk->getNumCallBacks(), chunk->getPacket().size(), chunk->sizeBytes());
}
bool
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.h b/searchlib/src/vespa/searchlib/transactionlog/domain.h
index 9adff564cc8..041ec27cf23 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domain.h
+++ b/searchlib/src/vespa/searchlib/transactionlog/domain.h
@@ -18,8 +18,9 @@ public:
using SP = std::shared_ptr<Domain>;
using Executor = vespalib::SyncableThreadExecutor;
using DomainPartSP = std::shared_ptr<DomainPart>;
+ using FileHeaderContext = common::FileHeaderContext;
Domain(const vespalib::string &name, const vespalib::string &baseDir, Executor & executor,
- const DomainConfig & cfg, const common::FileHeaderContext &fileHeaderContext);
+ const DomainConfig & cfg, const FileHeaderContext &fileHeaderContext);
~Domain() override;
@@ -27,14 +28,14 @@ public:
const vespalib::string & name() const { return _name; }
bool erase(SerialNum to);
- void commit(const Packet & packet, Writer::DoneCallback onDone) override;
+ void append(const Packet & packet, Writer::DoneCallback onDone) override;
+ [[nodiscard]] CommitResult startCommit(DoneCallback onDone) override;
int visit(const Domain::SP & self, SerialNum from, SerialNum to, std::unique_ptr<Destination> dest);
SerialNum begin() const;
SerialNum end() const;
SerialNum getSynced() const;
void triggerSyncNow();
- bool commitIfStale();
bool getMarkedDeleted() const { return _markedDeleted; }
void markDeleted() { _markedDeleted = true; }
@@ -55,6 +56,11 @@ public:
uint64_t size() const;
Domain & setConfig(const DomainConfig & cfg);
private:
+ void commitIfFull(const vespalib::MonitorGuard & guard);
+
+ std::unique_ptr<CommitChunk> grabCurrentChunk(const vespalib::MonitorGuard & guard);
+ void commitChunk(std::unique_ptr<CommitChunk> chunk, const vespalib::MonitorGuard & chunkOrderGuard);
+ void doCommit(std::unique_ptr<CommitChunk> chunk);
SerialNum begin(const vespalib::LockGuard & guard) const;
SerialNum end(const vespalib::LockGuard & guard) const;
size_t byteSize(const vespalib::LockGuard & guard) const;
@@ -72,23 +78,24 @@ private:
using DomainPartList = std::map<SerialNum, DomainPartSP>;
using DurationSeconds = std::chrono::duration<double>;
- DomainConfig _config;
- SerialNum _lastSerial;
- std::unique_ptr<Executor> _singleCommiter;
- Executor & _executor;
- std::atomic<int> _sessionId;
- vespalib::Monitor _syncMonitor;
- bool _pendingSync;
- vespalib::string _name;
- DomainPartList _parts;
- vespalib::Lock _lock;
- vespalib::Monitor _currentChunkMonitor;
- vespalib::Lock _sessionLock;
- SessionList _sessions;
- DurationSeconds _maxSessionRunTime;
- vespalib::string _baseDir;
- const common::FileHeaderContext &_fileHeaderContext;
- bool _markedDeleted;
+ DomainConfig _config;
+ std::unique_ptr<CommitChunk> _currentChunk;
+ SerialNum _lastSerial;
+ std::unique_ptr<Executor> _singleCommitter;
+ Executor &_executor;
+ std::atomic<int> _sessionId;
+ vespalib::Monitor _syncMonitor;
+ bool _pendingSync;
+ vespalib::string _name;
+ DomainPartList _parts;
+ vespalib::Lock _lock;
+ vespalib::Monitor _currentChunkMonitor;
+ vespalib::Lock _sessionLock;
+ SessionList _sessions;
+ DurationSeconds _maxSessionRunTime;
+ vespalib::string _baseDir;
+ const FileHeaderContext &_fileHeaderContext;
+ bool _markedDeleted;
};
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
index 8855183226d..b7e02894e6b 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
@@ -252,7 +252,7 @@ DomainPart::buildPacketMapping(bool allowTruncate)
DomainPart::DomainPart(const string & name, const string & baseDir, SerialNum s, Encoding encoding,
uint8_t compressionLevel, const FileHeaderContext &fileHeaderContext, bool allowTruncate)
- : _encoding(encoding.getCrc(), Encoding::Compression::none), //TODO We do not yet support compression
+ : _encoding(encoding),
_compressionLevel(compressionLevel),
_lock(),
_fileLock(),
@@ -396,16 +396,19 @@ DomainPart::commit(SerialNum firstSerial, const Packet &packet)
if (_range.from() == 0) {
_range.from(firstSerial);
}
+ IChunk::UP chunk = IChunk::create(_encoding, _compressionLevel);
for (size_t i(0); h.size() > 0; i++) {
//LOG(spam,
//"Pos(%d) Len(%d), Lim(%d), Remaining(%d)",
//h.getPos(), h.getLength(), h.getLimit(), h.getRemaining());
- IChunk::UP chunk = IChunk::create(_encoding, _compressionLevel);
Packet::Entry entry;
entry.deserialize(h);
if (_range.to() < entry.serial()) {
chunk->add(entry);
- write(*_transLog, *chunk);
+ if (_encoding.getCompression() == Encoding::Compression::none) {
+ write(*_transLog, *chunk);
+ chunk = IChunk::create(_encoding, _compressionLevel);
+ }
_sz++;
_range.to(entry.serial());
} else {
@@ -413,6 +416,9 @@ DomainPart::commit(SerialNum firstSerial, const Packet &packet)
entry.serial(), _range.to()));
}
}
+ if ( ! chunk->getEntries().empty()) {
+ write(*_transLog, *chunk);
+ }
bool merged(false);
LockGuard guard(_lock);
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp
index 269ed7e9380..0c0c9186e12 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp
@@ -572,7 +572,11 @@ TransLogServer::domainCommit(FRT_RPCRequest *req)
Packet packet(params[1]._data._buf, params[1]._data._len);
try {
vespalib::Gate gate;
- domain->commit(packet, make_shared<GateCallback>(gate));
+ {
+ // Need to scope in order to drain out all the callbacks.
+ domain->append(packet, make_shared<GateCallback>(gate));
+ auto keep = domain->startCommit(make_shared<IgnoreCallback>());
+ }
gate.await();
ret.AddInt32(0);
ret.AddString("ok");