diff options
Diffstat (limited to 'searchlib')
62 files changed, 543 insertions, 608 deletions
diff --git a/searchlib/src/apps/tests/biglogtest.cpp b/searchlib/src/apps/tests/biglogtest.cpp index d5c59bf5b29..bd8991edc4b 100644 --- a/searchlib/src/apps/tests/biglogtest.cpp +++ b/searchlib/src/apps/tests/biglogtest.cpp @@ -8,6 +8,7 @@ #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/vespalib/data/databuffer.h> +#include <filesystem> using namespace search; using search::index::DummyFileHeaderContext; @@ -148,9 +149,8 @@ Test::testDIO() { uint64_t serial = 0; - FastOS_File::EmptyDirectory(_dir.c_str()); - FastOS_File::RemoveDirectory(_dir.c_str()); - EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str())); + std::filesystem::remove_all(std::filesystem::path(_dir)); + std::filesystem::create_directory(std::filesystem::path(_dir)); Map lidToBlobMap; vespalib::DataBuffer buf; @@ -238,7 +238,6 @@ Test::testDIO() factory<DS> ds(_dir); checkBlobs(ds(), lidToBlobMap); } - FastOS_File::EmptyDirectory(_dir.c_str()); - FastOS_File::RemoveDirectory(_dir.c_str()); + std::filesystem::remove_all(std::filesystem::path(_dir)); TEST_FLUSH(); } diff --git a/searchlib/src/tests/alignment/alignment.cpp b/searchlib/src/tests/alignment/alignment.cpp index 06acf96e16c..3c6906f45bf 100644 --- a/searchlib/src/tests/alignment/alignment.cpp +++ b/searchlib/src/tests/alignment/alignment.cpp @@ -6,6 +6,9 @@ LOG_SETUP("alignment_test"); #include <sys/time.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/vespalib/util/memory.h> + +using vespalib::Unaligned; struct Timer { rusage usage; @@ -28,7 +31,7 @@ TEST_SETUP(Test); double timeAccess(void *bufp, uint32_t len, double &sum) { - double *buf = (double *)bufp; + auto buf = Unaligned<double>::ptr(bufp); Timer timer; timer.start(); for(uint32_t i = 0; i < 512_Ki; ++i) { diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index bb516c3b451..a0906e2a488 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -791,7 +791,7 @@ AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t value if (!result) { return false; } - EXPECT_EQ(valueCount, ptr->get(doc, &buffer[0], buffer.size())) << (result = false, ""); + EXPECT_EQ(valueCount, ptr->get(doc, buffer.data(), buffer.size())) << (result = false, ""); if (!result) { return false; } @@ -807,7 +807,7 @@ AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t val std::vector<BufferType> buffer(valueCount); bool retval = true; EXPECT_TRUE((retval = retval && (static_cast<uint32_t>(ptr->getValueCount(doc)) == valueCount))); - EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount))); + EXPECT_TRUE((retval = retval && (ptr->get(doc, buffer.data(), buffer.size()) == valueCount))); for (uint32_t i = 0; i < valueCount; ++i) { EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range]))); } @@ -868,7 +868,7 @@ AttributeTest::testSingle(const AttributePtr & ptr, const std::vector<BufferType ptr->clearDoc(doc); } ptr->commit(); - EXPECT_EQ(1u, ptr->get(doc, &buffer[0], buffer.size())); + EXPECT_EQ(1u, ptr->get(doc, buffer.data(), buffer.size())); if (doc % 2 == 0) { if (smallUInt) { expectZero(buffer[0]); @@ -1156,7 +1156,7 @@ AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<Buffe EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight())); } commit(ptr); - ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount); + ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount); std::sort(buffer.begin(), buffer.begin() + valueCount, order_by_weight()); for (uint32_t j = 0; j < valueCount; ++j) { EXPECT_TRUE(buffer[j].getValue() == ordered_values[j].getValue()); @@ -1173,20 +1173,20 @@ AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<Buffe // append non-existent value EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight())); commit(ptr); - ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 1); EXPECT_TRUE(contains(buffer, valueCount + 1, values[doc])); // append existent value EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10)); commit(ptr); - ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 1); EXPECT_TRUE(contains(buffer, valueCount + 1, BufferType(values[doc].getValue(), values[doc].getWeight() + 10))); // append non-existent value two times EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight())); EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10)); commit(ptr); - ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 2); EXPECT_TRUE(contains(buffer, valueCount + 2, BufferType(values[doc + 1].getValue(), values[doc + 1].getWeight() + 10))); } EXPECT_EQ(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4); @@ -1203,11 +1203,11 @@ AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<Buffe EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2); // remove existent value - ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 2); EXPECT_TRUE(contains_value(buffer, valueCount + 2, values[doc + 1].getValue())); EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0)); commit(ptr); - ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 1); EXPECT_FALSE(contains_value(buffer, valueCount + 1, values[doc + 1].getValue())); } EXPECT_EQ(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2); diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp index 5c1c49d8eb5..e27065f1c25 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp @@ -47,7 +47,7 @@ public: for (auto& key : keys) { adds.emplace_back(KeyData(key, 1)); } - _postings.apply(_trees[idx], &*adds.begin(), &*adds.end(), &*removes.begin(), &*removes.end()); + _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size()); } void clear_tree(size_t idx) { diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index 92c3da40fe9..90127e9ae7b 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -21,6 +21,7 @@ #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/compress.h> +#include <vespa/vespalib/util/memory.h> #include <vespa/vespalib/stllike/asciistream.h> #include <limits> #include <cmath> @@ -184,8 +185,8 @@ MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const return true; if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen())) return false; - if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(), - lhs->getDataLen()) == 0)) + if (!EXPECT_TRUE(vespalib::memcmp_safe(lhs->getData(), rhs->getData(), + lhs->getDataLen()) == 0)) return false; return true; } @@ -480,7 +481,7 @@ EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix) buildTermQuery(query, vec.getName(), ss.str(), prefix); return (static_cast<const AttributeVector &>(vec)). - getSearch(vespalib::stringref(&query[0], query.size()), + getSearch(vespalib::stringref(query.data(), query.size()), SearchContextParams()); } diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp index 10cc14012dd..573284ffa35 100644 --- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -83,8 +83,8 @@ protected: additions.emplace_back(i, 0); } _store.apply(root, - &additions[0], &additions[0] + additions.size(), - &removals[0], &removals[0] + removals.size()); + additions.data(), additions.data() + additions.size(), + removals.data(), removals.data() + removals.size()); return root; } static std::vector<int> make_exp_sequence(int start_key, int end_key) diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp index 446aeaf22a7..54efb3261c8 100644 --- a/searchlib/src/tests/attribute/postinglist/postinglist.cpp +++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp @@ -379,8 +379,8 @@ insertRandomValues(Tree &tree, std::vector<AttributePosting> additions; std::vector<uint32_t> removals; additions.push_back(newPosting); - postings.apply(newIdx, &additions[0], &additions[0] + additions.size(), - &removals[0], &removals[0] + removals.size()); + postings.apply(newIdx, additions.data(), additions.data() + additions.size(), + removals.data(), removals.data() + removals.size()); std::atomic_thread_fence(std::memory_order_release); itr.writeData(newIdx); @@ -461,8 +461,8 @@ removeRandomValues(Tree &tree, std::vector<AttributePosting> additions; std::vector<uint32_t> removals; removals.push_back(i->_docId); - postings.apply(newIdx, &additions[0], &additions[0]+additions.size(), - &removals[0], &removals[0] + removals.size()); + postings.apply(newIdx, additions.data(), additions.data() + additions.size(), + removals.data(), removals.data() + removals.size()); if (newIdx != oldIdx) { std::atomic_thread_fence(std::memory_order_release); itr.writeData(newIdx); diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp index c6091604a97..4ca2802d22d 100644 --- a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp +++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp @@ -6,7 +6,7 @@ #include <vespa/searchcommon/common/undefinedvalues.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/size_literals.h> -#include <vespa/fastos/file.h> +#include <filesystem> #include <vespa/log/log.h> LOG_SETUP("sourceselector_test"); @@ -161,8 +161,8 @@ Test::requireThatSelectorCanSaveAndLoad(bool compactLidSpace) selector.compactLidSpace(maxDocId - 4); } - FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); - FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str()); + std::filesystem::remove_all(std::filesystem::path(index_dir)); + std::filesystem::create_directory(std::filesystem::path(index_dir)); SourceSelector::SaveInfo::UP save_info = selector.extractSaveInfo(base_file_name); @@ -177,7 +177,7 @@ Test::requireThatSelectorCanSaveAndLoad(bool compactLidSpace) EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit()); } - FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); + std::filesystem::remove_all(std::filesystem::path(index_dir)); } void diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index aa2c475e7b6..96039bee15b 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -48,7 +48,7 @@ checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, { std::vector<vespalib::string> buffer(valueCount); EXPECT_TRUE(static_cast<uint32_t>(vec.getValueCount(doc)) == valueCount); - EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount); + EXPECT_TRUE(vec.get(doc, buffer.data(), buffer.size()) == valueCount); EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues); } @@ -125,10 +125,10 @@ testMultiValue(Attribute & attr, uint32_t numDocs) // test get all std::vector<vespalib::string> values(valueCount); - ASSERT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + ASSERT_TRUE(attr.get(doc, values.data(), valueCount) == valueCount); std::vector<uint32_t> enums(valueCount); - ASSERT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount); + ASSERT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, enums.data(), valueCount) == valueCount); auto combined = zipped_and_sorted_by_first(values, enums); for (uint32_t j = 0; j < valueCount; ++j) { @@ -167,10 +167,10 @@ testMultiValue(Attribute & attr, uint32_t numDocs) // test get all std::vector<vespalib::string> values(valueCount); - EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + EXPECT_TRUE(attr.get(doc, values.data(), valueCount) == valueCount); std::vector<uint32_t> enums(valueCount); - EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount); + EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, enums.data(), valueCount) == valueCount); auto combined = zipped_and_sorted_by_first(values, enums); for (uint32_t j = 0; j < valueCount; ++j) { diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index bf0b74b0003..72b2f1e320a 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -157,6 +157,12 @@ public: void expect_empty_add() const { EXPECT_TRUE(_adds.empty()); } + void expect_empty_prepare_add() const { + EXPECT_TRUE(_prepare_adds.empty()); + } + void expect_empty_complete_add() const { + EXPECT_TRUE(_complete_adds.empty()); + } void expect_entry(uint32_t exp_docid, const DoubleVector& exp_vector, const EntryVector& entries) const { EXPECT_EQUAL(1u, entries.size()); EXPECT_EQUAL(exp_docid, entries.back().first); @@ -881,6 +887,30 @@ TEST_F("nearest neighbor index can be updated in two phases", DenseTensorAttribu } } +TEST_F("nearest neighbor index is NOT updated when tensor value is unchanged", DenseTensorAttributeMockIndex) +{ + auto& index = f.mock_index(); + { + auto vec_a = vec_2d(3, 5); + auto prepare_result = f.prepare_set_tensor(1, vec_a); + index.expect_prepare_add(1, {3, 5}); + f.complete_set_tensor(1, vec_a, std::move(prepare_result)); + f.assertGetTensor(vec_a, 1); + index.expect_complete_add(1, {3, 5}); + } + index.clear(); + { + // Replaces previous value with the same value + auto vec_b = vec_2d(3, 5); + auto prepare_result = f.prepare_set_tensor(1, vec_b); + EXPECT_TRUE(prepare_result.get() == nullptr); + index.expect_empty_prepare_add(); + f.complete_set_tensor(1, vec_b, std::move(prepare_result)); + f.assertGetTensor(vec_b, 1); + index.expect_empty_complete_add(); + } +} + TEST_F("clearDoc() updates nearest neighbor index", DenseTensorAttributeMockIndex) { auto& index = f.mock_index(); diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index fffa1778c85..418182f7bbf 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -18,6 +18,7 @@ #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/vespalib/util/memory.h> #include <iomanip> using document::BucketId; @@ -275,7 +276,7 @@ void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz vespalib::DataBuffer buf; EXPECT_EQUAL(static_cast<ssize_t>(sz), datastore.read(lid, buf)); EXPECT_EQUAL(buf.getDataLen(), sz); - EXPECT_TRUE(memcmp(a, buf.getData(), sz) == 0); + EXPECT_TRUE(vespalib::memcmp_safe(a, buf.getData(), sz) == 0); } TEST("testTruncatedIdxFile"){ @@ -666,13 +667,13 @@ TEST("test that the integrated visit cache works.") { } TEST("testWriteRead") { - FastOS_File::RemoveDirectory("empty"); + std::filesystem::remove_all(std::filesystem::path("empty")); const char * bufA = "aaaaaaaaaaaaaaaaaaaaa"; const char * bufB = "bbbbbbbbbbbbbbbb"; const vespalib::ConstBufferRef a[2] = { vespalib::ConstBufferRef(bufA, strlen(bufA)), vespalib::ConstBufferRef(bufB, strlen(bufB))}; LogDataStore::Config config; { - EXPECT_TRUE(FastOS_File::MakeDirectory("empty")); + std::filesystem::create_directory(std::filesystem::path("empty")); DummyFileHeaderContext fileHeaderContext; vespalib::ThreadStackExecutor executor(1, 128_Ki); MyTlSyncer tlSyncer; @@ -736,7 +737,7 @@ TEST("testWriteRead") { EXPECT_EQUAL(0ul, datastore.getDiskBloat()); EXPECT_EQUAL(0ul, datastore.getMaxSpreadAsBloat()); } - FastOS_File::EmptyAndRemoveDirectory("empty"); + std::filesystem::remove_all(std::filesystem::path("empty")); } TEST("requireThatSyncTokenIsUpdatedAfterFlush") { diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index d478adafa57..87de62dbfad 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -2,6 +2,7 @@ #include "mysearch.h" #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/isourceselector.h> #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/searchlib/queryeval/leaf_blueprints.h> @@ -23,6 +24,14 @@ using namespace search::fef; using namespace search::query; using search::BitVector; +struct InvalidSelector : ISourceSelector { + InvalidSelector() : ISourceSelector(Source()) {} + void setSource(uint32_t, Source) override { abort(); } + uint32_t getDocIdLimit() const override { abort(); } + void compactLidSpace(uint32_t) override { abort(); } + std::unique_ptr<sourceselector::Iterator> createIterator() const override { abort(); } +}; + struct WeightOrder { bool operator()(const wand::Term &t1, const wand::Term &t2) const { return (t1.weight < t2.weight); @@ -412,7 +421,7 @@ TEST("test Rank Blueprint") { } TEST("test SourceBlender Blueprint") { - ISourceSelector *selector = nullptr; // not needed here + auto selector = std::make_unique<InvalidSelector>(); // not needed here SourceBlenderBlueprint b(*selector); { // combine std::vector<Blueprint::HitEstimate> est; @@ -485,8 +494,8 @@ TEST("test SourceBlender Blueprint") { } TEST("test SourceBlender below AND optimization") { - ISourceSelector *selector_1 = 0; // the one - ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + auto selector_1 = std::make_unique<InvalidSelector>(); // the one + auto selector_2 = std::make_unique<InvalidSelector>(); // not the one //------------------------------------------------------------------------- AndBlueprint *top = new AndBlueprint(); Blueprint::UP top_bp(top); @@ -567,8 +576,8 @@ TEST("test SourceBlender below AND optimization") { } TEST("test SourceBlender below OR optimization") { - ISourceSelector *selector_1 = 0; // the one - ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + auto selector_1 = std::make_unique<InvalidSelector>(); // the one + auto selector_2 = std::make_unique<InvalidSelector>(); // not the one //------------------------------------------------------------------------- OrBlueprint *top = new OrBlueprint(); Blueprint::UP top_up(top); @@ -649,8 +658,8 @@ TEST("test SourceBlender below OR optimization") { } TEST("test SourceBlender below AND_NOT optimization") { - ISourceSelector *selector_1 = 0; // the one - ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + auto selector_1 = std::make_unique<InvalidSelector>(); // the one + auto selector_2 = std::make_unique<InvalidSelector>(); // not the one //------------------------------------------------------------------------- AndNotBlueprint *top = new AndNotBlueprint(); Blueprint::UP top_up(top); @@ -741,8 +750,8 @@ TEST("test SourceBlender below AND_NOT optimization") { } TEST("test SourceBlender below RANK optimization") { - ISourceSelector *selector_1 = 0; // the one - ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + auto selector_1 = std::make_unique<InvalidSelector>(); // the one + auto selector_2 = std::make_unique<InvalidSelector>(); // not the one //------------------------------------------------------------------------- RankBlueprint *top = new RankBlueprint(); Blueprint::UP top_up(top); @@ -876,7 +885,7 @@ TEST("test empty root node optimization and safeness") { } TEST("and with one empty child is optimized away") { - ISourceSelector *selector = 0; + auto selector = std::make_unique<InvalidSelector>(); Blueprint::UP top(ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(ap((new AndBlueprint())-> @@ -891,7 +900,7 @@ TEST("and with one empty child is optimized away") { } TEST("test single child optimization") { - ISourceSelector *selector = 0; + auto selector = std::make_unique<InvalidSelector>(); //------------------------------------------------------------------------- Blueprint::UP top_up( ap((new AndNotBlueprint())-> diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt b/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt index 3ebc8eb5251..e543a847498 100644 --- a/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt +++ b/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt @@ -5,5 +5,6 @@ vespa_add_executable(searchlib_nearest_neighbor_test_app TEST nearest_neighbor_test.cpp DEPENDS searchlib + GTest::GTest ) vespa_add_test(NAME searchlib_nearest_neighbor_test_app COMMAND searchlib_nearest_neighbor_test_app) diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp index 029b74ff914..1e341eab707 100644 --- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp +++ b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp @@ -1,37 +1,38 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> -#include <vespa/vespalib/util/stringfmt.h> - #include <vespa/eval/eval/simple_value.h> #include <vespa/eval/eval/tensor_spec.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/feature.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/queryeval/nearest_neighbor_iterator.h> +#include <vespa/searchlib/queryeval/nns_index_iterator.h> #include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> +#include <vespa/searchlib/tensor/distance_calculator.h> #include <vespa/searchlib/tensor/distance_function_factory.h> +#include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/test/insertion_operators.h> -#include <vespa/searchlib/queryeval/nns_index_iterator.h> -#include <vespa/searchcommon/attribute/config.h> +#include <vespa/vespalib/util/stringfmt.h> #include <vespa/log/log.h> LOG_SETUP("nearest_neighbor_test"); #define EPS 1.0e-6 -using search::feature_t; -using search::tensor::DenseTensorAttribute; using search::AttributeVector; using search::BitVector; -using vespalib::eval::Value; -using vespalib::eval::ValueType; +using search::attribute::DistanceMetric; +using search::feature_t; +using search::tensor::DenseTensorAttribute; +using search::tensor::DistanceCalculator; +using search::tensor::DistanceFunction; using vespalib::eval::CellType; -using vespalib::eval::TensorSpec; using vespalib::eval::SimpleValue; -using search::tensor::DistanceFunction; -using search::attribute::DistanceMetric; +using vespalib::eval::TensorSpec; +using vespalib::eval::Value; +using vespalib::eval::ValueType; using namespace search::fef; using namespace search::queryeval; @@ -96,7 +97,7 @@ struct Fixture uint32_t sz = _attr->getNumDocs(); _global_filter = BitVector::create(sz); for (uint32_t id : docids) { - EXPECT_LESS(id, sz); + EXPECT_LT(id, sz); _global_filter->setBit(id); } } @@ -112,11 +113,11 @@ struct Fixture setTensor(docId, *t); } - const DistanceFunction *dist_fun() const { + const DistanceFunction &dist_fun() const { if (_cfg.tensorType().cell_type() == CellType::FLOAT) { - return euclid_f.get(); + return *euclid_f; } else { - return euclid_d.get(); + return *euclid_d; } } }; @@ -126,10 +127,11 @@ SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std auto md = MatchData::makeTestInstance(2, 2); auto &tfmd = *(md->resolveTermField(0)); auto &attr = *(env._tensorAttr); + DistanceCalculator dist_calc(attr, qtv, env.dist_fun()); NearestNeighborDistanceHeap dh(2); - dh.set_distance_threshold(env.dist_fun()->convert_threshold(threshold)); + dh.set_distance_threshold(env.dist_fun().convert_threshold(threshold)); const BitVector *filter = env._global_filter.get(); - auto search = NearestNeighborIterator::create(strict, tfmd, qtv, attr, dh, filter, env.dist_fun()); + auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, filter); if (strict) { return SimpleResult().searchStrict(*search, attr.getNumDocs()); } else { @@ -152,33 +154,33 @@ verify_iterator_returns_expected_results(const vespalib::string& attribute_tenso auto nullTensor = createTensor(query_tensor_type_spec, 0.0, 0.0); SimpleResult result = find_matches<true>(fixture, *nullTensor); SimpleResult nullExpect({1,2,4,6}); - EXPECT_EQUAL(result, nullExpect); + EXPECT_EQ(result, nullExpect); result = find_matches<false>(fixture, *nullTensor); - EXPECT_EQUAL(result, nullExpect); + EXPECT_EQ(result, nullExpect); auto farTensor = createTensor(query_tensor_type_spec, 9.0, 9.0); SimpleResult farExpect({1,2,3,5}); result = find_matches<true>(fixture, *farTensor); - EXPECT_EQUAL(result, farExpect); + EXPECT_EQ(result, farExpect); result = find_matches<false>(fixture, *farTensor); - EXPECT_EQUAL(result, farExpect); + EXPECT_EQ(result, farExpect); SimpleResult null_thr5_exp({1,4,6}); result = find_matches<true>(fixture, *nullTensor, 5.0); - EXPECT_EQUAL(result, null_thr5_exp); + EXPECT_EQ(result, null_thr5_exp); result = find_matches<false>(fixture, *nullTensor, 5.0); - EXPECT_EQUAL(result, null_thr5_exp); + EXPECT_EQ(result, null_thr5_exp); SimpleResult far_thr4_exp({2,5}); result = find_matches<true>(fixture, *farTensor, 4.0); - EXPECT_EQUAL(result, far_thr4_exp); + EXPECT_EQ(result, far_thr4_exp); result = find_matches<false>(fixture, *farTensor, 4.0); - EXPECT_EQUAL(result, far_thr4_exp); + EXPECT_EQ(result, far_thr4_exp); } -TEST("require that NearestNeighborIterator returns expected results") { - TEST_DO(verify_iterator_returns_expected_results(denseSpecDouble, denseSpecDouble)); - TEST_DO(verify_iterator_returns_expected_results(denseSpecFloat, denseSpecFloat)); +TEST(NnsIndexIteratorTest, require_that_iterator_returns_expected_results) { + verify_iterator_returns_expected_results(denseSpecDouble, denseSpecDouble); + verify_iterator_returns_expected_results(denseSpecFloat, denseSpecFloat); } void @@ -197,20 +199,20 @@ verify_iterator_returns_filtered_results(const vespalib::string& attribute_tenso auto nullTensor = createTensor(query_tensor_type_spec, 0.0, 0.0); SimpleResult result = find_matches<true>(fixture, *nullTensor); SimpleResult nullExpect({1,3,4}); - EXPECT_EQUAL(result, nullExpect); + EXPECT_EQ(result, nullExpect); result = find_matches<false>(fixture, *nullTensor); - EXPECT_EQUAL(result, nullExpect); + EXPECT_EQ(result, nullExpect); auto farTensor = createTensor(query_tensor_type_spec, 9.0, 9.0); SimpleResult farExpect({1,3,4}); result = find_matches<true>(fixture, *farTensor); - EXPECT_EQUAL(result, farExpect); + EXPECT_EQ(result, farExpect); result = find_matches<false>(fixture, *farTensor); - EXPECT_EQUAL(result, farExpect); + EXPECT_EQ(result, farExpect); } -TEST("require that NearestNeighborIterator returns filtered results") { - TEST_DO(verify_iterator_returns_filtered_results(denseSpecDouble, denseSpecDouble)); - TEST_DO(verify_iterator_returns_filtered_results(denseSpecFloat, denseSpecFloat)); +TEST(NnsIndexIteratorTest, require_that_iterator_returns_filtered_results) { + verify_iterator_returns_filtered_results(denseSpecDouble, denseSpecDouble); + verify_iterator_returns_filtered_results(denseSpecFloat, denseSpecFloat); } template <bool strict> @@ -218,8 +220,9 @@ std::vector<feature_t> get_rawscores(Fixture &env, const Value &qtv) { auto md = MatchData::makeTestInstance(2, 2); auto &tfmd = *(md->resolveTermField(0)); auto &attr = *(env._tensorAttr); + DistanceCalculator dist_calc(attr, qtv, env.dist_fun()); NearestNeighborDistanceHeap dh(2); - auto search = NearestNeighborIterator::create(strict, tfmd, qtv, attr, dh, nullptr, env.dist_fun()); + auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, nullptr); uint32_t limit = attr.getNumDocs(); uint32_t docid = 1; search->initRange(docid, limit); @@ -249,63 +252,63 @@ verify_iterator_sets_expected_rawscore(const vespalib::string& attribute_tensor_ auto nullTensor = createTensor(query_tensor_type_spec, 0.0, 0.0); std::vector<feature_t> got = get_rawscores<true>(fixture, *nullTensor); std::vector<feature_t> expected{5.0, 13.0, 10.0, 10.0, 5.0}; - EXPECT_EQUAL(got.size(), expected.size()); + EXPECT_EQ(got.size(), expected.size()); for (size_t i = 0; i < expected.size(); ++i) { - EXPECT_APPROX(1.0/(1.0+expected[i]), got[i], EPS); + EXPECT_NEAR(1.0/(1.0+expected[i]), got[i], EPS); } got = get_rawscores<false>(fixture, *nullTensor); - EXPECT_EQUAL(got.size(), expected.size()); + EXPECT_EQ(got.size(), expected.size()); for (size_t i = 0; i < expected.size(); ++i) { - EXPECT_APPROX(1.0/(1.0+expected[i]), got[i], EPS); + EXPECT_NEAR(1.0/(1.0+expected[i]), got[i], EPS); } } -TEST("require that NearestNeighborIterator sets expected rawscore") { - TEST_DO(verify_iterator_sets_expected_rawscore(denseSpecDouble, denseSpecDouble)); - TEST_DO(verify_iterator_sets_expected_rawscore(denseSpecFloat, denseSpecFloat)); +TEST(NnsIndexIteratorTest, require_that_iterator_sets_expected_rawscore) { + verify_iterator_sets_expected_rawscore(denseSpecDouble, denseSpecDouble); + verify_iterator_sets_expected_rawscore(denseSpecFloat, denseSpecFloat); } -TEST("require that NnsIndexIterator works as expected") { +TEST(NnsIndexIteratorTest, require_that_iterator_works_as_expected) { std::vector<NnsIndexIterator::Hit> hits{{2,4.0}, {3,9.0}, {5,1.0}, {8,16.0}, {9,36.0}}; auto md = MatchData::makeTestInstance(2, 2); auto &tfmd = *(md->resolveTermField(0)); - auto search = NnsIndexIterator::create(tfmd, hits, euclid_d.get()); + auto search = NnsIndexIterator::create(tfmd, hits, *euclid_d); uint32_t docid = 1; search->initFullRange(); bool match = search->seek(docid); EXPECT_FALSE(match); EXPECT_FALSE(search->isAtEnd()); - EXPECT_EQUAL(2u, search->getDocId()); + EXPECT_EQ(2u, search->getDocId()); docid = 2; match = search->seek(docid); EXPECT_TRUE(match); EXPECT_FALSE(search->isAtEnd()); - EXPECT_EQUAL(docid, search->getDocId()); + EXPECT_EQ(docid, search->getDocId()); search->unpack(docid); - EXPECT_APPROX(1.0/(1.0+2.0), tfmd.getRawScore(), EPS); + EXPECT_NEAR(1.0/(1.0+2.0), tfmd.getRawScore(), EPS); docid = 3; match = search->seek(docid); EXPECT_TRUE(match); EXPECT_FALSE(search->isAtEnd()); - EXPECT_EQUAL(docid, search->getDocId()); + EXPECT_EQ(docid, search->getDocId()); search->unpack(docid); - EXPECT_APPROX(1.0/(1.0+3.0), tfmd.getRawScore(), EPS); + EXPECT_NEAR(1.0/(1.0+3.0), tfmd.getRawScore(), EPS); docid = 4; match = search->seek(docid); EXPECT_FALSE(match); EXPECT_FALSE(search->isAtEnd()); - EXPECT_EQUAL(5u, search->getDocId()); + EXPECT_EQ(5u, search->getDocId()); docid = 6; match = search->seek(docid); EXPECT_FALSE(match); EXPECT_FALSE(search->isAtEnd()); - EXPECT_EQUAL(8u, search->getDocId()); + EXPECT_EQ(8u, search->getDocId()); docid = 8; search->unpack(docid); - EXPECT_APPROX(1.0/(1.0+4.0), tfmd.getRawScore(), EPS); + EXPECT_NEAR(1.0/(1.0+4.0), tfmd.getRawScore(), EPS); docid = 9; match = search->seek(docid); EXPECT_TRUE(match); @@ -320,10 +323,10 @@ TEST("require that NnsIndexIterator works as expected") { match = search->seek(docid); EXPECT_FALSE(match); EXPECT_FALSE(search->isAtEnd()); - EXPECT_EQUAL(5u, search->getDocId()); + EXPECT_EQ(5u, search->getDocId()); docid = 5; search->unpack(docid); - EXPECT_APPROX(1.0/(1.0+1.0), tfmd.getRawScore(), EPS); + EXPECT_NEAR(1.0/(1.0+1.0), tfmd.getRawScore(), EPS); EXPECT_FALSE(search->isAtEnd()); docid = 6; match = search->seek(docid); @@ -331,4 +334,4 @@ TEST("require that NnsIndexIterator works as expected") { EXPECT_TRUE(search->isAtEnd()); } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp index 57980237f21..55577b3916c 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp @@ -111,7 +111,7 @@ public: void operator()(IAttributeVector &attributeVector) override { OP op(attributeVector, _operand); if (op.valid()) { - const RankedHit *hits = &_result.second[0]; + const RankedHit *hits = _result.second.data(); size_t numHits = _result.second.size(); std::for_each(hits, hits+numHits, [&op](RankedHit hit) { op(hit.getDocId()); }); if (_result.first) { diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp index b514275f75d..80e9b28139a 100644 --- a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp +++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp @@ -13,7 +13,7 @@ sortLoadedByEnum(LoadedEnumAttributeVector &loaded) LoadedEnumAttribute::EnumCompare, 56>:: radix_sort(LoadedEnumAttribute::EnumRadix(), LoadedEnumAttribute::EnumCompare(), - &loaded[0], loaded.size(), 16); + loaded.data(), loaded.size(), 16); } } diff --git a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp index 0ffd6e2c845..4d3912ae24d 100644 --- a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp +++ b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp @@ -14,7 +14,7 @@ sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<T>> & loade typename LoadedNumericValue<T>::ValueCompare, 56>:: radix_sort(typename LoadedNumericValue<T>::ValueRadix(), typename LoadedNumericValue<T>::ValueCompare(), - &loaded[0], + loaded.data(), loaded.size(), 16); } @@ -29,7 +29,7 @@ sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<T>> & loade typename LoadedNumericValue<T>::DocOrderCompare, 56>:: radix_sort(typename LoadedNumericValue<T>::DocRadix(), typename LoadedNumericValue<T>::DocOrderCompare(), - &loaded[0], + loaded.data(), loaded.size(), 16); } diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h index cc128b0eef1..0a29b4af48d 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h @@ -49,7 +49,7 @@ protected: using WType = MultiValueType; uint32_t get(DocId doc, const WType * & values) const { MultiValueArrayRef array(this->_mvMapping.get(doc)); - values = &array[0]; + values = array.data(); return array.size(); } diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp index 0e0dceaf254..79276ce6f55 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp @@ -69,11 +69,11 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu postings.removeDups(); newIndex = EntryRef(); _postingList.apply(newIndex, - &postings._additions[0], - &postings._additions[0] + + postings._additions.data(), + postings._additions.data() + postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + + postings._removals.data(), + postings._removals.data() + postings._removals.size()); posting_indexes[posting_enum] = newIndex; postings.clear(); @@ -91,10 +91,10 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu postings.removeDups(); newIndex = EntryRef(); _postingList.apply(newIndex, - &postings._additions[0], - &postings._additions[0] + postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + postings._removals.size()); + postings._additions.data(), + postings._additions.data() + postings._additions.size(), + postings._removals.data(), + postings._removals.data() + postings._removals.size()); posting_indexes[posting_enum] = newIndex; loader.build_dictionary(); loader.free_unused_values(); @@ -158,10 +158,10 @@ clearPostings(attribute::IAttributeVector::EnumHandle eidx, auto updater = [this, &postings](EntryRef posting_idx) -> EntryRef { _postingList.apply(posting_idx, - &postings._additions[0], - &postings._additions[0] + postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + postings._removals.size()); + postings._additions.data(), + postings._additions.data() + postings._additions.size(), + postings._removals.data(), + postings._removals.data() + postings._removals.size()); return posting_idx; }; _dictionary.update_posting_list(er, cmp, updater); @@ -240,11 +240,11 @@ handle_load_posting_lists(LoadedVector& loaded) postings.removeDups(); newIndex = EntryRef(); _postingList.apply(newIndex, - &postings._additions[0], - &postings._additions[0] + + postings._additions.data(), + postings._additions.data() + postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + + postings._removals.data(), + postings._removals.data() + postings._removals.size()); postings.clear(); if (value._docId < docIdLimit) { @@ -262,11 +262,11 @@ handle_load_posting_lists(LoadedVector& loaded) postings.removeDups(); newIndex = EntryRef(); _postingList.apply(newIndex, - &postings._additions[0], - &postings._additions[0] + + postings._additions.data(), + postings._additions.data() + postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + postings._removals.size()); + postings._removals.data(), + postings._removals.data() + postings._removals.size()); similarValues[0]._pidx = newIndex; for (size_t i(0), m(similarValues.size()); i < m; i++) { loaded.write(similarValues[i]); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp index c17627a5026..d8426ce1a45 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp @@ -20,8 +20,8 @@ PostingListSearchContext(const IEnumStoreDictionary& dictionary, const ISearchContext &baseSearchCtx) : _dictionary(dictionary), _frozenDictionary(_dictionary.get_has_btree_dictionary() ? _dictionary.get_posting_dictionary().getFrozenView() : FrozenDictionary()), - _lowerDictItr(BTreeNode::Ref(), _frozenDictionary.getAllocator()), - _upperDictItr(BTreeNode::Ref(), _frozenDictionary.getAllocator()), + _lowerDictItr(_dictionary.get_has_btree_dictionary() ? DictionaryConstIterator(BTreeNode::Ref(), _frozenDictionary.getAllocator()) : DictionaryConstIterator()), + _upperDictItr(_dictionary.get_has_btree_dictionary() ? DictionaryConstIterator(BTreeNode::Ref(), _frozenDictionary.getAllocator()) : DictionaryConstIterator()), _uniqueValues(0u), _docIdLimit(docIdLimit), _dictSize(_frozenDictionary.size()), diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp index 5ac506e4fc2..b60250256f4 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -98,8 +98,7 @@ EncodeContext64EBase<false>::writeBits(uint64_t data, uint32_t length) // Shift new bits into cacheInt _cacheInt |= (data << (64 - _cacheFree)); *_valI++ = bswap(_cacheInt); - - data >>= _cacheFree; + data = (_cacheFree < 64) ? data >> _cacheFree : 0; // Initialize variables for receiving new bits length -= _cacheFree; _cacheInt = 0; @@ -194,9 +193,9 @@ writeBits(const uint64_t *bits, uint32_t bitOffset, uint32_t bitLength) if (bitOffset + bitLength < 64) { uint32_t length = bitLength; if (bigEndian) { - uint64_t data = (EC::bswap(*bits) >> - (64 - bitOffset - length)) & - CodingTables::_intMask64[length]; + uint64_t data = ((bitOffset + length) > 0) + ? (EC::bswap(*bits) >> (64 - bitOffset - length)) & CodingTables::_intMask64[length] + : 0; UC64BE_WRITEBITS_NS(o, EC); } else { uint64_t data = (EC::bswap(*bits) >> bitOffset) & diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 45005d499fb..74231638213 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -165,8 +165,7 @@ public: #define UC64BE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ do { \ - length = \ - 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \ + length = __builtin_clzl(val); \ unsigned int olength = length; \ val <<= length; \ if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \ @@ -174,8 +173,9 @@ public: length = 0; \ } \ val64 = (val >> (63 - olength - (k))) - (UINT64_C(1) << (k)); \ - val <<= olength + 1 + (k); \ - if (__builtin_expect(olength + 1 + (k) == 64, false)) { \ + if (__builtin_expect(olength + 1 + (k) != 64, true)) { \ + val <<= olength + 1 + (k); \ + } else { \ val = 0; \ } \ length += olength + 1 + (k); \ @@ -193,8 +193,7 @@ public: #define UC64BE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ EC) \ do { \ - length = \ - 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \ + length = __builtin_clzl(val); \ val <<= length; \ val64 = (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \ val <<= length + 1 + (k); \ @@ -219,8 +218,7 @@ public: #define UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \ k, EC, resop) \ do { \ - length = \ - 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \ + length = __builtin_clzl(val); \ val <<= length; \ resop (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \ val <<= length + 1 + (k); \ @@ -231,16 +229,16 @@ public: #define UC64BE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ do { \ - length = \ - 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \ + length = __builtin_clzl(val); \ unsigned int olength = length; \ val <<= length; \ if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ length = 0; \ } \ - val <<= olength + 1 + (k); \ - if (__builtin_expect(olength + 1 + (k) == 64, false)) { \ + if (__builtin_expect(olength + 1 + (k) != 64, true)) { \ + val <<= olength + 1 + (k); \ + } else { \ val = 0; \ } \ length += olength + 1 + (k); \ @@ -258,8 +256,7 @@ public: #define UC64BE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ EC) \ do { \ - length = \ - 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \ + length = __builtin_clzl(val); \ val <<= length; \ val <<= length + 1 + (k); \ length += length + 1 + (k); \ @@ -394,11 +391,11 @@ public: #define UC64LE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ do { \ - unsigned int olength = \ - ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + unsigned int olength = __builtin_ctzl(val); \ length = olength + 1; \ - val >>= length; \ - if (__builtin_expect(length == 64, false)) { \ + if (__builtin_expect(length != 64, true)) { \ + val >>= length; \ + } else { \ val = 0; \ } \ if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \ @@ -423,7 +420,7 @@ public: #define UC64LE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ EC) \ do { \ - length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + length = __builtin_ctzl(val); \ val >>= length + 1; \ val64 = (val & ((UINT64_C(1) << (length + (k))) - 1)) + \ (UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \ @@ -449,7 +446,7 @@ public: #define UC64LE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \ k, EC, resop) \ do { \ - length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + length = __builtin_ctzl(val); \ val >>= length + 1; \ resop (val & ((UINT64_C(1) << (length + (k))) - 1)) + \ (UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \ @@ -461,11 +458,11 @@ public: #define UC64LE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ do { \ - unsigned int olength = \ - ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + unsigned int olength = __builtin_ctzl(val); \ length = olength + 1; \ - val >>= length; \ - if (__builtin_expect(length == 64, false)) { \ + if (__builtin_expect(length != 64, true)) { \ + val >>= length; \ + } else { \ val = 0; \ } \ if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \ @@ -488,7 +485,7 @@ public: #define UC64LE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ EC) \ do { \ - length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + length = __builtin_ctzl(val); \ val >>= length + 1; \ val >>= length + (k); \ length += length + 1 + (k); \ @@ -507,7 +504,11 @@ public: if (length >= cacheFree) { \ cacheInt |= (data << (64 - cacheFree)); \ *bufI++ = EC::bswap(cacheInt); \ - data >>= cacheFree; \ + if (__builtin_expect(cacheFree != 64, true)) { \ + data >>= cacheFree; \ + } else { \ + data = 0; \ + } \ length -= cacheFree; \ cacheInt = 0; \ cacheFree = 64; \ diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp index 9a711a028fb..b0a201d913e 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp @@ -365,7 +365,7 @@ PageDict4SPWriter::flushPage() _prevL3Size - wordsSize * 8; e.padBits(padding); if (wordsSize > 0) { - e.writeBytes(vespalib::ConstArrayRef<char>(&_words[0], wordsSize)); + e.writeBytes(vespalib::ConstArrayRef<char>(_words.data(), wordsSize)); } assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0); _l6Word = _l3Word; @@ -676,7 +676,7 @@ PageDict4PWriter::flushPage() _countsSize - _countsWordOffset * 8; e.padBits(padding); if (_countsWordOffset > 0) { - e.writeBytes(vespalib::ConstArrayRef(&_words[0], _countsWordOffset)); + e.writeBytes(vespalib::ConstArrayRef(_words.data(), _countsWordOffset)); } assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0); _l3Word = _pendingCountsWord; @@ -1055,7 +1055,7 @@ lookup(vespalib::stringref key) L7Vector::const_iterator l7lb; l7lb = std::lower_bound(_l7.begin(), _l7.end(), key); - l7Pos = &*l7lb - &_l7[0]; + l7Pos = l7lb - _l7.cbegin(); StartOffset startOffset; uint64_t pageNum = _pFirstPageNum; uint32_t sparsePageNum = _spFirstPageNum; @@ -1863,7 +1863,7 @@ PageDict4Reader::setupPage() uint32_t padding = (getPageBitSize() - wordsSize * 8 - pageOffset) & (getPageBitSize() - 1); _pd.skipBits(padding); _words.resize(wordsSize); - _pd.readBytes(reinterpret_cast<uint8_t *>(&_words[0]), wordsSize); + _pd.readBytes(reinterpret_cast<uint8_t *>(_words.data()), wordsSize); _wc = _words.begin(); _we = _words.end(); checkWordOffsets(_words, _l1SkipChecks, _l2SkipChecks); @@ -1985,7 +1985,7 @@ PageDict4Reader::setupSPage() uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset; _spd.skipBits(padding); _spwords.resize(wordsSize); - _spd.readBytes(reinterpret_cast<uint8_t *>(&_spwords[0]), wordsSize); + _spd.readBytes(reinterpret_cast<uint8_t *>(_spwords.data()), wordsSize); _spwc = _spwords.begin(); _spwe = _spwords.end(); checkWordOffsets(_spwords, _l4SkipChecks, _l5SkipChecks); diff --git a/searchlib/src/vespa/searchlib/common/geo_location.cpp b/searchlib/src/vespa/searchlib/common/geo_location.cpp index 1806ba1338c..20408a93a82 100644 --- a/searchlib/src/vespa/searchlib/common/geo_location.cpp +++ b/searchlib/src/vespa/searchlib/common/geo_location.cpp @@ -8,6 +8,12 @@ namespace search::common { namespace { +uint64_t abs_diff(int32_t a, int32_t b) { + return (a > b) + ? (int64_t(a) - int64_t(b)) + : (int64_t(b) - int64_t(a)); +} + ZCurve::BoundingBox to_z(GeoLocation::Box box) { return ZCurve::BoundingBox(box.x.low, box.x.high, box.y.low, box.y.high); @@ -158,13 +164,13 @@ GeoLocation::GeoLocation(Box b, Point p, uint32_t r, Aspect xa) uint64_t GeoLocation::sq_distance_to(Point p) const { if (has_point) { - uint64_t dx = (p.x > point.x) ? (p.x - point.x) : (point.x - p.x); + uint64_t dx = abs_diff(p.x, point.x); if (x_aspect.active()) { // x_aspect is a 32-bit fixed-point number in range [0,1] // this implements dx = (dx * x_aspect) dx = (dx * x_aspect.multiplier) >> 32; } - uint64_t dy = (p.y > point.y) ? (p.y - point.y) : (point.y - p.y); + uint64_t dy = abs_diff(p.y, point.y); return dx*dx + dy*dy; } return 0; diff --git a/searchlib/src/vespa/searchlib/common/geo_location.h b/searchlib/src/vespa/searchlib/common/geo_location.h index 07e6fd055cc..09c77037b03 100644 --- a/searchlib/src/vespa/searchlib/common/geo_location.h +++ b/searchlib/src/vespa/searchlib/common/geo_location.h @@ -2,10 +2,10 @@ #pragma once +#include <vespa/vespalib/geo/zcurve.h> #include <string> #include <cstdint> #include <limits> -#include <vespa/vespalib/geo/zcurve.h> namespace search::common { diff --git a/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp b/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp index 8794169b4a6..d829e1b93e4 100644 --- a/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp +++ b/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp @@ -15,7 +15,7 @@ LOG_SETUP(".searchlib.common.geo_location_parser"); namespace { int getInt(const char * &p) { - int val; + uint32_t val; bool isminus; val = 0; isminus = false; diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.h b/searchlib/src/vespa/searchlib/common/geo_location_spec.h index ea0104aa058..f1e3671181d 100644 --- a/searchlib/src/vespa/searchlib/common/geo_location_spec.h +++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.h @@ -2,9 +2,9 @@ #pragma once +#include "geo_location.h" #include <string> #include <cstdint> -#include "geo_location.h" namespace search::common { diff --git a/searchlib/src/vespa/searchlib/common/resultset.cpp b/searchlib/src/vespa/searchlib/common/resultset.cpp index 2e1e431ad82..3a88a310fe8 100644 --- a/searchlib/src/vespa/searchlib/common/resultset.cpp +++ b/searchlib/src/vespa/searchlib/common/resultset.cpp @@ -99,7 +99,7 @@ ResultSet::mergeWithBitOverflow(HitRank default_value) void ResultSet::sort(FastS_IResultSorter & sorter, unsigned int ntop) { - sorter.sortResults(&_rankedHitsArray[0], _rankedHitsArray.size(), ntop); + sorter.sortResults(_rankedHitsArray.data(), _rankedHitsArray.size(), ntop); } std::pair<std::unique_ptr<BitVector>, vespalib::Array<RankedHit>> diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h index 6824fc4170d..a4823d2f372 100644 --- a/searchlib/src/vespa/searchlib/common/resultset.h +++ b/searchlib/src/vespa/searchlib/common/resultset.h @@ -26,7 +26,7 @@ public: void allocArray(unsigned int arrayAllocated); void setBitOverflow(std::unique_ptr<BitVector> newBitOverflow); - const RankedHit * getArray() const { return &_rankedHitsArray[0]; } + const RankedHit * getArray() const { return _rankedHitsArray.data(); } RankedHit & operator [](uint32_t i) { return _rankedHitsArray[i]; } void push_back(RankedHit hit) { _rankedHitsArray.push_back_fast(hit); } unsigned int getArrayUsed() const { return _rankedHitsArray.size(); } diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp index f1756712d2c..59a47dd3312 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.cpp +++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp @@ -209,9 +209,9 @@ FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available variableWidth *= 2; available += variableWidth * n; dataSize += variableWidth * n; - uint32_t byteUsed = mySortData - &_binarySortData[0]; + uint32_t byteUsed = mySortData - _binarySortData.data(); _binarySortData.resize(dataSize); - return &_binarySortData[0] + byteUsed; + return _binarySortData.data() + byteUsed; } void @@ -237,7 +237,7 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) uint32_t dataSize = (fixedWidth + variableWidth) * n; uint32_t available = dataSize; _binarySortData.resize(dataSize); - uint8_t *mySortData = &_binarySortData[0]; + uint8_t *mySortData = _binarySortData.data(); _sortDataArray.resize(n); @@ -342,7 +342,7 @@ void FastS_SortSpec::copySortData(uint32_t offset, uint32_t n, uint32_t *idx, char *buf) { - const uint8_t * sortData = &_binarySortData[0]; + const uint8_t * sortData = _binarySortData.data(); uint32_t totalLen = 0; for (uint32_t i = offset; i < (offset + n); ++i, ++idx) { const uint8_t * src = sortData + _sortDataArray[i]._idx; @@ -378,7 +378,7 @@ inline int FastS_SortSpec::Compare(const FastS_SortSpec *self, const SortData &a, const SortData &b) { - const uint8_t * ref = &(self->_binarySortData[0]); + const uint8_t * ref = self->_binarySortData.data(); uint32_t len = a._len < b._len ? a._len : b._len; int retval = memcmp(ref + a._idx, ref + b._idx, len); @@ -448,10 +448,10 @@ void FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn) { initSortData(a, n); - SortData * sortData = &_sortDataArray[0]; + SortData * sortData = _sortDataArray.data(); { Array<uint32_t> radixScratchPad(n, Alloc::alloc(0, MMAP_LIMIT)); - search::radix_sort(SortDataRadix(&_binarySortData[0]), StdSortDataCompare(&_binarySortData[0]), SortDataEof(), 1, sortData, n, &radixScratchPad[0], 0, 96, topn); + search::radix_sort(SortDataRadix(_binarySortData.data()), StdSortDataCompare(_binarySortData.data()), SortDataEof(), 1, sortData, n, radixScratchPad.data(), 0, 96, topn); } for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) { a[i]._rankValue = _sortDataArray[i]._rankValue; diff --git a/searchlib/src/vespa/searchlib/diskindex/docidmapper.h b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h index 90bfa658a72..7c6f53720f2 100644 --- a/searchlib/src/vespa/searchlib/diskindex/docidmapper.h +++ b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h @@ -42,9 +42,9 @@ public: { } void setup(const DocIdMapping &mapping) { - _selector = (mapping._selector != nullptr) ? &((*mapping._selector)[0]) : nullptr; + _selector = (mapping._selector != nullptr) ? mapping._selector->data() : nullptr; _docIdLimit = mapping._docIdLimit; - _selectorLimit = (mapping._selector != nullptr) ? (*mapping._selector).size() : 0u; + _selectorLimit = (mapping._selector != nullptr) ? mapping._selector->size() : 0u; _selectorId = mapping._selectorId; } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp index 860903174bc..d27ab2e7787 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp @@ -12,12 +12,12 @@ #include <vespa/searchlib/common/i_flush_token.h> #include <vespa/searchlib/index/schemautil.h> #include <vespa/searchlib/util/filekit.h> -#include <vespa/searchlib/util/dirtraverse.h> #include <vespa/searchlib/util/posting_priority_queue_merger.hpp> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/exceptions.h> #include <filesystem> +#include <system_error> #include <vespa/log/log.h> @@ -107,8 +107,9 @@ FieldMerger::clean_tmp_dirs() while (i > 0) { i--; vespalib::string tmpindexpath = createTmpPath(_field_dir, i); - search::DirectoryTraverse dt(tmpindexpath.c_str()); - if (!dt.RemoveTree()) { + std::error_code ec; + std::filesystem::remove_all(std::filesystem::path(tmpindexpath), ec); + if (ec) { LOG(error, "Failed to clean tmpdir %s", tmpindexpath.c_str()); return false; } diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp index e142255252c..4fd9d116244 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp @@ -8,13 +8,13 @@ #include <vespa/searchlib/common/documentsummary.h> #include <vespa/searchlib/common/i_flush_token.h> #include <vespa/searchlib/index/schemautil.h> -#include <vespa/searchlib/util/dirtraverse.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/util/error.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/lambdatask.h> #include <vespa/document/util/queue.h> #include <filesystem> +#include <system_error> #include <vespa/log/log.h> @@ -117,8 +117,9 @@ Fusion::merge(vespalib::Executor& shared_executor, std::shared_ptr<IFlushToken> LOG(error, "\"%s\" is not a directory", _fusion_out_index.get_path().c_str()); return false; } - search::DirectoryTraverse dt(_fusion_out_index.get_path().c_str()); - if (!dt.RemoveTree()) { + std::error_code ec; + std::filesystem::remove_all(std::filesystem::path(_fusion_out_index.get_path()), ec); + if (ec) { LOG(error, "Failed to clean directory \"%s\"", _fusion_out_index.get_path().c_str()); return false; } diff --git a/searchlib/src/vespa/searchlib/engine/docsumrequest.h b/searchlib/src/vespa/searchlib/engine/docsumrequest.h index 27fb5b25a96..d4f3a1ec340 100644 --- a/searchlib/src/vespa/searchlib/engine/docsumrequest.h +++ b/searchlib/src/vespa/searchlib/engine/docsumrequest.h @@ -2,7 +2,6 @@ #pragma once -#include "propertiesmap.h" #include "request.h" #include "lazy_source.h" #include <vespa/document/base/globalid.h> diff --git a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp index 77781d583cb..4eaa5b3eb65 100644 --- a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp +++ b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp @@ -135,7 +135,7 @@ ProtoConverter::search_reply_to_proto(const SearchReply &reply, ProtoSearchReply } } } - proto.set_grouping_blob(&reply.groupResult[0], reply.groupResult.size()); + proto.set_grouping_blob(reply.groupResult.data(), reply.groupResult.size()); const auto &slime_trace = reply.propertiesMap.trace().lookup("slime"); proto.set_slime_trace(slime_trace.get().data(), slime_trace.get().size()); if (reply.my_issues) { diff --git a/searchlib/src/vespa/searchlib/expression/integerresultnode.h b/searchlib/src/vespa/searchlib/expression/integerresultnode.h index a7fe86acd97..e63ac783bc8 100644 --- a/searchlib/src/vespa/searchlib/expression/integerresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/integerresultnode.h @@ -28,7 +28,7 @@ public: T bv(static_cast<const IntegerResultNodeT &>(b)._value); return (_value < bv) ? -1 : (_value > bv) ? 1 : 0; } - void add(const ResultNode & b) override { _value += b.getInteger(); } + void add(const ResultNode & b) override { _value = uint64_t(_value) + uint64_t(b.getInteger()); } void negate() override { _value = - _value; } void multiply(const ResultNode & b) override { if constexpr (std::is_same_v<T, bool>) { diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp index 2da08e2915d..4cf185ad55e 100644 --- a/searchlib/src/vespa/searchlib/fef/objectstore.cpp +++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp @@ -2,8 +2,7 @@ #include "objectstore.h" #include <vespa/vespalib/stllike/hash_map.hpp> -namespace search { -namespace fef { +namespace search::fef { ObjectStore::ObjectStore() : _objectMap() @@ -37,4 +36,3 @@ ObjectStore::get(const vespalib::string & key) const } } -} diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp index b851fc50518..8664b0fc14b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp @@ -18,7 +18,7 @@ HitCollector::sortHitsByScore(size_t topn) _scoreOrder.push_back(i); } ShiftBasedRadixSorter<uint32_t, IndirectScoreRadix, IndirectScoreComparator, 56, true>:: - radix_sort(IndirectScoreRadix(&_hits[0]), IndirectScoreComparator(&_hits[0]), &_scoreOrder[0], _scoreOrder.size(), 16, topn); + radix_sort(IndirectScoreRadix(_hits.data()), IndirectScoreComparator(_hits.data()), _scoreOrder.data(), _scoreOrder.size(), 16, topn); _scoreOrder.resize(topn); } } @@ -28,7 +28,7 @@ HitCollector::sortHitsByDocId() { if (_hitsSortOrder != SortOrder::DOC_ID) { ShiftBasedRadixSorter<Hit, DocIdRadix, DocIdComparator, 24>:: - radix_sort(DocIdRadix(), DocIdComparator(), &_hits[0], _hits.size(), 16); + radix_sort(DocIdRadix(), DocIdComparator(), _hits.data(), _hits.size(), 16); _hitsSortOrder = SortOrder::DOC_ID; _scoreOrder.clear(); } @@ -170,7 +170,7 @@ HitCollector::getSortedHitSequence(size_t max_hits) { size_t num_hits = std::min(_hits.size(), max_hits); sortHitsByScore(num_hits); - return SortedHitSequence(&_hits[0], &_scoreOrder[0], num_hits); + return SortedHitSequence(_hits.data(), _scoreOrder.data(), num_hits); } void diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 8c03800b92a..8aa806b01cd 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -4,7 +4,6 @@ #include "nearest_neighbor_blueprint.h" #include "nearest_neighbor_iterator.h" #include "nns_index_iterator.h" -#include <vespa/eval/eval/fast_value.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/distance_function_factory.h> @@ -13,45 +12,12 @@ LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint"); -using vespalib::eval::CellType; -using vespalib::eval::FastValueBuilderFactory; -using vespalib::eval::TypedCells; using vespalib::eval::Value; -using vespalib::eval::ValueType; namespace search::queryeval { namespace { -template<typename LCT, typename RCT> -std::unique_ptr<Value> -convert_cells(const ValueType &new_type, std::unique_ptr<Value> old_value) -{ - auto old_cells = old_value->cells().typify<LCT>(); - auto builder = FastValueBuilderFactory::get().create_value_builder<RCT>(new_type); - auto new_cells = builder->add_subspace(); - assert(old_cells.size() == new_cells.size()); - auto p = new_cells.begin(); - for (LCT value : old_cells) { - RCT conv(value); - *p++ = conv; - } - return builder->build(std::move(builder)); -} - -struct ConvertCellsSelector -{ - template <typename LCT, typename RCT> - static auto invoke(const ValueType &new_type, std::unique_ptr<Value> old_value) { - return convert_cells<LCT, RCT>(new_type, std::move(old_value)); - } - auto operator() (CellType from, CellType to, std::unique_ptr<Value> old_value) const { - using MyTypify = vespalib::eval::TypifyCellType; - ValueType new_type = old_value->type().cell_cast(to); - return vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(from, to, new_type, std::move(old_value)); - } -}; - vespalib::string to_string(NearestNeighborBlueprint::Algorithm algorithm) { @@ -78,7 +44,8 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f double global_filter_upper_limit) : ComplexLeafBlueprint(field), _attr_tensor(attr_tensor), - _query_tensor(std::move(query_tensor)), + _distance_calc(_attr_tensor, std::move(query_tensor)), + _query_tensor(_distance_calc.query_tensor()), _target_hits(target_hits), _adjusted_target_hits(target_hits), _approximate(approximate), @@ -86,7 +53,6 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _distance_threshold(std::numeric_limits<double>::max()), _global_filter_lower_limit(global_filter_lower_limit), _global_filter_upper_limit(global_filter_upper_limit), - _fallback_dist_fun(), _distance_heap(target_hits), _found_hits(), _algorithm(Algorithm::EXACT), @@ -95,27 +61,13 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _global_filter_hits(), _global_filter_hit_ratio() { - CellType attr_ct = _attr_tensor.getTensorType().cell_type(); - _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), attr_ct); - _dist_fun = _fallback_dist_fun.get(); - assert(_dist_fun); - auto nns_index = _attr_tensor.nearest_neighbor_index(); - if (nns_index) { - _dist_fun = nns_index->distance_function(); - assert(_dist_fun); - } - auto query_ct = _query_tensor->cells().type; - CellType required_ct = _dist_fun->expected_cell_type(); - if (query_ct != required_ct) { - ConvertCellsSelector converter; - _query_tensor = converter(query_ct, required_ct, std::move(_query_tensor)); - } if (distance_threshold < std::numeric_limits<double>::max()) { - _distance_threshold = _dist_fun->convert_threshold(distance_threshold); + _distance_threshold = _distance_calc.function().convert_threshold(distance_threshold); _distance_heap.set_distance_threshold(_distance_threshold); } uint32_t est_hits = _attr_tensor.get_num_docs(); setEstimate(HitEstimate(est_hits, false)); + auto nns_index = _attr_tensor.nearest_neighbor_index(); set_want_global_filter(nns_index && _approximate); } @@ -155,7 +107,7 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter, d void NearestNeighborBlueprint::perform_top_k(const search::tensor::NearestNeighborIndex* nns_index) { - auto lhs = _query_tensor->cells(); + auto lhs = _query_tensor.cells(); uint32_t k = _adjusted_target_hits; if (_global_filter->has_filter()) { auto filter = _global_filter->filter(); @@ -175,13 +127,12 @@ NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchData switch (_algorithm) { case Algorithm::INDEX_TOP_K_WITH_FILTER: case Algorithm::INDEX_TOP_K: - return NnsIndexIterator::create(tfmd, _found_hits, _dist_fun); + return NnsIndexIterator::create(tfmd, _found_hits, _distance_calc.function()); default: ; } - const Value &qT = *_query_tensor; - return NearestNeighborIterator::create(strict, tfmd, qT, _attr_tensor, - _distance_heap, _global_filter->filter(), _dist_fun); + return NearestNeighborIterator::create(strict, tfmd, _distance_calc, + _distance_heap, _global_filter->filter()); } void @@ -189,7 +140,7 @@ NearestNeighborBlueprint::visitMembers(vespalib::ObjectVisitor& visitor) const { ComplexLeafBlueprint::visitMembers(visitor); visitor.visitString("attribute_tensor", _attr_tensor.getTensorType().to_spec()); - visitor.visitString("query_tensor", _query_tensor->type().to_spec()); + visitor.visitString("query_tensor", _query_tensor.type().to_spec()); visitor.visitInt("target_hits", _target_hits); visitor.visitInt("adjusted_target_hits", _adjusted_target_hits); visitor.visitInt("explore_additional_hits", _explore_additional_hits); diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index 16b0e13014e..3be7d7fd01f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -3,6 +3,7 @@ #include "blueprint.h" #include "nearest_neighbor_distance_heap.h" +#include <vespa/searchlib/tensor/distance_calculator.h> #include <vespa/searchlib/tensor/distance_function.h> #include <vespa/searchlib/tensor/nearest_neighbor_index.h> #include <optional> @@ -28,7 +29,8 @@ public: }; private: const tensor::ITensorAttribute& _attr_tensor; - std::unique_ptr<vespalib::eval::Value> _query_tensor; + search::tensor::DistanceCalculator _distance_calc; + const vespalib::eval::Value& _query_tensor; uint32_t _target_hits; uint32_t _adjusted_target_hits; bool _approximate; @@ -36,8 +38,6 @@ private: double _distance_threshold; double _global_filter_lower_limit; double _global_filter_upper_limit; - search::tensor::DistanceFunction::UP _fallback_dist_fun; - const search::tensor::DistanceFunction *_dist_fun; mutable NearestNeighborDistanceHeap _distance_heap; std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits; Algorithm _algorithm; @@ -59,7 +59,7 @@ public: NearestNeighborBlueprint& operator=(const NearestNeighborBlueprint&) = delete; ~NearestNeighborBlueprint(); const tensor::ITensorAttribute& get_attribute_tensor() const { return _attr_tensor; } - const vespalib::eval::Value& get_query_tensor() const { return *_query_tensor; } + const vespalib::eval::Value& get_query_tensor() const { return _query_tensor; } uint32_t get_target_hits() const { return _target_hits; } uint32_t get_adjusted_target_hits() const { return _adjusted_target_hits; } void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp index 6a00568bd06..e06fcc614d8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp @@ -2,6 +2,8 @@ #include "nearest_neighbor_iterator.h" #include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/tensor/distance_calculator.h> +#include <vespa/searchlib/tensor/distance_function.h> using search::tensor::ITensorAttribute; using vespalib::ConstArrayRef; @@ -34,11 +36,10 @@ public: NearestNeighborImpl(Params params_in) : NearestNeighborIterator(params_in), - _lhs(params().queryTensor.cells()), _lastScore(0.0) { - assert(is_compatible(params().tensorAttribute.getTensorType(), - params().queryTensor.type())); + assert(is_compatible(params().distance_calc.attribute_tensor().getTensorType(), + params().distance_calc.query_tensor().type())); } ~NearestNeighborImpl(); @@ -64,7 +65,7 @@ public: } void doUnpack(uint32_t docId) override { - double score = params().distanceFunction->to_rawscore(_lastScore); + double score = params().distance_calc.function().to_rawscore(_lastScore); params().tfmd.setRawScore(docId, score); params().distanceHeap.used(_lastScore); } @@ -73,11 +74,9 @@ public: private: double computeDistance(uint32_t docId, double limit) { - auto rhs = params().tensorAttribute.extract_cells_ref(docId); - return params().distanceFunction->calc_with_limit(_lhs, rhs, limit); + return params().distance_calc.calc_with_limit(docId, limit); } - TypedCells _lhs; double _lastScore; }; @@ -105,14 +104,12 @@ std::unique_ptr<NearestNeighborIterator> NearestNeighborIterator::create( bool strict, fef::TermFieldMatchData &tfmd, - const vespalib::eval::Value &queryTensor, - const search::tensor::ITensorAttribute &tensorAttribute, + const search::tensor::DistanceCalculator &distance_calc, NearestNeighborDistanceHeap &distanceHeap, - const search::BitVector *filter, - const search::tensor::DistanceFunction *dist_fun) + const search::BitVector *filter) { - Params params(tfmd, queryTensor, tensorAttribute, distanceHeap, filter, dist_fun); + Params params(tfmd, distance_calc, distanceHeap, filter); if (filter) { return resolve_strict<true>(strict, params); } else { diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h index 66622288d84..0d8f70d15c2 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h @@ -7,10 +7,11 @@ #include <vespa/eval/eval/value.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/tensor/i_tensor_attribute.h> -#include <vespa/searchlib/tensor/distance_function.h> #include <vespa/vespalib/util/priority_queue.h> #include <cmath> +namespace search::tensor { class DistanceCalculator; } + namespace search::queryeval { class NearestNeighborIterator : public SearchIterator @@ -21,24 +22,18 @@ public: struct Params { fef::TermFieldMatchData &tfmd; - const Value &queryTensor; - const ITensorAttribute &tensorAttribute; + const search::tensor::DistanceCalculator &distance_calc; NearestNeighborDistanceHeap &distanceHeap; const search::BitVector *filter; - const search::tensor::DistanceFunction *distanceFunction; - + Params(fef::TermFieldMatchData &tfmd_in, - const Value &queryTensor_in, - const ITensorAttribute &tensorAttribute_in, + const search::tensor::DistanceCalculator &distance_calc_in, NearestNeighborDistanceHeap &distanceHeap_in, - const search::BitVector *filter_in, - const search::tensor::DistanceFunction *distanceFunction_in) + const search::BitVector *filter_in) : tfmd(tfmd_in), - queryTensor(queryTensor_in), - tensorAttribute(tensorAttribute_in), + distance_calc(distance_calc_in), distanceHeap(distanceHeap_in), - filter(filter_in), - distanceFunction(distanceFunction_in) + filter(filter_in) {} }; @@ -49,11 +44,9 @@ public: static std::unique_ptr<NearestNeighborIterator> create( bool strict, fef::TermFieldMatchData &tfmd, - const Value &queryTensor, - const search::tensor::ITensorAttribute &tensorAttribute, + const search::tensor::DistanceCalculator &distance_calc, NearestNeighborDistanceHeap &distanceHeap, - const search::BitVector *filter, - const search::tensor::DistanceFunction *dist_fun); + const search::BitVector *filter); const Params& params() const { return _params; } private: diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp index cd65f01025b..95264a79431 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp @@ -18,13 +18,13 @@ class NeighborVectorIterator : public NnsIndexIterator private: fef::TermFieldMatchData &_tfmd; const std::vector<Neighbor> &_hits; - const search::tensor::DistanceFunction * const _dist_fun; + const search::tensor::DistanceFunction &_dist_fun; uint32_t _idx; double _last_abstract_dist; public: NeighborVectorIterator(fef::TermFieldMatchData &tfmd, const std::vector<Neighbor> &hits, - const search::tensor::DistanceFunction *dist_fun) + const search::tensor::DistanceFunction &dist_fun) : _tfmd(tfmd), _hits(hits), _dist_fun(dist_fun), @@ -54,7 +54,7 @@ public: } void doUnpack(uint32_t docId) override { - double score = _dist_fun->to_rawscore(_last_abstract_dist); + double score = _dist_fun.to_rawscore(_last_abstract_dist); _tfmd.setRawScore(docId, score); } @@ -65,7 +65,7 @@ std::unique_ptr<NnsIndexIterator> NnsIndexIterator::create( fef::TermFieldMatchData &tfmd, const std::vector<Neighbor> &hits, - const search::tensor::DistanceFunction *dist_fun) + const search::tensor::DistanceFunction &dist_fun) { return std::make_unique<NeighborVectorIterator>(tfmd, hits, dist_fun); } diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h index 019ac8579bd..031a603de49 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h @@ -16,7 +16,7 @@ public: static std::unique_ptr<NnsIndexIterator> create( fef::TermFieldMatchData &tfmd, const std::vector<Hit> &hits, - const search::tensor::DistanceFunction *dist_fun); + const search::tensor::DistanceFunction &dist_fun); }; } // namespace diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index ae34cdd66c8..9e0ccb8d37a 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -11,6 +11,7 @@ vespa_add_library(searchlib_tensor OBJECT direct_tensor_attribute.cpp direct_tensor_saver.cpp direct_tensor_store.cpp + distance_calculator.cpp distance_function_factory.cpp euclidean_distance.cpp geo_degrees_distance.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 2fdb73fcf96..c713b3ef335 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -8,9 +8,9 @@ #include "tensor_attribute.hpp" #include <vespa/eval/eval/value.h> #include <vespa/fastlib/io/bufferedfile.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/load_utils.h> #include <vespa/searchlib/attribute/readerbase.h> -#include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/data/slime/inserter.h> #include <vespa/vespalib/util/cpu_usage.h> #include <vespa/vespalib/util/lambdatask.h> @@ -102,10 +102,16 @@ BlobSequenceReader::is_present() { } +bool +DenseTensorAttribute::tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const +{ + auto old_tensor = extract_cells_ref(docid); + return _comp.equals(old_tensor, new_tensor.cells()); +} + void DenseTensorAttribute::internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor) { - checkTensorType(tensor); consider_remove_from_index(docid); EntryRef ref = _denseTensorStore.setTensor(tensor); setTensorRef(docid, ref); @@ -152,7 +158,8 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con const NearestNeighborIndexFactory& index_factory) : TensorAttribute(baseFileName, cfg, _denseTensorStore), _denseTensorStore(cfg.tensorType(), get_memory_allocator()), - _index() + _index(), + _comp(cfg.tensorType()) { if (cfg.hnsw_index_params().has_value()) { auto tensor_type = cfg.tensorType(); @@ -180,6 +187,7 @@ DenseTensorAttribute::clearDoc(DocId docId) void DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor) { + checkTensorType(tensor); internal_set_tensor(docId, tensor); if (_index) { _index->add_document(docId); @@ -189,16 +197,26 @@ DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor std::unique_ptr<PrepareResult> DenseTensorAttribute::prepare_set_tensor(DocId docid, const vespalib::eval::Value& tensor) const { + checkTensorType(tensor); if (_index) { + if (tensor_is_unchanged(docid, tensor)) { + // Don't make changes to the nearest neighbor index when the inserted tensor is unchanged. + // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. + return {}; + } return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard()); } - return std::unique_ptr<PrepareResult>(); + return {}; } void DenseTensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& tensor, std::unique_ptr<PrepareResult> prepare_result) { + if (_index && !prepare_result) { + // The tensor is unchanged. + return; + } internal_set_tensor(docid, tensor); if (_index) { _index->complete_add_document(docid, std::move(prepare_result)); diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index da7a88af1be..1138a4f4433 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -6,6 +6,7 @@ #include "dense_tensor_store.h" #include "doc_vector_access.h" #include "tensor_attribute.h" +#include "typed_cells_comparator.h" #include <memory> namespace search::tensor { @@ -20,7 +21,9 @@ class DenseTensorAttribute : public TensorAttribute, public DocVectorAccess { private: DenseTensorStore _denseTensorStore; std::unique_ptr<NearestNeighborIndex> _index; + TypedCellsComparator _comp; + bool tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const; void internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor); void consider_remove_from_index(DocId docid); vespalib::MemoryUsage update_stat() override; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp b/searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp new file mode 100644 index 00000000000..c53d50bc9ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp @@ -0,0 +1,90 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "distance_calculator.h" +#include "distance_function_factory.h" +#include "nearest_neighbor_index.h" +#include <vespa/eval/eval/fast_value.h> + +using vespalib::eval::CellType; +using vespalib::eval::FastValueBuilderFactory; +using vespalib::eval::TypedCells; +using vespalib::eval::Value; +using vespalib::eval::ValueType; + +namespace { + +template<typename LCT, typename RCT> +std::unique_ptr<Value> +convert_cells(const ValueType& new_type, std::unique_ptr<Value> old_value) +{ + auto old_cells = old_value->cells().typify<LCT>(); + auto builder = FastValueBuilderFactory::get().create_value_builder<RCT>(new_type); + auto new_cells = builder->add_subspace(); + assert(old_cells.size() == new_cells.size()); + auto p = new_cells.begin(); + for (LCT value : old_cells) { + RCT conv(value); + *p++ = conv; + } + return builder->build(std::move(builder)); +} + +struct ConvertCellsSelector +{ + template <typename LCT, typename RCT> + static auto invoke(const ValueType& new_type, std::unique_ptr<Value> old_value) { + return convert_cells<LCT, RCT>(new_type, std::move(old_value)); + } + auto operator() (CellType from, CellType to, std::unique_ptr<Value> old_value) const { + using MyTypify = vespalib::eval::TypifyCellType; + ValueType new_type = old_value->type().cell_cast(to); + return vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(from, to, new_type, std::move(old_value)); + } +}; + +} + +namespace search::tensor { + +DistanceCalculator::DistanceCalculator(const tensor::ITensorAttribute& attr_tensor, + std::unique_ptr<vespalib::eval::Value> query_tensor_in) + : _attr_tensor(attr_tensor), + _query_tensor_uptr(std::move(query_tensor_in)), + _query_tensor(), + _query_tensor_cells(), + _dist_fun_uptr(make_distance_function(_attr_tensor.distance_metric(), + _attr_tensor.getTensorType().cell_type())), + _dist_fun(_dist_fun_uptr.get()) +{ + assert(_dist_fun); + auto nns_index = _attr_tensor.nearest_neighbor_index(); + if (nns_index) { + _dist_fun = nns_index->distance_function(); + assert(_dist_fun); + } + auto query_ct = _query_tensor_uptr->cells().type; + CellType required_ct = _dist_fun->expected_cell_type(); + if (query_ct != required_ct) { + ConvertCellsSelector converter; + _query_tensor_uptr = converter(query_ct, required_ct, std::move(_query_tensor_uptr)); + } + _query_tensor = _query_tensor_uptr.get(); + _query_tensor_cells = _query_tensor->cells(); +} + +DistanceCalculator::DistanceCalculator(const tensor::ITensorAttribute& attr_tensor, + const vespalib::eval::Value& query_tensor_in, + const DistanceFunction& function_in) + : _attr_tensor(attr_tensor), + _query_tensor_uptr(), + _query_tensor(&query_tensor_in), + _query_tensor_cells(_query_tensor->cells()), + _dist_fun_uptr(), + _dist_fun(&function_in) +{ +} + +DistanceCalculator::~DistanceCalculator() = default; + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h new file mode 100644 index 00000000000..eeb66887598 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -0,0 +1,48 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "distance_function.h" +#include "i_tensor_attribute.h" + +namespace vespalib::eval { struct Value; } + +namespace search::tensor { + +/** + * Class used to calculate the distance between two n-dimensional vectors, + * where one is stored in a TensorAttribute and the other comes from the query. + * + * The distance function to use is defined in the TensorAttribute. + */ +class DistanceCalculator { +private: + const tensor::ITensorAttribute& _attr_tensor; + std::unique_ptr<vespalib::eval::Value> _query_tensor_uptr; + const vespalib::eval::Value* _query_tensor; + vespalib::eval::TypedCells _query_tensor_cells; + std::unique_ptr<DistanceFunction> _dist_fun_uptr; + const DistanceFunction* _dist_fun; + +public: + DistanceCalculator(const tensor::ITensorAttribute& attr_tensor, + std::unique_ptr<vespalib::eval::Value> query_tensor_in); + + /** + * Only used by unit tests where ownership of query tensor and distance function is handled outside. + */ + DistanceCalculator(const tensor::ITensorAttribute& attr_tensor, + const vespalib::eval::Value& query_tensor_in, + const DistanceFunction& function_in); + + ~DistanceCalculator(); + + const tensor::ITensorAttribute& attribute_tensor() const { return _attr_tensor; } + const vespalib::eval::Value& query_tensor() const { return *_query_tensor; } + const DistanceFunction& function() const { return *_dist_fun; } + + double calc_with_limit(uint32_t docid, double limit) const { + return _dist_fun->calc_with_limit(_query_tensor_cells, _attr_tensor.extract_cells_ref(docid), limit); + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index 77873cb7ced..d5ebf656189 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -23,7 +23,7 @@ public: DistanceFunction(vespalib::eval::CellType expected) : _expect_cell_type(expected) {} - virtual ~DistanceFunction() {} + virtual ~DistanceFunction() = default; // input (query) vectors must be converted to this cell type: vespalib::eval::CellType expected_cell_type() const { diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index e82f31df38e..2ee1b268449 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -436,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid, if (max_nodes < _cfg.min_size_before_two_phase()) { // the first documents added will do all work in write thread // to ensure they are linked together: - return std::unique_ptr<PrepareResult>(); + return std::make_unique<PreparedFirstAddDoc>(); } PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard)); return std::make_unique<PreparedAddDoc>(std::move(op)); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 72a10724ff1..3f5a9d514ed 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -152,6 +152,8 @@ protected: const BitVector *filter, uint32_t explore_k, double distance_threshold) const; + struct PreparedFirstAddDoc : public PrepareResult {}; + struct PreparedAddDoc : public PrepareResult { using ReadGuard = vespalib::GenerationHandler::Guard; uint32_t docid; diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp index a668387e5bd..58e625e6aca 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp @@ -68,7 +68,7 @@ struct MyFastValueView final : Value { { const StringIdVector &labels = handle_view; for (size_t i = 0; i < num_spaces; ++i) { - ConstArrayRef<string_id> addr(&labels[i * num_mapped], num_mapped); + ConstArrayRef<string_id> addr(labels.data() + (i * num_mapped), num_mapped); my_index.map.add_mapping(FastAddrMap::hash_labels(addr)); } assert(my_index.map.size() == num_spaces); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index add5184c4eb..78c58e86a3b 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -135,7 +135,7 @@ TensorAttribute::addDoc(DocId &docId) } void -TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor) +TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor) const { const ValueType &fieldTensorType = getConfig().tensorType(); const ValueType &tensorType = tensor.type(); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h index ae6a4a302ea..c8aa42c6133 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h @@ -32,7 +32,7 @@ protected: template <typename RefType> void doCompactWorst(); - void checkTensorType(const vespalib::eval::Value &tensor); + void checkTensorType(const vespalib::eval::Value &tensor) const; void setTensorRef(DocId docId, EntryRef ref); virtual vespalib::MemoryUsage update_stat(); virtual vespalib::MemoryUsage memory_usage() const; diff --git a/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h b/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h new file mode 100644 index 00000000000..d1c890be961 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/cell_type.h> +#include <vespa/eval/eval/typed_cells.h> +#include <vespa/eval/eval/value_type.h> +#include <cstring> + +namespace search::tensor { + +/** + * Comparator used to compare two vespalib::eval::TypedCells instances. + * + * The caller must first validate that they are of the same vespalib::eval::ValueType. + */ +class TypedCellsComparator { +private: + size_t _mem_size; + +public: + TypedCellsComparator(const vespalib::eval::ValueType& type) + : _mem_size(vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), type.dense_subspace_size())) + {} + bool equals(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const { + return std::memcmp(lhs.data, rhs.data, _mem_size) == 0; + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp index d240e5a7c6c..d1bb464fc37 100644 --- a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp +++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp @@ -10,10 +10,7 @@ namespace search { namespace { struct MockReadGuard : public IDocumentMetaStoreContext::IReadGuard { - virtual const search::IDocumentMetaStore &get() const override { - search::IDocumentMetaStore *nullStore = nullptr; - return static_cast<search::IDocumentMetaStore &>(*nullStore); - } + virtual const search::IDocumentMetaStore &get() const override { abort(); } }; } diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.cpp b/searchlib/src/vespa/searchlib/transactionlog/common.cpp index 4130ad0bc06..d4192fe0beb 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/common.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/common.cpp @@ -3,7 +3,9 @@ #include "common.h" #include <vespa/vespalib/util/stringfmt.h> #include <vespa/fastos/file.h> +#include <filesystem> #include <stdexcept> +#include <system_error> namespace search::transactionlog { @@ -32,7 +34,9 @@ makeDirectory(const char * dir) if ( FastOS_File::Stat(dir, &st) ) { retval = st._isDirectory ? 0 : -2; } else { - retval = FastOS_File::MakeDirectory(dir) ? 0 : -3; + std::error_code ec; + std::filesystem::create_directory(std::filesystem::path(dir), ec); + retval = (!ec) ? 0 : -3; } return retval; diff --git a/searchlib/src/vespa/searchlib/util/comprfile.cpp b/searchlib/src/vespa/searchlib/util/comprfile.cpp index 61eeca6fc2d..bde246a1239 100644 --- a/searchlib/src/vespa/searchlib/util/comprfile.cpp +++ b/searchlib/src/vespa/searchlib/util/comprfile.cpp @@ -155,7 +155,7 @@ ComprFileReadBase::SetPosition(uint64_t newPosition, bool readAll, ComprFileDecodeContext &decodeContext, int &bitOffset, - FastOS_FileInterface &file, + FastOS_FileInterface *file, uint64_t &fileReadByteOffset, uint64_t fileSize, ComprBuffer &cbuf) @@ -176,7 +176,7 @@ ComprFileReadBase::SetPosition(uint64_t newPosition, readAll, decodeContext, bitOffset, - file, + *file, fileReadByteOffset, fileSize, cbuf); @@ -200,7 +200,7 @@ ComprFileReadBase::SetPosition(uint64_t newPosition, readAll, decodeContext, bitOffset, - file, + *file, fileReadByteOffset, fileSize, cbuf); @@ -221,9 +221,8 @@ ComprFileReadBase::SetPosition(uint64_t newPosition, (cbuf.getUnitBitSize() - 1)); assert(pos <= static_cast<int64_t>(fileSize)); - - file.SetPosition(pos); - assert(pos == file.GetPosition()); + file->SetPosition(pos); + assert(pos == file->GetPosition()); decodeContext.emptyBuffer(newPosition); assert(decodeContext.getBitPos(bitOffset, @@ -337,7 +336,7 @@ ComprFileReadContext::setPosition(uint64_t newPosition) _readAll, *_decodeContext, _bitOffset, - *_file, + _file, _fileReadByteOffset, _fileSize, *this); diff --git a/searchlib/src/vespa/searchlib/util/comprfile.h b/searchlib/src/vespa/searchlib/util/comprfile.h index 2ee95a53235..dc8cf6185fc 100644 --- a/searchlib/src/vespa/searchlib/util/comprfile.h +++ b/searchlib/src/vespa/searchlib/util/comprfile.h @@ -76,7 +76,7 @@ public: bool readAll, ComprFileDecodeContext &decodeContext, int &bitOffset, - FastOS_FileInterface &file, + FastOS_FileInterface *file, uint64_t &fileReadByteOffset, uint64_t fileSize, ComprBuffer &cbuf); diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.cpp b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp index 07dbc9a247d..c1e8b6b7396 100644 --- a/searchlib/src/vespa/searchlib/util/dirtraverse.cpp +++ b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp @@ -2,275 +2,63 @@ #include "dirtraverse.h" #include <vespa/vespalib/util/size_literals.h> -#include <vespa/fastos/file.h> -#include <cassert> -#include <cstring> +#include <filesystem> +#include <system_error> namespace search { -extern "C" { -static int cmpname(const void *av, const void *bv) -{ - const DirectoryTraverse::Name *const a = - *(const DirectoryTraverse::Name *const *) av; - const DirectoryTraverse::Name *const b = - *(const DirectoryTraverse::Name *const *) bv; - return a->_name.compare(b->_name.c_str()); -} -} - -DirectoryTraverse::Name::Name(const char *name) - : _name(name), - _next(nullptr) -{ -} -DirectoryTraverse::Name::~Name() = default; - -DirectoryTraverse::Name * -DirectoryTraverse::Name::sort(Name *head, int count) -{ - Name *nl; - Name **names; - int i; - - names = new Name *[count]; - i = 0; - for(nl = head; nl != nullptr; nl = nl->_next) - names[i++] = nl; - assert(i == count); - qsort(names, count, sizeof(Name *), cmpname); - for (i = 0; i < count; i++) { - if (i + 1 < count) - names[i]->_next = names[i + 1]; - else - names[i]->_next = nullptr; - } - head = names[0]; - delete [] names; - return head; -} - - -void -DirectoryTraverse::QueueDir(const char *name) -{ - Name *n = new Name(name); - if (_dirTail == nullptr) - _dirHead = n; - else - _dirTail->_next = n; - _dirTail = n; -} +namespace fs = std::filesystem; +namespace { -void -DirectoryTraverse::PushDir(const char *name) -{ - Name *n = new Name(name); - n->_next = _pdirHead; - _pdirHead = n; -} - - -void -DirectoryTraverse::PushRemoveDir(const char *name) -{ - Name *n = new Name(name); - n->_next = _rdirHead; - _rdirHead = n; -} - - -void -DirectoryTraverse::PushPushedDirs() -{ - Name *n; - while (_pdirHead != nullptr) { - n = _pdirHead; - _pdirHead = n->_next; - n->_next = _dirHead; - _dirHead = n; - if (_dirTail == nullptr) - _dirTail = n; - } -} - - -DirectoryTraverse::Name * -DirectoryTraverse::UnQueueDir() -{ - Name *n; - PushPushedDirs(); - if (_dirHead == nullptr) - return nullptr; - n = _dirHead; - _dirHead = n->_next; - n->_next = nullptr; - if (_dirHead == nullptr) - _dirTail = nullptr; - return n; -} - -DirectoryTraverse::Name * -DirectoryTraverse::UnQueueName() -{ - Name *n; - if (_nameHead == nullptr) - return nullptr; - n = _nameHead; - _nameHead = n->_next; - n->_next = nullptr; - _nameCount--; - return n; -} - - -void -DirectoryTraverse::ScanSingleDir() +uint64_t +try_get_tree_size(const std::string& base_dir) { - assert(_nameHead == nullptr); - assert(_nameCount == 0); - delete _curDir; - _fullDirName.clear(); - _curDir = UnQueueDir(); - if (_curDir == nullptr) - return; - _fullDirName = _baseDir; - if ( ! _curDir->_name.empty()) { - _fullDirName += "/" + _curDir->_name; + fs::path path(base_dir); + std::error_code ec; + fs::recursive_directory_iterator dir_itr(path, fs::directory_options::skip_permission_denied, ec); + if (ec) { + return 0; } - FastOS_DirectoryScan *dirscan = new FastOS_DirectoryScan(_fullDirName.c_str()); - while (dirscan->ReadNext()) { - const char *name = dirscan->GetName(); - if (strcmp(name, ".") == 0 || - strcmp(name, "..") == 0) - continue; - Name *nl = new Name(name); - nl->_next = _nameHead; - _nameHead = nl; - _nameCount++; - } - if (_nameCount > 1) - _nameHead = _nameHead->sort(_nameHead, _nameCount); - delete dirscan; -} - - -bool -DirectoryTraverse::NextName() -{ - delete _curName; - _curName = nullptr; - while (_nameHead == nullptr && (_dirHead != nullptr || _pdirHead != nullptr)) - ScanSingleDir(); - if (_nameHead == nullptr) - return false; - _curName = UnQueueName(); - _fullName = _fullDirName + "/" + _curName->_name; - _relName = _fullName.c_str() + (_baseDir.size() + 1); - return true; -} - - -bool -DirectoryTraverse::NextRemoveDir() -{ - Name *curName; - delete _curName; - _curName = nullptr; - if (_rdirHead == nullptr) - return false; - curName = _rdirHead; - _rdirHead = curName->_next; - _fullName = _baseDir + "/" + curName->_name; - _relName = _fullName.c_str() + _baseDir.size() + 1; - delete curName; - return true; -} - - -bool -DirectoryTraverse::RemoveTree() -{ - FastOS_StatInfo statInfo; - - while (NextName()) { - const char *relname = GetRelName(); - const char *fullname = GetFullName(); - if (FastOS_File::Stat(fullname, &statInfo)) { - if (statInfo._isDirectory) { - PushDir(relname); - PushRemoveDir(relname); - } else { - FastOS_File::Delete(fullname); + uint64_t total_size = 0; + constexpr uint64_t block_size = 4_Ki; + for (const auto &elem : dir_itr) { + if (fs::is_regular_file(elem.path()) && !fs::is_symlink(elem.path())) { + const auto size = elem.file_size(ec); + if (!ec) { + // round up size to file system block size (assumed to be 4 KiB) + auto adj_size = ((size + block_size - 1) / block_size) * block_size; + total_size += adj_size; } } } - while (NextRemoveDir()) { - const char *fullname = GetFullName(); - FastOS_File::RemoveDirectory(fullname); - } - FastOS_File::RemoveDirectory(_baseDir.c_str()); - return true; + return total_size; +} + } uint64_t DirectoryTraverse::GetTreeSize() { - FastOS_StatInfo statInfo; - uint64_t size = 0; - const uint64_t blockSize = 4_Ki; - - while (NextName()) { - const char *relname = GetRelName(); - const char *fullname = GetFullName(); - if (FastOS_File::Stat(fullname, &statInfo)) { - uint64_t adjSize = ((statInfo._size + blockSize - 1) / blockSize) * blockSize; - size += adjSize; - if (statInfo._isDirectory) { - PushDir(relname); - } + // Since try_get_tree_size may throw on concurrent directory + // modifications, immediately retry a bounded number of times if this + // happens. Number of retries chosen randomly by counting fingers. + for (int i = 0; i < 10; ++i) { + try { + return try_get_tree_size(_base_dir); + } catch (const fs::filesystem_error&) { + // Go around for another spin that hopefully won't race. } } - return size; + return 0; } -DirectoryTraverse::DirectoryTraverse(const char *baseDir) - : _baseDir(baseDir), - _nameHead(nullptr), - _nameCount(0), - _dirHead(nullptr), - _dirTail(nullptr), - _pdirHead(nullptr), - _rdirHead(nullptr), - _curDir(nullptr), - _curName(nullptr), - _fullDirName(), - _fullName(), - _relName(nullptr) +DirectoryTraverse::DirectoryTraverse(const std::string& base_dir) + : _base_dir(base_dir) { - QueueDir(""); - ScanSingleDir(); } - -DirectoryTraverse::~DirectoryTraverse() -{ - delete _curDir; - delete _curName; - PushPushedDirs(); - while (_dirHead != nullptr) - delete UnQueueDir(); - while (_nameHead != nullptr) - delete UnQueueName(); - while (_rdirHead != nullptr) { - Name *n; - n = _rdirHead; - _rdirHead = n->_next; - n->_next = nullptr; - delete n; - } -} +DirectoryTraverse::~DirectoryTraverse() = default; } // namespace search diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.h b/searchlib/src/vespa/searchlib/util/dirtraverse.h index 4a96ad0935d..c26246e2596 100644 --- a/searchlib/src/vespa/searchlib/util/dirtraverse.h +++ b/searchlib/src/vespa/searchlib/util/dirtraverse.h @@ -7,54 +7,16 @@ namespace search { +/* + * Class used to get size of directory tree on disk. + */ class DirectoryTraverse { private: - DirectoryTraverse(const DirectoryTraverse &); - DirectoryTraverse& operator=(const DirectoryTraverse &); - -public: - class Name - { - private: - Name(const Name &); - Name& operator=(const Name &); - - public: - std::string _name; - Name *_next; - explicit Name(const char *name); - ~Name(); - static Name *sort(Name *head, int count); - }; -private: - std::string _baseDir; - Name *_nameHead; - int _nameCount; - Name *_dirHead; - Name *_dirTail; - Name *_pdirHead; - Name *_rdirHead; - Name *_curDir; - Name *_curName; - std::string _fullDirName; - std::string _fullName; - const char *_relName; + std::string _base_dir; public: - const char *GetFullName() const { return _fullName.c_str(); } - const char *GetRelName() const { return _relName; } - void QueueDir(const char *name); - void PushDir(const char *name); - void PushRemoveDir(const char *name); - void PushPushedDirs(); - Name *UnQueueDir(); - Name *UnQueueName(); - void ScanSingleDir(); - bool NextName(); - bool NextRemoveDir(); - bool RemoveTree(); uint64_t GetTreeSize(); // Returns size of directory in bytes - explicit DirectoryTraverse(const char *baseDir); + explicit DirectoryTraverse(const std::string& base_dir); ~DirectoryTraverse(); }; |