summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/apps/tests/biglogtest.cpp9
-rw-r--r--searchlib/src/tests/alignment/alignment.cpp5
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp18
-rw-r--r--searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp7
-rw-r--r--searchlib/src/tests/attribute/posting_store/posting_store_test.cpp4
-rw-r--r--searchlib/src/tests/attribute/postinglist/postinglist.cpp8
-rw-r--r--searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp10
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp30
-rw-r--r--searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp9
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp31
-rw-r--r--searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp117
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.h53
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/common/geo_location.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/common/geo_location.h2
-rw-r--r--searchlib/src/vespa/searchlib/common/geo_location_parser.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/common/geo_location_spec.h2
-rw-r--r--searchlib/src/vespa/searchlib/common/resultset.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/common/resultset.h2
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/docidmapper.h4
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/field_merger.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fusion.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumrequest.h1
-rw-r--r--searchlib/src/vespa/searchlib/engine/proto_converter.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/expression/integerresultnode.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.h48
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h30
-rw-r--r--searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/common.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/util/comprfile.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/util/comprfile.h2
-rw-r--r--searchlib/src/vespa/searchlib/util/dirtraverse.cpp282
-rw-r--r--searchlib/src/vespa/searchlib/util/dirtraverse.h48
62 files changed, 543 insertions, 608 deletions
diff --git a/searchlib/src/apps/tests/biglogtest.cpp b/searchlib/src/apps/tests/biglogtest.cpp
index d5c59bf5b29..bd8991edc4b 100644
--- a/searchlib/src/apps/tests/biglogtest.cpp
+++ b/searchlib/src/apps/tests/biglogtest.cpp
@@ -8,6 +8,7 @@
#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/vespalib/data/databuffer.h>
+#include <filesystem>
using namespace search;
using search::index::DummyFileHeaderContext;
@@ -148,9 +149,8 @@ Test::testDIO()
{
uint64_t serial = 0;
- FastOS_File::EmptyDirectory(_dir.c_str());
- FastOS_File::RemoveDirectory(_dir.c_str());
- EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str()));
+ std::filesystem::remove_all(std::filesystem::path(_dir));
+ std::filesystem::create_directory(std::filesystem::path(_dir));
Map lidToBlobMap;
vespalib::DataBuffer buf;
@@ -238,7 +238,6 @@ Test::testDIO()
factory<DS> ds(_dir);
checkBlobs(ds(), lidToBlobMap);
}
- FastOS_File::EmptyDirectory(_dir.c_str());
- FastOS_File::RemoveDirectory(_dir.c_str());
+ std::filesystem::remove_all(std::filesystem::path(_dir));
TEST_FLUSH();
}
diff --git a/searchlib/src/tests/alignment/alignment.cpp b/searchlib/src/tests/alignment/alignment.cpp
index 06acf96e16c..3c6906f45bf 100644
--- a/searchlib/src/tests/alignment/alignment.cpp
+++ b/searchlib/src/tests/alignment/alignment.cpp
@@ -6,6 +6,9 @@ LOG_SETUP("alignment_test");
#include <sys/time.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/size_literals.h>
+#include <vespa/vespalib/util/memory.h>
+
+using vespalib::Unaligned;
struct Timer {
rusage usage;
@@ -28,7 +31,7 @@ TEST_SETUP(Test);
double
timeAccess(void *bufp, uint32_t len, double &sum)
{
- double *buf = (double *)bufp;
+ auto buf = Unaligned<double>::ptr(bufp);
Timer timer;
timer.start();
for(uint32_t i = 0; i < 512_Ki; ++i) {
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
index bb516c3b451..a0906e2a488 100644
--- a/searchlib/src/tests/attribute/attribute_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -791,7 +791,7 @@ AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t value
if (!result) {
return false;
}
- EXPECT_EQ(valueCount, ptr->get(doc, &buffer[0], buffer.size())) << (result = false, "");
+ EXPECT_EQ(valueCount, ptr->get(doc, buffer.data(), buffer.size())) << (result = false, "");
if (!result) {
return false;
}
@@ -807,7 +807,7 @@ AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t val
std::vector<BufferType> buffer(valueCount);
bool retval = true;
EXPECT_TRUE((retval = retval && (static_cast<uint32_t>(ptr->getValueCount(doc)) == valueCount)));
- EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount)));
+ EXPECT_TRUE((retval = retval && (ptr->get(doc, buffer.data(), buffer.size()) == valueCount)));
for (uint32_t i = 0; i < valueCount; ++i) {
EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range])));
}
@@ -868,7 +868,7 @@ AttributeTest::testSingle(const AttributePtr & ptr, const std::vector<BufferType
ptr->clearDoc(doc);
}
ptr->commit();
- EXPECT_EQ(1u, ptr->get(doc, &buffer[0], buffer.size()));
+ EXPECT_EQ(1u, ptr->get(doc, buffer.data(), buffer.size()));
if (doc % 2 == 0) {
if (smallUInt) {
expectZero(buffer[0]);
@@ -1156,7 +1156,7 @@ AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<Buffe
EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight()));
}
commit(ptr);
- ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount);
+ ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount);
std::sort(buffer.begin(), buffer.begin() + valueCount, order_by_weight());
for (uint32_t j = 0; j < valueCount; ++j) {
EXPECT_TRUE(buffer[j].getValue() == ordered_values[j].getValue());
@@ -1173,20 +1173,20 @@ AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<Buffe
// append non-existent value
EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight()));
commit(ptr);
- ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 1);
EXPECT_TRUE(contains(buffer, valueCount + 1, values[doc]));
// append existent value
EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10));
commit(ptr);
- ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 1);
EXPECT_TRUE(contains(buffer, valueCount + 1, BufferType(values[doc].getValue(), values[doc].getWeight() + 10)));
// append non-existent value two times
EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight()));
EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10));
commit(ptr);
- ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2);
+ ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 2);
EXPECT_TRUE(contains(buffer, valueCount + 2, BufferType(values[doc + 1].getValue(), values[doc + 1].getWeight() + 10)));
}
EXPECT_EQ(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4);
@@ -1203,11 +1203,11 @@ AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<Buffe
EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2);
// remove existent value
- ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2);
+ ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 2);
EXPECT_TRUE(contains_value(buffer, valueCount + 2, values[doc + 1].getValue()));
EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0));
commit(ptr);
- ASSERT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ ASSERT_TRUE(ptr->get(doc, buffer.data(), buffer.size()) == valueCount + 1);
EXPECT_FALSE(contains_value(buffer, valueCount + 1, values[doc + 1].getValue()));
}
EXPECT_EQ(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2);
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
index 5c1c49d8eb5..e27065f1c25 100644
--- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
+++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
@@ -47,7 +47,7 @@ public:
for (auto& key : keys) {
adds.emplace_back(KeyData(key, 1));
}
- _postings.apply(_trees[idx], &*adds.begin(), &*adds.end(), &*removes.begin(), &*removes.end());
+ _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size());
}
void clear_tree(size_t idx) {
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
index 92c3da40fe9..90127e9ae7b 100644
--- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -21,6 +21,7 @@
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/compress.h>
+#include <vespa/vespalib/util/memory.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <limits>
#include <cmath>
@@ -184,8 +185,8 @@ MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const
return true;
if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen()))
return false;
- if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(),
- lhs->getDataLen()) == 0))
+ if (!EXPECT_TRUE(vespalib::memcmp_safe(lhs->getData(), rhs->getData(),
+ lhs->getDataLen()) == 0))
return false;
return true;
}
@@ -480,7 +481,7 @@ EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix)
buildTermQuery(query, vec.getName(), ss.str(), prefix);
return (static_cast<const AttributeVector &>(vec)).
- getSearch(vespalib::stringref(&query[0], query.size()),
+ getSearch(vespalib::stringref(query.data(), query.size()),
SearchContextParams());
}
diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
index 10cc14012dd..573284ffa35 100644
--- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
@@ -83,8 +83,8 @@ protected:
additions.emplace_back(i, 0);
}
_store.apply(root,
- &additions[0], &additions[0] + additions.size(),
- &removals[0], &removals[0] + removals.size());
+ additions.data(), additions.data() + additions.size(),
+ removals.data(), removals.data() + removals.size());
return root;
}
static std::vector<int> make_exp_sequence(int start_key, int end_key)
diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
index 446aeaf22a7..54efb3261c8 100644
--- a/searchlib/src/tests/attribute/postinglist/postinglist.cpp
+++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
@@ -379,8 +379,8 @@ insertRandomValues(Tree &tree,
std::vector<AttributePosting> additions;
std::vector<uint32_t> removals;
additions.push_back(newPosting);
- postings.apply(newIdx, &additions[0], &additions[0] + additions.size(),
- &removals[0], &removals[0] + removals.size());
+ postings.apply(newIdx, additions.data(), additions.data() + additions.size(),
+ removals.data(), removals.data() + removals.size());
std::atomic_thread_fence(std::memory_order_release);
itr.writeData(newIdx);
@@ -461,8 +461,8 @@ removeRandomValues(Tree &tree,
std::vector<AttributePosting> additions;
std::vector<uint32_t> removals;
removals.push_back(i->_docId);
- postings.apply(newIdx, &additions[0], &additions[0]+additions.size(),
- &removals[0], &removals[0] + removals.size());
+ postings.apply(newIdx, additions.data(), additions.data() + additions.size(),
+ removals.data(), removals.data() + removals.size());
if (newIdx != oldIdx) {
std::atomic_thread_fence(std::memory_order_release);
itr.writeData(newIdx);
diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
index c6091604a97..4ca2802d22d 100644
--- a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
+++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
@@ -6,7 +6,7 @@
#include <vespa/searchcommon/common/undefinedvalues.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/fastos/file.h>
+#include <filesystem>
#include <vespa/log/log.h>
LOG_SETUP("sourceselector_test");
@@ -161,8 +161,8 @@ Test::requireThatSelectorCanSaveAndLoad(bool compactLidSpace)
selector.compactLidSpace(maxDocId - 4);
}
- FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str());
- FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str());
+ std::filesystem::remove_all(std::filesystem::path(index_dir));
+ std::filesystem::create_directory(std::filesystem::path(index_dir));
SourceSelector::SaveInfo::UP save_info =
selector.extractSaveInfo(base_file_name);
@@ -177,7 +177,7 @@ Test::requireThatSelectorCanSaveAndLoad(bool compactLidSpace)
EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit());
}
- FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str());
+ std::filesystem::remove_all(std::filesystem::path(index_dir));
}
void
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index aa2c475e7b6..96039bee15b 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -48,7 +48,7 @@ checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount,
{
std::vector<vespalib::string> buffer(valueCount);
EXPECT_TRUE(static_cast<uint32_t>(vec.getValueCount(doc)) == valueCount);
- EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount);
+ EXPECT_TRUE(vec.get(doc, buffer.data(), buffer.size()) == valueCount);
EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues);
}
@@ -125,10 +125,10 @@ testMultiValue(Attribute & attr, uint32_t numDocs)
// test get all
std::vector<vespalib::string> values(valueCount);
- ASSERT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount);
+ ASSERT_TRUE(attr.get(doc, values.data(), valueCount) == valueCount);
std::vector<uint32_t> enums(valueCount);
- ASSERT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount);
+ ASSERT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, enums.data(), valueCount) == valueCount);
auto combined = zipped_and_sorted_by_first(values, enums);
for (uint32_t j = 0; j < valueCount; ++j) {
@@ -167,10 +167,10 @@ testMultiValue(Attribute & attr, uint32_t numDocs)
// test get all
std::vector<vespalib::string> values(valueCount);
- EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount);
+ EXPECT_TRUE(attr.get(doc, values.data(), valueCount) == valueCount);
std::vector<uint32_t> enums(valueCount);
- EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount);
+ EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, enums.data(), valueCount) == valueCount);
auto combined = zipped_and_sorted_by_first(values, enums);
for (uint32_t j = 0; j < valueCount; ++j) {
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index bf0b74b0003..72b2f1e320a 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -157,6 +157,12 @@ public:
void expect_empty_add() const {
EXPECT_TRUE(_adds.empty());
}
+ void expect_empty_prepare_add() const {
+ EXPECT_TRUE(_prepare_adds.empty());
+ }
+ void expect_empty_complete_add() const {
+ EXPECT_TRUE(_complete_adds.empty());
+ }
void expect_entry(uint32_t exp_docid, const DoubleVector& exp_vector, const EntryVector& entries) const {
EXPECT_EQUAL(1u, entries.size());
EXPECT_EQUAL(exp_docid, entries.back().first);
@@ -881,6 +887,30 @@ TEST_F("nearest neighbor index can be updated in two phases", DenseTensorAttribu
}
}
+TEST_F("nearest neighbor index is NOT updated when tensor value is unchanged", DenseTensorAttributeMockIndex)
+{
+ auto& index = f.mock_index();
+ {
+ auto vec_a = vec_2d(3, 5);
+ auto prepare_result = f.prepare_set_tensor(1, vec_a);
+ index.expect_prepare_add(1, {3, 5});
+ f.complete_set_tensor(1, vec_a, std::move(prepare_result));
+ f.assertGetTensor(vec_a, 1);
+ index.expect_complete_add(1, {3, 5});
+ }
+ index.clear();
+ {
+ // Replaces previous value with the same value
+ auto vec_b = vec_2d(3, 5);
+ auto prepare_result = f.prepare_set_tensor(1, vec_b);
+ EXPECT_TRUE(prepare_result.get() == nullptr);
+ index.expect_empty_prepare_add();
+ f.complete_set_tensor(1, vec_b, std::move(prepare_result));
+ f.assertGetTensor(vec_b, 1);
+ index.expect_empty_complete_add();
+ }
+}
+
TEST_F("clearDoc() updates nearest neighbor index", DenseTensorAttributeMockIndex)
{
auto& index = f.mock_index();
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index fffa1778c85..418182f7bbf 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -18,6 +18,7 @@
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/vespalib/util/size_literals.h>
+#include <vespa/vespalib/util/memory.h>
#include <iomanip>
using document::BucketId;
@@ -275,7 +276,7 @@ void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz
vespalib::DataBuffer buf;
EXPECT_EQUAL(static_cast<ssize_t>(sz), datastore.read(lid, buf));
EXPECT_EQUAL(buf.getDataLen(), sz);
- EXPECT_TRUE(memcmp(a, buf.getData(), sz) == 0);
+ EXPECT_TRUE(vespalib::memcmp_safe(a, buf.getData(), sz) == 0);
}
TEST("testTruncatedIdxFile"){
@@ -666,13 +667,13 @@ TEST("test that the integrated visit cache works.") {
}
TEST("testWriteRead") {
- FastOS_File::RemoveDirectory("empty");
+ std::filesystem::remove_all(std::filesystem::path("empty"));
const char * bufA = "aaaaaaaaaaaaaaaaaaaaa";
const char * bufB = "bbbbbbbbbbbbbbbb";
const vespalib::ConstBufferRef a[2] = { vespalib::ConstBufferRef(bufA, strlen(bufA)), vespalib::ConstBufferRef(bufB, strlen(bufB))};
LogDataStore::Config config;
{
- EXPECT_TRUE(FastOS_File::MakeDirectory("empty"));
+ std::filesystem::create_directory(std::filesystem::path("empty"));
DummyFileHeaderContext fileHeaderContext;
vespalib::ThreadStackExecutor executor(1, 128_Ki);
MyTlSyncer tlSyncer;
@@ -736,7 +737,7 @@ TEST("testWriteRead") {
EXPECT_EQUAL(0ul, datastore.getDiskBloat());
EXPECT_EQUAL(0ul, datastore.getMaxSpreadAsBloat());
}
- FastOS_File::EmptyAndRemoveDirectory("empty");
+ std::filesystem::remove_all(std::filesystem::path("empty"));
}
TEST("requireThatSyncTokenIsUpdatedAfterFlush") {
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index d478adafa57..87de62dbfad 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -2,6 +2,7 @@
#include "mysearch.h"
#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/isourceselector.h>
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
#include <vespa/searchlib/queryeval/leaf_blueprints.h>
@@ -23,6 +24,14 @@ using namespace search::fef;
using namespace search::query;
using search::BitVector;
+struct InvalidSelector : ISourceSelector {
+ InvalidSelector() : ISourceSelector(Source()) {}
+ void setSource(uint32_t, Source) override { abort(); }
+ uint32_t getDocIdLimit() const override { abort(); }
+ void compactLidSpace(uint32_t) override { abort(); }
+ std::unique_ptr<sourceselector::Iterator> createIterator() const override { abort(); }
+};
+
struct WeightOrder {
bool operator()(const wand::Term &t1, const wand::Term &t2) const {
return (t1.weight < t2.weight);
@@ -412,7 +421,7 @@ TEST("test Rank Blueprint") {
}
TEST("test SourceBlender Blueprint") {
- ISourceSelector *selector = nullptr; // not needed here
+ auto selector = std::make_unique<InvalidSelector>(); // not needed here
SourceBlenderBlueprint b(*selector);
{ // combine
std::vector<Blueprint::HitEstimate> est;
@@ -485,8 +494,8 @@ TEST("test SourceBlender Blueprint") {
}
TEST("test SourceBlender below AND optimization") {
- ISourceSelector *selector_1 = 0; // the one
- ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ auto selector_1 = std::make_unique<InvalidSelector>(); // the one
+ auto selector_2 = std::make_unique<InvalidSelector>(); // not the one
//-------------------------------------------------------------------------
AndBlueprint *top = new AndBlueprint();
Blueprint::UP top_bp(top);
@@ -567,8 +576,8 @@ TEST("test SourceBlender below AND optimization") {
}
TEST("test SourceBlender below OR optimization") {
- ISourceSelector *selector_1 = 0; // the one
- ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ auto selector_1 = std::make_unique<InvalidSelector>(); // the one
+ auto selector_2 = std::make_unique<InvalidSelector>(); // not the one
//-------------------------------------------------------------------------
OrBlueprint *top = new OrBlueprint();
Blueprint::UP top_up(top);
@@ -649,8 +658,8 @@ TEST("test SourceBlender below OR optimization") {
}
TEST("test SourceBlender below AND_NOT optimization") {
- ISourceSelector *selector_1 = 0; // the one
- ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ auto selector_1 = std::make_unique<InvalidSelector>(); // the one
+ auto selector_2 = std::make_unique<InvalidSelector>(); // not the one
//-------------------------------------------------------------------------
AndNotBlueprint *top = new AndNotBlueprint();
Blueprint::UP top_up(top);
@@ -741,8 +750,8 @@ TEST("test SourceBlender below AND_NOT optimization") {
}
TEST("test SourceBlender below RANK optimization") {
- ISourceSelector *selector_1 = 0; // the one
- ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ auto selector_1 = std::make_unique<InvalidSelector>(); // the one
+ auto selector_2 = std::make_unique<InvalidSelector>(); // not the one
//-------------------------------------------------------------------------
RankBlueprint *top = new RankBlueprint();
Blueprint::UP top_up(top);
@@ -876,7 +885,7 @@ TEST("test empty root node optimization and safeness") {
}
TEST("and with one empty child is optimized away") {
- ISourceSelector *selector = 0;
+ auto selector = std::make_unique<InvalidSelector>();
Blueprint::UP top(ap((new SourceBlenderBlueprint(*selector))->
addChild(ap(MyLeafSpec(10).create())).
addChild(ap((new AndBlueprint())->
@@ -891,7 +900,7 @@ TEST("and with one empty child is optimized away") {
}
TEST("test single child optimization") {
- ISourceSelector *selector = 0;
+ auto selector = std::make_unique<InvalidSelector>();
//-------------------------------------------------------------------------
Blueprint::UP top_up(
ap((new AndNotBlueprint())->
diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt b/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt
index 3ebc8eb5251..e543a847498 100644
--- a/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt
+++ b/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt
@@ -5,5 +5,6 @@ vespa_add_executable(searchlib_nearest_neighbor_test_app TEST
nearest_neighbor_test.cpp
DEPENDS
searchlib
+ GTest::GTest
)
vespa_add_test(NAME searchlib_nearest_neighbor_test_app COMMAND searchlib_nearest_neighbor_test_app)
diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
index 029b74ff914..1e341eab707 100644
--- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
+++ b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
@@ -1,37 +1,38 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
-#include <vespa/vespalib/util/stringfmt.h>
-
#include <vespa/eval/eval/simple_value.h>
#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/feature.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/queryeval/nearest_neighbor_iterator.h>
+#include <vespa/searchlib/queryeval/nns_index_iterator.h>
#include <vespa/searchlib/queryeval/simpleresult.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
+#include <vespa/searchlib/tensor/distance_calculator.h>
#include <vespa/searchlib/tensor/distance_function_factory.h>
+#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/test/insertion_operators.h>
-#include <vespa/searchlib/queryeval/nns_index_iterator.h>
-#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/log/log.h>
LOG_SETUP("nearest_neighbor_test");
#define EPS 1.0e-6
-using search::feature_t;
-using search::tensor::DenseTensorAttribute;
using search::AttributeVector;
using search::BitVector;
-using vespalib::eval::Value;
-using vespalib::eval::ValueType;
+using search::attribute::DistanceMetric;
+using search::feature_t;
+using search::tensor::DenseTensorAttribute;
+using search::tensor::DistanceCalculator;
+using search::tensor::DistanceFunction;
using vespalib::eval::CellType;
-using vespalib::eval::TensorSpec;
using vespalib::eval::SimpleValue;
-using search::tensor::DistanceFunction;
-using search::attribute::DistanceMetric;
+using vespalib::eval::TensorSpec;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
using namespace search::fef;
using namespace search::queryeval;
@@ -96,7 +97,7 @@ struct Fixture
uint32_t sz = _attr->getNumDocs();
_global_filter = BitVector::create(sz);
for (uint32_t id : docids) {
- EXPECT_LESS(id, sz);
+ EXPECT_LT(id, sz);
_global_filter->setBit(id);
}
}
@@ -112,11 +113,11 @@ struct Fixture
setTensor(docId, *t);
}
- const DistanceFunction *dist_fun() const {
+ const DistanceFunction &dist_fun() const {
if (_cfg.tensorType().cell_type() == CellType::FLOAT) {
- return euclid_f.get();
+ return *euclid_f;
} else {
- return euclid_d.get();
+ return *euclid_d;
}
}
};
@@ -126,10 +127,11 @@ SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std
auto md = MatchData::makeTestInstance(2, 2);
auto &tfmd = *(md->resolveTermField(0));
auto &attr = *(env._tensorAttr);
+ DistanceCalculator dist_calc(attr, qtv, env.dist_fun());
NearestNeighborDistanceHeap dh(2);
- dh.set_distance_threshold(env.dist_fun()->convert_threshold(threshold));
+ dh.set_distance_threshold(env.dist_fun().convert_threshold(threshold));
const BitVector *filter = env._global_filter.get();
- auto search = NearestNeighborIterator::create(strict, tfmd, qtv, attr, dh, filter, env.dist_fun());
+ auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, filter);
if (strict) {
return SimpleResult().searchStrict(*search, attr.getNumDocs());
} else {
@@ -152,33 +154,33 @@ verify_iterator_returns_expected_results(const vespalib::string& attribute_tenso
auto nullTensor = createTensor(query_tensor_type_spec, 0.0, 0.0);
SimpleResult result = find_matches<true>(fixture, *nullTensor);
SimpleResult nullExpect({1,2,4,6});
- EXPECT_EQUAL(result, nullExpect);
+ EXPECT_EQ(result, nullExpect);
result = find_matches<false>(fixture, *nullTensor);
- EXPECT_EQUAL(result, nullExpect);
+ EXPECT_EQ(result, nullExpect);
auto farTensor = createTensor(query_tensor_type_spec, 9.0, 9.0);
SimpleResult farExpect({1,2,3,5});
result = find_matches<true>(fixture, *farTensor);
- EXPECT_EQUAL(result, farExpect);
+ EXPECT_EQ(result, farExpect);
result = find_matches<false>(fixture, *farTensor);
- EXPECT_EQUAL(result, farExpect);
+ EXPECT_EQ(result, farExpect);
SimpleResult null_thr5_exp({1,4,6});
result = find_matches<true>(fixture, *nullTensor, 5.0);
- EXPECT_EQUAL(result, null_thr5_exp);
+ EXPECT_EQ(result, null_thr5_exp);
result = find_matches<false>(fixture, *nullTensor, 5.0);
- EXPECT_EQUAL(result, null_thr5_exp);
+ EXPECT_EQ(result, null_thr5_exp);
SimpleResult far_thr4_exp({2,5});
result = find_matches<true>(fixture, *farTensor, 4.0);
- EXPECT_EQUAL(result, far_thr4_exp);
+ EXPECT_EQ(result, far_thr4_exp);
result = find_matches<false>(fixture, *farTensor, 4.0);
- EXPECT_EQUAL(result, far_thr4_exp);
+ EXPECT_EQ(result, far_thr4_exp);
}
-TEST("require that NearestNeighborIterator returns expected results") {
- TEST_DO(verify_iterator_returns_expected_results(denseSpecDouble, denseSpecDouble));
- TEST_DO(verify_iterator_returns_expected_results(denseSpecFloat, denseSpecFloat));
+TEST(NnsIndexIteratorTest, require_that_iterator_returns_expected_results) {
+ verify_iterator_returns_expected_results(denseSpecDouble, denseSpecDouble);
+ verify_iterator_returns_expected_results(denseSpecFloat, denseSpecFloat);
}
void
@@ -197,20 +199,20 @@ verify_iterator_returns_filtered_results(const vespalib::string& attribute_tenso
auto nullTensor = createTensor(query_tensor_type_spec, 0.0, 0.0);
SimpleResult result = find_matches<true>(fixture, *nullTensor);
SimpleResult nullExpect({1,3,4});
- EXPECT_EQUAL(result, nullExpect);
+ EXPECT_EQ(result, nullExpect);
result = find_matches<false>(fixture, *nullTensor);
- EXPECT_EQUAL(result, nullExpect);
+ EXPECT_EQ(result, nullExpect);
auto farTensor = createTensor(query_tensor_type_spec, 9.0, 9.0);
SimpleResult farExpect({1,3,4});
result = find_matches<true>(fixture, *farTensor);
- EXPECT_EQUAL(result, farExpect);
+ EXPECT_EQ(result, farExpect);
result = find_matches<false>(fixture, *farTensor);
- EXPECT_EQUAL(result, farExpect);
+ EXPECT_EQ(result, farExpect);
}
-TEST("require that NearestNeighborIterator returns filtered results") {
- TEST_DO(verify_iterator_returns_filtered_results(denseSpecDouble, denseSpecDouble));
- TEST_DO(verify_iterator_returns_filtered_results(denseSpecFloat, denseSpecFloat));
+TEST(NnsIndexIteratorTest, require_that_iterator_returns_filtered_results) {
+ verify_iterator_returns_filtered_results(denseSpecDouble, denseSpecDouble);
+ verify_iterator_returns_filtered_results(denseSpecFloat, denseSpecFloat);
}
template <bool strict>
@@ -218,8 +220,9 @@ std::vector<feature_t> get_rawscores(Fixture &env, const Value &qtv) {
auto md = MatchData::makeTestInstance(2, 2);
auto &tfmd = *(md->resolveTermField(0));
auto &attr = *(env._tensorAttr);
+ DistanceCalculator dist_calc(attr, qtv, env.dist_fun());
NearestNeighborDistanceHeap dh(2);
- auto search = NearestNeighborIterator::create(strict, tfmd, qtv, attr, dh, nullptr, env.dist_fun());
+ auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, nullptr);
uint32_t limit = attr.getNumDocs();
uint32_t docid = 1;
search->initRange(docid, limit);
@@ -249,63 +252,63 @@ verify_iterator_sets_expected_rawscore(const vespalib::string& attribute_tensor_
auto nullTensor = createTensor(query_tensor_type_spec, 0.0, 0.0);
std::vector<feature_t> got = get_rawscores<true>(fixture, *nullTensor);
std::vector<feature_t> expected{5.0, 13.0, 10.0, 10.0, 5.0};
- EXPECT_EQUAL(got.size(), expected.size());
+ EXPECT_EQ(got.size(), expected.size());
for (size_t i = 0; i < expected.size(); ++i) {
- EXPECT_APPROX(1.0/(1.0+expected[i]), got[i], EPS);
+ EXPECT_NEAR(1.0/(1.0+expected[i]), got[i], EPS);
}
got = get_rawscores<false>(fixture, *nullTensor);
- EXPECT_EQUAL(got.size(), expected.size());
+ EXPECT_EQ(got.size(), expected.size());
for (size_t i = 0; i < expected.size(); ++i) {
- EXPECT_APPROX(1.0/(1.0+expected[i]), got[i], EPS);
+ EXPECT_NEAR(1.0/(1.0+expected[i]), got[i], EPS);
}
}
-TEST("require that NearestNeighborIterator sets expected rawscore") {
- TEST_DO(verify_iterator_sets_expected_rawscore(denseSpecDouble, denseSpecDouble));
- TEST_DO(verify_iterator_sets_expected_rawscore(denseSpecFloat, denseSpecFloat));
+TEST(NnsIndexIteratorTest, require_that_iterator_sets_expected_rawscore) {
+ verify_iterator_sets_expected_rawscore(denseSpecDouble, denseSpecDouble);
+ verify_iterator_sets_expected_rawscore(denseSpecFloat, denseSpecFloat);
}
-TEST("require that NnsIndexIterator works as expected") {
+TEST(NnsIndexIteratorTest, require_that_iterator_works_as_expected) {
std::vector<NnsIndexIterator::Hit> hits{{2,4.0}, {3,9.0}, {5,1.0}, {8,16.0}, {9,36.0}};
auto md = MatchData::makeTestInstance(2, 2);
auto &tfmd = *(md->resolveTermField(0));
- auto search = NnsIndexIterator::create(tfmd, hits, euclid_d.get());
+ auto search = NnsIndexIterator::create(tfmd, hits, *euclid_d);
uint32_t docid = 1;
search->initFullRange();
bool match = search->seek(docid);
EXPECT_FALSE(match);
EXPECT_FALSE(search->isAtEnd());
- EXPECT_EQUAL(2u, search->getDocId());
+ EXPECT_EQ(2u, search->getDocId());
docid = 2;
match = search->seek(docid);
EXPECT_TRUE(match);
EXPECT_FALSE(search->isAtEnd());
- EXPECT_EQUAL(docid, search->getDocId());
+ EXPECT_EQ(docid, search->getDocId());
search->unpack(docid);
- EXPECT_APPROX(1.0/(1.0+2.0), tfmd.getRawScore(), EPS);
+ EXPECT_NEAR(1.0/(1.0+2.0), tfmd.getRawScore(), EPS);
docid = 3;
match = search->seek(docid);
EXPECT_TRUE(match);
EXPECT_FALSE(search->isAtEnd());
- EXPECT_EQUAL(docid, search->getDocId());
+ EXPECT_EQ(docid, search->getDocId());
search->unpack(docid);
- EXPECT_APPROX(1.0/(1.0+3.0), tfmd.getRawScore(), EPS);
+ EXPECT_NEAR(1.0/(1.0+3.0), tfmd.getRawScore(), EPS);
docid = 4;
match = search->seek(docid);
EXPECT_FALSE(match);
EXPECT_FALSE(search->isAtEnd());
- EXPECT_EQUAL(5u, search->getDocId());
+ EXPECT_EQ(5u, search->getDocId());
docid = 6;
match = search->seek(docid);
EXPECT_FALSE(match);
EXPECT_FALSE(search->isAtEnd());
- EXPECT_EQUAL(8u, search->getDocId());
+ EXPECT_EQ(8u, search->getDocId());
docid = 8;
search->unpack(docid);
- EXPECT_APPROX(1.0/(1.0+4.0), tfmd.getRawScore(), EPS);
+ EXPECT_NEAR(1.0/(1.0+4.0), tfmd.getRawScore(), EPS);
docid = 9;
match = search->seek(docid);
EXPECT_TRUE(match);
@@ -320,10 +323,10 @@ TEST("require that NnsIndexIterator works as expected") {
match = search->seek(docid);
EXPECT_FALSE(match);
EXPECT_FALSE(search->isAtEnd());
- EXPECT_EQUAL(5u, search->getDocId());
+ EXPECT_EQ(5u, search->getDocId());
docid = 5;
search->unpack(docid);
- EXPECT_APPROX(1.0/(1.0+1.0), tfmd.getRawScore(), EPS);
+ EXPECT_NEAR(1.0/(1.0+1.0), tfmd.getRawScore(), EPS);
EXPECT_FALSE(search->isAtEnd());
docid = 6;
match = search->seek(docid);
@@ -331,4 +334,4 @@ TEST("require that NnsIndexIterator works as expected") {
EXPECT_TRUE(search->isAtEnd());
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp
index 57980237f21..55577b3916c 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_operation.cpp
@@ -111,7 +111,7 @@ public:
void operator()(IAttributeVector &attributeVector) override {
OP op(attributeVector, _operand);
if (op.valid()) {
- const RankedHit *hits = &_result.second[0];
+ const RankedHit *hits = _result.second.data();
size_t numHits = _result.second.size();
std::for_each(hits, hits+numHits, [&op](RankedHit hit) { op(hit.getDocId()); });
if (_result.first) {
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp
index b514275f75d..80e9b28139a 100644
--- a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp
@@ -13,7 +13,7 @@ sortLoadedByEnum(LoadedEnumAttributeVector &loaded)
LoadedEnumAttribute::EnumCompare, 56>::
radix_sort(LoadedEnumAttribute::EnumRadix(),
LoadedEnumAttribute::EnumCompare(),
- &loaded[0], loaded.size(), 16);
+ loaded.data(), loaded.size(), 16);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp
index 0ffd6e2c845..4d3912ae24d 100644
--- a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp
@@ -14,7 +14,7 @@ sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<T>> & loade
typename LoadedNumericValue<T>::ValueCompare, 56>::
radix_sort(typename LoadedNumericValue<T>::ValueRadix(),
typename LoadedNumericValue<T>::ValueCompare(),
- &loaded[0],
+ loaded.data(),
loaded.size(),
16);
}
@@ -29,7 +29,7 @@ sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<T>> & loade
typename LoadedNumericValue<T>::DocOrderCompare, 56>::
radix_sort(typename LoadedNumericValue<T>::DocRadix(),
typename LoadedNumericValue<T>::DocOrderCompare(),
- &loaded[0],
+ loaded.data(),
loaded.size(),
16);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
index cc128b0eef1..0a29b4af48d 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
@@ -49,7 +49,7 @@ protected:
using WType = MultiValueType;
uint32_t get(DocId doc, const WType * & values) const {
MultiValueArrayRef array(this->_mvMapping.get(doc));
- values = &array[0];
+ values = array.data();
return array.size();
}
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
index 0e0dceaf254..79276ce6f55 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
@@ -69,11 +69,11 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu
postings.removeDups();
newIndex = EntryRef();
_postingList.apply(newIndex,
- &postings._additions[0],
- &postings._additions[0] +
+ postings._additions.data(),
+ postings._additions.data() +
postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] +
+ postings._removals.data(),
+ postings._removals.data() +
postings._removals.size());
posting_indexes[posting_enum] = newIndex;
postings.clear();
@@ -91,10 +91,10 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu
postings.removeDups();
newIndex = EntryRef();
_postingList.apply(newIndex,
- &postings._additions[0],
- &postings._additions[0] + postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] + postings._removals.size());
+ postings._additions.data(),
+ postings._additions.data() + postings._additions.size(),
+ postings._removals.data(),
+ postings._removals.data() + postings._removals.size());
posting_indexes[posting_enum] = newIndex;
loader.build_dictionary();
loader.free_unused_values();
@@ -158,10 +158,10 @@ clearPostings(attribute::IAttributeVector::EnumHandle eidx,
auto updater = [this, &postings](EntryRef posting_idx) -> EntryRef
{
_postingList.apply(posting_idx,
- &postings._additions[0],
- &postings._additions[0] + postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] + postings._removals.size());
+ postings._additions.data(),
+ postings._additions.data() + postings._additions.size(),
+ postings._removals.data(),
+ postings._removals.data() + postings._removals.size());
return posting_idx;
};
_dictionary.update_posting_list(er, cmp, updater);
@@ -240,11 +240,11 @@ handle_load_posting_lists(LoadedVector& loaded)
postings.removeDups();
newIndex = EntryRef();
_postingList.apply(newIndex,
- &postings._additions[0],
- &postings._additions[0] +
+ postings._additions.data(),
+ postings._additions.data() +
postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] +
+ postings._removals.data(),
+ postings._removals.data() +
postings._removals.size());
postings.clear();
if (value._docId < docIdLimit) {
@@ -262,11 +262,11 @@ handle_load_posting_lists(LoadedVector& loaded)
postings.removeDups();
newIndex = EntryRef();
_postingList.apply(newIndex,
- &postings._additions[0],
- &postings._additions[0] +
+ postings._additions.data(),
+ postings._additions.data() +
postings._additions.size(),
- &postings._removals[0],
- &postings._removals[0] + postings._removals.size());
+ postings._removals.data(),
+ postings._removals.data() + postings._removals.size());
similarValues[0]._pidx = newIndex;
for (size_t i(0), m(similarValues.size()); i < m; i++) {
loaded.write(similarValues[i]);
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
index c17627a5026..d8426ce1a45 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
@@ -20,8 +20,8 @@ PostingListSearchContext(const IEnumStoreDictionary& dictionary,
const ISearchContext &baseSearchCtx)
: _dictionary(dictionary),
_frozenDictionary(_dictionary.get_has_btree_dictionary() ? _dictionary.get_posting_dictionary().getFrozenView() : FrozenDictionary()),
- _lowerDictItr(BTreeNode::Ref(), _frozenDictionary.getAllocator()),
- _upperDictItr(BTreeNode::Ref(), _frozenDictionary.getAllocator()),
+ _lowerDictItr(_dictionary.get_has_btree_dictionary() ? DictionaryConstIterator(BTreeNode::Ref(), _frozenDictionary.getAllocator()) : DictionaryConstIterator()),
+ _upperDictItr(_dictionary.get_has_btree_dictionary() ? DictionaryConstIterator(BTreeNode::Ref(), _frozenDictionary.getAllocator()) : DictionaryConstIterator()),
_uniqueValues(0u),
_docIdLimit(docIdLimit),
_dictSize(_frozenDictionary.size()),
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
index 5ac506e4fc2..b60250256f4 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
@@ -98,8 +98,7 @@ EncodeContext64EBase<false>::writeBits(uint64_t data, uint32_t length)
// Shift new bits into cacheInt
_cacheInt |= (data << (64 - _cacheFree));
*_valI++ = bswap(_cacheInt);
-
- data >>= _cacheFree;
+ data = (_cacheFree < 64) ? data >> _cacheFree : 0;
// Initialize variables for receiving new bits
length -= _cacheFree;
_cacheInt = 0;
@@ -194,9 +193,9 @@ writeBits(const uint64_t *bits, uint32_t bitOffset, uint32_t bitLength)
if (bitOffset + bitLength < 64) {
uint32_t length = bitLength;
if (bigEndian) {
- uint64_t data = (EC::bswap(*bits) >>
- (64 - bitOffset - length)) &
- CodingTables::_intMask64[length];
+ uint64_t data = ((bitOffset + length) > 0)
+ ? (EC::bswap(*bits) >> (64 - bitOffset - length)) & CodingTables::_intMask64[length]
+ : 0;
UC64BE_WRITEBITS_NS(o, EC);
} else {
uint64_t data = (EC::bswap(*bits) >> bitOffset) &
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h
index 45005d499fb..74231638213 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h
@@ -165,8 +165,7 @@ public:
#define UC64BE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
do { \
- length = \
- 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \
+ length = __builtin_clzl(val); \
unsigned int olength = length; \
val <<= length; \
if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \
@@ -174,8 +173,9 @@ public:
length = 0; \
} \
val64 = (val >> (63 - olength - (k))) - (UINT64_C(1) << (k)); \
- val <<= olength + 1 + (k); \
- if (__builtin_expect(olength + 1 + (k) == 64, false)) { \
+ if (__builtin_expect(olength + 1 + (k) != 64, true)) { \
+ val <<= olength + 1 + (k); \
+ } else { \
val = 0; \
} \
length += olength + 1 + (k); \
@@ -193,8 +193,7 @@ public:
#define UC64BE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
EC) \
do { \
- length = \
- 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \
+ length = __builtin_clzl(val); \
val <<= length; \
val64 = (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \
val <<= length + 1 + (k); \
@@ -219,8 +218,7 @@ public:
#define UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \
k, EC, resop) \
do { \
- length = \
- 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \
+ length = __builtin_clzl(val); \
val <<= length; \
resop (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \
val <<= length + 1 + (k); \
@@ -231,16 +229,16 @@ public:
#define UC64BE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
do { \
- length = \
- 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \
+ length = __builtin_clzl(val); \
unsigned int olength = length; \
val <<= length; \
if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \
UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
length = 0; \
} \
- val <<= olength + 1 + (k); \
- if (__builtin_expect(olength + 1 + (k) == 64, false)) { \
+ if (__builtin_expect(olength + 1 + (k) != 64, true)) { \
+ val <<= olength + 1 + (k); \
+ } else { \
val = 0; \
} \
length += olength + 1 + (k); \
@@ -258,8 +256,7 @@ public:
#define UC64BE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
EC) \
do { \
- length = \
- 63 - ::search::bitcompression::EncodeContext64BE::asmlog2(val); \
+ length = __builtin_clzl(val); \
val <<= length; \
val <<= length + 1 + (k); \
length += length + 1 + (k); \
@@ -394,11 +391,11 @@ public:
#define UC64LE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
do { \
- unsigned int olength = \
- ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ unsigned int olength = __builtin_ctzl(val); \
length = olength + 1; \
- val >>= length; \
- if (__builtin_expect(length == 64, false)) { \
+ if (__builtin_expect(length != 64, true)) { \
+ val >>= length; \
+ } else { \
val = 0; \
} \
if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \
@@ -423,7 +420,7 @@ public:
#define UC64LE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
EC) \
do { \
- length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ length = __builtin_ctzl(val); \
val >>= length + 1; \
val64 = (val & ((UINT64_C(1) << (length + (k))) - 1)) + \
(UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \
@@ -449,7 +446,7 @@ public:
#define UC64LE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \
k, EC, resop) \
do { \
- length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ length = __builtin_ctzl(val); \
val >>= length + 1; \
resop (val & ((UINT64_C(1) << (length + (k))) - 1)) + \
(UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \
@@ -461,11 +458,11 @@ public:
#define UC64LE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
do { \
- unsigned int olength = \
- ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ unsigned int olength = __builtin_ctzl(val); \
length = olength + 1; \
- val >>= length; \
- if (__builtin_expect(length == 64, false)) { \
+ if (__builtin_expect(length != 64, true)) { \
+ val >>= length; \
+ } else { \
val = 0; \
} \
if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \
@@ -488,7 +485,7 @@ public:
#define UC64LE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
EC) \
do { \
- length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ length = __builtin_ctzl(val); \
val >>= length + 1; \
val >>= length + (k); \
length += length + 1 + (k); \
@@ -507,7 +504,11 @@ public:
if (length >= cacheFree) { \
cacheInt |= (data << (64 - cacheFree)); \
*bufI++ = EC::bswap(cacheInt); \
- data >>= cacheFree; \
+ if (__builtin_expect(cacheFree != 64, true)) { \
+ data >>= cacheFree; \
+ } else { \
+ data = 0; \
+ } \
length -= cacheFree; \
cacheInt = 0; \
cacheFree = 64; \
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
index 9a711a028fb..b0a201d913e 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
@@ -365,7 +365,7 @@ PageDict4SPWriter::flushPage()
_prevL3Size - wordsSize * 8;
e.padBits(padding);
if (wordsSize > 0) {
- e.writeBytes(vespalib::ConstArrayRef<char>(&_words[0], wordsSize));
+ e.writeBytes(vespalib::ConstArrayRef<char>(_words.data(), wordsSize));
}
assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0);
_l6Word = _l3Word;
@@ -676,7 +676,7 @@ PageDict4PWriter::flushPage()
_countsSize - _countsWordOffset * 8;
e.padBits(padding);
if (_countsWordOffset > 0) {
- e.writeBytes(vespalib::ConstArrayRef(&_words[0], _countsWordOffset));
+ e.writeBytes(vespalib::ConstArrayRef(_words.data(), _countsWordOffset));
}
assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0);
_l3Word = _pendingCountsWord;
@@ -1055,7 +1055,7 @@ lookup(vespalib::stringref key)
L7Vector::const_iterator l7lb;
l7lb = std::lower_bound(_l7.begin(), _l7.end(), key);
- l7Pos = &*l7lb - &_l7[0];
+ l7Pos = l7lb - _l7.cbegin();
StartOffset startOffset;
uint64_t pageNum = _pFirstPageNum;
uint32_t sparsePageNum = _spFirstPageNum;
@@ -1863,7 +1863,7 @@ PageDict4Reader::setupPage()
uint32_t padding = (getPageBitSize() - wordsSize * 8 - pageOffset) & (getPageBitSize() - 1);
_pd.skipBits(padding);
_words.resize(wordsSize);
- _pd.readBytes(reinterpret_cast<uint8_t *>(&_words[0]), wordsSize);
+ _pd.readBytes(reinterpret_cast<uint8_t *>(_words.data()), wordsSize);
_wc = _words.begin();
_we = _words.end();
checkWordOffsets(_words, _l1SkipChecks, _l2SkipChecks);
@@ -1985,7 +1985,7 @@ PageDict4Reader::setupSPage()
uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset;
_spd.skipBits(padding);
_spwords.resize(wordsSize);
- _spd.readBytes(reinterpret_cast<uint8_t *>(&_spwords[0]), wordsSize);
+ _spd.readBytes(reinterpret_cast<uint8_t *>(_spwords.data()), wordsSize);
_spwc = _spwords.begin();
_spwe = _spwords.end();
checkWordOffsets(_spwords, _l4SkipChecks, _l5SkipChecks);
diff --git a/searchlib/src/vespa/searchlib/common/geo_location.cpp b/searchlib/src/vespa/searchlib/common/geo_location.cpp
index 1806ba1338c..20408a93a82 100644
--- a/searchlib/src/vespa/searchlib/common/geo_location.cpp
+++ b/searchlib/src/vespa/searchlib/common/geo_location.cpp
@@ -8,6 +8,12 @@ namespace search::common {
namespace {
+uint64_t abs_diff(int32_t a, int32_t b) {
+ return (a > b)
+ ? (int64_t(a) - int64_t(b))
+ : (int64_t(b) - int64_t(a));
+}
+
ZCurve::BoundingBox to_z(GeoLocation::Box box) {
return ZCurve::BoundingBox(box.x.low, box.x.high,
box.y.low, box.y.high);
@@ -158,13 +164,13 @@ GeoLocation::GeoLocation(Box b, Point p, uint32_t r, Aspect xa)
uint64_t GeoLocation::sq_distance_to(Point p) const {
if (has_point) {
- uint64_t dx = (p.x > point.x) ? (p.x - point.x) : (point.x - p.x);
+ uint64_t dx = abs_diff(p.x, point.x);
if (x_aspect.active()) {
// x_aspect is a 32-bit fixed-point number in range [0,1]
// this implements dx = (dx * x_aspect)
dx = (dx * x_aspect.multiplier) >> 32;
}
- uint64_t dy = (p.y > point.y) ? (p.y - point.y) : (point.y - p.y);
+ uint64_t dy = abs_diff(p.y, point.y);
return dx*dx + dy*dy;
}
return 0;
diff --git a/searchlib/src/vespa/searchlib/common/geo_location.h b/searchlib/src/vespa/searchlib/common/geo_location.h
index 07e6fd055cc..09c77037b03 100644
--- a/searchlib/src/vespa/searchlib/common/geo_location.h
+++ b/searchlib/src/vespa/searchlib/common/geo_location.h
@@ -2,10 +2,10 @@
#pragma once
+#include <vespa/vespalib/geo/zcurve.h>
#include <string>
#include <cstdint>
#include <limits>
-#include <vespa/vespalib/geo/zcurve.h>
namespace search::common {
diff --git a/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp b/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp
index 8794169b4a6..d829e1b93e4 100644
--- a/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp
+++ b/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp
@@ -15,7 +15,7 @@ LOG_SETUP(".searchlib.common.geo_location_parser");
namespace {
int getInt(const char * &p) {
- int val;
+ uint32_t val;
bool isminus;
val = 0;
isminus = false;
diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.h b/searchlib/src/vespa/searchlib/common/geo_location_spec.h
index ea0104aa058..f1e3671181d 100644
--- a/searchlib/src/vespa/searchlib/common/geo_location_spec.h
+++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.h
@@ -2,9 +2,9 @@
#pragma once
+#include "geo_location.h"
#include <string>
#include <cstdint>
-#include "geo_location.h"
namespace search::common {
diff --git a/searchlib/src/vespa/searchlib/common/resultset.cpp b/searchlib/src/vespa/searchlib/common/resultset.cpp
index 2e1e431ad82..3a88a310fe8 100644
--- a/searchlib/src/vespa/searchlib/common/resultset.cpp
+++ b/searchlib/src/vespa/searchlib/common/resultset.cpp
@@ -99,7 +99,7 @@ ResultSet::mergeWithBitOverflow(HitRank default_value)
void
ResultSet::sort(FastS_IResultSorter & sorter, unsigned int ntop) {
- sorter.sortResults(&_rankedHitsArray[0], _rankedHitsArray.size(), ntop);
+ sorter.sortResults(_rankedHitsArray.data(), _rankedHitsArray.size(), ntop);
}
std::pair<std::unique_ptr<BitVector>, vespalib::Array<RankedHit>>
diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h
index 6824fc4170d..a4823d2f372 100644
--- a/searchlib/src/vespa/searchlib/common/resultset.h
+++ b/searchlib/src/vespa/searchlib/common/resultset.h
@@ -26,7 +26,7 @@ public:
void allocArray(unsigned int arrayAllocated);
void setBitOverflow(std::unique_ptr<BitVector> newBitOverflow);
- const RankedHit * getArray() const { return &_rankedHitsArray[0]; }
+ const RankedHit * getArray() const { return _rankedHitsArray.data(); }
RankedHit & operator [](uint32_t i) { return _rankedHitsArray[i]; }
void push_back(RankedHit hit) { _rankedHitsArray.push_back_fast(hit); }
unsigned int getArrayUsed() const { return _rankedHitsArray.size(); }
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp
index f1756712d2c..59a47dd3312 100644
--- a/searchlib/src/vespa/searchlib/common/sortresults.cpp
+++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp
@@ -209,9 +209,9 @@ FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available
variableWidth *= 2;
available += variableWidth * n;
dataSize += variableWidth * n;
- uint32_t byteUsed = mySortData - &_binarySortData[0];
+ uint32_t byteUsed = mySortData - _binarySortData.data();
_binarySortData.resize(dataSize);
- return &_binarySortData[0] + byteUsed;
+ return _binarySortData.data() + byteUsed;
}
void
@@ -237,7 +237,7 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
uint32_t dataSize = (fixedWidth + variableWidth) * n;
uint32_t available = dataSize;
_binarySortData.resize(dataSize);
- uint8_t *mySortData = &_binarySortData[0];
+ uint8_t *mySortData = _binarySortData.data();
_sortDataArray.resize(n);
@@ -342,7 +342,7 @@ void
FastS_SortSpec::copySortData(uint32_t offset, uint32_t n,
uint32_t *idx, char *buf)
{
- const uint8_t * sortData = &_binarySortData[0];
+ const uint8_t * sortData = _binarySortData.data();
uint32_t totalLen = 0;
for (uint32_t i = offset; i < (offset + n); ++i, ++idx) {
const uint8_t * src = sortData + _sortDataArray[i]._idx;
@@ -378,7 +378,7 @@ inline int
FastS_SortSpec::Compare(const FastS_SortSpec *self, const SortData &a,
const SortData &b)
{
- const uint8_t * ref = &(self->_binarySortData[0]);
+ const uint8_t * ref = self->_binarySortData.data();
uint32_t len = a._len < b._len ? a._len : b._len;
int retval = memcmp(ref + a._idx,
ref + b._idx, len);
@@ -448,10 +448,10 @@ void
FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn)
{
initSortData(a, n);
- SortData * sortData = &_sortDataArray[0];
+ SortData * sortData = _sortDataArray.data();
{
Array<uint32_t> radixScratchPad(n, Alloc::alloc(0, MMAP_LIMIT));
- search::radix_sort(SortDataRadix(&_binarySortData[0]), StdSortDataCompare(&_binarySortData[0]), SortDataEof(), 1, sortData, n, &radixScratchPad[0], 0, 96, topn);
+ search::radix_sort(SortDataRadix(_binarySortData.data()), StdSortDataCompare(_binarySortData.data()), SortDataEof(), 1, sortData, n, radixScratchPad.data(), 0, 96, topn);
}
for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) {
a[i]._rankValue = _sortDataArray[i]._rankValue;
diff --git a/searchlib/src/vespa/searchlib/diskindex/docidmapper.h b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h
index 90bfa658a72..7c6f53720f2 100644
--- a/searchlib/src/vespa/searchlib/diskindex/docidmapper.h
+++ b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h
@@ -42,9 +42,9 @@ public:
{ }
void setup(const DocIdMapping &mapping) {
- _selector = (mapping._selector != nullptr) ? &((*mapping._selector)[0]) : nullptr;
+ _selector = (mapping._selector != nullptr) ? mapping._selector->data() : nullptr;
_docIdLimit = mapping._docIdLimit;
- _selectorLimit = (mapping._selector != nullptr) ? (*mapping._selector).size() : 0u;
+ _selectorLimit = (mapping._selector != nullptr) ? mapping._selector->size() : 0u;
_selectorId = mapping._selectorId;
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp
index 860903174bc..d27ab2e7787 100644
--- a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp
@@ -12,12 +12,12 @@
#include <vespa/searchlib/common/i_flush_token.h>
#include <vespa/searchlib/index/schemautil.h>
#include <vespa/searchlib/util/filekit.h>
-#include <vespa/searchlib/util/dirtraverse.h>
#include <vespa/searchlib/util/posting_priority_queue_merger.hpp>
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/exceptions.h>
#include <filesystem>
+#include <system_error>
#include <vespa/log/log.h>
@@ -107,8 +107,9 @@ FieldMerger::clean_tmp_dirs()
while (i > 0) {
i--;
vespalib::string tmpindexpath = createTmpPath(_field_dir, i);
- search::DirectoryTraverse dt(tmpindexpath.c_str());
- if (!dt.RemoveTree()) {
+ std::error_code ec;
+ std::filesystem::remove_all(std::filesystem::path(tmpindexpath), ec);
+ if (ec) {
LOG(error, "Failed to clean tmpdir %s", tmpindexpath.c_str());
return false;
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
index e142255252c..4fd9d116244 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
@@ -8,13 +8,13 @@
#include <vespa/searchlib/common/documentsummary.h>
#include <vespa/searchlib/common/i_flush_token.h>
#include <vespa/searchlib/index/schemautil.h>
-#include <vespa/searchlib/util/dirtraverse.h>
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/util/error.h>
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/lambdatask.h>
#include <vespa/document/util/queue.h>
#include <filesystem>
+#include <system_error>
#include <vespa/log/log.h>
@@ -117,8 +117,9 @@ Fusion::merge(vespalib::Executor& shared_executor, std::shared_ptr<IFlushToken>
LOG(error, "\"%s\" is not a directory", _fusion_out_index.get_path().c_str());
return false;
}
- search::DirectoryTraverse dt(_fusion_out_index.get_path().c_str());
- if (!dt.RemoveTree()) {
+ std::error_code ec;
+ std::filesystem::remove_all(std::filesystem::path(_fusion_out_index.get_path()), ec);
+ if (ec) {
LOG(error, "Failed to clean directory \"%s\"", _fusion_out_index.get_path().c_str());
return false;
}
diff --git a/searchlib/src/vespa/searchlib/engine/docsumrequest.h b/searchlib/src/vespa/searchlib/engine/docsumrequest.h
index 27fb5b25a96..d4f3a1ec340 100644
--- a/searchlib/src/vespa/searchlib/engine/docsumrequest.h
+++ b/searchlib/src/vespa/searchlib/engine/docsumrequest.h
@@ -2,7 +2,6 @@
#pragma once
-#include "propertiesmap.h"
#include "request.h"
#include "lazy_source.h"
#include <vespa/document/base/globalid.h>
diff --git a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp
index 77781d583cb..4eaa5b3eb65 100644
--- a/searchlib/src/vespa/searchlib/engine/proto_converter.cpp
+++ b/searchlib/src/vespa/searchlib/engine/proto_converter.cpp
@@ -135,7 +135,7 @@ ProtoConverter::search_reply_to_proto(const SearchReply &reply, ProtoSearchReply
}
}
}
- proto.set_grouping_blob(&reply.groupResult[0], reply.groupResult.size());
+ proto.set_grouping_blob(reply.groupResult.data(), reply.groupResult.size());
const auto &slime_trace = reply.propertiesMap.trace().lookup("slime");
proto.set_slime_trace(slime_trace.get().data(), slime_trace.get().size());
if (reply.my_issues) {
diff --git a/searchlib/src/vespa/searchlib/expression/integerresultnode.h b/searchlib/src/vespa/searchlib/expression/integerresultnode.h
index a7fe86acd97..e63ac783bc8 100644
--- a/searchlib/src/vespa/searchlib/expression/integerresultnode.h
+++ b/searchlib/src/vespa/searchlib/expression/integerresultnode.h
@@ -28,7 +28,7 @@ public:
T bv(static_cast<const IntegerResultNodeT &>(b)._value);
return (_value < bv) ? -1 : (_value > bv) ? 1 : 0;
}
- void add(const ResultNode & b) override { _value += b.getInteger(); }
+ void add(const ResultNode & b) override { _value = uint64_t(_value) + uint64_t(b.getInteger()); }
void negate() override { _value = - _value; }
void multiply(const ResultNode & b) override {
if constexpr (std::is_same_v<T, bool>) {
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
index 2da08e2915d..4cf185ad55e 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.cpp
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
@@ -2,8 +2,7 @@
#include "objectstore.h"
#include <vespa/vespalib/stllike/hash_map.hpp>
-namespace search {
-namespace fef {
+namespace search::fef {
ObjectStore::ObjectStore() :
_objectMap()
@@ -37,4 +36,3 @@ ObjectStore::get(const vespalib::string & key) const
}
}
-}
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
index b851fc50518..8664b0fc14b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -18,7 +18,7 @@ HitCollector::sortHitsByScore(size_t topn)
_scoreOrder.push_back(i);
}
ShiftBasedRadixSorter<uint32_t, IndirectScoreRadix, IndirectScoreComparator, 56, true>::
- radix_sort(IndirectScoreRadix(&_hits[0]), IndirectScoreComparator(&_hits[0]), &_scoreOrder[0], _scoreOrder.size(), 16, topn);
+ radix_sort(IndirectScoreRadix(_hits.data()), IndirectScoreComparator(_hits.data()), _scoreOrder.data(), _scoreOrder.size(), 16, topn);
_scoreOrder.resize(topn);
}
}
@@ -28,7 +28,7 @@ HitCollector::sortHitsByDocId()
{
if (_hitsSortOrder != SortOrder::DOC_ID) {
ShiftBasedRadixSorter<Hit, DocIdRadix, DocIdComparator, 24>::
- radix_sort(DocIdRadix(), DocIdComparator(), &_hits[0], _hits.size(), 16);
+ radix_sort(DocIdRadix(), DocIdComparator(), _hits.data(), _hits.size(), 16);
_hitsSortOrder = SortOrder::DOC_ID;
_scoreOrder.clear();
}
@@ -170,7 +170,7 @@ HitCollector::getSortedHitSequence(size_t max_hits)
{
size_t num_hits = std::min(_hits.size(), max_hits);
sortHitsByScore(num_hits);
- return SortedHitSequence(&_hits[0], &_scoreOrder[0], num_hits);
+ return SortedHitSequence(_hits.data(), _scoreOrder.data(), num_hits);
}
void
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index 8c03800b92a..8aa806b01cd 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -4,7 +4,6 @@
#include "nearest_neighbor_blueprint.h"
#include "nearest_neighbor_iterator.h"
#include "nns_index_iterator.h"
-#include <vespa/eval/eval/fast_value.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/distance_function_factory.h>
@@ -13,45 +12,12 @@
LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint");
-using vespalib::eval::CellType;
-using vespalib::eval::FastValueBuilderFactory;
-using vespalib::eval::TypedCells;
using vespalib::eval::Value;
-using vespalib::eval::ValueType;
namespace search::queryeval {
namespace {
-template<typename LCT, typename RCT>
-std::unique_ptr<Value>
-convert_cells(const ValueType &new_type, std::unique_ptr<Value> old_value)
-{
- auto old_cells = old_value->cells().typify<LCT>();
- auto builder = FastValueBuilderFactory::get().create_value_builder<RCT>(new_type);
- auto new_cells = builder->add_subspace();
- assert(old_cells.size() == new_cells.size());
- auto p = new_cells.begin();
- for (LCT value : old_cells) {
- RCT conv(value);
- *p++ = conv;
- }
- return builder->build(std::move(builder));
-}
-
-struct ConvertCellsSelector
-{
- template <typename LCT, typename RCT>
- static auto invoke(const ValueType &new_type, std::unique_ptr<Value> old_value) {
- return convert_cells<LCT, RCT>(new_type, std::move(old_value));
- }
- auto operator() (CellType from, CellType to, std::unique_ptr<Value> old_value) const {
- using MyTypify = vespalib::eval::TypifyCellType;
- ValueType new_type = old_value->type().cell_cast(to);
- return vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(from, to, new_type, std::move(old_value));
- }
-};
-
vespalib::string
to_string(NearestNeighborBlueprint::Algorithm algorithm)
{
@@ -78,7 +44,8 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
double global_filter_upper_limit)
: ComplexLeafBlueprint(field),
_attr_tensor(attr_tensor),
- _query_tensor(std::move(query_tensor)),
+ _distance_calc(_attr_tensor, std::move(query_tensor)),
+ _query_tensor(_distance_calc.query_tensor()),
_target_hits(target_hits),
_adjusted_target_hits(target_hits),
_approximate(approximate),
@@ -86,7 +53,6 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
_distance_threshold(std::numeric_limits<double>::max()),
_global_filter_lower_limit(global_filter_lower_limit),
_global_filter_upper_limit(global_filter_upper_limit),
- _fallback_dist_fun(),
_distance_heap(target_hits),
_found_hits(),
_algorithm(Algorithm::EXACT),
@@ -95,27 +61,13 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
_global_filter_hits(),
_global_filter_hit_ratio()
{
- CellType attr_ct = _attr_tensor.getTensorType().cell_type();
- _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), attr_ct);
- _dist_fun = _fallback_dist_fun.get();
- assert(_dist_fun);
- auto nns_index = _attr_tensor.nearest_neighbor_index();
- if (nns_index) {
- _dist_fun = nns_index->distance_function();
- assert(_dist_fun);
- }
- auto query_ct = _query_tensor->cells().type;
- CellType required_ct = _dist_fun->expected_cell_type();
- if (query_ct != required_ct) {
- ConvertCellsSelector converter;
- _query_tensor = converter(query_ct, required_ct, std::move(_query_tensor));
- }
if (distance_threshold < std::numeric_limits<double>::max()) {
- _distance_threshold = _dist_fun->convert_threshold(distance_threshold);
+ _distance_threshold = _distance_calc.function().convert_threshold(distance_threshold);
_distance_heap.set_distance_threshold(_distance_threshold);
}
uint32_t est_hits = _attr_tensor.get_num_docs();
setEstimate(HitEstimate(est_hits, false));
+ auto nns_index = _attr_tensor.nearest_neighbor_index();
set_want_global_filter(nns_index && _approximate);
}
@@ -155,7 +107,7 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter, d
void
NearestNeighborBlueprint::perform_top_k(const search::tensor::NearestNeighborIndex* nns_index)
{
- auto lhs = _query_tensor->cells();
+ auto lhs = _query_tensor.cells();
uint32_t k = _adjusted_target_hits;
if (_global_filter->has_filter()) {
auto filter = _global_filter->filter();
@@ -175,13 +127,12 @@ NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchData
switch (_algorithm) {
case Algorithm::INDEX_TOP_K_WITH_FILTER:
case Algorithm::INDEX_TOP_K:
- return NnsIndexIterator::create(tfmd, _found_hits, _dist_fun);
+ return NnsIndexIterator::create(tfmd, _found_hits, _distance_calc.function());
default:
;
}
- const Value &qT = *_query_tensor;
- return NearestNeighborIterator::create(strict, tfmd, qT, _attr_tensor,
- _distance_heap, _global_filter->filter(), _dist_fun);
+ return NearestNeighborIterator::create(strict, tfmd, _distance_calc,
+ _distance_heap, _global_filter->filter());
}
void
@@ -189,7 +140,7 @@ NearestNeighborBlueprint::visitMembers(vespalib::ObjectVisitor& visitor) const
{
ComplexLeafBlueprint::visitMembers(visitor);
visitor.visitString("attribute_tensor", _attr_tensor.getTensorType().to_spec());
- visitor.visitString("query_tensor", _query_tensor->type().to_spec());
+ visitor.visitString("query_tensor", _query_tensor.type().to_spec());
visitor.visitInt("target_hits", _target_hits);
visitor.visitInt("adjusted_target_hits", _adjusted_target_hits);
visitor.visitInt("explore_additional_hits", _explore_additional_hits);
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
index 16b0e13014e..3be7d7fd01f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
@@ -3,6 +3,7 @@
#include "blueprint.h"
#include "nearest_neighbor_distance_heap.h"
+#include <vespa/searchlib/tensor/distance_calculator.h>
#include <vespa/searchlib/tensor/distance_function.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
#include <optional>
@@ -28,7 +29,8 @@ public:
};
private:
const tensor::ITensorAttribute& _attr_tensor;
- std::unique_ptr<vespalib::eval::Value> _query_tensor;
+ search::tensor::DistanceCalculator _distance_calc;
+ const vespalib::eval::Value& _query_tensor;
uint32_t _target_hits;
uint32_t _adjusted_target_hits;
bool _approximate;
@@ -36,8 +38,6 @@ private:
double _distance_threshold;
double _global_filter_lower_limit;
double _global_filter_upper_limit;
- search::tensor::DistanceFunction::UP _fallback_dist_fun;
- const search::tensor::DistanceFunction *_dist_fun;
mutable NearestNeighborDistanceHeap _distance_heap;
std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits;
Algorithm _algorithm;
@@ -59,7 +59,7 @@ public:
NearestNeighborBlueprint& operator=(const NearestNeighborBlueprint&) = delete;
~NearestNeighborBlueprint();
const tensor::ITensorAttribute& get_attribute_tensor() const { return _attr_tensor; }
- const vespalib::eval::Value& get_query_tensor() const { return *_query_tensor; }
+ const vespalib::eval::Value& get_query_tensor() const { return _query_tensor; }
uint32_t get_target_hits() const { return _target_hits; }
uint32_t get_adjusted_target_hits() const { return _adjusted_target_hits; }
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
index 6a00568bd06..e06fcc614d8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
@@ -2,6 +2,8 @@
#include "nearest_neighbor_iterator.h"
#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/tensor/distance_calculator.h>
+#include <vespa/searchlib/tensor/distance_function.h>
using search::tensor::ITensorAttribute;
using vespalib::ConstArrayRef;
@@ -34,11 +36,10 @@ public:
NearestNeighborImpl(Params params_in)
: NearestNeighborIterator(params_in),
- _lhs(params().queryTensor.cells()),
_lastScore(0.0)
{
- assert(is_compatible(params().tensorAttribute.getTensorType(),
- params().queryTensor.type()));
+ assert(is_compatible(params().distance_calc.attribute_tensor().getTensorType(),
+ params().distance_calc.query_tensor().type()));
}
~NearestNeighborImpl();
@@ -64,7 +65,7 @@ public:
}
void doUnpack(uint32_t docId) override {
- double score = params().distanceFunction->to_rawscore(_lastScore);
+ double score = params().distance_calc.function().to_rawscore(_lastScore);
params().tfmd.setRawScore(docId, score);
params().distanceHeap.used(_lastScore);
}
@@ -73,11 +74,9 @@ public:
private:
double computeDistance(uint32_t docId, double limit) {
- auto rhs = params().tensorAttribute.extract_cells_ref(docId);
- return params().distanceFunction->calc_with_limit(_lhs, rhs, limit);
+ return params().distance_calc.calc_with_limit(docId, limit);
}
- TypedCells _lhs;
double _lastScore;
};
@@ -105,14 +104,12 @@ std::unique_ptr<NearestNeighborIterator>
NearestNeighborIterator::create(
bool strict,
fef::TermFieldMatchData &tfmd,
- const vespalib::eval::Value &queryTensor,
- const search::tensor::ITensorAttribute &tensorAttribute,
+ const search::tensor::DistanceCalculator &distance_calc,
NearestNeighborDistanceHeap &distanceHeap,
- const search::BitVector *filter,
- const search::tensor::DistanceFunction *dist_fun)
+ const search::BitVector *filter)
{
- Params params(tfmd, queryTensor, tensorAttribute, distanceHeap, filter, dist_fun);
+ Params params(tfmd, distance_calc, distanceHeap, filter);
if (filter) {
return resolve_strict<true>(strict, params);
} else {
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
index 66622288d84..0d8f70d15c2 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
@@ -7,10 +7,11 @@
#include <vespa/eval/eval/value.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/tensor/i_tensor_attribute.h>
-#include <vespa/searchlib/tensor/distance_function.h>
#include <vespa/vespalib/util/priority_queue.h>
#include <cmath>
+namespace search::tensor { class DistanceCalculator; }
+
namespace search::queryeval {
class NearestNeighborIterator : public SearchIterator
@@ -21,24 +22,18 @@ public:
struct Params {
fef::TermFieldMatchData &tfmd;
- const Value &queryTensor;
- const ITensorAttribute &tensorAttribute;
+ const search::tensor::DistanceCalculator &distance_calc;
NearestNeighborDistanceHeap &distanceHeap;
const search::BitVector *filter;
- const search::tensor::DistanceFunction *distanceFunction;
-
+
Params(fef::TermFieldMatchData &tfmd_in,
- const Value &queryTensor_in,
- const ITensorAttribute &tensorAttribute_in,
+ const search::tensor::DistanceCalculator &distance_calc_in,
NearestNeighborDistanceHeap &distanceHeap_in,
- const search::BitVector *filter_in,
- const search::tensor::DistanceFunction *distanceFunction_in)
+ const search::BitVector *filter_in)
: tfmd(tfmd_in),
- queryTensor(queryTensor_in),
- tensorAttribute(tensorAttribute_in),
+ distance_calc(distance_calc_in),
distanceHeap(distanceHeap_in),
- filter(filter_in),
- distanceFunction(distanceFunction_in)
+ filter(filter_in)
{}
};
@@ -49,11 +44,9 @@ public:
static std::unique_ptr<NearestNeighborIterator> create(
bool strict,
fef::TermFieldMatchData &tfmd,
- const Value &queryTensor,
- const search::tensor::ITensorAttribute &tensorAttribute,
+ const search::tensor::DistanceCalculator &distance_calc,
NearestNeighborDistanceHeap &distanceHeap,
- const search::BitVector *filter,
- const search::tensor::DistanceFunction *dist_fun);
+ const search::BitVector *filter);
const Params& params() const { return _params; }
private:
diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
index cd65f01025b..95264a79431 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
@@ -18,13 +18,13 @@ class NeighborVectorIterator : public NnsIndexIterator
private:
fef::TermFieldMatchData &_tfmd;
const std::vector<Neighbor> &_hits;
- const search::tensor::DistanceFunction * const _dist_fun;
+ const search::tensor::DistanceFunction &_dist_fun;
uint32_t _idx;
double _last_abstract_dist;
public:
NeighborVectorIterator(fef::TermFieldMatchData &tfmd,
const std::vector<Neighbor> &hits,
- const search::tensor::DistanceFunction *dist_fun)
+ const search::tensor::DistanceFunction &dist_fun)
: _tfmd(tfmd),
_hits(hits),
_dist_fun(dist_fun),
@@ -54,7 +54,7 @@ public:
}
void doUnpack(uint32_t docId) override {
- double score = _dist_fun->to_rawscore(_last_abstract_dist);
+ double score = _dist_fun.to_rawscore(_last_abstract_dist);
_tfmd.setRawScore(docId, score);
}
@@ -65,7 +65,7 @@ std::unique_ptr<NnsIndexIterator>
NnsIndexIterator::create(
fef::TermFieldMatchData &tfmd,
const std::vector<Neighbor> &hits,
- const search::tensor::DistanceFunction *dist_fun)
+ const search::tensor::DistanceFunction &dist_fun)
{
return std::make_unique<NeighborVectorIterator>(tfmd, hits, dist_fun);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h
index 019ac8579bd..031a603de49 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h
@@ -16,7 +16,7 @@ public:
static std::unique_ptr<NnsIndexIterator> create(
fef::TermFieldMatchData &tfmd,
const std::vector<Hit> &hits,
- const search::tensor::DistanceFunction *dist_fun);
+ const search::tensor::DistanceFunction &dist_fun);
};
} // namespace
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index ae34cdd66c8..9e0ccb8d37a 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -11,6 +11,7 @@ vespa_add_library(searchlib_tensor OBJECT
direct_tensor_attribute.cpp
direct_tensor_saver.cpp
direct_tensor_store.cpp
+ distance_calculator.cpp
distance_function_factory.cpp
euclidean_distance.cpp
geo_degrees_distance.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 2fdb73fcf96..c713b3ef335 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -8,9 +8,9 @@
#include "tensor_attribute.hpp"
#include <vespa/eval/eval/value.h>
#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/load_utils.h>
#include <vespa/searchlib/attribute/readerbase.h>
-#include <vespa/searchcommon/attribute/config.h>
#include <vespa/vespalib/data/slime/inserter.h>
#include <vespa/vespalib/util/cpu_usage.h>
#include <vespa/vespalib/util/lambdatask.h>
@@ -102,10 +102,16 @@ BlobSequenceReader::is_present() {
}
+bool
+DenseTensorAttribute::tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const
+{
+ auto old_tensor = extract_cells_ref(docid);
+ return _comp.equals(old_tensor, new_tensor.cells());
+}
+
void
DenseTensorAttribute::internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor)
{
- checkTensorType(tensor);
consider_remove_from_index(docid);
EntryRef ref = _denseTensorStore.setTensor(tensor);
setTensorRef(docid, ref);
@@ -152,7 +158,8 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con
const NearestNeighborIndexFactory& index_factory)
: TensorAttribute(baseFileName, cfg, _denseTensorStore),
_denseTensorStore(cfg.tensorType(), get_memory_allocator()),
- _index()
+ _index(),
+ _comp(cfg.tensorType())
{
if (cfg.hnsw_index_params().has_value()) {
auto tensor_type = cfg.tensorType();
@@ -180,6 +187,7 @@ DenseTensorAttribute::clearDoc(DocId docId)
void
DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor)
{
+ checkTensorType(tensor);
internal_set_tensor(docId, tensor);
if (_index) {
_index->add_document(docId);
@@ -189,16 +197,26 @@ DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor
std::unique_ptr<PrepareResult>
DenseTensorAttribute::prepare_set_tensor(DocId docid, const vespalib::eval::Value& tensor) const
{
+ checkTensorType(tensor);
if (_index) {
+ if (tensor_is_unchanged(docid, tensor)) {
+ // Don't make changes to the nearest neighbor index when the inserted tensor is unchanged.
+ // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point.
+ return {};
+ }
return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard());
}
- return std::unique_ptr<PrepareResult>();
+ return {};
}
void
DenseTensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& tensor,
std::unique_ptr<PrepareResult> prepare_result)
{
+ if (_index && !prepare_result) {
+ // The tensor is unchanged.
+ return;
+ }
internal_set_tensor(docid, tensor);
if (_index) {
_index->complete_add_document(docid, std::move(prepare_result));
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index da7a88af1be..1138a4f4433 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -6,6 +6,7 @@
#include "dense_tensor_store.h"
#include "doc_vector_access.h"
#include "tensor_attribute.h"
+#include "typed_cells_comparator.h"
#include <memory>
namespace search::tensor {
@@ -20,7 +21,9 @@ class DenseTensorAttribute : public TensorAttribute, public DocVectorAccess {
private:
DenseTensorStore _denseTensorStore;
std::unique_ptr<NearestNeighborIndex> _index;
+ TypedCellsComparator _comp;
+ bool tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const;
void internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor);
void consider_remove_from_index(DocId docid);
vespalib::MemoryUsage update_stat() override;
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp b/searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp
new file mode 100644
index 00000000000..c53d50bc9ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.cpp
@@ -0,0 +1,90 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "distance_calculator.h"
+#include "distance_function_factory.h"
+#include "nearest_neighbor_index.h"
+#include <vespa/eval/eval/fast_value.h>
+
+using vespalib::eval::CellType;
+using vespalib::eval::FastValueBuilderFactory;
+using vespalib::eval::TypedCells;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
+
+namespace {
+
+template<typename LCT, typename RCT>
+std::unique_ptr<Value>
+convert_cells(const ValueType& new_type, std::unique_ptr<Value> old_value)
+{
+ auto old_cells = old_value->cells().typify<LCT>();
+ auto builder = FastValueBuilderFactory::get().create_value_builder<RCT>(new_type);
+ auto new_cells = builder->add_subspace();
+ assert(old_cells.size() == new_cells.size());
+ auto p = new_cells.begin();
+ for (LCT value : old_cells) {
+ RCT conv(value);
+ *p++ = conv;
+ }
+ return builder->build(std::move(builder));
+}
+
+struct ConvertCellsSelector
+{
+ template <typename LCT, typename RCT>
+ static auto invoke(const ValueType& new_type, std::unique_ptr<Value> old_value) {
+ return convert_cells<LCT, RCT>(new_type, std::move(old_value));
+ }
+ auto operator() (CellType from, CellType to, std::unique_ptr<Value> old_value) const {
+ using MyTypify = vespalib::eval::TypifyCellType;
+ ValueType new_type = old_value->type().cell_cast(to);
+ return vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(from, to, new_type, std::move(old_value));
+ }
+};
+
+}
+
+namespace search::tensor {
+
+DistanceCalculator::DistanceCalculator(const tensor::ITensorAttribute& attr_tensor,
+ std::unique_ptr<vespalib::eval::Value> query_tensor_in)
+ : _attr_tensor(attr_tensor),
+ _query_tensor_uptr(std::move(query_tensor_in)),
+ _query_tensor(),
+ _query_tensor_cells(),
+ _dist_fun_uptr(make_distance_function(_attr_tensor.distance_metric(),
+ _attr_tensor.getTensorType().cell_type())),
+ _dist_fun(_dist_fun_uptr.get())
+{
+ assert(_dist_fun);
+ auto nns_index = _attr_tensor.nearest_neighbor_index();
+ if (nns_index) {
+ _dist_fun = nns_index->distance_function();
+ assert(_dist_fun);
+ }
+ auto query_ct = _query_tensor_uptr->cells().type;
+ CellType required_ct = _dist_fun->expected_cell_type();
+ if (query_ct != required_ct) {
+ ConvertCellsSelector converter;
+ _query_tensor_uptr = converter(query_ct, required_ct, std::move(_query_tensor_uptr));
+ }
+ _query_tensor = _query_tensor_uptr.get();
+ _query_tensor_cells = _query_tensor->cells();
+}
+
+DistanceCalculator::DistanceCalculator(const tensor::ITensorAttribute& attr_tensor,
+ const vespalib::eval::Value& query_tensor_in,
+ const DistanceFunction& function_in)
+ : _attr_tensor(attr_tensor),
+ _query_tensor_uptr(),
+ _query_tensor(&query_tensor_in),
+ _query_tensor_cells(_query_tensor->cells()),
+ _dist_fun_uptr(),
+ _dist_fun(&function_in)
+{
+}
+
+DistanceCalculator::~DistanceCalculator() = default;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
new file mode 100644
index 00000000000..eeb66887598
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
@@ -0,0 +1,48 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "distance_function.h"
+#include "i_tensor_attribute.h"
+
+namespace vespalib::eval { struct Value; }
+
+namespace search::tensor {
+
+/**
+ * Class used to calculate the distance between two n-dimensional vectors,
+ * where one is stored in a TensorAttribute and the other comes from the query.
+ *
+ * The distance function to use is defined in the TensorAttribute.
+ */
+class DistanceCalculator {
+private:
+ const tensor::ITensorAttribute& _attr_tensor;
+ std::unique_ptr<vespalib::eval::Value> _query_tensor_uptr;
+ const vespalib::eval::Value* _query_tensor;
+ vespalib::eval::TypedCells _query_tensor_cells;
+ std::unique_ptr<DistanceFunction> _dist_fun_uptr;
+ const DistanceFunction* _dist_fun;
+
+public:
+ DistanceCalculator(const tensor::ITensorAttribute& attr_tensor,
+ std::unique_ptr<vespalib::eval::Value> query_tensor_in);
+
+ /**
+ * Only used by unit tests where ownership of query tensor and distance function is handled outside.
+ */
+ DistanceCalculator(const tensor::ITensorAttribute& attr_tensor,
+ const vespalib::eval::Value& query_tensor_in,
+ const DistanceFunction& function_in);
+
+ ~DistanceCalculator();
+
+ const tensor::ITensorAttribute& attribute_tensor() const { return _attr_tensor; }
+ const vespalib::eval::Value& query_tensor() const { return *_query_tensor; }
+ const DistanceFunction& function() const { return *_dist_fun; }
+
+ double calc_with_limit(uint32_t docid, double limit) const {
+ return _dist_fun->calc_with_limit(_query_tensor_cells, _attr_tensor.extract_cells_ref(docid), limit);
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 77873cb7ced..d5ebf656189 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -23,7 +23,7 @@ public:
DistanceFunction(vespalib::eval::CellType expected) : _expect_cell_type(expected) {}
- virtual ~DistanceFunction() {}
+ virtual ~DistanceFunction() = default;
// input (query) vectors must be converted to this cell type:
vespalib::eval::CellType expected_cell_type() const {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index e82f31df38e..2ee1b268449 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -436,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid,
if (max_nodes < _cfg.min_size_before_two_phase()) {
// the first documents added will do all work in write thread
// to ensure they are linked together:
- return std::unique_ptr<PrepareResult>();
+ return std::make_unique<PreparedFirstAddDoc>();
}
PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard));
return std::make_unique<PreparedAddDoc>(std::move(op));
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 72a10724ff1..3f5a9d514ed 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -152,6 +152,8 @@ protected:
const BitVector *filter, uint32_t explore_k,
double distance_threshold) const;
+ struct PreparedFirstAddDoc : public PrepareResult {};
+
struct PreparedAddDoc : public PrepareResult {
using ReadGuard = vespalib::GenerationHandler::Guard;
uint32_t docid;
diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
index a668387e5bd..58e625e6aca 100644
--- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
@@ -68,7 +68,7 @@ struct MyFastValueView final : Value {
{
const StringIdVector &labels = handle_view;
for (size_t i = 0; i < num_spaces; ++i) {
- ConstArrayRef<string_id> addr(&labels[i * num_mapped], num_mapped);
+ ConstArrayRef<string_id> addr(labels.data() + (i * num_mapped), num_mapped);
my_index.map.add_mapping(FastAddrMap::hash_labels(addr));
}
assert(my_index.map.size() == num_spaces);
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index add5184c4eb..78c58e86a3b 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -135,7 +135,7 @@ TensorAttribute::addDoc(DocId &docId)
}
void
-TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor)
+TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor) const
{
const ValueType &fieldTensorType = getConfig().tensorType();
const ValueType &tensorType = tensor.type();
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
index ae6a4a302ea..c8aa42c6133 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
@@ -32,7 +32,7 @@ protected:
template <typename RefType>
void doCompactWorst();
- void checkTensorType(const vespalib::eval::Value &tensor);
+ void checkTensorType(const vespalib::eval::Value &tensor) const;
void setTensorRef(DocId docId, EntryRef ref);
virtual vespalib::MemoryUsage update_stat();
virtual vespalib::MemoryUsage memory_usage() const;
diff --git a/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h b/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h
new file mode 100644
index 00000000000..d1c890be961
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/cell_type.h>
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/eval/eval/value_type.h>
+#include <cstring>
+
+namespace search::tensor {
+
+/**
+ * Comparator used to compare two vespalib::eval::TypedCells instances.
+ *
+ * The caller must first validate that they are of the same vespalib::eval::ValueType.
+ */
+class TypedCellsComparator {
+private:
+ size_t _mem_size;
+
+public:
+ TypedCellsComparator(const vespalib::eval::ValueType& type)
+ : _mem_size(vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), type.dense_subspace_size()))
+ {}
+ bool equals(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const {
+ return std::memcmp(lhs.data, rhs.data, _mem_size) == 0;
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp
index d240e5a7c6c..d1bb464fc37 100644
--- a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp
+++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp
@@ -10,10 +10,7 @@ namespace search {
namespace {
struct MockReadGuard : public IDocumentMetaStoreContext::IReadGuard {
- virtual const search::IDocumentMetaStore &get() const override {
- search::IDocumentMetaStore *nullStore = nullptr;
- return static_cast<search::IDocumentMetaStore &>(*nullStore);
- }
+ virtual const search::IDocumentMetaStore &get() const override { abort(); }
};
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.cpp b/searchlib/src/vespa/searchlib/transactionlog/common.cpp
index 4130ad0bc06..d4192fe0beb 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/common.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/common.cpp
@@ -3,7 +3,9 @@
#include "common.h"
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/fastos/file.h>
+#include <filesystem>
#include <stdexcept>
+#include <system_error>
namespace search::transactionlog {
@@ -32,7 +34,9 @@ makeDirectory(const char * dir)
if ( FastOS_File::Stat(dir, &st) ) {
retval = st._isDirectory ? 0 : -2;
} else {
- retval = FastOS_File::MakeDirectory(dir) ? 0 : -3;
+ std::error_code ec;
+ std::filesystem::create_directory(std::filesystem::path(dir), ec);
+ retval = (!ec) ? 0 : -3;
}
return retval;
diff --git a/searchlib/src/vespa/searchlib/util/comprfile.cpp b/searchlib/src/vespa/searchlib/util/comprfile.cpp
index 61eeca6fc2d..bde246a1239 100644
--- a/searchlib/src/vespa/searchlib/util/comprfile.cpp
+++ b/searchlib/src/vespa/searchlib/util/comprfile.cpp
@@ -155,7 +155,7 @@ ComprFileReadBase::SetPosition(uint64_t newPosition,
bool readAll,
ComprFileDecodeContext &decodeContext,
int &bitOffset,
- FastOS_FileInterface &file,
+ FastOS_FileInterface *file,
uint64_t &fileReadByteOffset,
uint64_t fileSize,
ComprBuffer &cbuf)
@@ -176,7 +176,7 @@ ComprFileReadBase::SetPosition(uint64_t newPosition,
readAll,
decodeContext,
bitOffset,
- file,
+ *file,
fileReadByteOffset,
fileSize,
cbuf);
@@ -200,7 +200,7 @@ ComprFileReadBase::SetPosition(uint64_t newPosition,
readAll,
decodeContext,
bitOffset,
- file,
+ *file,
fileReadByteOffset,
fileSize,
cbuf);
@@ -221,9 +221,8 @@ ComprFileReadBase::SetPosition(uint64_t newPosition,
(cbuf.getUnitBitSize() - 1));
assert(pos <= static_cast<int64_t>(fileSize));
-
- file.SetPosition(pos);
- assert(pos == file.GetPosition());
+ file->SetPosition(pos);
+ assert(pos == file->GetPosition());
decodeContext.emptyBuffer(newPosition);
assert(decodeContext.getBitPos(bitOffset,
@@ -337,7 +336,7 @@ ComprFileReadContext::setPosition(uint64_t newPosition)
_readAll,
*_decodeContext,
_bitOffset,
- *_file,
+ _file,
_fileReadByteOffset,
_fileSize,
*this);
diff --git a/searchlib/src/vespa/searchlib/util/comprfile.h b/searchlib/src/vespa/searchlib/util/comprfile.h
index 2ee95a53235..dc8cf6185fc 100644
--- a/searchlib/src/vespa/searchlib/util/comprfile.h
+++ b/searchlib/src/vespa/searchlib/util/comprfile.h
@@ -76,7 +76,7 @@ public:
bool readAll,
ComprFileDecodeContext &decodeContext,
int &bitOffset,
- FastOS_FileInterface &file,
+ FastOS_FileInterface *file,
uint64_t &fileReadByteOffset,
uint64_t fileSize,
ComprBuffer &cbuf);
diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.cpp b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp
index 07dbc9a247d..c1e8b6b7396 100644
--- a/searchlib/src/vespa/searchlib/util/dirtraverse.cpp
+++ b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp
@@ -2,275 +2,63 @@
#include "dirtraverse.h"
#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/fastos/file.h>
-#include <cassert>
-#include <cstring>
+#include <filesystem>
+#include <system_error>
namespace search {
-extern "C" {
-static int cmpname(const void *av, const void *bv)
-{
- const DirectoryTraverse::Name *const a =
- *(const DirectoryTraverse::Name *const *) av;
- const DirectoryTraverse::Name *const b =
- *(const DirectoryTraverse::Name *const *) bv;
- return a->_name.compare(b->_name.c_str());
-}
-}
-
-DirectoryTraverse::Name::Name(const char *name)
- : _name(name),
- _next(nullptr)
-{
-}
-DirectoryTraverse::Name::~Name() = default;
-
-DirectoryTraverse::Name *
-DirectoryTraverse::Name::sort(Name *head, int count)
-{
- Name *nl;
- Name **names;
- int i;
-
- names = new Name *[count];
- i = 0;
- for(nl = head; nl != nullptr; nl = nl->_next)
- names[i++] = nl;
- assert(i == count);
- qsort(names, count, sizeof(Name *), cmpname);
- for (i = 0; i < count; i++) {
- if (i + 1 < count)
- names[i]->_next = names[i + 1];
- else
- names[i]->_next = nullptr;
- }
- head = names[0];
- delete [] names;
- return head;
-}
-
-
-void
-DirectoryTraverse::QueueDir(const char *name)
-{
- Name *n = new Name(name);
- if (_dirTail == nullptr)
- _dirHead = n;
- else
- _dirTail->_next = n;
- _dirTail = n;
-}
+namespace fs = std::filesystem;
+namespace {
-void
-DirectoryTraverse::PushDir(const char *name)
-{
- Name *n = new Name(name);
- n->_next = _pdirHead;
- _pdirHead = n;
-}
-
-
-void
-DirectoryTraverse::PushRemoveDir(const char *name)
-{
- Name *n = new Name(name);
- n->_next = _rdirHead;
- _rdirHead = n;
-}
-
-
-void
-DirectoryTraverse::PushPushedDirs()
-{
- Name *n;
- while (_pdirHead != nullptr) {
- n = _pdirHead;
- _pdirHead = n->_next;
- n->_next = _dirHead;
- _dirHead = n;
- if (_dirTail == nullptr)
- _dirTail = n;
- }
-}
-
-
-DirectoryTraverse::Name *
-DirectoryTraverse::UnQueueDir()
-{
- Name *n;
- PushPushedDirs();
- if (_dirHead == nullptr)
- return nullptr;
- n = _dirHead;
- _dirHead = n->_next;
- n->_next = nullptr;
- if (_dirHead == nullptr)
- _dirTail = nullptr;
- return n;
-}
-
-DirectoryTraverse::Name *
-DirectoryTraverse::UnQueueName()
-{
- Name *n;
- if (_nameHead == nullptr)
- return nullptr;
- n = _nameHead;
- _nameHead = n->_next;
- n->_next = nullptr;
- _nameCount--;
- return n;
-}
-
-
-void
-DirectoryTraverse::ScanSingleDir()
+uint64_t
+try_get_tree_size(const std::string& base_dir)
{
- assert(_nameHead == nullptr);
- assert(_nameCount == 0);
- delete _curDir;
- _fullDirName.clear();
- _curDir = UnQueueDir();
- if (_curDir == nullptr)
- return;
- _fullDirName = _baseDir;
- if ( ! _curDir->_name.empty()) {
- _fullDirName += "/" + _curDir->_name;
+ fs::path path(base_dir);
+ std::error_code ec;
+ fs::recursive_directory_iterator dir_itr(path, fs::directory_options::skip_permission_denied, ec);
+ if (ec) {
+ return 0;
}
- FastOS_DirectoryScan *dirscan = new FastOS_DirectoryScan(_fullDirName.c_str());
- while (dirscan->ReadNext()) {
- const char *name = dirscan->GetName();
- if (strcmp(name, ".") == 0 ||
- strcmp(name, "..") == 0)
- continue;
- Name *nl = new Name(name);
- nl->_next = _nameHead;
- _nameHead = nl;
- _nameCount++;
- }
- if (_nameCount > 1)
- _nameHead = _nameHead->sort(_nameHead, _nameCount);
- delete dirscan;
-}
-
-
-bool
-DirectoryTraverse::NextName()
-{
- delete _curName;
- _curName = nullptr;
- while (_nameHead == nullptr && (_dirHead != nullptr || _pdirHead != nullptr))
- ScanSingleDir();
- if (_nameHead == nullptr)
- return false;
- _curName = UnQueueName();
- _fullName = _fullDirName + "/" + _curName->_name;
- _relName = _fullName.c_str() + (_baseDir.size() + 1);
- return true;
-}
-
-
-bool
-DirectoryTraverse::NextRemoveDir()
-{
- Name *curName;
- delete _curName;
- _curName = nullptr;
- if (_rdirHead == nullptr)
- return false;
- curName = _rdirHead;
- _rdirHead = curName->_next;
- _fullName = _baseDir + "/" + curName->_name;
- _relName = _fullName.c_str() + _baseDir.size() + 1;
- delete curName;
- return true;
-}
-
-
-bool
-DirectoryTraverse::RemoveTree()
-{
- FastOS_StatInfo statInfo;
-
- while (NextName()) {
- const char *relname = GetRelName();
- const char *fullname = GetFullName();
- if (FastOS_File::Stat(fullname, &statInfo)) {
- if (statInfo._isDirectory) {
- PushDir(relname);
- PushRemoveDir(relname);
- } else {
- FastOS_File::Delete(fullname);
+ uint64_t total_size = 0;
+ constexpr uint64_t block_size = 4_Ki;
+ for (const auto &elem : dir_itr) {
+ if (fs::is_regular_file(elem.path()) && !fs::is_symlink(elem.path())) {
+ const auto size = elem.file_size(ec);
+ if (!ec) {
+ // round up size to file system block size (assumed to be 4 KiB)
+ auto adj_size = ((size + block_size - 1) / block_size) * block_size;
+ total_size += adj_size;
}
}
}
- while (NextRemoveDir()) {
- const char *fullname = GetFullName();
- FastOS_File::RemoveDirectory(fullname);
- }
- FastOS_File::RemoveDirectory(_baseDir.c_str());
- return true;
+ return total_size;
+}
+
}
uint64_t
DirectoryTraverse::GetTreeSize()
{
- FastOS_StatInfo statInfo;
- uint64_t size = 0;
- const uint64_t blockSize = 4_Ki;
-
- while (NextName()) {
- const char *relname = GetRelName();
- const char *fullname = GetFullName();
- if (FastOS_File::Stat(fullname, &statInfo)) {
- uint64_t adjSize = ((statInfo._size + blockSize - 1) / blockSize) * blockSize;
- size += adjSize;
- if (statInfo._isDirectory) {
- PushDir(relname);
- }
+ // Since try_get_tree_size may throw on concurrent directory
+ // modifications, immediately retry a bounded number of times if this
+ // happens. Number of retries chosen randomly by counting fingers.
+ for (int i = 0; i < 10; ++i) {
+ try {
+ return try_get_tree_size(_base_dir);
+ } catch (const fs::filesystem_error&) {
+ // Go around for another spin that hopefully won't race.
}
}
- return size;
+ return 0;
}
-DirectoryTraverse::DirectoryTraverse(const char *baseDir)
- : _baseDir(baseDir),
- _nameHead(nullptr),
- _nameCount(0),
- _dirHead(nullptr),
- _dirTail(nullptr),
- _pdirHead(nullptr),
- _rdirHead(nullptr),
- _curDir(nullptr),
- _curName(nullptr),
- _fullDirName(),
- _fullName(),
- _relName(nullptr)
+DirectoryTraverse::DirectoryTraverse(const std::string& base_dir)
+ : _base_dir(base_dir)
{
- QueueDir("");
- ScanSingleDir();
}
-
-DirectoryTraverse::~DirectoryTraverse()
-{
- delete _curDir;
- delete _curName;
- PushPushedDirs();
- while (_dirHead != nullptr)
- delete UnQueueDir();
- while (_nameHead != nullptr)
- delete UnQueueName();
- while (_rdirHead != nullptr) {
- Name *n;
- n = _rdirHead;
- _rdirHead = n->_next;
- n->_next = nullptr;
- delete n;
- }
-}
+DirectoryTraverse::~DirectoryTraverse() = default;
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.h b/searchlib/src/vespa/searchlib/util/dirtraverse.h
index 4a96ad0935d..c26246e2596 100644
--- a/searchlib/src/vespa/searchlib/util/dirtraverse.h
+++ b/searchlib/src/vespa/searchlib/util/dirtraverse.h
@@ -7,54 +7,16 @@
namespace search {
+/*
+ * Class used to get size of directory tree on disk.
+ */
class DirectoryTraverse
{
private:
- DirectoryTraverse(const DirectoryTraverse &);
- DirectoryTraverse& operator=(const DirectoryTraverse &);
-
-public:
- class Name
- {
- private:
- Name(const Name &);
- Name& operator=(const Name &);
-
- public:
- std::string _name;
- Name *_next;
- explicit Name(const char *name);
- ~Name();
- static Name *sort(Name *head, int count);
- };
-private:
- std::string _baseDir;
- Name *_nameHead;
- int _nameCount;
- Name *_dirHead;
- Name *_dirTail;
- Name *_pdirHead;
- Name *_rdirHead;
- Name *_curDir;
- Name *_curName;
- std::string _fullDirName;
- std::string _fullName;
- const char *_relName;
+ std::string _base_dir;
public:
- const char *GetFullName() const { return _fullName.c_str(); }
- const char *GetRelName() const { return _relName; }
- void QueueDir(const char *name);
- void PushDir(const char *name);
- void PushRemoveDir(const char *name);
- void PushPushedDirs();
- Name *UnQueueDir();
- Name *UnQueueName();
- void ScanSingleDir();
- bool NextName();
- bool NextRemoveDir();
- bool RemoveTree();
uint64_t GetTreeSize(); // Returns size of directory in bytes
- explicit DirectoryTraverse(const char *baseDir);
+ explicit DirectoryTraverse(const std::string& base_dir);
~DirectoryTraverse();
};