diff options
77 files changed, 522 insertions, 402 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 360a02256a9..c6fca8d32c6 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -506,6 +506,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { boolean atLeastOneClientWithCertificate = clients.stream().anyMatch(client -> !client.certificates().isEmpty()); if (!atLeastOneClientWithCertificate) throw new IllegalArgumentException("At least one client must require a certificate"); + + List<String> duplicates = clients.stream().collect(Collectors.groupingBy(Client::id)) + .entrySet().stream().filter(entry -> entry.getValue().size() > 1) + .map(Map.Entry::getKey).sorted().toList(); + if (! duplicates.isEmpty()) { + throw new IllegalArgumentException("Duplicate client ids: " + duplicates); + } } List<X509Certificate> operatorAndTesterCertificates = deployState.getProperties().operatorCertificates(); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java index 1c5eb16be80..fa09d3c1890 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java @@ -162,6 +162,36 @@ public class CloudTokenDataPlaneFilterTest extends ContainerModelBuilderTestBase assertEquals("Invalid permission 'unknown-permission'. Valid values are 'read' and 'write'.", exception.getMessage()); } + @Test + void fails_on_duplicate_clients() throws IOException { + var certFile = securityFolder.resolve("foo.pem"); + var servicesXml = """ + <container version="1.0"> + <clients> + <client id="mtls" permissions="read,write"> + <certificate file="%1$s"/> + </client> + <client id="mtls" permissions="read,write"> + <certificate file="%1$s"/> + </client> + <client id="token1" permissions="read"> + <token id="my-token"/> + </client> + <client id="token2" permissions="read"> + <token id="my-token"/> + </client> + <client id="token1" permissions="read"> + <token id="my-token"/> + </client> + </clients> + </container> + """.formatted(applicationFolder.toPath().relativize(certFile).toString()); + var clusterElem = DomBuilderTest.parse(servicesXml); + createCertificate(certFile); + var exception = assertThrows(IllegalArgumentException.class, () -> buildModel(Set.of(mtlsEndpoint), defaultTokens, clusterElem)); + assertEquals("Duplicate client ids: [mtls, token1]", exception.getMessage()); + } + private static CloudTokenDataPlaneFilterConfig.Clients.Tokens tokenConfig( String id, Collection<String> fingerprints, Collection<String> accessCheckHashes, Collection<String> expirations) { return new CloudTokenDataPlaneFilterConfig.Clients.Tokens.Builder() diff --git a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java index 81ec0919441..32fd1d64129 100644 --- a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java +++ b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java @@ -376,7 +376,15 @@ public class StateHandler extends AbstractRequestHandler implements CapabilityRe } private String prometheusSanitizedName(String name) { - return name.replaceAll("\\.", "_"); + var stringBuilder = new StringBuilder(); + for (char c : name.toCharArray()) { + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) { + stringBuilder.append(c); + } else { + stringBuilder.append("_"); + } + } + return stringBuilder.toString(); } private String sanitizeIfDouble(Number num) { diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java index 68a7d06e7fe..0aa2b0f41d5 100644 --- a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java +++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java @@ -88,7 +88,7 @@ public class StateHandlerTest extends StateHandlerTestBase { snapshot.add(otherContext, "some.counter", 2); snapshot.set(null, "bar", 20); snapshot.set(null, "bar", 40); - snapshot.set(null, "testing.infinity", Double.NEGATIVE_INFINITY); + snapshot.set(null, "testing-infinity", Double.NEGATIVE_INFINITY); snapshot.set(null, "testing.nan", Double.NaN); snapshotProvider.setSnapshot(snapshot); diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java index 6e1a2be684a..e7bdf640661 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java @@ -46,7 +46,7 @@ public abstract class SimpleIndexedItem extends SimpleTaggableItem implements In /** Appends the index prefix if necessary */ protected void appendIndexString(StringBuilder buffer) { - if (!getIndexName().equals("")) { + if (!getIndexName().isEmpty()) { buffer.append(getIndexName()); buffer.append(":"); } diff --git a/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java b/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java index 49eaa9b3a89..759181a2ce7 100644 --- a/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java @@ -82,7 +82,7 @@ public class SortingTestCase { private void requireThatChineseHasCorrectRules(Collator col) { final int reorderCodes [] = {UScript.HAN}; assertEquals("15.1.0.0", col.getUCAVersion().toString()); - assertEquals("153.121.44.8", col.getVersion().toString()); + assertEquals("153.121.45.0", col.getVersion().toString()); assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes())); assertNotEquals("", ((RuleBasedCollator) col).getRules()); diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index b5edcd44108..b37c97c5a71 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -68,7 +68,7 @@ <assertj.vespa.version>3.25.3</assertj.vespa.version> <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> - <aws-sdk.vespa.version>1.12.701</aws-sdk.vespa.version> + <aws-sdk.vespa.version>1.12.703</aws-sdk.vespa.version> <athenz.vespa.version>1.11.56</athenz.vespa.version> <!-- Athenz END --> @@ -91,7 +91,7 @@ <commons-logging.vespa.version>1.3.1</commons-logging.vespa.version> <!-- Bindings exported by jdisc through jcl-over-slf4j. --> <commons.math3.vespa.version>3.6.1</commons.math3.vespa.version> <commons-compress.vespa.version>1.26.1</commons-compress.vespa.version> - <commons-cli.vespa.version>1.6.0</commons-cli.vespa.version> + <commons-cli.vespa.version>1.7.0</commons-cli.vespa.version> <curator.vespa.version>5.6.0</curator.vespa.version> <dropwizard.metrics.vespa.version>4.2.25</dropwizard.metrics.vespa.version> <!-- ZK 3.9.1 requires this --> <eclipse-angus.vespa.version>2.0.2</eclipse-angus.vespa.version> @@ -104,7 +104,7 @@ <hamcrest.vespa.version>2.2</hamcrest.vespa.version> <hdrhistogram.vespa.version>2.1.12</hdrhistogram.vespa.version> <huggingface.vespa.version>0.27.0</huggingface.vespa.version> - <icu4j.vespa.version>74.2</icu4j.vespa.version> + <icu4j.vespa.version>75.1</icu4j.vespa.version> <java-jjwt.vespa.version>0.11.5</java-jjwt.vespa.version> <java-jwt.vespa.version>4.4.0</java-jwt.vespa.version> <javax.annotation.vespa.version>1.2</javax.annotation.vespa.version> diff --git a/eval/src/vespa/eval/eval/cell_type.h b/eval/src/vespa/eval/eval/cell_type.h index c15a5b68dba..3c474638480 100644 --- a/eval/src/vespa/eval/eval/cell_type.h +++ b/eval/src/vespa/eval/eval/cell_type.h @@ -70,7 +70,7 @@ struct CellMetaNotScalar { struct CellMeta { const CellType cell_type; const bool is_scalar; - constexpr CellMeta(CellType cell_type_in, bool is_scalar_in) + constexpr CellMeta(CellType cell_type_in, bool is_scalar_in) noexcept : cell_type(cell_type_in), is_scalar(is_scalar_in) { // is_scalar -> double cell type diff --git a/eval/src/vespa/eval/eval/typed_cells.h b/eval/src/vespa/eval/eval/typed_cells.h index d05c3e3294a..6cb8675cd5f 100644 --- a/eval/src/vespa/eval/eval/typed_cells.h +++ b/eval/src/vespa/eval/eval/typed_cells.h @@ -11,24 +11,24 @@ namespace vespalib::eval { struct TypedCells { const void *data; - size_t size:56; - CellType type; + size_t size:56; + CellType type; - explicit TypedCells(ConstArrayRef<double> cells) : data(cells.begin()), size(cells.size()), type(CellType::DOUBLE) {} - explicit TypedCells(ConstArrayRef<float> cells) : data(cells.begin()), size(cells.size()), type(CellType::FLOAT) {} - explicit TypedCells(ConstArrayRef<BFloat16> cells) : data(cells.begin()), size(cells.size()), type(CellType::BFLOAT16) {} - explicit TypedCells(ConstArrayRef<Int8Float> cells) : data(cells.begin()), size(cells.size()), type(CellType::INT8) {} + explicit TypedCells(ConstArrayRef<double> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::DOUBLE) {} + explicit TypedCells(ConstArrayRef<float> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::FLOAT) {} + explicit TypedCells(ConstArrayRef<BFloat16> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::BFLOAT16) {} + explicit TypedCells(ConstArrayRef<Int8Float> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::INT8) {} TypedCells() noexcept : data(nullptr), size(0), type(CellType::DOUBLE) {} TypedCells(const void *dp, CellType ct, size_t sz) noexcept : data(dp), size(sz), type(ct) {} - template <typename T> bool check_type() const { return vespalib::eval::check_cell_type<T>(type); } + template <typename T> bool check_type() const noexcept { return check_cell_type<T>(type); } - template <typename T> ConstArrayRef<T> typify() const { + template <typename T> ConstArrayRef<T> typify() const noexcept { assert(check_type<T>()); return ConstArrayRef<T>((const T *)data, size); } - template <typename T> ConstArrayRef<T> unsafe_typify() const { + template <typename T> ConstArrayRef<T> unsafe_typify() const noexcept { return ConstArrayRef<T>((const T *)data, size); } diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp index 81f25241d3d..94f0a313f2e 100644 --- a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp +++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp @@ -3,7 +3,6 @@ #include "dense_hamming_distance.h" #include <vespa/eval/eval/operation.h> #include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/hamming_distance.h> #include <vespa/vespalib/util/binary_hamming_distance.h> #include <vespa/log/log.h> diff --git a/fbench/src/test/filereader.cpp b/fbench/src/test/filereader.cpp index 87c5914e85b..b2061633d41 100644 --- a/fbench/src/test/filereader.cpp +++ b/fbench/src/test/filereader.cpp @@ -66,7 +66,7 @@ main(int argc, char **argv) return -1; } int res; - int buflen = 10240; + constexpr int buflen = 10240; char buf[buflen]; while ((res = reader->ReadLine(buf, buflen - 1)) >= 0) { // printf("len=%d, content:>%s<\n", res, buf); diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index e9de8cdca20..558b8dea8d9 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -350,21 +350,21 @@ public class Flags { public static final UnboundBooleanFlag MORE_WIREGUARD = defineFeatureFlag( "more-wireguard", false, - List.of("andreer"), "2023-08-21", "2024-04-14", + List.of("andreer"), "2023-08-21", "2025-01-01", "Use wireguard in INternal enCLAVES", "Takes effect on next host-admin run", HOSTNAME, CLOUD_ACCOUNT); public static final UnboundBooleanFlag IPV6_AWS_TARGET_GROUPS = defineFeatureFlag( "ipv6-aws-target-groups", false, - List.of("andreer"), "2023-08-28", "2024-04-14", + List.of("andreer"), "2023-08-28", "2025-01-01", "Always use IPv6 target groups for load balancers in aws", "Takes effect on next load-balancer provisioning", HOSTNAME, CLOUD_ACCOUNT); public static final UnboundBooleanFlag PROVISION_IPV6_ONLY_AWS = defineFeatureFlag( "provision-ipv6-only", false, - List.of("andreer"), "2023-08-28", "2024-04-14", + List.of("andreer"), "2023-08-28", "2025-01-01", "Provision without private IPv4 addresses in INternal enCLAVES in AWS", "Takes effect on next host provisioning / run of host-admin", HOSTNAME, CLOUD_ACCOUNT); diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 9479c814e89..8c500473678 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -185,6 +185,7 @@ public class InfrastructureMetricSet { addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE.max()); addMetric(metrics, ControllerMetrics.COREDUMP_PROCESSED.count()); addMetric(metrics, ControllerMetrics.AUTH0_EXCEPTIONS.count()); + addMetric(metrics, ControllerMetrics.BILLING_WEBHOOK_FAILURES.count()); addMetric(metrics, ControllerMetrics.CERTIFICATE_POOL_AVAILABLE.max()); addMetric(metrics, ControllerMetrics.BILLING_EXCEPTIONS.count()); diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp index c03d93b6480..0b2660824c0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp @@ -88,6 +88,7 @@ private: double w = getWeightFromNode(*node).percent(); eq->addTerm(build(_requestContext, *node, _context), w / eqw); } + _result->setDocIdLimit(_context.getDocIdLimit()); n.setDocumentFrequency(_result->getState().estimate().estHits, _context.getDocIdLimit()); } @@ -123,6 +124,7 @@ private: indexBlueprint = _context.getIndexes().createBlueprint(_requestContext, indexFields, n); } _result = mixer.mix(std::move(indexBlueprint)); + _result->setDocIdLimit(_context.getDocIdLimit()); n.setDocumentFrequency(_result->getState().estimate().estHits, _context.getDocIdLimit()); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp index 60c2e869e79..47a9f3dd43d 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp @@ -93,7 +93,8 @@ void ProtonTermData::setDocumentFrequency(uint32_t estHits, uint32_t docIdLimit) { if (docIdLimit > 1) { - propagate_document_frequency(estHits, docIdLimit - 1); + uint32_t total_doc_count = docIdLimit - 1; + propagate_document_frequency(std::min(estHits, total_doc_count), total_doc_count); } else { propagate_document_frequency(0, 1); } diff --git a/searchlib/src/tests/aggregator/perdocexpr_test.cpp b/searchlib/src/tests/aggregator/perdocexpr_test.cpp index 908e50ad4d2..e9f0981739c 100644 --- a/searchlib/src/tests/aggregator/perdocexpr_test.cpp +++ b/searchlib/src/tests/aggregator/perdocexpr_test.cpp @@ -604,6 +604,7 @@ getVespaChecksumV2(const std::string& ymumid, int fid, const std::string& flags_ sizeof(networkFid)+ new_flags_str.length(); + // GNU extension: Variable-length automatic array unsigned char buffer[length]; memset(buffer, 0x00, length); memcpy(buffer, ymumid.c_str(), ymumid.length()); diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp index 48270694394..d67757a3811 100644 --- a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp @@ -224,7 +224,7 @@ void ExtendAttributeTest::testExtendRaw(AttributeVector& attr) void ExtendAttributeTest::testExtendTensor(AttributeVector& attr) { - std::vector<double> empty_cells{0.0, 0.0}; + std::vector<double> empty_cells{}; std::vector<double> spec0_dense_cells{1.0, 2.0}; std::vector<double> spec0_mixed_cells0{3.0, 4.0}; std::vector<double> spec0_mixed_cells1{5.0, 6.0}; diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp index 951d6f61980..3b7ec00211d 100644 --- a/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp +++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp @@ -15,8 +15,9 @@ #include <vespa/searchlib/diskindex/pagedict4randread.h> #include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/vespalib/util/signalhandler.h> -#include <sstream> #include <cinttypes> +#include <optional> +#include <sstream> #include <vespa/log/log.h> LOG_SETUP("pagedict4test"); @@ -357,6 +358,7 @@ checkCounts(const std::string &word, void testWords(const std::string &logname, vespalib::Rand48 &rnd, + std::optional<uint32_t> mmap_file_size_threshold, uint64_t numWordIds, uint32_t tupleCount, uint32_t chunkSize, @@ -495,7 +497,14 @@ testWords(const std::string &logname, LOG(info, "%s: pagedict4 written", logname.c_str()); } { - std::unique_ptr<DictionaryFileSeqRead> dr(new PageDict4FileSeqRead); + std::unique_ptr<DictionaryFileSeqRead> dr; + { + auto my_dr = std::make_unique<PageDict4FileSeqRead>(); + if (mmap_file_size_threshold.has_value()) { + my_dr->set_mmap_file_size_threshold(mmap_file_size_threshold.value()); + } + dr = std::move(my_dr); + } search::TuneFileSeqRead tuneFileRead; bool openres = dr->open("fakedict", @@ -535,7 +544,14 @@ testWords(const std::string &logname, LOG(info, "%s: pagedict4 seqverify OK", logname.c_str()); } { - std::unique_ptr<DictionaryFileRandRead> drr(new PageDict4RandRead); + std::unique_ptr<DictionaryFileRandRead> drr; + { + auto my_drr = std::make_unique<PageDict4RandRead>(); + if (mmap_file_size_threshold.has_value()) { + my_drr->set_mmap_file_size_threshold(mmap_file_size_threshold.value()); + } + drr = std::move(my_drr); + } search::TuneFileRandRead tuneFileRead; bool openres = drr->open("fakedict", tuneFileRead); @@ -649,46 +665,50 @@ testWords(const std::string &logname, void PageDict4TestApp::testWords() { - ::testWords("smallchunkwordsempty", _rnd, + ::testWords("smallchunkwordsempty", _rnd, std::nullopt, 1000000, 0, 64, 80, 72, 64, false, false, false); - ::testWords("smallchunkwordsempty2", _rnd, + ::testWords("smallchunkwordsempty2", _rnd, std::nullopt, 0, 0, 64, 80, 72, 64, false, false, false); - ::testWords("smallchunkwords", _rnd, + ::testWords("smallchunkwords", _rnd, std::nullopt, 1000000, 100, 64, 80, 72, 64, false, false, false); - ::testWords("smallchunkwordswithemptyword", _rnd, + ::testWords("smallchunkwordswithemptyword", _rnd, std::nullopt, 1000000, 100, 64, 80, 72, 64, true, false, false); - ::testWords("smallchunkwordswithcommonfirstword", _rnd, + ::testWords("smallchunkwordswithcommonfirstword", _rnd, std::nullopt, 1000000, 100, 64, 80, 72, 64, false, true, false); - ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd, + ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd, std::nullopt, 1000000, 100, 64, 80, 72, 64, true, true, false); - ::testWords("smallchunkwordswithcommonlastword", _rnd, + ::testWords("smallchunkwordswithcommonlastword", _rnd, std::nullopt, 1000000, 100, 64, 80, 72, 64, false, false, true); -#if 1 - ::testWords("smallchunkwords2", _rnd, + ::testWords("smallchunkwords2", _rnd, std::nullopt, 1000000, _stress ? 10000 : 100, 64, 80, 72, 64, _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); -#endif -#if 1 - ::testWords("stdwords", _rnd, + ::testWords("stdwords", _rnd, std::nullopt, 1000000, _stress ? 10000 : 100, 262144, 80, 72, 64, _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); -#endif + ::testWords("stdwordsnommapssdat", _rnd, 500_Mi, + 1000000, 100, + 262144, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); + ::testWords("stdwordsmmapssdat", _rnd, 1, + 1000000, 100, + 262144, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); } int main(int argc, char **argv) { diff --git a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp index b7702398857..4ffc1fe366e 100644 --- a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp +++ b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp @@ -44,12 +44,16 @@ public: double calc_distance(uint32_t docid, const vespalib::string& query_tensor) { auto qt = make_tensor(query_tensor); auto calc = DistanceCalculator::make_with_validation(*attr, *qt); - return calc->calc_with_limit(docid, std::numeric_limits<double>::max()); + return calc->has_single_subspace() + ? calc->calc_with_limit<true>(docid, std::numeric_limits<double>::max()) + : calc->calc_with_limit<false>(docid, std::numeric_limits<double>::max()); } double calc_rawscore(uint32_t docid, const vespalib::string& query_tensor) { auto qt = make_tensor(query_tensor); auto calc = DistanceCalculator::make_with_validation(*attr, *qt); - return calc->calc_raw_score(docid); + return calc->has_single_subspace() + ? calc->calc_raw_score<true>(docid) + : calc->calc_raw_score<false>(docid); } OptSubspace calc_closest_subspace(uint32_t docid, const vespalib::string& query_tensor) { auto qt = make_tensor(query_tensor); diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index c01fc33767a..b697effeab4 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -62,14 +62,14 @@ public: _vectors[docid] = vec; return *this; } - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override { + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override { return get_vectors(docid).cells(subspace); } - VectorBundle get_vectors(uint32_t docid) const override { + VectorBundle get_vectors(uint32_t docid) const noexcept override { ArrayRef ref(_vectors[docid]); assert((ref.size() % _subspace_type.size()) == 0); uint32_t subspaces = ref.size() / _subspace_type.size(); - return VectorBundle(ref.data(), subspaces, _subspace_type); + return {ref.data(), subspaces, _subspace_type}; } void clear() { _vectors.clear(); } @@ -106,7 +106,7 @@ public: .set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5}); } - ~HnswIndexTest() override {} + ~HnswIndexTest() override; auto dff() { return search::tensor::make_distance_function_factory( @@ -280,6 +280,9 @@ public: static constexpr bool is_single = std::is_same_v<IndexType, HnswIndex<HnswIndexType::SINGLE>>; }; +template <typename IndexType> +HnswIndexTest<IndexType>::~HnswIndexTest() = default; + using HnswIndexTestTypes = ::testing::Types<HnswIndex<HnswIndexType::SINGLE>, HnswIndex<HnswIndexType::MULTI>>; TYPED_TEST_SUITE(HnswIndexTest, HnswIndexTestTypes); diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 1feb968fbb4..dce09a87fb8 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -1,13 +1,5 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <fcntl.h> -#include <cstdio> -#include <unistd.h> -#include <chrono> -#include <cstdlib> -#include <future> -#include <vector> - #include <vespa/eval/eval/typed_cells.h> #include <vespa/eval/eval/value_type.h> #include <vespa/searchlib/common/bitvector.h> @@ -25,6 +17,9 @@ #include <vespa/vespalib/util/lambdatask.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/data/simple_buffer.h> +#include <fcntl.h> +#include <unistd.h> +#include <future> #include <vespa/log/log.h> LOG_SETUP("stress_hnsw_mt"); @@ -119,17 +114,17 @@ public: memcpy(&_vectors[docid], vec.cbegin(), sizeof(MallocPointVector)); return *this; } - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override { + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override { assert(docid < NUM_POSSIBLE_DOCS); (void) subspace; ConstVectorRef ref(_vectors[docid]); return vespalib::eval::TypedCells(ref); } - VectorBundle get_vectors(uint32_t docid) const override { + VectorBundle get_vectors(uint32_t docid) const noexcept override { assert(docid < NUM_POSSIBLE_DOCS); ConstVectorRef ref(_vectors[docid]); assert(subspace_type.size() == ref.size()); - return VectorBundle(ref.data(), 1, subspace_type); + return {ref.data(), 1, subspace_type}; } }; @@ -257,7 +252,7 @@ public: loaded_vectors.load(); } - ~Stressor() {} + ~Stressor() override; auto dff() { return search::tensor::make_distance_function_factory( @@ -352,6 +347,9 @@ public: } }; +template <typename IndexType> +Stressor<IndexType>::~Stressor() = default; + using StressorTypes = ::testing::Types<HnswIndex<HnswIndexType::SINGLE>>; TYPED_TEST_SUITE(Stressor, StressorTypes); diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp index f3fc31ac8b1..e5ce886f499 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -6,7 +6,9 @@ #include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/data/databuffer.h> +#include <vespa/vespalib/datastore/aligner.h> #include <vespa/vespalib/util/arrayref.h> +#include <vespa/vespalib/util/round_up_to_page_size.h> #include <vespa/vespalib/util/size_literals.h> namespace search::bitcompression { @@ -181,6 +183,12 @@ readHeader(vespalib::GenericHeader &header, int64_t fileSize) return headerLen; } +bool +DecodeContext64Base::is_padded_for_memory_map(uint64_t file_bit_size, uint64_t file_size) noexcept +{ + using Aligner = vespalib::datastore::Aligner<64>; + return (Aligner::align(file_bit_size) + 128 <= (vespalib::round_up_to_page_size(file_size) * 8)); +} template <bool bigEndian> void diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 4124f1f659f..b1e13a9d96b 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -1261,6 +1261,13 @@ public: virtual uint64_t decode_exp_golomb(int k) = 0; void readBytes(uint8_t *buf, size_t len); uint32_t readHeader(vespalib::GenericHeader &header, int64_t fileSize); + + /* + * Check if file is padding at end for decompression readahead. + */ + static bool is_padded_for_memory_map(uint64_t file_bit_size, uint64_t file_size) noexcept; + + static uint64_t file_units(uint64_t file_size) noexcept { return (file_size + sizeof(uint64_t) - 1) / sizeof(uint64_t); } }; diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp index bceeb1e7bc1..89b5ffb84f8 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp @@ -51,7 +51,7 @@ using vespalib::getLastErrorString; namespace search::diskindex { struct PageDict4FileSeqRead::DictFileReadContext { - DictFileReadContext(vespalib::stringref id, const vespalib::string & name, const TuneFileSeqRead &tune, bool read_all_upfront); + DictFileReadContext(vespalib::stringref id, const vespalib::string & name, const TuneFileSeqRead &tune, uint32_t mmap_file_size_threshold, bool read_all_upfront); ~DictFileReadContext(); vespalib::FileHeader readHeader(); void readExtendedHeader(); @@ -66,7 +66,7 @@ struct PageDict4FileSeqRead::DictFileReadContext { }; PageDict4FileSeqRead::DictFileReadContext::DictFileReadContext(vespalib::stringref id, const vespalib::string & name, - const TuneFileSeqRead &tune, bool read_all_upfront) + const TuneFileSeqRead &tune, uint32_t mmap_file_size_threshold, bool read_all_upfront) : _id(id), _fileBitSize(0u), _headerLen(0u), @@ -79,23 +79,49 @@ PageDict4FileSeqRead::DictFileReadContext::DictFileReadContext(vespalib::stringr if (tune.getWantDirectIO()) { _file.EnableDirectIO(); } + if (read_all_upfront) { + _file.enableMemoryMap(0); + } if (!_file.OpenReadOnly(name.c_str())) { LOG(error, "could not open %s: %s", _file.GetFileName(), getLastErrorString().c_str()); return; } uint64_t fileSize = _file.getSize(); + uint64_t file_units = DC::file_units(fileSize); _readContext.setFile(&_file); _readContext.setFileSize(fileSize); + bool use_mmap = false; + /* + * Limit memory usage spike by using memory mapped .ssdat file if + * file size is greater than 32 MiB with padding at end of file. + */ + if (read_all_upfront && _file.MemoryMapPtr(0) != nullptr && fileSize >= mmap_file_size_threshold) { + _readContext.reference_compressed_buffer(_file.MemoryMapPtr(0), file_units); + vespalib::FileHeader header; + _dc.readHeader(header, _file.getSize()); + assert(header.hasTag("fileBitSize")); + int64_t file_bit_size = header.getTag("fileBitSize").asInteger(); + use_mmap = DC::is_padded_for_memory_map(file_bit_size, fileSize); + _readContext.setBitOffset(0); + _readContext.setBufferEndFilePos(0); + } if (read_all_upfront) { - _readContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) / sizeof(uint64_t), 32_Ki); + if (use_mmap) { + _readContext.reference_compressed_buffer(_file.MemoryMapPtr(0), file_units); + } else { + _readContext.allocComprBuf(file_units, 32_Ki); + } } else { _readContext.allocComprBuf(64_Ki, 32_Ki); } - _dc.emptyBuffer(0); - _readContext.readComprBuffer(); + if (!use_mmap) { + _dc.emptyBuffer(0); + _readContext.readComprBuffer(); + } if (read_all_upfront) { assert(_readContext.getBufferEndFilePos() >= fileSize); } + assert(_dc.getBitPosV() == 0); _valid = true; } @@ -121,7 +147,8 @@ PageDict4FileSeqRead::PageDict4FileSeqRead() _ss(), _sp(), _p(), - _wordNum(0u) + _wordNum(0u), + _mmap_file_size_threshold(32_Mi) { } PageDict4FileSeqRead::~PageDict4FileSeqRead() = default; @@ -166,9 +193,9 @@ bool PageDict4FileSeqRead::open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) { - _ss = std::make_unique<DictFileReadContext>(mySSId, name + ".ssdat", tuneFileRead, true); - _sp = std::make_unique<DictFileReadContext>(mySPId, name + ".spdat", tuneFileRead, false); - _p = std::make_unique<DictFileReadContext>(myPId, name + ".pdat", tuneFileRead, false); + _ss = std::make_unique<DictFileReadContext>(mySSId, name + ".ssdat", tuneFileRead, _mmap_file_size_threshold, true); + _sp = std::make_unique<DictFileReadContext>(mySPId, name + ".spdat", tuneFileRead, _mmap_file_size_threshold, false); + _p = std::make_unique<DictFileReadContext>(myPId, name + ".pdat", tuneFileRead, _mmap_file_size_threshold, false); if ( !_ss->_valid || !_sp->_valid || !_p->_valid ) { return false; } diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h index 404f85e9088..40540cd458e 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h @@ -26,6 +26,7 @@ class PageDict4FileSeqRead : public index::DictionaryFileSeqRead std::unique_ptr<DictFileReadContext> _sp; std::unique_ptr<DictFileReadContext> _p; uint64_t _wordNum; + uint32_t _mmap_file_size_threshold; public: PageDict4FileSeqRead(); ~PageDict4FileSeqRead() override; @@ -38,6 +39,7 @@ public: bool open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) override; bool close() override; void getParams(index::PostingListParams ¶ms) override; + void set_mmap_file_size_threshold(uint32_t v) { _mmap_file_size_threshold = v; } }; /** diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp index 3654b703648..a513a18ae5d 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp @@ -1,8 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "pagedict4randread.h" -#include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/data/fileheader.h> +#include <vespa/vespalib/stllike/asciistream.h> #include <vespa/fastos/file.h> #include <vespa/log/log.h> @@ -33,7 +33,8 @@ PageDict4RandRead::PageDict4RandRead() _pFileBitSize(0u), _ssHeaderLen(0u), _spHeaderLen(0u), - _pHeaderLen(0u) + _pHeaderLen(0u), + _mmap_file_size_threshold(32_Mi) { _ssd.setReadContext(&_ssReadContext); } @@ -229,14 +230,42 @@ PageDict4RandRead::open(const vespalib::string &name, } uint64_t fileSize = _ssfile->getSize(); + uint64_t file_units = DC::file_units(fileSize); _ssReadContext.setFile(_ssfile.get()); _ssReadContext.setFileSize(fileSize); - _ssReadContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) / sizeof(uint64_t), 32768u); - _ssd.emptyBuffer(0); - _ssReadContext.readComprBuffer(); - assert(_ssReadContext.getBufferEndFilePos() >= fileSize); + /* + * Limit memory usage spike by using memory mapped .ssdat file if + * file size is greater than 32 MiB with padding at end of file. + * Note: It might cause higher dictionary lookup latencies when + * system is under memory pressure due to pageins. + */ + bool has_read_ss_header = false; + if (_ssfile->MemoryMapPtr(0) != nullptr && fileSize >= _mmap_file_size_threshold) { + _ssReadContext.reference_compressed_buffer(_ssfile->MemoryMapPtr(0), file_units); + assert(_ssd.getReadOffset() == 0u); + readSSHeader(); + has_read_ss_header = true; + } + if (!has_read_ss_header || !DC::is_padded_for_memory_map(_ssFileBitSize, fileSize)) { + /* + * Insufficient padding or small .sdat file. Read whole file into + * memory. + */ + _ssReadContext.allocComprBuf(file_units, 32768u); + _ssd.emptyBuffer(0); + _ssReadContext.setBitOffset(0); + _ssReadContext.setBufferEndFilePos(0); + _ssfile->SetPosition(0); + _ssReadContext.readComprBuffer(); + assert(_ssReadContext.getBufferEndFilePos() >= fileSize); + assert(_ssd.getReadOffset() == 0u); + if (has_read_ss_header) { + _ssReadContext.setPosition(_ssHeaderLen * 8); + } else { + readSSHeader(); + } + } - readSSHeader(); readSPHeader(); readPHeader(); diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h index 051efa486dd..1c2e538cc48 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h @@ -36,6 +36,7 @@ class PageDict4RandRead : public index::DictionaryFileRandRead uint32_t _ssHeaderLen; uint32_t _spHeaderLen; uint32_t _pHeaderLen; + uint32_t _mmap_file_size_threshold; void readSSHeader(); void readSPHeader(); @@ -51,6 +52,7 @@ public: bool close() override; uint64_t getNumWordIds() const override; + void set_mmap_file_size_threshold(uint32_t v) { _mmap_file_size_threshold = v; } }; } diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp index b0955fe60bd..d19b979c360 100644 --- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp @@ -52,7 +52,7 @@ ConvertRawScoreToCloseness::execute(uint32_t docId) feature_t converted = tfmd->getRawScore(); max_closeness = std::max(max_closeness, converted); } else if (elem.calc) { - feature_t converted = elem.calc->calc_raw_score(docId); + feature_t converted = elem.calc->calc_raw_score<false>(docId); max_closeness = std::max(max_closeness, converted); } } diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index 15362b6a224..65a764d8b44 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -12,7 +12,6 @@ #include <vespa/vespalib/geo/zcurve.h> #include <vespa/vespalib/util/issue.h> #include <vespa/vespalib/util/stash.h> -#include <cmath> #include <limits> #include <vespa/log/log.h> @@ -62,7 +61,7 @@ ConvertRawscoreToDistance::execute(uint32_t docId) feature_t converted = elem.calc ? elem.calc->function().to_distance(invdist) : ((1.0 / invdist) - 1.0); min_distance = std::min(min_distance, converted); } else if (elem.calc) { - feature_t invdist = elem.calc->calc_raw_score(docId); + feature_t invdist = elem.calc->calc_raw_score<false>(docId); feature_t converted = elem.calc->function().to_distance(invdist); min_distance = std::min(min_distance, converted); } @@ -130,7 +129,10 @@ GeoGCDExecutor::GeoGCDExecutor(GeoLocationSpecPtrs locations, const attribute::I : FeatureExecutor(), _locations(), _pos(pos), - _intBuf() + _intBuf(), + _best_index(0.0), + _best_lat(0.0), + _best_lng(0.0) { if (_pos == nullptr) { return; @@ -140,7 +142,7 @@ GeoGCDExecutor::GeoGCDExecutor(GeoLocationSpecPtrs locations, const attribute::I if (p && p->location.valid() && p->location.has_point) { double lat = p->location.point.y / 1.0e6; double lng = p->location.point.x / 1.0e6; - _locations.emplace_back(search::common::GeoGcd{lat, lng}); + _locations.emplace_back(lat, lng); } } } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h index 7ff796e5b7d..8159d5c4147 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h @@ -50,6 +50,7 @@ void PredicateRangeTermExpander::expand(const vespalib::string &key, int64_t sig return; } size_t buffer_size = 21 * 2 + 3 + key.size(); // 2 numbers + punctuation + key + // GNU extension: Variable-length automatic array char buffer[buffer_size]; int size; int prefix_size = snprintf(buffer, buffer_size, "%s=", key.c_str()); diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp index d28f6077905..c76fe3363e4 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp @@ -19,17 +19,17 @@ namespace search::queryeval { * Keeps a heap of the K best hit distances. * Currently always does brute-force scanning, which is very expensive. **/ -template <bool strict, bool has_filter> -class NearestNeighborImpl : public NearestNeighborIterator +template <bool strict, bool has_filter, bool has_single_subspace> +class NearestNeighborImpl final : public NearestNeighborIterator { public: - NearestNeighborImpl(Params params_in) + explicit NearestNeighborImpl(Params params_in) : NearestNeighborIterator(std::move(params_in)), _lastScore(0.0) { } - ~NearestNeighborImpl(); + ~NearestNeighborImpl() override; void doSeek(uint32_t docId) override { double distanceLimit = params().distanceHeap.distanceLimit(); @@ -61,39 +61,47 @@ public: private: double computeDistance(uint32_t docId, double limit) { - return params().distance_calc->calc_with_limit(docId, limit); + return params().distance_calc->template calc_with_limit<has_single_subspace>(docId, limit); } double _lastScore; }; -template <bool strict, bool has_filter> -NearestNeighborImpl<strict, has_filter>::~NearestNeighborImpl() = default; +template <bool strict, bool has_filter, bool has_single_subspace> +NearestNeighborImpl<strict, has_filter, has_single_subspace>::~NearestNeighborImpl() = default; namespace { +template <bool strict, bool has_filter> +std::unique_ptr<NearestNeighborIterator> +resolve_single_subspace(NearestNeighborIterator::Params params) +{ + if (params.distance_calc->has_single_subspace()) { + using NNI = NearestNeighborImpl<strict, has_filter, true>; + return std::make_unique<NNI>(std::move(params)); + } else { + using NNI = NearestNeighborImpl<strict, has_filter, false>; + return std::make_unique<NNI>(std::move(params)); + } +} + template <bool has_filter> std::unique_ptr<NearestNeighborIterator> resolve_strict(bool strict, NearestNeighborIterator::Params params) { if (strict) { - using NNI = NearestNeighborImpl<true, has_filter>; - return std::make_unique<NNI>(std::move(params)); + return resolve_single_subspace<true, has_filter>(std::move(params)); } else { - using NNI = NearestNeighborImpl<false, has_filter>; - return std::make_unique<NNI>(std::move(params)); + return resolve_single_subspace<false, has_filter>(std::move(params)); } } } // namespace <unnamed> std::unique_ptr<NearestNeighborIterator> -NearestNeighborIterator::create( - bool strict, - fef::TermFieldMatchData &tfmd, - std::unique_ptr<search::tensor::DistanceCalculator> distance_calc, - NearestNeighborDistanceHeap &distanceHeap, - const GlobalFilter &filter) +NearestNeighborIterator::create(bool strict, fef::TermFieldMatchData &tfmd, + std::unique_ptr<search::tensor::DistanceCalculator> distance_calc, + NearestNeighborDistanceHeap &distanceHeap, const GlobalFilter &filter) { Params params(tfmd, std::move(distance_calc), distanceHeap, filter); if (filter.is_active()) { diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h index b34c9df47b9..177c732a44d 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h @@ -39,7 +39,7 @@ public: {} }; - NearestNeighborIterator(Params params_in) + explicit NearestNeighborIterator(Params params_in) : _params(std::move(params_in)) {} diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index 14953011e22..07e490f4575 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -2,7 +2,9 @@ #include "angular_distance.h" #include "temporary_vector_store.h" +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> #include <numbers> +#include <cmath> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -10,47 +12,15 @@ using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcAngular { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - double a_norm_sq = 0.0; - double b_norm_sq = 0.0; - double dot_product = 0.0; - for (size_t i = 0; i < sz; ++i) { - double a = lhs_vector[i]; - double b = rhs_vector[i]; - a_norm_sq += a*a; - b_norm_sq += b*b; - dot_product += a*b; - } - double squared_norms = a_norm_sq * b_norm_sq; - double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; - double cosine_similarity = dot_product / div; - double distance = 1.0 - cosine_similarity; // in range [0,2] - return std::max(0.0, distance); - } -}; - -} - template<typename FloatType> -class BoundAngularDistance : public BoundDistanceFunction { +class BoundAngularDistance final : public BoundDistanceFunction { private: const vespalib::hwaccelrated::IAccelrated & _computer; mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs; double _lhs_norm_sq; public: - BoundAngularDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundAngularDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs(_tmpSpace.storeLhs(lhs)) @@ -58,7 +28,7 @@ public: auto a = _lhs.data(); _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size); } - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -72,7 +42,7 @@ public: double distance = 1.0 - cosine_similarity; // in range [0,2] return distance; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { if (threshold < 0.0) { return 0.0; } @@ -82,7 +52,7 @@ public: double cosine_similarity = cos(threshold); return 1.0 - cosine_similarity; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double cosine_similarity = 1.0 - distance; // should be in the range [-1,1] but roundoff may cause problems: cosine_similarity = std::min(1.0, cosine_similarity); @@ -91,7 +61,7 @@ public: double score = 1.0 / (1.0 + angle_distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; @@ -101,14 +71,14 @@ template class BoundAngularDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -AngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h index f5e8589fe6a..5e0a060e060 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h @@ -2,12 +2,7 @@ #pragma once -#include "distance_function.h" -#include "bound_distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <cmath> namespace search::tensor { @@ -20,8 +15,8 @@ template <typename FloatType> class AngularDistanceFunctionFactory : public DistanceFunctionFactory { public: AngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h index c89619d9a77..a9d0c880625 100644 --- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h @@ -2,13 +2,8 @@ #pragma once -#include <memory> -#include <vespa/eval/eval/cell_type.h> -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/util/arrayref.h> #include "distance_function.h" - -namespace vespalib::eval { struct TypedCells; } +#include <vespa/eval/eval/typed_cells.h> namespace search::tensor { @@ -22,17 +17,17 @@ namespace search::tensor { class BoundDistanceFunction : public DistanceConverter { public: using UP = std::unique_ptr<BoundDistanceFunction>; + using TypedCells = vespalib::eval::TypedCells; - BoundDistanceFunction() = default; + BoundDistanceFunction() noexcept = default; - virtual ~BoundDistanceFunction() = default; + ~BoundDistanceFunction() override = default; // calculate internal distance (comparable) - virtual double calc(const vespalib::eval::TypedCells& rhs) const = 0; + virtual double calc(TypedCells rhs) const noexcept = 0; // calculate internal distance, early return allowed if > limit - virtual double calc_with_limit(const vespalib::eval::TypedCells& rhs, - double limit) const = 0; + virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index fb74dd51fa3..0dbb9c34010 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -30,14 +30,14 @@ DenseTensorAttribute::extract_cells_ref(DocId docId) const } vespalib::eval::TypedCells -DenseTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const +DenseTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept { EntryRef ref = (subspace == 0) ? acquire_entry_ref(docid) : EntryRef(); return _denseTensorStore.get_typed_cells(ref); } VectorBundle -DenseTensorAttribute::get_vectors(uint32_t docid) const +DenseTensorAttribute::get_vectors(uint32_t docid) const noexcept { EntryRef ref = acquire_entry_ref(docid); return _denseTensorStore.get_vectors(ref); diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index 03c976bd6b3..c07bfcc358e 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -26,8 +26,8 @@ public: bool supports_extract_cells_ref() const override { return true; } // Implements DocVectorAccess - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; - VectorBundle get_vectors(uint32_t docid) const override; + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override; + VectorBundle get_vectors(uint32_t docid) const noexcept override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp index 12dd6aa2bca..cf0e9adc095 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp @@ -74,7 +74,7 @@ DirectTensorAttribute::get_tensor_ref(DocId docId) const } vespalib::eval::TypedCells -DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const +DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept { EntryRef ref = acquire_entry_ref(docid); auto vectors = _direct_store.get_vectors(ref); @@ -82,7 +82,7 @@ DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const } VectorBundle -DirectTensorAttribute::get_vectors(uint32_t docid) const +DirectTensorAttribute::get_vectors(uint32_t docid) const noexcept { EntryRef ref = acquire_entry_ref(docid); return _direct_store.get_vectors(ref); diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h index a4f673ea99f..64f62650615 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h @@ -26,8 +26,8 @@ public: bool supports_get_tensor_ref() const override { return true; } // Implements DocVectorAccess - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; - VectorBundle get_vectors(uint32_t docid) const override; + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override; + VectorBundle get_vectors(uint32_t docid) const noexcept override; }; } // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h index 44bbbba65d6..6edb654d5bf 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h @@ -42,11 +42,11 @@ private: EntryRef add_entry(TensorSP tensor); public: - DirectTensorStore(const vespalib::eval::ValueType& tensor_type); + explicit DirectTensorStore(const vespalib::eval::ValueType& tensor_type); ~DirectTensorStore() override; using RefType = TensorStoreType::RefType; - const vespalib::eval::Value * get_tensor_ptr(EntryRef ref) const { + const vespalib::eval::Value * get_tensor_ptr(EntryRef ref) const noexcept { if (!ref.valid()) { return nullptr; } @@ -65,12 +65,12 @@ public: vespalib::eval::TypedCells get_empty_subspace() const noexcept { return _empty.cells(); } - VectorBundle get_vectors(EntryRef ref) const { + VectorBundle get_vectors(EntryRef ref) const noexcept { auto tensor = get_tensor_ptr(ref); if (tensor == nullptr) { - return VectorBundle(); + return {}; } - return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type); + return {tensor->cells().data, static_cast<uint32_t>(tensor->index().size()), _subspace_type}; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h index eab75537071..9dbd12650cb 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -5,6 +5,7 @@ #include "distance_function_factory.h" #include "i_tensor_attribute.h" #include "vector_bundle.h" +#include <vespa/eval/eval/value_type.h> #include <optional> namespace vespalib::eval { struct Value; } @@ -32,34 +33,55 @@ public: ~DistanceCalculator(); const tensor::ITensorAttribute& attribute_tensor() const { return _attr_tensor; } - const vespalib::eval::Value& query_tensor() const { + const vespalib::eval::Value& query_tensor() const noexcept{ assert(_query_tensor != nullptr); return *_query_tensor; } - const BoundDistanceFunction& function() const { return *_dist_fun; } + const BoundDistanceFunction& function() const noexcept { return *_dist_fun; } + bool has_single_subspace() const noexcept { return _attr_tensor.getTensorType().is_dense(); } - double calc_raw_score(uint32_t docid) const { - auto vectors = _attr_tensor.get_vectors(docid); - double result = _dist_fun->min_rawscore(); - for (uint32_t i = 0; i < vectors.subspaces(); ++i) { - double distance = _dist_fun->calc(vectors.cells(i)); - double score = _dist_fun->to_rawscore(distance); - result = std::max(result, score); + template<bool has_single_subspace> + double calc_raw_score(uint32_t docid) const noexcept { + if (has_single_subspace) { + auto cells = _attr_tensor.get_vector(docid, 0); + double min_rawscore = _dist_fun->min_rawscore(); + if (cells.size == 0) [[unlikely]] { + return min_rawscore; + } + return std::max(min_rawscore, _dist_fun->to_rawscore(_dist_fun->calc(cells))); + } else { + auto vectors = _attr_tensor.get_vectors(docid); + double result = _dist_fun->min_rawscore(); + for (uint32_t i = 0; i < vectors.subspaces(); ++i) { + double distance = _dist_fun->calc(vectors.cells(i)); + double score = _dist_fun->to_rawscore(distance); + result = std::max(result, score); + } + return result; } - return result; + } - double calc_with_limit(uint32_t docid, double limit) const { - auto vectors = _attr_tensor.get_vectors(docid); - double result = std::numeric_limits<double>::max(); - for (uint32_t i = 0; i < vectors.subspaces(); ++i) { - double distance = _dist_fun->calc_with_limit(vectors.cells(i), limit); - result = std::min(result, distance); + template<bool has_single_subspace> + double calc_with_limit(uint32_t docid, double limit) const noexcept { + if (has_single_subspace) { + auto cells = _attr_tensor.get_vector(docid, 0); + if (cells.size == 0) [[unlikely]] { + return std::numeric_limits<double>::max(); + } + return _dist_fun->calc_with_limit(cells, limit); + } else { + auto vectors = _attr_tensor.get_vectors(docid); + double result = std::numeric_limits<double>::max(); + for (uint32_t i = 0; i < vectors.subspaces(); ++i) { + double distance = _dist_fun->calc_with_limit(vectors.cells(i), limit); + result = std::min(result, distance); + } + return result; } - return result; } - void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) { + void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) noexcept { for (uint32_t i = 0; i < vectors.subspaces(); ++i) { double distance = _dist_fun->calc(vectors.cells(i)); if (!closest_subspace.has_value() || distance < best_distance) { @@ -69,7 +91,7 @@ public: } } - std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) { + std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) noexcept { double best_distance = 0.0; std::optional<uint32_t> closest_subspace; calc_closest_subspace(vectors, closest_subspace, best_distance); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index c2e8305038c..9a2db8dfac0 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -2,11 +2,6 @@ #pragma once -#include <memory> -#include <vespa/eval/eval/cell_type.h> - -namespace vespalib::eval { struct TypedCells; } - namespace search::tensor { class DistanceConverter { @@ -16,25 +11,25 @@ public: /** * Convert threshold (external distance units) to internal units. */ - virtual double convert_threshold(double threshold) const = 0; + virtual double convert_threshold(double threshold) const noexcept = 0; /** * Convert internal distance to rawscore (also used as closeness). */ - virtual double to_rawscore(double distance) const = 0; + virtual double to_rawscore(double distance) const noexcept = 0; /** * Convert rawscore to external distance. * Override this when the rawscore is NOT defined as (1.0 / (1.0 + external_distance)). */ - virtual double to_distance(double rawscore) const { + virtual double to_distance(double rawscore) const noexcept { return (1.0 / rawscore) - 1.0; } /** * The minimum rawscore (also used as closeness) that this distance function can return. */ - virtual double min_rawscore() const { + virtual double min_rawscore() const noexcept { return 0.0; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 4749a8549a6..ed08df5866e 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -3,22 +3,14 @@ #include "distance_function_factory.h" #include "distance_functions.h" #include "mips_distance_transform.h" -#include <vespa/vespalib/util/typify.h> -#include <vespa/vespalib/util/array.h> -#include <vespa/vespalib/util/arrayref.h> -#include <vespa/log/log.h> - -LOG_SETUP(".searchlib.tensor.distance_function_factory"); using search::attribute::DistanceMetric; using vespalib::eval::CellType; -using vespalib::eval::ValueType; namespace search::tensor { std::unique_ptr<DistanceFunctionFactory> -make_distance_function_factory(search::attribute::DistanceMetric variant, - vespalib::eval::CellType cell_type) +make_distance_function_factory(DistanceMetric variant, CellType cell_type) { switch (variant) { case DistanceMetric::Angular: diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h index 829ed7fae13..356366d6a77 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h @@ -4,7 +4,6 @@ #include "distance_function.h" #include "bound_distance_function.h" -#include <vespa/eval/eval/value_type.h> #include <vespa/searchcommon/attribute/distance_metric.h> namespace search::tensor { @@ -15,10 +14,11 @@ namespace search::tensor { * for one particular vector in the distance function object. */ struct DistanceFunctionFactory { - DistanceFunctionFactory() = default; - virtual ~DistanceFunctionFactory() {} - virtual BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) = 0; - virtual BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) = 0; + using TypedCells = vespalib::eval::TypedCells; + DistanceFunctionFactory() noexcept = default; + virtual ~DistanceFunctionFactory() = default; + virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0; + virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0; using UP = std::unique_ptr<DistanceFunctionFactory>; }; diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h index 477d5e1dc8a..dd68171dd59 100644 --- a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h +++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h @@ -16,9 +16,9 @@ class VectorBundle; */ class DocVectorAccess { public: - virtual ~DocVectorAccess() {} - virtual vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const = 0; - virtual VectorBundle get_vectors(uint32_t docid) const = 0; + virtual ~DocVectorAccess() = default; + virtual vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept = 0; + virtual VectorBundle get_vectors(uint32_t docid) const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp index cfc420d9ecd..d581dbd129e 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp @@ -10,7 +10,8 @@ EmptySubspace::EmptySubspace(const SubspaceType& type) _cells() { _empty_space.resize(type.mem_size()); - _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), type.size()); + // Set size to zero to signal empty/invalid subspace + _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), 0); } EmptySubspace::~EmptySubspace() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h index dd0ab9264c4..4043ec122e6 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h @@ -10,7 +10,7 @@ namespace search::tensor { class SubspaceType; /* - * Class containg an empty subspace, used as a bad fallback when we cannot + * Class containing an empty subspace, used as a bad fallback when we cannot * get a real subspace. */ class EmptySubspace diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index 3efc8c3a5ea..6a730132ad1 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -2,39 +2,20 @@ #include "euclidean_distance.h" #include "temporary_vector_store.h" +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> +#include <cmath> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; +using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcEuclidean { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - double sum = 0.0; - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - for (size_t i = 0; i < sz; ++i) { - double diff = lhs_vector[i] - rhs_vector[i]; - sum += diff*diff; - } - return sum; - } -}; - -} - using vespalib::eval::Int8Float; using vespalib::BFloat16; template<typename AttributeCellType> -class BoundEuclideanDistance : public BoundDistanceFunction { +class BoundEuclideanDistance final : public BoundDistanceFunction { using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>; private: const vespalib::hwaccelrated::IAccelrated & _computer; @@ -44,12 +25,12 @@ private: static const float *cast(const float * p) { return p; } static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); } public: - BoundEuclideanDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundEuclideanDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -57,15 +38,15 @@ public: auto b = rhs_vector.data(); return _computer.squaredEuclideanDistance(cast(a), cast(b), sz); } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold*threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double d = sqrt(distance); double score = 1.0 / (1.0 + d); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override { + double calc_with_limit(TypedCells rhs, double limit) const noexcept override { vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>(); double sum = 0.0; size_t sz = _lhs_vector.size(); @@ -85,14 +66,14 @@ template class BoundEuclideanDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundEuclideanDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundEuclideanDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index 42097f8b39b..8c39a12bf86 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <cmath> namespace search::tensor { @@ -18,9 +14,9 @@ namespace search::tensor { template <typename FloatType> class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory { public: - EuclideanDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + EuclideanDistanceFunctionFactory() noexcept = default; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp index 7b6c40c643e..f5484f40271 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp @@ -3,6 +3,7 @@ #include "geo_degrees_distance.h" #include "temporary_vector_store.h" #include <numbers> +#include <cmath> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -15,7 +16,7 @@ namespace search::tensor { * Uses the haversine formula directly from: * https://en.wikipedia.org/wiki/Haversine_formula **/ -class BoundGeoDistance : public BoundDistanceFunction { +class BoundGeoDistance final : public BoundDistanceFunction { private: mutable TemporaryVectorStore<double> _tmpSpace; const vespalib::ConstArrayRef<double> _lh_vector; @@ -26,16 +27,16 @@ public: static constexpr double degrees_to_radians = M_PI / 180.0; // haversine function: - static double haversine(double angle) { + static double haversine(double angle) noexcept { double s = sin(0.5*angle); return s*s; } - BoundGeoDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundGeoDistance(TypedCells lhs) : _tmpSpace(lhs.size), _lh_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { vespalib::ConstArrayRef<double> rhs_vector = _tmpSpace.convertRhs(rhs); assert(2 == _lh_vector.size()); assert(2 == rhs_vector.size()); @@ -56,7 +57,7 @@ public: double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon; return hav_central_angle; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { if (threshold < 0.0) { return 0.0; } @@ -68,25 +69,25 @@ public: double rt_hav = sin(half_angle); return rt_hav * rt_hav; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double hav_diff = sqrt(distance); // distance in kilometers: double d = 2 * asin(hav_diff) * earth_mean_radius; // km to rawscore: return 1.0 / (1.0 + d); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) { return std::make_unique<BoundGeoDistance>(lhs); } BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { return std::make_unique<BoundGeoDistance>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h index f1af976b91f..1464898421b 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h @@ -2,12 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <vespa/vespalib/util/typify.h> -#include <cmath> namespace search::tensor { @@ -19,8 +14,8 @@ namespace search::tensor { class GeoDistanceFunctionFactory : public DistanceFunctionFactory { public: GeoDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index a1dc8cc52f7..0be920b9c03 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -6,51 +6,29 @@ using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; +using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcHamming { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - size_t sum = 0; - for (size_t i = 0; i < sz; ++i) { - sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1; - } - return (double)sum; - } -}; - -} - using vespalib::eval::Int8Float; template<typename FloatType> -class BoundHammingDistance : public BoundDistanceFunction { +class BoundHammingDistance final : public BoundDistanceFunction { private: mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs_vector; public: - BoundHammingDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundHammingDistance(TypedCells lhs) : _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); - assert(sz == rhs_vector.size()); - auto a = _lhs_vector.data(); - auto b = rhs_vector.data(); if constexpr (std::is_same<Int8Float, FloatType>::value) { - return (double) vespalib::binary_hamming_distance(a, b, sz); + return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz); } else { + assert(sz == rhs_vector.size()); size_t sum = 0; for (size_t i = 0; i < sz; ++i) { sum += (_lhs_vector[i] == rhs_vector[i]) ? 0 : 1; @@ -58,14 +36,13 @@ public: return (double)sum; } } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold; } - double to_rawscore(double distance) const override { - double score = 1.0 / (1.0 + distance); - return score; + double to_rawscore(double distance) const noexcept override { + return 1.0 / (1.0 + distance); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { // consider optimizing: return calc(rhs); } @@ -73,14 +50,14 @@ public: template <typename FloatType> BoundDistanceFunction::UP -HammingDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundHammingDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundHammingDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index 32e2be99214..6e7f96e1e2f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/util/typify.h> -#include <cmath> namespace search::tensor { @@ -20,8 +16,8 @@ template <typename FloatType> class HammingDistanceFunctionFactory : public DistanceFunctionFactory { public: HammingDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h index 1f2da032619..b48ec93c10e 100644 --- a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h @@ -21,7 +21,7 @@ class SerializedTensorRef; */ class ITensorAttribute : public DocVectorAccess { public: - virtual ~ITensorAttribute() {} + virtual ~ITensorAttribute() = default; virtual std::unique_ptr<vespalib::eval::Value> getTensor(uint32_t docId) const = 0; virtual std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const = 0; virtual vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp index 5ad6224f6d4..223a0a5750f 100644 --- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp +++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp @@ -28,9 +28,7 @@ ImportedTensorAttributeVectorReadGuard::ImportedTensorAttributeVectorReadGuard(s { } -ImportedTensorAttributeVectorReadGuard::~ImportedTensorAttributeVectorReadGuard() -{ -} +ImportedTensorAttributeVectorReadGuard::~ImportedTensorAttributeVectorReadGuard() = default; const ITensorAttribute * ImportedTensorAttributeVectorReadGuard::asTensorAttribute() const @@ -63,13 +61,13 @@ ImportedTensorAttributeVectorReadGuard::get_tensor_ref(uint32_t docid) const } vespalib::eval::TypedCells -ImportedTensorAttributeVectorReadGuard::get_vector(uint32_t docid, uint32_t subspace) const +ImportedTensorAttributeVectorReadGuard::get_vector(uint32_t docid, uint32_t subspace) const noexcept { return _target_tensor_attribute.get_vector(getTargetLid(docid), subspace); } search::tensor::VectorBundle -ImportedTensorAttributeVectorReadGuard::get_vectors(uint32_t docid) const +ImportedTensorAttributeVectorReadGuard::get_vectors(uint32_t docid) const noexcept { return _target_tensor_attribute.get_vectors(getTargetLid(docid)); } diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h index e07de5486b6..5e6bf8961df 100644 --- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h +++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h @@ -27,7 +27,7 @@ public: ImportedTensorAttributeVectorReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard, const attribute::ImportedAttributeVector &imported_attribute, bool stableEnumGuard); - ~ImportedTensorAttributeVectorReadGuard(); + ~ImportedTensorAttributeVectorReadGuard() override; const ITensorAttribute *asTensorAttribute() const override; @@ -45,8 +45,8 @@ public: bool supports_get_serialized_tensor_ref() const override; uint32_t get_num_docs() const override { return getNumDocs(); } - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; - VectorBundle get_vectors(uint32_t docid) const override; + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override; + VectorBundle get_vectors(uint32_t docid) const noexcept override; const vespalib::eval::ValueType &getTensorType() const override; void get_state(const vespalib::slime::Inserter& inserter) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index 3645c511b01..c42242d8dc8 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -4,7 +4,6 @@ #include "temporary_vector_store.h" #include <vespa/vespalib/hwaccelrated/iaccelrated.h> #include <cmath> -#include <mutex> #include <variant> using vespalib::eval::Int8Float; @@ -12,7 +11,7 @@ using vespalib::eval::Int8Float; namespace search::tensor { template<typename FloatType, bool extra_dim> -class BoundMipsDistanceFunction : public BoundDistanceFunction { +class BoundMipsDistanceFunction final : public BoundDistanceFunction { mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs_vector; const vespalib::hwaccelrated::IAccelrated & _computer; @@ -24,7 +23,7 @@ class BoundMipsDistanceFunction : public BoundDistanceFunction { static const float *cast(const float * p) { return p; } static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); } public: - BoundMipsDistanceFunction(const vespalib::eval::TypedCells& lhs, MaximumSquaredNormStore& sq_norm_store) + BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store) : BoundDistanceFunction(), _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)), @@ -44,7 +43,7 @@ public: return _lhs_extra_dim; } - double calc(const vespalib::eval::TypedCells &rhs) const override { + double calc(TypedCells rhs) const noexcept override { vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); const FloatType * a = _lhs_vector.data(); const FloatType * b = rhs_vector.data(); @@ -58,32 +57,32 @@ public: } return -dp; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { return -distance; } - double to_distance(double rawscore) const override { + double to_distance(double rawscore) const noexcept override { return -rawscore; } - double min_rawscore() const override { + double min_rawscore() const noexcept override { return std::numeric_limits<double>::lowest(); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; template<typename FloatType> BoundDistanceFunction::UP -MipsDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store); } template<typename FloatType> BoundDistanceFunction::UP -MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store); }; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 63b2a83c1b5..67a6eb58de0 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -45,7 +45,7 @@ public: : _sq_norm_store(std::make_shared<MaximumSquaredNormStore>()) { } - ~MipsDistanceFunctionFactoryBase() = default; + ~MipsDistanceFunctionFactoryBase() override = default; MaximumSquaredNormStore& get_max_squared_norm_store() noexcept { return *_sq_norm_store; } }; @@ -59,12 +59,11 @@ public: template<typename FloatType> class MipsDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase { public: - MipsDistanceFunctionFactory() : MipsDistanceFunctionFactoryBase() { } - ~MipsDistanceFunctionFactory() = default; + MipsDistanceFunctionFactory() noexcept = default; + ~MipsDistanceFunctionFactory() override = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp index 931fd3edb06..267f91bb4e0 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp @@ -2,6 +2,7 @@ #include "prenormalized_angular_distance.h" #include "temporary_vector_store.h" +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -9,14 +10,14 @@ using vespalib::eval::TypifyCellType; namespace search::tensor { template<typename FloatType> -class BoundPrenormalizedAngularDistance : public BoundDistanceFunction { +class BoundPrenormalizedAngularDistance final : public BoundDistanceFunction { private: const vespalib::hwaccelrated::IAccelrated & _computer; mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs; double _lhs_norm_sq; public: - BoundPrenormalizedAngularDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundPrenormalizedAngularDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs(_tmpSpace.storeLhs(lhs)) @@ -27,7 +28,7 @@ public: _lhs_norm_sq = 1.0; } } - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -37,13 +38,13 @@ public: double distance = _lhs_norm_sq - dot_product; return distance; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { double cosine_similarity = 1.0 - threshold; double dot_product = cosine_similarity * _lhs_norm_sq; double distance = _lhs_norm_sq - dot_product; return distance; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double dot_product = _lhs_norm_sq - distance; double cosine_similarity = dot_product / _lhs_norm_sq; // should be in in range [-1,1] but roundoff may cause problems: @@ -53,7 +54,7 @@ public: double score = 1.0 / (1.0 + cosine_distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; @@ -63,14 +64,14 @@ template class BoundPrenormalizedAngularDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundPrenormalizedAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundPrenormalizedAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h index 0f647547e08..7e3a8c2c676 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" -#include "bound_distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> namespace search::tensor { @@ -18,8 +14,8 @@ template <typename FloatType> class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory { public: PrenormalizedAngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index 75927112b89..3c1bb51f4ea 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -42,7 +42,7 @@ SerializedFastValueAttribute::supports_get_serialized_tensor_ref() const } vespalib::eval::TypedCells -SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const +SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept { EntryRef ref = acquire_entry_ref(docid); auto vectors = _tensorBufferStore.get_vectors(ref); @@ -50,7 +50,7 @@ SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) cons } VectorBundle -SerializedFastValueAttribute::get_vectors(uint32_t docid) const +SerializedFastValueAttribute::get_vectors(uint32_t docid) const noexcept { EntryRef ref = acquire_entry_ref(docid); return _tensorBufferStore.get_vectors(ref); diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h index 386b0d91add..43b5a23d176 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h @@ -27,8 +27,8 @@ public: bool supports_get_serialized_tensor_ref() const override; // Implements DocVectorAccess - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; - VectorBundle get_vectors(uint32_t docid) const override; + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override; + VectorBundle get_vectors(uint32_t docid) const noexcept override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp index ff07f245de4..b1018555212 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp @@ -2,10 +2,6 @@ #include "temporary_vector_store.h" -#include <vespa/log/log.h> - -LOG_SETUP(".searchlib.tensor.temporary_vector_store"); - using vespalib::ConstArrayRef; using vespalib::ArrayRef; using vespalib::eval::CellType; @@ -17,7 +13,7 @@ namespace { template<typename FromType, typename ToType> ConstArrayRef<ToType> -convert_cells(ArrayRef<ToType> space, TypedCells cells) +convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept { assert(cells.size == space.size()); auto old_cells = cells.typify<FromType>(); @@ -32,7 +28,7 @@ convert_cells(ArrayRef<ToType> space, TypedCells cells) template <typename ToType> struct ConvertCellsSelector { - template <typename FromType> static auto invoke(ArrayRef<ToType> dst, TypedCells src) { + template <typename FromType> static auto invoke(ArrayRef<ToType> dst, TypedCells src) noexcept { return convert_cells<FromType, ToType>(dst, src); } }; @@ -41,8 +37,8 @@ struct ConvertCellsSelector template <typename FloatType> ConstArrayRef<FloatType> -TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) { - LOG_ASSERT(cells.size * 2 == _tmpSpace.size()); +TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) noexcept { + assert(cells.size * 2 == _tmpSpace.size()); ArrayRef<FloatType> where(_tmpSpace.data() + offset, cells.size); using MyTypify = vespalib::eval::TypifyCellType; using MySelector = ConvertCellsSelector<FloatType>; diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h index ad5bdf3ed3a..3dc237c85a4 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h @@ -2,10 +2,7 @@ #pragma once -#include <memory> -#include <vespa/eval/eval/cell_type.h> #include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/util/arrayref.h> namespace search::tensor { @@ -13,14 +10,15 @@ namespace search::tensor { template <typename FloatType> class TemporaryVectorStore { private: + using TypedCells = vespalib::eval::TypedCells; std::vector<FloatType> _tmpSpace; - vespalib::ConstArrayRef<FloatType> internal_convert(vespalib::eval::TypedCells cells, size_t offset); + vespalib::ConstArrayRef<FloatType> internal_convert(TypedCells cells, size_t offset) noexcept; public: - TemporaryVectorStore(size_t vectorSize) : _tmpSpace(vectorSize * 2) {} - vespalib::ConstArrayRef<FloatType> storeLhs(vespalib::eval::TypedCells cells) { + explicit TemporaryVectorStore(size_t vectorSize) noexcept : _tmpSpace(vectorSize * 2) {} + vespalib::ConstArrayRef<FloatType> storeLhs(TypedCells cells) noexcept { return internal_convert(cells, 0); } - vespalib::ConstArrayRef<FloatType> convertRhs(vespalib::eval::TypedCells cells) { + vespalib::ConstArrayRef<FloatType> convertRhs(TypedCells cells) { if (vespalib::eval::get_cell_type<FloatType>() == cells.type) [[likely]] { return cells.unsafe_typify<FloatType>(); } else { diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h index 9a2192cf736..b93249b7e21 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h @@ -105,18 +105,18 @@ public: vespalib::eval::TypedCells get_empty_subspace() const noexcept { return _empty.cells(); } - VectorBundle get_vectors(vespalib::ConstArrayRef<char> buf) const { + VectorBundle get_vectors(vespalib::ConstArrayRef<char> buf) const noexcept { auto num_subspaces = get_num_subspaces(buf); auto cells_mem_size = get_cells_mem_size(num_subspaces); auto aligner = select_aligner(cells_mem_size); - return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type); + return {buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type}; } - SerializedTensorRef get_serialized_tensor_ref(vespalib::ConstArrayRef<char> buf) const { + SerializedTensorRef get_serialized_tensor_ref(vespalib::ConstArrayRef<char> buf) const noexcept { auto num_subspaces = get_num_subspaces(buf); auto cells_mem_size = get_cells_mem_size(num_subspaces); auto aligner = select_aligner(cells_mem_size); vespalib::ConstArrayRef<vespalib::string_id> labels(reinterpret_cast<const vespalib::string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); - return SerializedTensorRef(VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels); + return {VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels}; } bool is_dense() const noexcept { return _num_mapped_dimensions == 0; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h index c8d96adc220..07275c77566 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h @@ -41,16 +41,16 @@ public: vespalib::eval::TypedCells get_empty_subspace() const noexcept { return _ops.get_empty_subspace(); } - VectorBundle get_vectors(EntryRef ref) const { + VectorBundle get_vectors(EntryRef ref) const noexcept { if (!ref.valid()) { - return VectorBundle(); + return {}; } auto buf = _array_store.get(ref); return _ops.get_vectors(buf); } - SerializedTensorRef get_serialized_tensor_ref(EntryRef ref) const { + SerializedTensorRef get_serialized_tensor_ref(EntryRef ref) const noexcept { if (!ref.valid()) { - return SerializedTensorRef(); + return {}; } auto buf = _array_store.get(ref); return _ops.get_serialized_tensor_ref(buf); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp index 1f85dba6afe..716d54d0a71 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp @@ -86,20 +86,20 @@ TensorExtAttribute::getExtendInterface() } TypedCells -TensorExtAttribute::get_vector(uint32_t docid, uint32_t subspace) const +TensorExtAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept { auto vectors = get_vectors(docid); return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _empty.cells(); } VectorBundle -TensorExtAttribute::get_vectors(uint32_t docid) const +TensorExtAttribute::get_vectors(uint32_t docid) const noexcept { auto tensor = _data[docid]; if (tensor == nullptr) { - return VectorBundle(); + return {}; } - return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type); + return {tensor->cells().data, static_cast<uint32_t>(tensor->index().size()), _subspace_type}; } std::unique_ptr<Value> diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h index 890b568c26e..0434c2ab65f 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h @@ -37,8 +37,8 @@ public: bool add(const vespalib::eval::Value& v, int32_t) override; IExtendAttribute* getExtendInterface() override; // DocVectorAccess API - vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; - VectorBundle get_vectors(uint32_t docid) const override; + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override; + VectorBundle get_vectors(uint32_t docid) const noexcept override; // ITensorAttribute API std::unique_ptr<vespalib::eval::Value> getTensor(uint32_t docid) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h index 7ff7ea943de..087c0f43b60 100644 --- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h +++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h @@ -40,7 +40,7 @@ public: uint32_t subspaces() const noexcept { return _subspaces; } vespalib::eval::TypedCells cells(uint32_t subspace) const noexcept { assert(subspace < _subspaces); - return vespalib::eval::TypedCells(static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size); + return {static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size}; } }; diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp index 816317bf86d..2fd23100f46 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp @@ -131,7 +131,7 @@ NearestNeighborFieldSearcher::onValue(const document::FieldValue& fv) _attr->add(*tfv->getAsTensorPtr(), 1); for (auto& elem : _calcs) { double distance_limit = elem->heap.distanceLimit(); - double distance = elem->calc->calc_with_limit(scratch_docid, distance_limit); + double distance = elem->calc->calc_with_limit<false>(scratch_docid, distance_limit); if (distance <= distance_limit) { elem->node->set_distance(distance); } diff --git a/vespalib/src/tests/util/hamming/CMakeLists.txt b/vespalib/src/tests/util/hamming/CMakeLists.txt index 5c317627200..ab551eab583 100644 --- a/vespalib/src/tests/util/hamming/CMakeLists.txt +++ b/vespalib/src/tests/util/hamming/CMakeLists.txt @@ -7,3 +7,10 @@ vespa_add_executable(vespalib_hamming_test_app TEST GTest::GTest ) vespa_add_test(NAME vespalib_hamming_test_app COMMAND vespalib_hamming_test_app) + +vespa_add_executable(vespalib_hamming_benchmark_app TEST + SOURCES + hamming_benchmark.cpp + DEPENDS + vespalib +) diff --git a/vespalib/src/tests/util/hamming/hamming_benchmark.cpp b/vespalib/src/tests/util/hamming/hamming_benchmark.cpp new file mode 100644 index 00000000000..b6393dcd1b7 --- /dev/null +++ b/vespalib/src/tests/util/hamming/hamming_benchmark.cpp @@ -0,0 +1,40 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/util/binary_hamming_distance.h> +#include <vector> +#include <cstdlib> +#include <cstdint> +#include <cstdio> + +using namespace vespalib; + +int main(int argc, char* argv[]) { + size_t vector_length = 1024/8; + size_t num_vectors = 1; + size_t num_reps = 100000000; + + if (argc > 2) { + vector_length = atol(argv[2])/8; + } + if (argc > 3) { + num_reps = atol(argv[3]); + } + if (argc > 4) { + num_vectors = atol(argv[4]); + } + + std::vector<uint8_t> center(vector_length); + std::vector<uint8_t> vectors(num_vectors*vector_length); + srand(13); + for (uint8_t & v : center) { v = rand(); } + for (uint8_t & v : vectors) { v = rand(); } + uint64_t sum(0); + for (size_t i=0; i < num_reps; i++) { + for (size_t j(0); j < num_vectors; j++) { + sum += binary_hamming_distance(center.data(), vectors.data() + j*vector_length, vector_length); + } + } + + printf("%lu vectors of %lu bits, repeated %lu times. Sum of distances = %lu\n", num_vectors, vector_length*8, num_reps, sum); + return 0; +} diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.h b/vespalib/src/vespa/vespalib/datastore/array_store.h index 4549b81283e..51a1f9fe950 100644 --- a/vespalib/src/vespa/vespalib/datastore/array_store.h +++ b/vespalib/src/vespa/vespalib/datastore/array_store.h @@ -94,7 +94,7 @@ private: EntryRef allocate_dynamic_array(size_t array_size, uint32_t type_id); EntryRef addLargeArray(ConstArrayRef array); EntryRef allocate_large_array(size_t array_size); - ConstArrayRef getSmallArray(RefT ref, size_t arraySize) const { + ConstArrayRef getSmallArray(RefT ref, size_t arraySize) const noexcept { const ElemT *buf = _store.template getEntryArray<ElemT>(ref, arraySize); return ConstArrayRef(buf, arraySize); } @@ -104,7 +104,7 @@ private: auto size = BufferType::get_dynamic_array_size(entry); return ConstArrayRef(entry, size); } - ConstArrayRef getLargeArray(RefT ref) const { + ConstArrayRef getLargeArray(RefT ref) const noexcept { const LargeArray *buf = _store.template getEntry<LargeArray>(ref); return ConstArrayRef(&(*buf)[0], buf->size()); } @@ -114,7 +114,7 @@ public: ArrayStore(const ArrayStoreConfig &cfg, std::shared_ptr<alloc::MemoryAllocator> memory_allocator, TypeMapper&& mapper); ~ArrayStore() override; EntryRef add(ConstArrayRef array); - ConstArrayRef get(EntryRef ref) const { + ConstArrayRef get(EntryRef ref) const noexcept { if (!ref.valid()) [[unlikely]] { return ConstArrayRef(); } diff --git a/vespalib/src/vespa/vespalib/datastore/datastore.h b/vespalib/src/vespa/vespalib/datastore/datastore.h index fa231e9cf94..0226c780cf1 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastore.h +++ b/vespalib/src/vespa/vespalib/datastore/datastore.h @@ -96,7 +96,7 @@ public: EntryRef addEntry(const EntryType &e); - const EntryType &getEntry(EntryRef ref) const { + const EntryType &getEntry(EntryRef ref) const noexcept { return *this->template getEntry<EntryType>(RefType(ref)); } }; diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp index 0e9393b7be4..5f242059ccf 100644 --- a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp +++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp @@ -4,25 +4,34 @@ namespace vespalib { -size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) { - uintptr_t addr_a = (uintptr_t) lhs; - uintptr_t addr_b = (uintptr_t) rhs; +namespace { + constexpr uint8_t WORD_SZ = sizeof (uint64_t); + constexpr uint8_t UNROLL_CNT = 2; + static_assert(sizeof(uint64_t) == 8); +} +size_t +binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept { + auto addr_a = (uintptr_t) lhs; + auto addr_b = (uintptr_t) rhs; size_t sum = 0; size_t i = 0; - static_assert(sizeof(uint64_t) == 8); bool aligned = ((addr_a & 0x7) == 0) && ((addr_b & 0x7) == 0); if (__builtin_expect(aligned, true)) { - const uint64_t *words_a = static_cast<const uint64_t *>(lhs); - const uint64_t *words_b = static_cast<const uint64_t *>(rhs); - for (; i * 8 + 7 < sz; ++i) { - uint64_t xor_bits = words_a[i] ^ words_b[i]; - sum += __builtin_popcountl(xor_bits); + const auto *words_a = static_cast<const uint64_t *>(lhs); + const auto *words_b = static_cast<const uint64_t *>(rhs); + for (; (i+UNROLL_CNT) * WORD_SZ <= sz; i += UNROLL_CNT) { + for (uint8_t j=0; j < UNROLL_CNT; j++) { + sum += __builtin_popcountl(words_a[i+j] ^ words_b[i+j]); + } + } + for (; (i + 1) * WORD_SZ <= sz; ++i) { + sum += __builtin_popcountl(words_a[i] ^ words_b[i]); } } - if (__builtin_expect((i * 8 < sz), false)) { - const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs); - const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs); - for (i *= 8; i < sz; ++i) { + if (__builtin_expect((i * WORD_SZ < sz), false)) { + const auto *bytes_a = static_cast<const uint8_t *>(lhs); + const auto *bytes_b = static_cast<const uint8_t *>(rhs); + for (i *= WORD_SZ; i < sz; ++i) { uint64_t xor_bits = bytes_a[i] ^ bytes_b[i]; sum += __builtin_popcountl(xor_bits); } diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h index 84bbbe71788..f5280903db1 100644 --- a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h +++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h @@ -10,5 +10,5 @@ namespace vespalib { * @param sz number of bytes in each blob * @return number of bits that differ when comparing the two blobs **/ -size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz); +size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept; } diff --git a/vespamalloc/src/vespamalloc/util/callstack.cpp b/vespamalloc/src/vespamalloc/util/callstack.cpp index a0645f06815..b8449c89a72 100644 --- a/vespamalloc/src/vespamalloc/util/callstack.cpp +++ b/vespamalloc/src/vespamalloc/util/callstack.cpp @@ -53,6 +53,7 @@ const void * StackEntry::_stopAddr = nullptr; size_t StackEntry::fillStack(StackEntry *stack, size_t nelems) { + // GNU extension: Variable-length automatic array void * retAddr[nelems]; int sz = backtrace(retAddr, nelems); if ((sz > 0) && (size_t(sz) <= nelems)) { |