summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java7
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java30
-rw-r--r--container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java10
-rw-r--r--container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java2
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java2
-rw-r--r--container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java2
-rw-r--r--dependency-versions/pom.xml6
-rw-r--r--eval/src/vespa/eval/eval/cell_type.h2
-rw-r--r--eval/src/vespa/eval/eval/typed_cells.h18
-rw-r--r--eval/src/vespa/eval/instruction/dense_hamming_distance.cpp1
-rw-r--r--fbench/src/test/filereader.cpp2
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java6
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp3
-rw-r--r--searchlib/src/tests/aggregator/perdocexpr_test.cpp1
-rw-r--r--searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp2
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp52
-rw-r--r--searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp8
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp11
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.h7
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.h2
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h2
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp42
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp50
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/bound_distance_function.h17
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.h60
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.h10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/doc_vector_access.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/empty_subspace.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp47
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/vector_bundle.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp2
-rw-r--r--vespalib/src/tests/util/hamming/CMakeLists.txt7
-rw-r--r--vespalib/src/tests/util/hamming/hamming_benchmark.cpp40
-rw-r--r--vespalib/src/vespa/vespalib/datastore/array_store.h6
-rw-r--r--vespalib/src/vespa/vespalib/datastore/datastore.h2
-rw-r--r--vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp35
-rw-r--r--vespalib/src/vespa/vespalib/util/binary_hamming_distance.h2
-rw-r--r--vespamalloc/src/vespamalloc/util/callstack.cpp1
77 files changed, 522 insertions, 402 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 360a02256a9..c6fca8d32c6 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -506,6 +506,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
boolean atLeastOneClientWithCertificate = clients.stream().anyMatch(client -> !client.certificates().isEmpty());
if (!atLeastOneClientWithCertificate)
throw new IllegalArgumentException("At least one client must require a certificate");
+
+ List<String> duplicates = clients.stream().collect(Collectors.groupingBy(Client::id))
+ .entrySet().stream().filter(entry -> entry.getValue().size() > 1)
+ .map(Map.Entry::getKey).sorted().toList();
+ if (! duplicates.isEmpty()) {
+ throw new IllegalArgumentException("Duplicate client ids: " + duplicates);
+ }
}
List<X509Certificate> operatorAndTesterCertificates = deployState.getProperties().operatorCertificates();
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java
index 1c5eb16be80..fa09d3c1890 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java
@@ -162,6 +162,36 @@ public class CloudTokenDataPlaneFilterTest extends ContainerModelBuilderTestBase
assertEquals("Invalid permission 'unknown-permission'. Valid values are 'read' and 'write'.", exception.getMessage());
}
+ @Test
+ void fails_on_duplicate_clients() throws IOException {
+ var certFile = securityFolder.resolve("foo.pem");
+ var servicesXml = """
+ <container version="1.0">
+ <clients>
+ <client id="mtls" permissions="read,write">
+ <certificate file="%1$s"/>
+ </client>
+ <client id="mtls" permissions="read,write">
+ <certificate file="%1$s"/>
+ </client>
+ <client id="token1" permissions="read">
+ <token id="my-token"/>
+ </client>
+ <client id="token2" permissions="read">
+ <token id="my-token"/>
+ </client>
+ <client id="token1" permissions="read">
+ <token id="my-token"/>
+ </client>
+ </clients>
+ </container>
+ """.formatted(applicationFolder.toPath().relativize(certFile).toString());
+ var clusterElem = DomBuilderTest.parse(servicesXml);
+ createCertificate(certFile);
+ var exception = assertThrows(IllegalArgumentException.class, () -> buildModel(Set.of(mtlsEndpoint), defaultTokens, clusterElem));
+ assertEquals("Duplicate client ids: [mtls, token1]", exception.getMessage());
+ }
+
private static CloudTokenDataPlaneFilterConfig.Clients.Tokens tokenConfig(
String id, Collection<String> fingerprints, Collection<String> accessCheckHashes, Collection<String> expirations) {
return new CloudTokenDataPlaneFilterConfig.Clients.Tokens.Builder()
diff --git a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java
index 81ec0919441..32fd1d64129 100644
--- a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java
+++ b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java
@@ -376,7 +376,15 @@ public class StateHandler extends AbstractRequestHandler implements CapabilityRe
}
private String prometheusSanitizedName(String name) {
- return name.replaceAll("\\.", "_");
+ var stringBuilder = new StringBuilder();
+ for (char c : name.toCharArray()) {
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
+ stringBuilder.append(c);
+ } else {
+ stringBuilder.append("_");
+ }
+ }
+ return stringBuilder.toString();
}
private String sanitizeIfDouble(Number num) {
diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java
index 68a7d06e7fe..0aa2b0f41d5 100644
--- a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java
+++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java
@@ -88,7 +88,7 @@ public class StateHandlerTest extends StateHandlerTestBase {
snapshot.add(otherContext, "some.counter", 2);
snapshot.set(null, "bar", 20);
snapshot.set(null, "bar", 40);
- snapshot.set(null, "testing.infinity", Double.NEGATIVE_INFINITY);
+ snapshot.set(null, "testing-infinity", Double.NEGATIVE_INFINITY);
snapshot.set(null, "testing.nan", Double.NaN);
snapshotProvider.setSnapshot(snapshot);
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java
index 6e1a2be684a..e7bdf640661 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java
@@ -46,7 +46,7 @@ public abstract class SimpleIndexedItem extends SimpleTaggableItem implements In
/** Appends the index prefix if necessary */
protected void appendIndexString(StringBuilder buffer) {
- if (!getIndexName().equals("")) {
+ if (!getIndexName().isEmpty()) {
buffer.append(getIndexName());
buffer.append(":");
}
diff --git a/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java b/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java
index 49eaa9b3a89..759181a2ce7 100644
--- a/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/query/SortingTestCase.java
@@ -82,7 +82,7 @@ public class SortingTestCase {
private void requireThatChineseHasCorrectRules(Collator col) {
final int reorderCodes [] = {UScript.HAN};
assertEquals("15.1.0.0", col.getUCAVersion().toString());
- assertEquals("153.121.44.8", col.getVersion().toString());
+ assertEquals("153.121.45.0", col.getVersion().toString());
assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));
assertNotEquals("", ((RuleBasedCollator) col).getRules());
diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml
index b5edcd44108..b37c97c5a71 100644
--- a/dependency-versions/pom.xml
+++ b/dependency-versions/pom.xml
@@ -68,7 +68,7 @@
<assertj.vespa.version>3.25.3</assertj.vespa.version>
<!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories -->
- <aws-sdk.vespa.version>1.12.701</aws-sdk.vespa.version>
+ <aws-sdk.vespa.version>1.12.703</aws-sdk.vespa.version>
<athenz.vespa.version>1.11.56</athenz.vespa.version>
<!-- Athenz END -->
@@ -91,7 +91,7 @@
<commons-logging.vespa.version>1.3.1</commons-logging.vespa.version> <!-- Bindings exported by jdisc through jcl-over-slf4j. -->
<commons.math3.vespa.version>3.6.1</commons.math3.vespa.version>
<commons-compress.vespa.version>1.26.1</commons-compress.vespa.version>
- <commons-cli.vespa.version>1.6.0</commons-cli.vespa.version>
+ <commons-cli.vespa.version>1.7.0</commons-cli.vespa.version>
<curator.vespa.version>5.6.0</curator.vespa.version>
<dropwizard.metrics.vespa.version>4.2.25</dropwizard.metrics.vespa.version> <!-- ZK 3.9.1 requires this -->
<eclipse-angus.vespa.version>2.0.2</eclipse-angus.vespa.version>
@@ -104,7 +104,7 @@
<hamcrest.vespa.version>2.2</hamcrest.vespa.version>
<hdrhistogram.vespa.version>2.1.12</hdrhistogram.vespa.version>
<huggingface.vespa.version>0.27.0</huggingface.vespa.version>
- <icu4j.vespa.version>74.2</icu4j.vespa.version>
+ <icu4j.vespa.version>75.1</icu4j.vespa.version>
<java-jjwt.vespa.version>0.11.5</java-jjwt.vespa.version>
<java-jwt.vespa.version>4.4.0</java-jwt.vespa.version>
<javax.annotation.vespa.version>1.2</javax.annotation.vespa.version>
diff --git a/eval/src/vespa/eval/eval/cell_type.h b/eval/src/vespa/eval/eval/cell_type.h
index c15a5b68dba..3c474638480 100644
--- a/eval/src/vespa/eval/eval/cell_type.h
+++ b/eval/src/vespa/eval/eval/cell_type.h
@@ -70,7 +70,7 @@ struct CellMetaNotScalar {
struct CellMeta {
const CellType cell_type;
const bool is_scalar;
- constexpr CellMeta(CellType cell_type_in, bool is_scalar_in)
+ constexpr CellMeta(CellType cell_type_in, bool is_scalar_in) noexcept
: cell_type(cell_type_in), is_scalar(is_scalar_in)
{
// is_scalar -> double cell type
diff --git a/eval/src/vespa/eval/eval/typed_cells.h b/eval/src/vespa/eval/eval/typed_cells.h
index d05c3e3294a..6cb8675cd5f 100644
--- a/eval/src/vespa/eval/eval/typed_cells.h
+++ b/eval/src/vespa/eval/eval/typed_cells.h
@@ -11,24 +11,24 @@ namespace vespalib::eval {
struct TypedCells {
const void *data;
- size_t size:56;
- CellType type;
+ size_t size:56;
+ CellType type;
- explicit TypedCells(ConstArrayRef<double> cells) : data(cells.begin()), size(cells.size()), type(CellType::DOUBLE) {}
- explicit TypedCells(ConstArrayRef<float> cells) : data(cells.begin()), size(cells.size()), type(CellType::FLOAT) {}
- explicit TypedCells(ConstArrayRef<BFloat16> cells) : data(cells.begin()), size(cells.size()), type(CellType::BFLOAT16) {}
- explicit TypedCells(ConstArrayRef<Int8Float> cells) : data(cells.begin()), size(cells.size()), type(CellType::INT8) {}
+ explicit TypedCells(ConstArrayRef<double> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::DOUBLE) {}
+ explicit TypedCells(ConstArrayRef<float> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::FLOAT) {}
+ explicit TypedCells(ConstArrayRef<BFloat16> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::BFLOAT16) {}
+ explicit TypedCells(ConstArrayRef<Int8Float> cells) noexcept : data(cells.begin()), size(cells.size()), type(CellType::INT8) {}
TypedCells() noexcept : data(nullptr), size(0), type(CellType::DOUBLE) {}
TypedCells(const void *dp, CellType ct, size_t sz) noexcept : data(dp), size(sz), type(ct) {}
- template <typename T> bool check_type() const { return vespalib::eval::check_cell_type<T>(type); }
+ template <typename T> bool check_type() const noexcept { return check_cell_type<T>(type); }
- template <typename T> ConstArrayRef<T> typify() const {
+ template <typename T> ConstArrayRef<T> typify() const noexcept {
assert(check_type<T>());
return ConstArrayRef<T>((const T *)data, size);
}
- template <typename T> ConstArrayRef<T> unsafe_typify() const {
+ template <typename T> ConstArrayRef<T> unsafe_typify() const noexcept {
return ConstArrayRef<T>((const T *)data, size);
}
diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
index 81f25241d3d..94f0a313f2e 100644
--- a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
+++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp
@@ -3,7 +3,6 @@
#include "dense_hamming_distance.h"
#include <vespa/eval/eval/operation.h>
#include <vespa/eval/eval/value.h>
-#include <vespa/eval/eval/hamming_distance.h>
#include <vespa/vespalib/util/binary_hamming_distance.h>
#include <vespa/log/log.h>
diff --git a/fbench/src/test/filereader.cpp b/fbench/src/test/filereader.cpp
index 87c5914e85b..b2061633d41 100644
--- a/fbench/src/test/filereader.cpp
+++ b/fbench/src/test/filereader.cpp
@@ -66,7 +66,7 @@ main(int argc, char **argv)
return -1;
}
int res;
- int buflen = 10240;
+ constexpr int buflen = 10240;
char buf[buflen];
while ((res = reader->ReadLine(buf, buflen - 1)) >= 0) {
// printf("len=%d, content:>%s<\n", res, buf);
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index e9de8cdca20..558b8dea8d9 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -350,21 +350,21 @@ public class Flags {
public static final UnboundBooleanFlag MORE_WIREGUARD = defineFeatureFlag(
"more-wireguard", false,
- List.of("andreer"), "2023-08-21", "2024-04-14",
+ List.of("andreer"), "2023-08-21", "2025-01-01",
"Use wireguard in INternal enCLAVES",
"Takes effect on next host-admin run",
HOSTNAME, CLOUD_ACCOUNT);
public static final UnboundBooleanFlag IPV6_AWS_TARGET_GROUPS = defineFeatureFlag(
"ipv6-aws-target-groups", false,
- List.of("andreer"), "2023-08-28", "2024-04-14",
+ List.of("andreer"), "2023-08-28", "2025-01-01",
"Always use IPv6 target groups for load balancers in aws",
"Takes effect on next load-balancer provisioning",
HOSTNAME, CLOUD_ACCOUNT);
public static final UnboundBooleanFlag PROVISION_IPV6_ONLY_AWS = defineFeatureFlag(
"provision-ipv6-only", false,
- List.of("andreer"), "2023-08-28", "2024-04-14",
+ List.of("andreer"), "2023-08-28", "2025-01-01",
"Provision without private IPv4 addresses in INternal enCLAVES in AWS",
"Takes effect on next host provisioning / run of host-admin",
HOSTNAME, CLOUD_ACCOUNT);
diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java
index 9479c814e89..8c500473678 100644
--- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java
+++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java
@@ -185,6 +185,7 @@ public class InfrastructureMetricSet {
addMetric(metrics, ControllerMetrics.ZMS_QUOTA_USAGE.max());
addMetric(metrics, ControllerMetrics.COREDUMP_PROCESSED.count());
addMetric(metrics, ControllerMetrics.AUTH0_EXCEPTIONS.count());
+ addMetric(metrics, ControllerMetrics.BILLING_WEBHOOK_FAILURES.count());
addMetric(metrics, ControllerMetrics.CERTIFICATE_POOL_AVAILABLE.max());
addMetric(metrics, ControllerMetrics.BILLING_EXCEPTIONS.count());
diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
index c03d93b6480..0b2660824c0 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
@@ -88,6 +88,7 @@ private:
double w = getWeightFromNode(*node).percent();
eq->addTerm(build(_requestContext, *node, _context), w / eqw);
}
+ _result->setDocIdLimit(_context.getDocIdLimit());
n.setDocumentFrequency(_result->getState().estimate().estHits, _context.getDocIdLimit());
}
@@ -123,6 +124,7 @@ private:
indexBlueprint = _context.getIndexes().createBlueprint(_requestContext, indexFields, n);
}
_result = mixer.mix(std::move(indexBlueprint));
+ _result->setDocIdLimit(_context.getDocIdLimit());
n.setDocumentFrequency(_result->getState().estimate().estHits, _context.getDocIdLimit());
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp
index 60c2e869e79..47a9f3dd43d 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp
@@ -93,7 +93,8 @@ void
ProtonTermData::setDocumentFrequency(uint32_t estHits, uint32_t docIdLimit)
{
if (docIdLimit > 1) {
- propagate_document_frequency(estHits, docIdLimit - 1);
+ uint32_t total_doc_count = docIdLimit - 1;
+ propagate_document_frequency(std::min(estHits, total_doc_count), total_doc_count);
} else {
propagate_document_frequency(0, 1);
}
diff --git a/searchlib/src/tests/aggregator/perdocexpr_test.cpp b/searchlib/src/tests/aggregator/perdocexpr_test.cpp
index 908e50ad4d2..e9f0981739c 100644
--- a/searchlib/src/tests/aggregator/perdocexpr_test.cpp
+++ b/searchlib/src/tests/aggregator/perdocexpr_test.cpp
@@ -604,6 +604,7 @@ getVespaChecksumV2(const std::string& ymumid, int fid, const std::string& flags_
sizeof(networkFid)+
new_flags_str.length();
+ // GNU extension: Variable-length automatic array
unsigned char buffer[length];
memset(buffer, 0x00, length);
memcpy(buffer, ymumid.c_str(), ymumid.length());
diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp
index 48270694394..d67757a3811 100644
--- a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp
+++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.cpp
@@ -224,7 +224,7 @@ void ExtendAttributeTest::testExtendRaw(AttributeVector& attr)
void ExtendAttributeTest::testExtendTensor(AttributeVector& attr)
{
- std::vector<double> empty_cells{0.0, 0.0};
+ std::vector<double> empty_cells{};
std::vector<double> spec0_dense_cells{1.0, 2.0};
std::vector<double> spec0_mixed_cells0{3.0, 4.0};
std::vector<double> spec0_mixed_cells1{5.0, 6.0};
diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp
index 951d6f61980..3b7ec00211d 100644
--- a/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp
+++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4_test.cpp
@@ -15,8 +15,9 @@
#include <vespa/searchlib/diskindex/pagedict4randread.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/vespalib/util/signalhandler.h>
-#include <sstream>
#include <cinttypes>
+#include <optional>
+#include <sstream>
#include <vespa/log/log.h>
LOG_SETUP("pagedict4test");
@@ -357,6 +358,7 @@ checkCounts(const std::string &word,
void
testWords(const std::string &logname,
vespalib::Rand48 &rnd,
+ std::optional<uint32_t> mmap_file_size_threshold,
uint64_t numWordIds,
uint32_t tupleCount,
uint32_t chunkSize,
@@ -495,7 +497,14 @@ testWords(const std::string &logname,
LOG(info, "%s: pagedict4 written", logname.c_str());
}
{
- std::unique_ptr<DictionaryFileSeqRead> dr(new PageDict4FileSeqRead);
+ std::unique_ptr<DictionaryFileSeqRead> dr;
+ {
+ auto my_dr = std::make_unique<PageDict4FileSeqRead>();
+ if (mmap_file_size_threshold.has_value()) {
+ my_dr->set_mmap_file_size_threshold(mmap_file_size_threshold.value());
+ }
+ dr = std::move(my_dr);
+ }
search::TuneFileSeqRead tuneFileRead;
bool openres = dr->open("fakedict",
@@ -535,7 +544,14 @@ testWords(const std::string &logname,
LOG(info, "%s: pagedict4 seqverify OK", logname.c_str());
}
{
- std::unique_ptr<DictionaryFileRandRead> drr(new PageDict4RandRead);
+ std::unique_ptr<DictionaryFileRandRead> drr;
+ {
+ auto my_drr = std::make_unique<PageDict4RandRead>();
+ if (mmap_file_size_threshold.has_value()) {
+ my_drr->set_mmap_file_size_threshold(mmap_file_size_threshold.value());
+ }
+ drr = std::move(my_drr);
+ }
search::TuneFileRandRead tuneFileRead;
bool openres = drr->open("fakedict",
tuneFileRead);
@@ -649,46 +665,50 @@ testWords(const std::string &logname,
void
PageDict4TestApp::testWords()
{
- ::testWords("smallchunkwordsempty", _rnd,
+ ::testWords("smallchunkwordsempty", _rnd, std::nullopt,
1000000, 0,
64, 80, 72, 64,
false, false, false);
- ::testWords("smallchunkwordsempty2", _rnd,
+ ::testWords("smallchunkwordsempty2", _rnd, std::nullopt,
0, 0,
64, 80, 72, 64,
false, false, false);
- ::testWords("smallchunkwords", _rnd,
+ ::testWords("smallchunkwords", _rnd, std::nullopt,
1000000, 100,
64, 80, 72, 64,
false, false, false);
- ::testWords("smallchunkwordswithemptyword", _rnd,
+ ::testWords("smallchunkwordswithemptyword", _rnd, std::nullopt,
1000000, 100,
64, 80, 72, 64,
true, false, false);
- ::testWords("smallchunkwordswithcommonfirstword", _rnd,
+ ::testWords("smallchunkwordswithcommonfirstword", _rnd, std::nullopt,
1000000, 100,
64, 80, 72, 64,
false, true, false);
- ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd,
+ ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd, std::nullopt,
1000000, 100,
64, 80, 72, 64,
true, true, false);
- ::testWords("smallchunkwordswithcommonlastword", _rnd,
+ ::testWords("smallchunkwordswithcommonlastword", _rnd, std::nullopt,
1000000, 100,
64, 80, 72, 64,
false, false, true);
-#if 1
- ::testWords("smallchunkwords2", _rnd,
+ ::testWords("smallchunkwords2", _rnd, std::nullopt,
1000000, _stress ? 10000 : 100,
64, 80, 72, 64,
_emptyWord, _firstWordForcedCommon, _lastWordForcedCommon);
-#endif
-#if 1
- ::testWords("stdwords", _rnd,
+ ::testWords("stdwords", _rnd, std::nullopt,
1000000, _stress ? 10000 : 100,
262144, 80, 72, 64,
_emptyWord, _firstWordForcedCommon, _lastWordForcedCommon);
-#endif
+ ::testWords("stdwordsnommapssdat", _rnd, 500_Mi,
+ 1000000, 100,
+ 262144, 80, 72, 64,
+ _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon);
+ ::testWords("stdwordsmmapssdat", _rnd, 1,
+ 1000000, 100,
+ 262144, 80, 72, 64,
+ _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon);
}
int main(int argc, char **argv) {
diff --git a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
index b7702398857..4ffc1fe366e 100644
--- a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
+++ b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
@@ -44,12 +44,16 @@ public:
double calc_distance(uint32_t docid, const vespalib::string& query_tensor) {
auto qt = make_tensor(query_tensor);
auto calc = DistanceCalculator::make_with_validation(*attr, *qt);
- return calc->calc_with_limit(docid, std::numeric_limits<double>::max());
+ return calc->has_single_subspace()
+ ? calc->calc_with_limit<true>(docid, std::numeric_limits<double>::max())
+ : calc->calc_with_limit<false>(docid, std::numeric_limits<double>::max());
}
double calc_rawscore(uint32_t docid, const vespalib::string& query_tensor) {
auto qt = make_tensor(query_tensor);
auto calc = DistanceCalculator::make_with_validation(*attr, *qt);
- return calc->calc_raw_score(docid);
+ return calc->has_single_subspace()
+ ? calc->calc_raw_score<true>(docid)
+ : calc->calc_raw_score<false>(docid);
}
OptSubspace calc_closest_subspace(uint32_t docid, const vespalib::string& query_tensor) {
auto qt = make_tensor(query_tensor);
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index c01fc33767a..b697effeab4 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -62,14 +62,14 @@ public:
_vectors[docid] = vec;
return *this;
}
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override {
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override {
return get_vectors(docid).cells(subspace);
}
- VectorBundle get_vectors(uint32_t docid) const override {
+ VectorBundle get_vectors(uint32_t docid) const noexcept override {
ArrayRef ref(_vectors[docid]);
assert((ref.size() % _subspace_type.size()) == 0);
uint32_t subspaces = ref.size() / _subspace_type.size();
- return VectorBundle(ref.data(), subspaces, _subspace_type);
+ return {ref.data(), subspaces, _subspace_type};
}
void clear() { _vectors.clear(); }
@@ -106,7 +106,7 @@ public:
.set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5});
}
- ~HnswIndexTest() override {}
+ ~HnswIndexTest() override;
auto dff() {
return search::tensor::make_distance_function_factory(
@@ -280,6 +280,9 @@ public:
static constexpr bool is_single = std::is_same_v<IndexType, HnswIndex<HnswIndexType::SINGLE>>;
};
+template <typename IndexType>
+HnswIndexTest<IndexType>::~HnswIndexTest() = default;
+
using HnswIndexTestTypes = ::testing::Types<HnswIndex<HnswIndexType::SINGLE>, HnswIndex<HnswIndexType::MULTI>>;
TYPED_TEST_SUITE(HnswIndexTest, HnswIndexTestTypes);
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 1feb968fbb4..dce09a87fb8 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -1,13 +1,5 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <fcntl.h>
-#include <cstdio>
-#include <unistd.h>
-#include <chrono>
-#include <cstdlib>
-#include <future>
-#include <vector>
-
#include <vespa/eval/eval/typed_cells.h>
#include <vespa/eval/eval/value_type.h>
#include <vespa/searchlib/common/bitvector.h>
@@ -25,6 +17,9 @@
#include <vespa/vespalib/util/lambdatask.h>
#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/data/simple_buffer.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <future>
#include <vespa/log/log.h>
LOG_SETUP("stress_hnsw_mt");
@@ -119,17 +114,17 @@ public:
memcpy(&_vectors[docid], vec.cbegin(), sizeof(MallocPointVector));
return *this;
}
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override {
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override {
assert(docid < NUM_POSSIBLE_DOCS);
(void) subspace;
ConstVectorRef ref(_vectors[docid]);
return vespalib::eval::TypedCells(ref);
}
- VectorBundle get_vectors(uint32_t docid) const override {
+ VectorBundle get_vectors(uint32_t docid) const noexcept override {
assert(docid < NUM_POSSIBLE_DOCS);
ConstVectorRef ref(_vectors[docid]);
assert(subspace_type.size() == ref.size());
- return VectorBundle(ref.data(), 1, subspace_type);
+ return {ref.data(), 1, subspace_type};
}
};
@@ -257,7 +252,7 @@ public:
loaded_vectors.load();
}
- ~Stressor() {}
+ ~Stressor() override;
auto dff() {
return search::tensor::make_distance_function_factory(
@@ -352,6 +347,9 @@ public:
}
};
+template <typename IndexType>
+Stressor<IndexType>::~Stressor() = default;
+
using StressorTypes = ::testing::Types<HnswIndex<HnswIndexType::SINGLE>>;
TYPED_TEST_SUITE(Stressor, StressorTypes);
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
index f3fc31ac8b1..e5ce886f499 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
@@ -6,7 +6,9 @@
#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/datastore/aligner.h>
#include <vespa/vespalib/util/arrayref.h>
+#include <vespa/vespalib/util/round_up_to_page_size.h>
#include <vespa/vespalib/util/size_literals.h>
namespace search::bitcompression {
@@ -181,6 +183,12 @@ readHeader(vespalib::GenericHeader &header, int64_t fileSize)
return headerLen;
}
+bool
+DecodeContext64Base::is_padded_for_memory_map(uint64_t file_bit_size, uint64_t file_size) noexcept
+{
+ using Aligner = vespalib::datastore::Aligner<64>;
+ return (Aligner::align(file_bit_size) + 128 <= (vespalib::round_up_to_page_size(file_size) * 8));
+}
template <bool bigEndian>
void
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h
index 4124f1f659f..b1e13a9d96b 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h
@@ -1261,6 +1261,13 @@ public:
virtual uint64_t decode_exp_golomb(int k) = 0;
void readBytes(uint8_t *buf, size_t len);
uint32_t readHeader(vespalib::GenericHeader &header, int64_t fileSize);
+
+ /*
+ * Check if file is padding at end for decompression readahead.
+ */
+ static bool is_padded_for_memory_map(uint64_t file_bit_size, uint64_t file_size) noexcept;
+
+ static uint64_t file_units(uint64_t file_size) noexcept { return (file_size + sizeof(uint64_t) - 1) / sizeof(uint64_t); }
};
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
index bceeb1e7bc1..89b5ffb84f8 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
@@ -51,7 +51,7 @@ using vespalib::getLastErrorString;
namespace search::diskindex {
struct PageDict4FileSeqRead::DictFileReadContext {
- DictFileReadContext(vespalib::stringref id, const vespalib::string & name, const TuneFileSeqRead &tune, bool read_all_upfront);
+ DictFileReadContext(vespalib::stringref id, const vespalib::string & name, const TuneFileSeqRead &tune, uint32_t mmap_file_size_threshold, bool read_all_upfront);
~DictFileReadContext();
vespalib::FileHeader readHeader();
void readExtendedHeader();
@@ -66,7 +66,7 @@ struct PageDict4FileSeqRead::DictFileReadContext {
};
PageDict4FileSeqRead::DictFileReadContext::DictFileReadContext(vespalib::stringref id, const vespalib::string & name,
- const TuneFileSeqRead &tune, bool read_all_upfront)
+ const TuneFileSeqRead &tune, uint32_t mmap_file_size_threshold, bool read_all_upfront)
: _id(id),
_fileBitSize(0u),
_headerLen(0u),
@@ -79,23 +79,49 @@ PageDict4FileSeqRead::DictFileReadContext::DictFileReadContext(vespalib::stringr
if (tune.getWantDirectIO()) {
_file.EnableDirectIO();
}
+ if (read_all_upfront) {
+ _file.enableMemoryMap(0);
+ }
if (!_file.OpenReadOnly(name.c_str())) {
LOG(error, "could not open %s: %s", _file.GetFileName(), getLastErrorString().c_str());
return;
}
uint64_t fileSize = _file.getSize();
+ uint64_t file_units = DC::file_units(fileSize);
_readContext.setFile(&_file);
_readContext.setFileSize(fileSize);
+ bool use_mmap = false;
+ /*
+ * Limit memory usage spike by using memory mapped .ssdat file if
+ * file size is greater than 32 MiB with padding at end of file.
+ */
+ if (read_all_upfront && _file.MemoryMapPtr(0) != nullptr && fileSize >= mmap_file_size_threshold) {
+ _readContext.reference_compressed_buffer(_file.MemoryMapPtr(0), file_units);
+ vespalib::FileHeader header;
+ _dc.readHeader(header, _file.getSize());
+ assert(header.hasTag("fileBitSize"));
+ int64_t file_bit_size = header.getTag("fileBitSize").asInteger();
+ use_mmap = DC::is_padded_for_memory_map(file_bit_size, fileSize);
+ _readContext.setBitOffset(0);
+ _readContext.setBufferEndFilePos(0);
+ }
if (read_all_upfront) {
- _readContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) / sizeof(uint64_t), 32_Ki);
+ if (use_mmap) {
+ _readContext.reference_compressed_buffer(_file.MemoryMapPtr(0), file_units);
+ } else {
+ _readContext.allocComprBuf(file_units, 32_Ki);
+ }
} else {
_readContext.allocComprBuf(64_Ki, 32_Ki);
}
- _dc.emptyBuffer(0);
- _readContext.readComprBuffer();
+ if (!use_mmap) {
+ _dc.emptyBuffer(0);
+ _readContext.readComprBuffer();
+ }
if (read_all_upfront) {
assert(_readContext.getBufferEndFilePos() >= fileSize);
}
+ assert(_dc.getBitPosV() == 0);
_valid = true;
}
@@ -121,7 +147,8 @@ PageDict4FileSeqRead::PageDict4FileSeqRead()
_ss(),
_sp(),
_p(),
- _wordNum(0u)
+ _wordNum(0u),
+ _mmap_file_size_threshold(32_Mi)
{ }
PageDict4FileSeqRead::~PageDict4FileSeqRead() = default;
@@ -166,9 +193,9 @@ bool
PageDict4FileSeqRead::open(const vespalib::string &name,
const TuneFileSeqRead &tuneFileRead)
{
- _ss = std::make_unique<DictFileReadContext>(mySSId, name + ".ssdat", tuneFileRead, true);
- _sp = std::make_unique<DictFileReadContext>(mySPId, name + ".spdat", tuneFileRead, false);
- _p = std::make_unique<DictFileReadContext>(myPId, name + ".pdat", tuneFileRead, false);
+ _ss = std::make_unique<DictFileReadContext>(mySSId, name + ".ssdat", tuneFileRead, _mmap_file_size_threshold, true);
+ _sp = std::make_unique<DictFileReadContext>(mySPId, name + ".spdat", tuneFileRead, _mmap_file_size_threshold, false);
+ _p = std::make_unique<DictFileReadContext>(myPId, name + ".pdat", tuneFileRead, _mmap_file_size_threshold, false);
if ( !_ss->_valid || !_sp->_valid || !_p->_valid ) {
return false;
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
index 404f85e9088..40540cd458e 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
@@ -26,6 +26,7 @@ class PageDict4FileSeqRead : public index::DictionaryFileSeqRead
std::unique_ptr<DictFileReadContext> _sp;
std::unique_ptr<DictFileReadContext> _p;
uint64_t _wordNum;
+ uint32_t _mmap_file_size_threshold;
public:
PageDict4FileSeqRead();
~PageDict4FileSeqRead() override;
@@ -38,6 +39,7 @@ public:
bool open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) override;
bool close() override;
void getParams(index::PostingListParams &params) override;
+ void set_mmap_file_size_threshold(uint32_t v) { _mmap_file_size_threshold = v; }
};
/**
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp
index 3654b703648..a513a18ae5d 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp
@@ -1,8 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "pagedict4randread.h"
-#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/fastos/file.h>
#include <vespa/log/log.h>
@@ -33,7 +33,8 @@ PageDict4RandRead::PageDict4RandRead()
_pFileBitSize(0u),
_ssHeaderLen(0u),
_spHeaderLen(0u),
- _pHeaderLen(0u)
+ _pHeaderLen(0u),
+ _mmap_file_size_threshold(32_Mi)
{
_ssd.setReadContext(&_ssReadContext);
}
@@ -229,14 +230,42 @@ PageDict4RandRead::open(const vespalib::string &name,
}
uint64_t fileSize = _ssfile->getSize();
+ uint64_t file_units = DC::file_units(fileSize);
_ssReadContext.setFile(_ssfile.get());
_ssReadContext.setFileSize(fileSize);
- _ssReadContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) / sizeof(uint64_t), 32768u);
- _ssd.emptyBuffer(0);
- _ssReadContext.readComprBuffer();
- assert(_ssReadContext.getBufferEndFilePos() >= fileSize);
+ /*
+ * Limit memory usage spike by using memory mapped .ssdat file if
+ * file size is greater than 32 MiB with padding at end of file.
+ * Note: It might cause higher dictionary lookup latencies when
+ * system is under memory pressure due to pageins.
+ */
+ bool has_read_ss_header = false;
+ if (_ssfile->MemoryMapPtr(0) != nullptr && fileSize >= _mmap_file_size_threshold) {
+ _ssReadContext.reference_compressed_buffer(_ssfile->MemoryMapPtr(0), file_units);
+ assert(_ssd.getReadOffset() == 0u);
+ readSSHeader();
+ has_read_ss_header = true;
+ }
+ if (!has_read_ss_header || !DC::is_padded_for_memory_map(_ssFileBitSize, fileSize)) {
+ /*
+ * Insufficient padding or small .sdat file. Read whole file into
+ * memory.
+ */
+ _ssReadContext.allocComprBuf(file_units, 32768u);
+ _ssd.emptyBuffer(0);
+ _ssReadContext.setBitOffset(0);
+ _ssReadContext.setBufferEndFilePos(0);
+ _ssfile->SetPosition(0);
+ _ssReadContext.readComprBuffer();
+ assert(_ssReadContext.getBufferEndFilePos() >= fileSize);
+ assert(_ssd.getReadOffset() == 0u);
+ if (has_read_ss_header) {
+ _ssReadContext.setPosition(_ssHeaderLen * 8);
+ } else {
+ readSSHeader();
+ }
+ }
- readSSHeader();
readSPHeader();
readPHeader();
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h
index 051efa486dd..1c2e538cc48 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h
@@ -36,6 +36,7 @@ class PageDict4RandRead : public index::DictionaryFileRandRead
uint32_t _ssHeaderLen;
uint32_t _spHeaderLen;
uint32_t _pHeaderLen;
+ uint32_t _mmap_file_size_threshold;
void readSSHeader();
void readSPHeader();
@@ -51,6 +52,7 @@ public:
bool close() override;
uint64_t getNumWordIds() const override;
+ void set_mmap_file_size_threshold(uint32_t v) { _mmap_file_size_threshold = v; }
};
}
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
index b0955fe60bd..d19b979c360 100644
--- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
@@ -52,7 +52,7 @@ ConvertRawScoreToCloseness::execute(uint32_t docId)
feature_t converted = tfmd->getRawScore();
max_closeness = std::max(max_closeness, converted);
} else if (elem.calc) {
- feature_t converted = elem.calc->calc_raw_score(docId);
+ feature_t converted = elem.calc->calc_raw_score<false>(docId);
max_closeness = std::max(max_closeness, converted);
}
}
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
index 15362b6a224..65a764d8b44 100644
--- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
@@ -12,7 +12,6 @@
#include <vespa/vespalib/geo/zcurve.h>
#include <vespa/vespalib/util/issue.h>
#include <vespa/vespalib/util/stash.h>
-#include <cmath>
#include <limits>
#include <vespa/log/log.h>
@@ -62,7 +61,7 @@ ConvertRawscoreToDistance::execute(uint32_t docId)
feature_t converted = elem.calc ? elem.calc->function().to_distance(invdist) : ((1.0 / invdist) - 1.0);
min_distance = std::min(min_distance, converted);
} else if (elem.calc) {
- feature_t invdist = elem.calc->calc_raw_score(docId);
+ feature_t invdist = elem.calc->calc_raw_score<false>(docId);
feature_t converted = elem.calc->function().to_distance(invdist);
min_distance = std::min(min_distance, converted);
}
@@ -130,7 +129,10 @@ GeoGCDExecutor::GeoGCDExecutor(GeoLocationSpecPtrs locations, const attribute::I
: FeatureExecutor(),
_locations(),
_pos(pos),
- _intBuf()
+ _intBuf(),
+ _best_index(0.0),
+ _best_lat(0.0),
+ _best_lng(0.0)
{
if (_pos == nullptr) {
return;
@@ -140,7 +142,7 @@ GeoGCDExecutor::GeoGCDExecutor(GeoLocationSpecPtrs locations, const attribute::I
if (p && p->location.valid() && p->location.has_point) {
double lat = p->location.point.y / 1.0e6;
double lng = p->location.point.x / 1.0e6;
- _locations.emplace_back(search::common::GeoGcd{lat, lng});
+ _locations.emplace_back(lat, lng);
}
}
}
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h
index 7ff796e5b7d..8159d5c4147 100644
--- a/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h
@@ -50,6 +50,7 @@ void PredicateRangeTermExpander::expand(const vespalib::string &key, int64_t sig
return;
}
size_t buffer_size = 21 * 2 + 3 + key.size(); // 2 numbers + punctuation + key
+ // GNU extension: Variable-length automatic array
char buffer[buffer_size];
int size;
int prefix_size = snprintf(buffer, buffer_size, "%s=", key.c_str());
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
index d28f6077905..c76fe3363e4 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
@@ -19,17 +19,17 @@ namespace search::queryeval {
* Keeps a heap of the K best hit distances.
* Currently always does brute-force scanning, which is very expensive.
**/
-template <bool strict, bool has_filter>
-class NearestNeighborImpl : public NearestNeighborIterator
+template <bool strict, bool has_filter, bool has_single_subspace>
+class NearestNeighborImpl final : public NearestNeighborIterator
{
public:
- NearestNeighborImpl(Params params_in)
+ explicit NearestNeighborImpl(Params params_in)
: NearestNeighborIterator(std::move(params_in)),
_lastScore(0.0)
{
}
- ~NearestNeighborImpl();
+ ~NearestNeighborImpl() override;
void doSeek(uint32_t docId) override {
double distanceLimit = params().distanceHeap.distanceLimit();
@@ -61,39 +61,47 @@ public:
private:
double computeDistance(uint32_t docId, double limit) {
- return params().distance_calc->calc_with_limit(docId, limit);
+ return params().distance_calc->template calc_with_limit<has_single_subspace>(docId, limit);
}
double _lastScore;
};
-template <bool strict, bool has_filter>
-NearestNeighborImpl<strict, has_filter>::~NearestNeighborImpl() = default;
+template <bool strict, bool has_filter, bool has_single_subspace>
+NearestNeighborImpl<strict, has_filter, has_single_subspace>::~NearestNeighborImpl() = default;
namespace {
+template <bool strict, bool has_filter>
+std::unique_ptr<NearestNeighborIterator>
+resolve_single_subspace(NearestNeighborIterator::Params params)
+{
+ if (params.distance_calc->has_single_subspace()) {
+ using NNI = NearestNeighborImpl<strict, has_filter, true>;
+ return std::make_unique<NNI>(std::move(params));
+ } else {
+ using NNI = NearestNeighborImpl<strict, has_filter, false>;
+ return std::make_unique<NNI>(std::move(params));
+ }
+}
+
template <bool has_filter>
std::unique_ptr<NearestNeighborIterator>
resolve_strict(bool strict, NearestNeighborIterator::Params params)
{
if (strict) {
- using NNI = NearestNeighborImpl<true, has_filter>;
- return std::make_unique<NNI>(std::move(params));
+ return resolve_single_subspace<true, has_filter>(std::move(params));
} else {
- using NNI = NearestNeighborImpl<false, has_filter>;
- return std::make_unique<NNI>(std::move(params));
+ return resolve_single_subspace<false, has_filter>(std::move(params));
}
}
} // namespace <unnamed>
std::unique_ptr<NearestNeighborIterator>
-NearestNeighborIterator::create(
- bool strict,
- fef::TermFieldMatchData &tfmd,
- std::unique_ptr<search::tensor::DistanceCalculator> distance_calc,
- NearestNeighborDistanceHeap &distanceHeap,
- const GlobalFilter &filter)
+NearestNeighborIterator::create(bool strict, fef::TermFieldMatchData &tfmd,
+ std::unique_ptr<search::tensor::DistanceCalculator> distance_calc,
+ NearestNeighborDistanceHeap &distanceHeap, const GlobalFilter &filter)
{
Params params(tfmd, std::move(distance_calc), distanceHeap, filter);
if (filter.is_active()) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
index b34c9df47b9..177c732a44d 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.h
@@ -39,7 +39,7 @@ public:
{}
};
- NearestNeighborIterator(Params params_in)
+ explicit NearestNeighborIterator(Params params_in)
: _params(std::move(params_in))
{}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index 14953011e22..07e490f4575 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -2,7 +2,9 @@
#include "angular_distance.h"
#include "temporary_vector_store.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <numbers>
+#include <cmath>
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
@@ -10,47 +12,15 @@ using vespalib::eval::TypedCells;
namespace search::tensor {
-namespace {
-
-struct CalcAngular {
- template <typename LCT, typename RCT>
- static double invoke(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs)
- {
- auto lhs_vector = lhs.unsafe_typify<LCT>();
- auto rhs_vector = rhs.unsafe_typify<RCT>();
-
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- double a_norm_sq = 0.0;
- double b_norm_sq = 0.0;
- double dot_product = 0.0;
- for (size_t i = 0; i < sz; ++i) {
- double a = lhs_vector[i];
- double b = rhs_vector[i];
- a_norm_sq += a*a;
- b_norm_sq += b*b;
- dot_product += a*b;
- }
- double squared_norms = a_norm_sq * b_norm_sq;
- double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
- double cosine_similarity = dot_product / div;
- double distance = 1.0 - cosine_similarity; // in range [0,2]
- return std::max(0.0, distance);
- }
-};
-
-}
-
template<typename FloatType>
-class BoundAngularDistance : public BoundDistanceFunction {
+class BoundAngularDistance final : public BoundDistanceFunction {
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
mutable TemporaryVectorStore<FloatType> _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs;
double _lhs_norm_sq;
public:
- BoundAngularDistance(const vespalib::eval::TypedCells& lhs)
+ explicit BoundAngularDistance(TypedCells lhs)
: _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
_tmpSpace(lhs.size),
_lhs(_tmpSpace.storeLhs(lhs))
@@ -58,7 +28,7 @@ public:
auto a = _lhs.data();
_lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
}
- double calc(const vespalib::eval::TypedCells& rhs) const override {
+ double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
assert(sz == rhs_vector.size());
@@ -72,7 +42,7 @@ public:
double distance = 1.0 - cosine_similarity; // in range [0,2]
return distance;
}
- double convert_threshold(double threshold) const override {
+ double convert_threshold(double threshold) const noexcept override {
if (threshold < 0.0) {
return 0.0;
}
@@ -82,7 +52,7 @@ public:
double cosine_similarity = cos(threshold);
return 1.0 - cosine_similarity;
}
- double to_rawscore(double distance) const override {
+ double to_rawscore(double distance) const noexcept override {
double cosine_similarity = 1.0 - distance;
// should be in the range [-1,1] but roundoff may cause problems:
cosine_similarity = std::min(1.0, cosine_similarity);
@@ -91,7 +61,7 @@ public:
double score = 1.0 / (1.0 + angle_distance);
return score;
}
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
return calc(rhs);
}
};
@@ -101,14 +71,14 @@ template class BoundAngularDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
using DFT = BoundAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
using DFT = BoundAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index f5e8589fe6a..5e0a060e060 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -2,12 +2,7 @@
#pragma once
-#include "distance_function.h"
-#include "bound_distance_function.h"
#include "distance_function_factory.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <cmath>
namespace search::tensor {
@@ -20,8 +15,8 @@ template <typename FloatType>
class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
AngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
index c89619d9a77..a9d0c880625 100644
--- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
@@ -2,13 +2,8 @@
#pragma once
-#include <memory>
-#include <vespa/eval/eval/cell_type.h>
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/util/arrayref.h>
#include "distance_function.h"
-
-namespace vespalib::eval { struct TypedCells; }
+#include <vespa/eval/eval/typed_cells.h>
namespace search::tensor {
@@ -22,17 +17,17 @@ namespace search::tensor {
class BoundDistanceFunction : public DistanceConverter {
public:
using UP = std::unique_ptr<BoundDistanceFunction>;
+ using TypedCells = vespalib::eval::TypedCells;
- BoundDistanceFunction() = default;
+ BoundDistanceFunction() noexcept = default;
- virtual ~BoundDistanceFunction() = default;
+ ~BoundDistanceFunction() override = default;
// calculate internal distance (comparable)
- virtual double calc(const vespalib::eval::TypedCells& rhs) const = 0;
+ virtual double calc(TypedCells rhs) const noexcept = 0;
// calculate internal distance, early return allowed if > limit
- virtual double calc_with_limit(const vespalib::eval::TypedCells& rhs,
- double limit) const = 0;
+ virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index fb74dd51fa3..0dbb9c34010 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -30,14 +30,14 @@ DenseTensorAttribute::extract_cells_ref(DocId docId) const
}
vespalib::eval::TypedCells
-DenseTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const
+DenseTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept
{
EntryRef ref = (subspace == 0) ? acquire_entry_ref(docid) : EntryRef();
return _denseTensorStore.get_typed_cells(ref);
}
VectorBundle
-DenseTensorAttribute::get_vectors(uint32_t docid) const
+DenseTensorAttribute::get_vectors(uint32_t docid) const noexcept
{
EntryRef ref = acquire_entry_ref(docid);
return _denseTensorStore.get_vectors(ref);
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index 03c976bd6b3..c07bfcc358e 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -26,8 +26,8 @@ public:
bool supports_extract_cells_ref() const override { return true; }
// Implements DocVectorAccess
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
- VectorBundle get_vectors(uint32_t docid) const override;
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override;
+ VectorBundle get_vectors(uint32_t docid) const noexcept override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp
index 12dd6aa2bca..cf0e9adc095 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp
@@ -74,7 +74,7 @@ DirectTensorAttribute::get_tensor_ref(DocId docId) const
}
vespalib::eval::TypedCells
-DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const
+DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept
{
EntryRef ref = acquire_entry_ref(docid);
auto vectors = _direct_store.get_vectors(ref);
@@ -82,7 +82,7 @@ DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const
}
VectorBundle
-DirectTensorAttribute::get_vectors(uint32_t docid) const
+DirectTensorAttribute::get_vectors(uint32_t docid) const noexcept
{
EntryRef ref = acquire_entry_ref(docid);
return _direct_store.get_vectors(ref);
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h
index a4f673ea99f..64f62650615 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h
@@ -26,8 +26,8 @@ public:
bool supports_get_tensor_ref() const override { return true; }
// Implements DocVectorAccess
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
- VectorBundle get_vectors(uint32_t docid) const override;
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override;
+ VectorBundle get_vectors(uint32_t docid) const noexcept override;
};
} // namespace search::tensor
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
index 44bbbba65d6..6edb654d5bf 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
@@ -42,11 +42,11 @@ private:
EntryRef add_entry(TensorSP tensor);
public:
- DirectTensorStore(const vespalib::eval::ValueType& tensor_type);
+ explicit DirectTensorStore(const vespalib::eval::ValueType& tensor_type);
~DirectTensorStore() override;
using RefType = TensorStoreType::RefType;
- const vespalib::eval::Value * get_tensor_ptr(EntryRef ref) const {
+ const vespalib::eval::Value * get_tensor_ptr(EntryRef ref) const noexcept {
if (!ref.valid()) {
return nullptr;
}
@@ -65,12 +65,12 @@ public:
vespalib::eval::TypedCells get_empty_subspace() const noexcept {
return _empty.cells();
}
- VectorBundle get_vectors(EntryRef ref) const {
+ VectorBundle get_vectors(EntryRef ref) const noexcept {
auto tensor = get_tensor_ptr(ref);
if (tensor == nullptr) {
- return VectorBundle();
+ return {};
}
- return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type);
+ return {tensor->cells().data, static_cast<uint32_t>(tensor->index().size()), _subspace_type};
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
index eab75537071..9dbd12650cb 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
@@ -5,6 +5,7 @@
#include "distance_function_factory.h"
#include "i_tensor_attribute.h"
#include "vector_bundle.h"
+#include <vespa/eval/eval/value_type.h>
#include <optional>
namespace vespalib::eval { struct Value; }
@@ -32,34 +33,55 @@ public:
~DistanceCalculator();
const tensor::ITensorAttribute& attribute_tensor() const { return _attr_tensor; }
- const vespalib::eval::Value& query_tensor() const {
+ const vespalib::eval::Value& query_tensor() const noexcept{
assert(_query_tensor != nullptr);
return *_query_tensor;
}
- const BoundDistanceFunction& function() const { return *_dist_fun; }
+ const BoundDistanceFunction& function() const noexcept { return *_dist_fun; }
+ bool has_single_subspace() const noexcept { return _attr_tensor.getTensorType().is_dense(); }
- double calc_raw_score(uint32_t docid) const {
- auto vectors = _attr_tensor.get_vectors(docid);
- double result = _dist_fun->min_rawscore();
- for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
- double distance = _dist_fun->calc(vectors.cells(i));
- double score = _dist_fun->to_rawscore(distance);
- result = std::max(result, score);
+ template<bool has_single_subspace>
+ double calc_raw_score(uint32_t docid) const noexcept {
+ if (has_single_subspace) {
+ auto cells = _attr_tensor.get_vector(docid, 0);
+ double min_rawscore = _dist_fun->min_rawscore();
+ if (cells.size == 0) [[unlikely]] {
+ return min_rawscore;
+ }
+ return std::max(min_rawscore, _dist_fun->to_rawscore(_dist_fun->calc(cells)));
+ } else {
+ auto vectors = _attr_tensor.get_vectors(docid);
+ double result = _dist_fun->min_rawscore();
+ for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
+ double distance = _dist_fun->calc(vectors.cells(i));
+ double score = _dist_fun->to_rawscore(distance);
+ result = std::max(result, score);
+ }
+ return result;
}
- return result;
+
}
- double calc_with_limit(uint32_t docid, double limit) const {
- auto vectors = _attr_tensor.get_vectors(docid);
- double result = std::numeric_limits<double>::max();
- for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
- double distance = _dist_fun->calc_with_limit(vectors.cells(i), limit);
- result = std::min(result, distance);
+ template<bool has_single_subspace>
+ double calc_with_limit(uint32_t docid, double limit) const noexcept {
+ if (has_single_subspace) {
+ auto cells = _attr_tensor.get_vector(docid, 0);
+ if (cells.size == 0) [[unlikely]] {
+ return std::numeric_limits<double>::max();
+ }
+ return _dist_fun->calc_with_limit(cells, limit);
+ } else {
+ auto vectors = _attr_tensor.get_vectors(docid);
+ double result = std::numeric_limits<double>::max();
+ for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
+ double distance = _dist_fun->calc_with_limit(vectors.cells(i), limit);
+ result = std::min(result, distance);
+ }
+ return result;
}
- return result;
}
- void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) {
+ void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) noexcept {
for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
double distance = _dist_fun->calc(vectors.cells(i));
if (!closest_subspace.has_value() || distance < best_distance) {
@@ -69,7 +91,7 @@ public:
}
}
- std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) {
+ std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) noexcept {
double best_distance = 0.0;
std::optional<uint32_t> closest_subspace;
calc_closest_subspace(vectors, closest_subspace, best_distance);
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index c2e8305038c..9a2db8dfac0 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -2,11 +2,6 @@
#pragma once
-#include <memory>
-#include <vespa/eval/eval/cell_type.h>
-
-namespace vespalib::eval { struct TypedCells; }
-
namespace search::tensor {
class DistanceConverter {
@@ -16,25 +11,25 @@ public:
/**
* Convert threshold (external distance units) to internal units.
*/
- virtual double convert_threshold(double threshold) const = 0;
+ virtual double convert_threshold(double threshold) const noexcept = 0;
/**
* Convert internal distance to rawscore (also used as closeness).
*/
- virtual double to_rawscore(double distance) const = 0;
+ virtual double to_rawscore(double distance) const noexcept = 0;
/**
* Convert rawscore to external distance.
* Override this when the rawscore is NOT defined as (1.0 / (1.0 + external_distance)).
*/
- virtual double to_distance(double rawscore) const {
+ virtual double to_distance(double rawscore) const noexcept {
return (1.0 / rawscore) - 1.0;
}
/**
* The minimum rawscore (also used as closeness) that this distance function can return.
*/
- virtual double min_rawscore() const {
+ virtual double min_rawscore() const noexcept {
return 0.0;
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index 4749a8549a6..ed08df5866e 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -3,22 +3,14 @@
#include "distance_function_factory.h"
#include "distance_functions.h"
#include "mips_distance_transform.h"
-#include <vespa/vespalib/util/typify.h>
-#include <vespa/vespalib/util/array.h>
-#include <vespa/vespalib/util/arrayref.h>
-#include <vespa/log/log.h>
-
-LOG_SETUP(".searchlib.tensor.distance_function_factory");
using search::attribute::DistanceMetric;
using vespalib::eval::CellType;
-using vespalib::eval::ValueType;
namespace search::tensor {
std::unique_ptr<DistanceFunctionFactory>
-make_distance_function_factory(search::attribute::DistanceMetric variant,
- vespalib::eval::CellType cell_type)
+make_distance_function_factory(DistanceMetric variant, CellType cell_type)
{
switch (variant) {
case DistanceMetric::Angular:
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
index 829ed7fae13..356366d6a77 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
@@ -4,7 +4,6 @@
#include "distance_function.h"
#include "bound_distance_function.h"
-#include <vespa/eval/eval/value_type.h>
#include <vespa/searchcommon/attribute/distance_metric.h>
namespace search::tensor {
@@ -15,10 +14,11 @@ namespace search::tensor {
* for one particular vector in the distance function object.
*/
struct DistanceFunctionFactory {
- DistanceFunctionFactory() = default;
- virtual ~DistanceFunctionFactory() {}
- virtual BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) = 0;
- virtual BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) = 0;
+ using TypedCells = vespalib::eval::TypedCells;
+ DistanceFunctionFactory() noexcept = default;
+ virtual ~DistanceFunctionFactory() = default;
+ virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0;
+ virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0;
using UP = std::unique_ptr<DistanceFunctionFactory>;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
index 477d5e1dc8a..dd68171dd59 100644
--- a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
+++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
@@ -16,9 +16,9 @@ class VectorBundle;
*/
class DocVectorAccess {
public:
- virtual ~DocVectorAccess() {}
- virtual vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const = 0;
- virtual VectorBundle get_vectors(uint32_t docid) const = 0;
+ virtual ~DocVectorAccess() = default;
+ virtual vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept = 0;
+ virtual VectorBundle get_vectors(uint32_t docid) const noexcept = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
index cfc420d9ecd..d581dbd129e 100644
--- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
@@ -10,7 +10,8 @@ EmptySubspace::EmptySubspace(const SubspaceType& type)
_cells()
{
_empty_space.resize(type.mem_size());
- _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), type.size());
+ // Set size to zero to signal empty/invalid subspace
+ _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), 0);
}
EmptySubspace::~EmptySubspace() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h
index dd0ab9264c4..4043ec122e6 100644
--- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h
+++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h
@@ -10,7 +10,7 @@ namespace search::tensor {
class SubspaceType;
/*
- * Class containg an empty subspace, used as a bad fallback when we cannot
+ * Class containing an empty subspace, used as a bad fallback when we cannot
* get a real subspace.
*/
class EmptySubspace
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 3efc8c3a5ea..6a730132ad1 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -2,39 +2,20 @@
#include "euclidean_distance.h"
#include "temporary_vector_store.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <cmath>
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
+using vespalib::eval::TypedCells;
namespace search::tensor {
-namespace {
-
-struct CalcEuclidean {
- template <typename LCT, typename RCT>
- static double invoke(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs)
- {
- auto lhs_vector = lhs.unsafe_typify<LCT>();
- auto rhs_vector = rhs.unsafe_typify<RCT>();
- double sum = 0.0;
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz; ++i) {
- double diff = lhs_vector[i] - rhs_vector[i];
- sum += diff*diff;
- }
- return sum;
- }
-};
-
-}
-
using vespalib::eval::Int8Float;
using vespalib::BFloat16;
template<typename AttributeCellType>
-class BoundEuclideanDistance : public BoundDistanceFunction {
+class BoundEuclideanDistance final : public BoundDistanceFunction {
using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>;
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
@@ -44,12 +25,12 @@ private:
static const float *cast(const float * p) { return p; }
static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
- BoundEuclideanDistance(const vespalib::eval::TypedCells& lhs)
+ explicit BoundEuclideanDistance(TypedCells lhs)
: _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
_tmpSpace(lhs.size),
_lhs_vector(_tmpSpace.storeLhs(lhs))
{}
- double calc(const vespalib::eval::TypedCells& rhs) const override {
+ double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs_vector.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
assert(sz == rhs_vector.size());
@@ -57,15 +38,15 @@ public:
auto b = rhs_vector.data();
return _computer.squaredEuclideanDistance(cast(a), cast(b), sz);
}
- double convert_threshold(double threshold) const override {
+ double convert_threshold(double threshold) const noexcept override {
return threshold*threshold;
}
- double to_rawscore(double distance) const override {
+ double to_rawscore(double distance) const noexcept override {
double d = sqrt(distance);
double score = 1.0 / (1.0 + d);
return score;
}
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override {
+ double calc_with_limit(TypedCells rhs, double limit) const noexcept override {
vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>();
double sum = 0.0;
size_t sz = _lhs_vector.size();
@@ -85,14 +66,14 @@ template class BoundEuclideanDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
using DFT = BoundEuclideanDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
using DFT = BoundEuclideanDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 42097f8b39b..8c39a12bf86 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -2,11 +2,7 @@
#pragma once
-#include "distance_function.h"
#include "distance_function_factory.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <cmath>
namespace search::tensor {
@@ -18,9 +14,9 @@ namespace search::tensor {
template <typename FloatType>
class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
public:
- EuclideanDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+ EuclideanDistanceFunctionFactory() noexcept = default;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
index 7b6c40c643e..f5484f40271 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -3,6 +3,7 @@
#include "geo_degrees_distance.h"
#include "temporary_vector_store.h"
#include <numbers>
+#include <cmath>
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
@@ -15,7 +16,7 @@ namespace search::tensor {
* Uses the haversine formula directly from:
* https://en.wikipedia.org/wiki/Haversine_formula
**/
-class BoundGeoDistance : public BoundDistanceFunction {
+class BoundGeoDistance final : public BoundDistanceFunction {
private:
mutable TemporaryVectorStore<double> _tmpSpace;
const vespalib::ConstArrayRef<double> _lh_vector;
@@ -26,16 +27,16 @@ public:
static constexpr double degrees_to_radians = M_PI / 180.0;
// haversine function:
- static double haversine(double angle) {
+ static double haversine(double angle) noexcept {
double s = sin(0.5*angle);
return s*s;
}
- BoundGeoDistance(const vespalib::eval::TypedCells& lhs)
+ explicit BoundGeoDistance(TypedCells lhs)
: _tmpSpace(lhs.size),
_lh_vector(_tmpSpace.storeLhs(lhs))
{}
- double calc(const vespalib::eval::TypedCells& rhs) const override {
+ double calc(TypedCells rhs) const noexcept override {
vespalib::ConstArrayRef<double> rhs_vector = _tmpSpace.convertRhs(rhs);
assert(2 == _lh_vector.size());
assert(2 == rhs_vector.size());
@@ -56,7 +57,7 @@ public:
double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
return hav_central_angle;
}
- double convert_threshold(double threshold) const override {
+ double convert_threshold(double threshold) const noexcept override {
if (threshold < 0.0) {
return 0.0;
}
@@ -68,25 +69,25 @@ public:
double rt_hav = sin(half_angle);
return rt_hav * rt_hav;
}
- double to_rawscore(double distance) const override {
+ double to_rawscore(double distance) const noexcept override {
double hav_diff = sqrt(distance);
// distance in kilometers:
double d = 2 * asin(hav_diff) * earth_mean_radius;
// km to rawscore:
return 1.0 / (1.0 + d);
}
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
return calc(rhs);
}
};
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) {
return std::make_unique<BoundGeoDistance>(lhs);
}
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) {
return std::make_unique<BoundGeoDistance>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index f1af976b91f..1464898421b 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -2,12 +2,7 @@
#pragma once
-#include "distance_function.h"
#include "distance_function_factory.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <vespa/vespalib/util/typify.h>
-#include <cmath>
namespace search::tensor {
@@ -19,8 +14,8 @@ namespace search::tensor {
class GeoDistanceFunctionFactory : public DistanceFunctionFactory {
public:
GeoDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index a1dc8cc52f7..0be920b9c03 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -6,51 +6,29 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
+using vespalib::eval::TypedCells;
namespace search::tensor {
-namespace {
-
-struct CalcHamming {
- template <typename LCT, typename RCT>
- static double invoke(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs)
- {
- auto lhs_vector = lhs.unsafe_typify<LCT>();
- auto rhs_vector = rhs.unsafe_typify<RCT>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- size_t sum = 0;
- for (size_t i = 0; i < sz; ++i) {
- sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1;
- }
- return (double)sum;
- }
-};
-
-}
-
using vespalib::eval::Int8Float;
template<typename FloatType>
-class BoundHammingDistance : public BoundDistanceFunction {
+class BoundHammingDistance final : public BoundDistanceFunction {
private:
mutable TemporaryVectorStore<FloatType> _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
public:
- BoundHammingDistance(const vespalib::eval::TypedCells& lhs)
+ explicit BoundHammingDistance(TypedCells lhs)
: _tmpSpace(lhs.size),
_lhs_vector(_tmpSpace.storeLhs(lhs))
{}
- double calc(const vespalib::eval::TypedCells& rhs) const override {
+ double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs_vector.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
- assert(sz == rhs_vector.size());
- auto a = _lhs_vector.data();
- auto b = rhs_vector.data();
if constexpr (std::is_same<Int8Float, FloatType>::value) {
- return (double) vespalib::binary_hamming_distance(a, b, sz);
+ return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz);
} else {
+ assert(sz == rhs_vector.size());
size_t sum = 0;
for (size_t i = 0; i < sz; ++i) {
sum += (_lhs_vector[i] == rhs_vector[i]) ? 0 : 1;
@@ -58,14 +36,13 @@ public:
return (double)sum;
}
}
- double convert_threshold(double threshold) const override {
+ double convert_threshold(double threshold) const noexcept override {
return threshold;
}
- double to_rawscore(double distance) const override {
- double score = 1.0 / (1.0 + distance);
- return score;
+ double to_rawscore(double distance) const noexcept override {
+ return 1.0 / (1.0 + distance);
}
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
// consider optimizing:
return calc(rhs);
}
@@ -73,14 +50,14 @@ public:
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
using DFT = BoundHammingDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
using DFT = BoundHammingDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 32e2be99214..6e7f96e1e2f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -2,11 +2,7 @@
#pragma once
-#include "distance_function.h"
#include "distance_function_factory.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/util/typify.h>
-#include <cmath>
namespace search::tensor {
@@ -20,8 +16,8 @@ template <typename FloatType>
class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
public:
HammingDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
index 1f2da032619..b48ec93c10e 100644
--- a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
@@ -21,7 +21,7 @@ class SerializedTensorRef;
*/
class ITensorAttribute : public DocVectorAccess {
public:
- virtual ~ITensorAttribute() {}
+ virtual ~ITensorAttribute() = default;
virtual std::unique_ptr<vespalib::eval::Value> getTensor(uint32_t docId) const = 0;
virtual std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const = 0;
virtual vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
index 5ad6224f6d4..223a0a5750f 100644
--- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
@@ -28,9 +28,7 @@ ImportedTensorAttributeVectorReadGuard::ImportedTensorAttributeVectorReadGuard(s
{
}
-ImportedTensorAttributeVectorReadGuard::~ImportedTensorAttributeVectorReadGuard()
-{
-}
+ImportedTensorAttributeVectorReadGuard::~ImportedTensorAttributeVectorReadGuard() = default;
const ITensorAttribute *
ImportedTensorAttributeVectorReadGuard::asTensorAttribute() const
@@ -63,13 +61,13 @@ ImportedTensorAttributeVectorReadGuard::get_tensor_ref(uint32_t docid) const
}
vespalib::eval::TypedCells
-ImportedTensorAttributeVectorReadGuard::get_vector(uint32_t docid, uint32_t subspace) const
+ImportedTensorAttributeVectorReadGuard::get_vector(uint32_t docid, uint32_t subspace) const noexcept
{
return _target_tensor_attribute.get_vector(getTargetLid(docid), subspace);
}
search::tensor::VectorBundle
-ImportedTensorAttributeVectorReadGuard::get_vectors(uint32_t docid) const
+ImportedTensorAttributeVectorReadGuard::get_vectors(uint32_t docid) const noexcept
{
return _target_tensor_attribute.get_vectors(getTargetLid(docid));
}
diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
index e07de5486b6..5e6bf8961df 100644
--- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
+++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
@@ -27,7 +27,7 @@ public:
ImportedTensorAttributeVectorReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard,
const attribute::ImportedAttributeVector &imported_attribute,
bool stableEnumGuard);
- ~ImportedTensorAttributeVectorReadGuard();
+ ~ImportedTensorAttributeVectorReadGuard() override;
const ITensorAttribute *asTensorAttribute() const override;
@@ -45,8 +45,8 @@ public:
bool supports_get_serialized_tensor_ref() const override;
uint32_t get_num_docs() const override { return getNumDocs(); }
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
- VectorBundle get_vectors(uint32_t docid) const override;
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override;
+ VectorBundle get_vectors(uint32_t docid) const noexcept override;
const vespalib::eval::ValueType &getTensorType() const override;
void get_state(const vespalib::slime::Inserter& inserter) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index 3645c511b01..c42242d8dc8 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -4,7 +4,6 @@
#include "temporary_vector_store.h"
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <cmath>
-#include <mutex>
#include <variant>
using vespalib::eval::Int8Float;
@@ -12,7 +11,7 @@ using vespalib::eval::Int8Float;
namespace search::tensor {
template<typename FloatType, bool extra_dim>
-class BoundMipsDistanceFunction : public BoundDistanceFunction {
+class BoundMipsDistanceFunction final : public BoundDistanceFunction {
mutable TemporaryVectorStore<FloatType> _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
const vespalib::hwaccelrated::IAccelrated & _computer;
@@ -24,7 +23,7 @@ class BoundMipsDistanceFunction : public BoundDistanceFunction {
static const float *cast(const float * p) { return p; }
static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
- BoundMipsDistanceFunction(const vespalib::eval::TypedCells& lhs, MaximumSquaredNormStore& sq_norm_store)
+ BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store)
: BoundDistanceFunction(),
_tmpSpace(lhs.size),
_lhs_vector(_tmpSpace.storeLhs(lhs)),
@@ -44,7 +43,7 @@ public:
return _lhs_extra_dim;
}
- double calc(const vespalib::eval::TypedCells &rhs) const override {
+ double calc(TypedCells rhs) const noexcept override {
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
const FloatType * a = _lhs_vector.data();
const FloatType * b = rhs_vector.data();
@@ -58,32 +57,32 @@ public:
}
return -dp;
}
- double convert_threshold(double threshold) const override {
+ double convert_threshold(double threshold) const noexcept override {
return threshold;
}
- double to_rawscore(double distance) const override {
+ double to_rawscore(double distance) const noexcept override {
return -distance;
}
- double to_distance(double rawscore) const override {
+ double to_distance(double rawscore) const noexcept override {
return -rawscore;
}
- double min_rawscore() const override {
+ double min_rawscore() const noexcept override {
return std::numeric_limits<double>::lowest();
}
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
return calc(rhs);
}
};
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store);
}
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store);
};
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
index 63b2a83c1b5..67a6eb58de0 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
@@ -45,7 +45,7 @@ public:
: _sq_norm_store(std::make_shared<MaximumSquaredNormStore>())
{
}
- ~MipsDistanceFunctionFactoryBase() = default;
+ ~MipsDistanceFunctionFactoryBase() override = default;
MaximumSquaredNormStore& get_max_squared_norm_store() noexcept { return *_sq_norm_store; }
};
@@ -59,12 +59,11 @@ public:
template<typename FloatType>
class MipsDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase {
public:
- MipsDistanceFunctionFactory() : MipsDistanceFunctionFactoryBase() { }
- ~MipsDistanceFunctionFactory() = default;
+ MipsDistanceFunctionFactory() noexcept = default;
+ ~MipsDistanceFunctionFactory() override = default;
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
-
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 931fd3edb06..267f91bb4e0 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -2,6 +2,7 @@
#include "prenormalized_angular_distance.h"
#include "temporary_vector_store.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
@@ -9,14 +10,14 @@ using vespalib::eval::TypifyCellType;
namespace search::tensor {
template<typename FloatType>
-class BoundPrenormalizedAngularDistance : public BoundDistanceFunction {
+class BoundPrenormalizedAngularDistance final : public BoundDistanceFunction {
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
mutable TemporaryVectorStore<FloatType> _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs;
double _lhs_norm_sq;
public:
- BoundPrenormalizedAngularDistance(const vespalib::eval::TypedCells& lhs)
+ explicit BoundPrenormalizedAngularDistance(TypedCells lhs)
: _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
_tmpSpace(lhs.size),
_lhs(_tmpSpace.storeLhs(lhs))
@@ -27,7 +28,7 @@ public:
_lhs_norm_sq = 1.0;
}
}
- double calc(const vespalib::eval::TypedCells& rhs) const override {
+ double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
assert(sz == rhs_vector.size());
@@ -37,13 +38,13 @@ public:
double distance = _lhs_norm_sq - dot_product;
return distance;
}
- double convert_threshold(double threshold) const override {
+ double convert_threshold(double threshold) const noexcept override {
double cosine_similarity = 1.0 - threshold;
double dot_product = cosine_similarity * _lhs_norm_sq;
double distance = _lhs_norm_sq - dot_product;
return distance;
}
- double to_rawscore(double distance) const override {
+ double to_rawscore(double distance) const noexcept override {
double dot_product = _lhs_norm_sq - distance;
double cosine_similarity = dot_product / _lhs_norm_sq;
// should be in in range [-1,1] but roundoff may cause problems:
@@ -53,7 +54,7 @@ public:
double score = 1.0 / (1.0 + cosine_distance);
return score;
}
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
return calc(rhs);
}
};
@@ -63,14 +64,14 @@ template class BoundPrenormalizedAngularDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
using DFT = BoundPrenormalizedAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
using DFT = BoundPrenormalizedAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index 0f647547e08..7e3a8c2c676 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -2,11 +2,7 @@
#pragma once
-#include "distance_function.h"
-#include "bound_distance_function.h"
#include "distance_function_factory.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace search::tensor {
@@ -18,8 +14,8 @@ template <typename FloatType>
class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
PrenormalizedAngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
index 75927112b89..3c1bb51f4ea 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
@@ -42,7 +42,7 @@ SerializedFastValueAttribute::supports_get_serialized_tensor_ref() const
}
vespalib::eval::TypedCells
-SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const
+SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept
{
EntryRef ref = acquire_entry_ref(docid);
auto vectors = _tensorBufferStore.get_vectors(ref);
@@ -50,7 +50,7 @@ SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) cons
}
VectorBundle
-SerializedFastValueAttribute::get_vectors(uint32_t docid) const
+SerializedFastValueAttribute::get_vectors(uint32_t docid) const noexcept
{
EntryRef ref = acquire_entry_ref(docid);
return _tensorBufferStore.get_vectors(ref);
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
index 386b0d91add..43b5a23d176 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
@@ -27,8 +27,8 @@ public:
bool supports_get_serialized_tensor_ref() const override;
// Implements DocVectorAccess
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
- VectorBundle get_vectors(uint32_t docid) const override;
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override;
+ VectorBundle get_vectors(uint32_t docid) const noexcept override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
index ff07f245de4..b1018555212 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
@@ -2,10 +2,6 @@
#include "temporary_vector_store.h"
-#include <vespa/log/log.h>
-
-LOG_SETUP(".searchlib.tensor.temporary_vector_store");
-
using vespalib::ConstArrayRef;
using vespalib::ArrayRef;
using vespalib::eval::CellType;
@@ -17,7 +13,7 @@ namespace {
template<typename FromType, typename ToType>
ConstArrayRef<ToType>
-convert_cells(ArrayRef<ToType> space, TypedCells cells)
+convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept
{
assert(cells.size == space.size());
auto old_cells = cells.typify<FromType>();
@@ -32,7 +28,7 @@ convert_cells(ArrayRef<ToType> space, TypedCells cells)
template <typename ToType>
struct ConvertCellsSelector
{
- template <typename FromType> static auto invoke(ArrayRef<ToType> dst, TypedCells src) {
+ template <typename FromType> static auto invoke(ArrayRef<ToType> dst, TypedCells src) noexcept {
return convert_cells<FromType, ToType>(dst, src);
}
};
@@ -41,8 +37,8 @@ struct ConvertCellsSelector
template <typename FloatType>
ConstArrayRef<FloatType>
-TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) {
- LOG_ASSERT(cells.size * 2 == _tmpSpace.size());
+TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) noexcept {
+ assert(cells.size * 2 == _tmpSpace.size());
ArrayRef<FloatType> where(_tmpSpace.data() + offset, cells.size);
using MyTypify = vespalib::eval::TypifyCellType;
using MySelector = ConvertCellsSelector<FloatType>;
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
index ad5bdf3ed3a..3dc237c85a4 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
@@ -2,10 +2,7 @@
#pragma once
-#include <memory>
-#include <vespa/eval/eval/cell_type.h>
#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/util/arrayref.h>
namespace search::tensor {
@@ -13,14 +10,15 @@ namespace search::tensor {
template <typename FloatType>
class TemporaryVectorStore {
private:
+ using TypedCells = vespalib::eval::TypedCells;
std::vector<FloatType> _tmpSpace;
- vespalib::ConstArrayRef<FloatType> internal_convert(vespalib::eval::TypedCells cells, size_t offset);
+ vespalib::ConstArrayRef<FloatType> internal_convert(TypedCells cells, size_t offset) noexcept;
public:
- TemporaryVectorStore(size_t vectorSize) : _tmpSpace(vectorSize * 2) {}
- vespalib::ConstArrayRef<FloatType> storeLhs(vespalib::eval::TypedCells cells) {
+ explicit TemporaryVectorStore(size_t vectorSize) noexcept : _tmpSpace(vectorSize * 2) {}
+ vespalib::ConstArrayRef<FloatType> storeLhs(TypedCells cells) noexcept {
return internal_convert(cells, 0);
}
- vespalib::ConstArrayRef<FloatType> convertRhs(vespalib::eval::TypedCells cells) {
+ vespalib::ConstArrayRef<FloatType> convertRhs(TypedCells cells) {
if (vespalib::eval::get_cell_type<FloatType>() == cells.type) [[likely]] {
return cells.unsafe_typify<FloatType>();
} else {
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
index 9a2192cf736..b93249b7e21 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
@@ -105,18 +105,18 @@ public:
vespalib::eval::TypedCells get_empty_subspace() const noexcept {
return _empty.cells();
}
- VectorBundle get_vectors(vespalib::ConstArrayRef<char> buf) const {
+ VectorBundle get_vectors(vespalib::ConstArrayRef<char> buf) const noexcept {
auto num_subspaces = get_num_subspaces(buf);
auto cells_mem_size = get_cells_mem_size(num_subspaces);
auto aligner = select_aligner(cells_mem_size);
- return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type);
+ return {buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type};
}
- SerializedTensorRef get_serialized_tensor_ref(vespalib::ConstArrayRef<char> buf) const {
+ SerializedTensorRef get_serialized_tensor_ref(vespalib::ConstArrayRef<char> buf) const noexcept {
auto num_subspaces = get_num_subspaces(buf);
auto cells_mem_size = get_cells_mem_size(num_subspaces);
auto aligner = select_aligner(cells_mem_size);
vespalib::ConstArrayRef<vespalib::string_id> labels(reinterpret_cast<const vespalib::string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
- return SerializedTensorRef(VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels);
+ return {VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels};
}
bool is_dense() const noexcept { return _num_mapped_dimensions == 0; }
};
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
index c8d96adc220..07275c77566 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
@@ -41,16 +41,16 @@ public:
vespalib::eval::TypedCells get_empty_subspace() const noexcept {
return _ops.get_empty_subspace();
}
- VectorBundle get_vectors(EntryRef ref) const {
+ VectorBundle get_vectors(EntryRef ref) const noexcept {
if (!ref.valid()) {
- return VectorBundle();
+ return {};
}
auto buf = _array_store.get(ref);
return _ops.get_vectors(buf);
}
- SerializedTensorRef get_serialized_tensor_ref(EntryRef ref) const {
+ SerializedTensorRef get_serialized_tensor_ref(EntryRef ref) const noexcept {
if (!ref.valid()) {
- return SerializedTensorRef();
+ return {};
}
auto buf = _array_store.get(ref);
return _ops.get_serialized_tensor_ref(buf);
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp
index 1f85dba6afe..716d54d0a71 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp
@@ -86,20 +86,20 @@ TensorExtAttribute::getExtendInterface()
}
TypedCells
-TensorExtAttribute::get_vector(uint32_t docid, uint32_t subspace) const
+TensorExtAttribute::get_vector(uint32_t docid, uint32_t subspace) const noexcept
{
auto vectors = get_vectors(docid);
return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _empty.cells();
}
VectorBundle
-TensorExtAttribute::get_vectors(uint32_t docid) const
+TensorExtAttribute::get_vectors(uint32_t docid) const noexcept
{
auto tensor = _data[docid];
if (tensor == nullptr) {
- return VectorBundle();
+ return {};
}
- return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type);
+ return {tensor->cells().data, static_cast<uint32_t>(tensor->index().size()), _subspace_type};
}
std::unique_ptr<Value>
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h
index 890b568c26e..0434c2ab65f 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h
@@ -37,8 +37,8 @@ public:
bool add(const vespalib::eval::Value& v, int32_t) override;
IExtendAttribute* getExtendInterface() override;
// DocVectorAccess API
- vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
- VectorBundle get_vectors(uint32_t docid) const override;
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const noexcept override;
+ VectorBundle get_vectors(uint32_t docid) const noexcept override;
// ITensorAttribute API
std::unique_ptr<vespalib::eval::Value> getTensor(uint32_t docid) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
index 7ff7ea943de..087c0f43b60 100644
--- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
+++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
@@ -40,7 +40,7 @@ public:
uint32_t subspaces() const noexcept { return _subspaces; }
vespalib::eval::TypedCells cells(uint32_t subspace) const noexcept {
assert(subspace < _subspaces);
- return vespalib::eval::TypedCells(static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size);
+ return {static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size};
}
};
diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
index 816317bf86d..2fd23100f46 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
@@ -131,7 +131,7 @@ NearestNeighborFieldSearcher::onValue(const document::FieldValue& fv)
_attr->add(*tfv->getAsTensorPtr(), 1);
for (auto& elem : _calcs) {
double distance_limit = elem->heap.distanceLimit();
- double distance = elem->calc->calc_with_limit(scratch_docid, distance_limit);
+ double distance = elem->calc->calc_with_limit<false>(scratch_docid, distance_limit);
if (distance <= distance_limit) {
elem->node->set_distance(distance);
}
diff --git a/vespalib/src/tests/util/hamming/CMakeLists.txt b/vespalib/src/tests/util/hamming/CMakeLists.txt
index 5c317627200..ab551eab583 100644
--- a/vespalib/src/tests/util/hamming/CMakeLists.txt
+++ b/vespalib/src/tests/util/hamming/CMakeLists.txt
@@ -7,3 +7,10 @@ vespa_add_executable(vespalib_hamming_test_app TEST
GTest::GTest
)
vespa_add_test(NAME vespalib_hamming_test_app COMMAND vespalib_hamming_test_app)
+
+vespa_add_executable(vespalib_hamming_benchmark_app TEST
+ SOURCES
+ hamming_benchmark.cpp
+ DEPENDS
+ vespalib
+)
diff --git a/vespalib/src/tests/util/hamming/hamming_benchmark.cpp b/vespalib/src/tests/util/hamming/hamming_benchmark.cpp
new file mode 100644
index 00000000000..b6393dcd1b7
--- /dev/null
+++ b/vespalib/src/tests/util/hamming/hamming_benchmark.cpp
@@ -0,0 +1,40 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/util/binary_hamming_distance.h>
+#include <vector>
+#include <cstdlib>
+#include <cstdint>
+#include <cstdio>
+
+using namespace vespalib;
+
+int main(int argc, char* argv[]) {
+ size_t vector_length = 1024/8;
+ size_t num_vectors = 1;
+ size_t num_reps = 100000000;
+
+ if (argc > 2) {
+ vector_length = atol(argv[2])/8;
+ }
+ if (argc > 3) {
+ num_reps = atol(argv[3]);
+ }
+ if (argc > 4) {
+ num_vectors = atol(argv[4]);
+ }
+
+ std::vector<uint8_t> center(vector_length);
+ std::vector<uint8_t> vectors(num_vectors*vector_length);
+ srand(13);
+ for (uint8_t & v : center) { v = rand(); }
+ for (uint8_t & v : vectors) { v = rand(); }
+ uint64_t sum(0);
+ for (size_t i=0; i < num_reps; i++) {
+ for (size_t j(0); j < num_vectors; j++) {
+ sum += binary_hamming_distance(center.data(), vectors.data() + j*vector_length, vector_length);
+ }
+ }
+
+ printf("%lu vectors of %lu bits, repeated %lu times. Sum of distances = %lu\n", num_vectors, vector_length*8, num_reps, sum);
+ return 0;
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.h b/vespalib/src/vespa/vespalib/datastore/array_store.h
index 4549b81283e..51a1f9fe950 100644
--- a/vespalib/src/vespa/vespalib/datastore/array_store.h
+++ b/vespalib/src/vespa/vespalib/datastore/array_store.h
@@ -94,7 +94,7 @@ private:
EntryRef allocate_dynamic_array(size_t array_size, uint32_t type_id);
EntryRef addLargeArray(ConstArrayRef array);
EntryRef allocate_large_array(size_t array_size);
- ConstArrayRef getSmallArray(RefT ref, size_t arraySize) const {
+ ConstArrayRef getSmallArray(RefT ref, size_t arraySize) const noexcept {
const ElemT *buf = _store.template getEntryArray<ElemT>(ref, arraySize);
return ConstArrayRef(buf, arraySize);
}
@@ -104,7 +104,7 @@ private:
auto size = BufferType::get_dynamic_array_size(entry);
return ConstArrayRef(entry, size);
}
- ConstArrayRef getLargeArray(RefT ref) const {
+ ConstArrayRef getLargeArray(RefT ref) const noexcept {
const LargeArray *buf = _store.template getEntry<LargeArray>(ref);
return ConstArrayRef(&(*buf)[0], buf->size());
}
@@ -114,7 +114,7 @@ public:
ArrayStore(const ArrayStoreConfig &cfg, std::shared_ptr<alloc::MemoryAllocator> memory_allocator, TypeMapper&& mapper);
~ArrayStore() override;
EntryRef add(ConstArrayRef array);
- ConstArrayRef get(EntryRef ref) const {
+ ConstArrayRef get(EntryRef ref) const noexcept {
if (!ref.valid()) [[unlikely]] {
return ConstArrayRef();
}
diff --git a/vespalib/src/vespa/vespalib/datastore/datastore.h b/vespalib/src/vespa/vespalib/datastore/datastore.h
index fa231e9cf94..0226c780cf1 100644
--- a/vespalib/src/vespa/vespalib/datastore/datastore.h
+++ b/vespalib/src/vespa/vespalib/datastore/datastore.h
@@ -96,7 +96,7 @@ public:
EntryRef addEntry(const EntryType &e);
- const EntryType &getEntry(EntryRef ref) const {
+ const EntryType &getEntry(EntryRef ref) const noexcept {
return *this->template getEntry<EntryType>(RefType(ref));
}
};
diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
index 0e9393b7be4..5f242059ccf 100644
--- a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
+++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
@@ -4,25 +4,34 @@
namespace vespalib {
-size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) {
- uintptr_t addr_a = (uintptr_t) lhs;
- uintptr_t addr_b = (uintptr_t) rhs;
+namespace {
+ constexpr uint8_t WORD_SZ = sizeof (uint64_t);
+ constexpr uint8_t UNROLL_CNT = 2;
+ static_assert(sizeof(uint64_t) == 8);
+}
+size_t
+binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept {
+ auto addr_a = (uintptr_t) lhs;
+ auto addr_b = (uintptr_t) rhs;
size_t sum = 0;
size_t i = 0;
- static_assert(sizeof(uint64_t) == 8);
bool aligned = ((addr_a & 0x7) == 0) && ((addr_b & 0x7) == 0);
if (__builtin_expect(aligned, true)) {
- const uint64_t *words_a = static_cast<const uint64_t *>(lhs);
- const uint64_t *words_b = static_cast<const uint64_t *>(rhs);
- for (; i * 8 + 7 < sz; ++i) {
- uint64_t xor_bits = words_a[i] ^ words_b[i];
- sum += __builtin_popcountl(xor_bits);
+ const auto *words_a = static_cast<const uint64_t *>(lhs);
+ const auto *words_b = static_cast<const uint64_t *>(rhs);
+ for (; (i+UNROLL_CNT) * WORD_SZ <= sz; i += UNROLL_CNT) {
+ for (uint8_t j=0; j < UNROLL_CNT; j++) {
+ sum += __builtin_popcountl(words_a[i+j] ^ words_b[i+j]);
+ }
+ }
+ for (; (i + 1) * WORD_SZ <= sz; ++i) {
+ sum += __builtin_popcountl(words_a[i] ^ words_b[i]);
}
}
- if (__builtin_expect((i * 8 < sz), false)) {
- const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs);
- const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs);
- for (i *= 8; i < sz; ++i) {
+ if (__builtin_expect((i * WORD_SZ < sz), false)) {
+ const auto *bytes_a = static_cast<const uint8_t *>(lhs);
+ const auto *bytes_b = static_cast<const uint8_t *>(rhs);
+ for (i *= WORD_SZ; i < sz; ++i) {
uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
sum += __builtin_popcountl(xor_bits);
}
diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h
index 84bbbe71788..f5280903db1 100644
--- a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h
+++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.h
@@ -10,5 +10,5 @@ namespace vespalib {
* @param sz number of bytes in each blob
* @return number of bits that differ when comparing the two blobs
**/
-size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz);
+size_t binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept;
}
diff --git a/vespamalloc/src/vespamalloc/util/callstack.cpp b/vespamalloc/src/vespamalloc/util/callstack.cpp
index a0645f06815..b8449c89a72 100644
--- a/vespamalloc/src/vespamalloc/util/callstack.cpp
+++ b/vespamalloc/src/vespamalloc/util/callstack.cpp
@@ -53,6 +53,7 @@ const void * StackEntry::_stopAddr = nullptr;
size_t
StackEntry::fillStack(StackEntry *stack, size_t nelems)
{
+ // GNU extension: Variable-length automatic array
void * retAddr[nelems];
int sz = backtrace(retAddr, nelems);
if ((sz > 0) && (size_t(sz) <= nelems)) {