summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java2
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp8
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp53
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h6
-rw-r--r--security-utils/src/main/java/com/yahoo/security/Base58.java22
-rw-r--r--security-utils/src/main/java/com/yahoo/security/Base62.java21
-rw-r--r--security-utils/src/main/java/com/yahoo/security/BaseNCodec.java151
-rw-r--r--security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java122
-rw-r--r--vespalib/src/tests/coro/generator/.gitignore1
-rw-r--r--vespalib/src/tests/coro/generator/CMakeLists.txt7
-rw-r--r--vespalib/src/tests/coro/generator/generator_bench.cpp58
14 files changed, 452 insertions, 26 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 8626f2b6291..ab4074bbb4e 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -70,7 +70,7 @@ public class Flags {
public static final UnboundBooleanFlag KEEP_STORAGE_NODE_UP = defineFeatureFlag(
"keep-storage-node-up", true,
- List.of("hakonhall"), "2022-07-07", "2022-11-07",
+ List.of("hakonhall"), "2022-07-07", "2022-12-07",
"Whether to leave the storage node (with wanted state) UP while the node is permanently down.",
"Takes effect immediately for nodes transitioning to permanently down.",
ZONE_ID, APPLICATION_ID);
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index 4b9c23ea5d3..6f4ffc31741 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -679,7 +679,7 @@ getExpectedBlueprint()
" estHits: 9\n"
" cost_tier: 1\n"
" tree_size: 2\n"
- " allow_termwise_eval: 0\n"
+ " allow_termwise_eval: false\n"
" }\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
@@ -698,7 +698,7 @@ getExpectedBlueprint()
" estHits: 9\n"
" cost_tier: 1\n"
" tree_size: 1\n"
- " allow_termwise_eval: 1\n"
+ " allow_termwise_eval: true\n"
" }\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
@@ -727,7 +727,7 @@ getExpectedSlimeBlueprint() {
" estHits: 9,"
" cost_tier: 1,"
" tree_size: 2,"
- " allow_termwise_eval: 0"
+ " allow_termwise_eval: false"
" },"
" sourceId: 4294967295,"
" docid_limit: 0,"
@@ -751,7 +751,7 @@ getExpectedSlimeBlueprint() {
" estHits: 9,"
" cost_tier: 1,"
" tree_size: 1,"
- " allow_termwise_eval: 1"
+ " allow_termwise_eval: true"
" },"
" sourceId: 4294967295,"
" docid_limit: 0"
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
index 1ac91c5d8d6..55ca42f7369 100644
--- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
@@ -595,7 +595,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture)
" estHits: 2\n"
" cost_tier: 1\n"
" tree_size: 2\n"
- " allow_termwise_eval: 0\n"
+ " allow_termwise_eval: false\n"
" }\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
@@ -617,7 +617,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture)
" estHits: 2\n"
" cost_tier: 1\n"
" tree_size: 1\n"
- " allow_termwise_eval: 1\n"
+ " allow_termwise_eval: true\n"
" }\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 179296ff0f9..692b86fdc75 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -45,6 +45,8 @@
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory");
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
using search::attribute::IAttributeVector;
using search::attribute::ISearchContext;
using search::fef::TermFieldMatchData;
@@ -52,6 +54,7 @@ using search::fef::TermFieldMatchDataArray;
using search::fef::TermFieldMatchDataPosition;
using search::query::Location;
using search::query::LocationTerm;
+using search::query::MultiTerm;
using search::query::Node;
using search::query::NumberTerm;
using search::query::PredicateQuery;
@@ -62,7 +65,6 @@ using search::query::StackDumpCreator;
using search::query::StringTerm;
using search::query::SubstringTerm;
using search::query::SuffixTerm;
-using search::query::MultiTerm;
using search::queryeval::AndBlueprint;
using search::queryeval::AndSearchStrict;
using search::queryeval::Blueprint;
@@ -84,11 +86,11 @@ using search::queryeval::SimpleLeafBlueprint;
using search::queryeval::WeightedSetTermBlueprint;
using search::tensor::DenseTensorAttribute;
using search::tensor::ITensorAttribute;
+using vespalib::Issue;
using vespalib::geo::ZCurve;
using vespalib::make_string;
using vespalib::string;
using vespalib::stringref;
-using vespalib::Issue;
namespace search {
namespace {
@@ -116,6 +118,7 @@ private:
class AttributeFieldBlueprint : public SimpleLeafBlueprint
{
private:
+ const IAttributeVector& _attr;
// Must take a copy of the query term for visitMembers()
// as only a few ISearchContext implementations exposes the query term.
vespalib::string _query_term;
@@ -126,6 +129,7 @@ private:
AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute,
QueryTermSimple::UP term, const attribute::SearchContextParams &params)
: SimpleLeafBlueprint(field),
+ _attr(attribute),
_query_term(term->getTermString()),
_search_context(attribute.createSearchContext(std::move(term), params)),
_type(OTHER)
@@ -195,11 +199,39 @@ public:
bool getRange(vespalib::string &from, vespalib::string &to) const override;
};
+namespace {
+
+vespalib::string
+get_type(const IAttributeVector& attr)
+{
+ auto coll_type = CollectionType(attr.getCollectionType());
+ auto basic_type = BasicType(attr.getBasicType());
+ if (coll_type.type() == CollectionType::SINGLE) {
+ return basic_type.asString();
+ }
+ std::ostringstream oss;
+ oss << coll_type.asString() << "<" << basic_type.asString() << ">";
+ return oss.str();
+}
+
+void
+visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr)
+{
+ visitor.openStruct("attribute", "IAttributeVector");
+ visitor.visitString("name", attr.getName());
+ visitor.visitString("type", get_type(attr));
+ visitor.visitBool("fast_search", attr.getIsFastSearch());
+ visitor.visitBool("filter", attr.getIsFilter());
+ visitor.closeStruct();
+}
+
+}
+
void
AttributeFieldBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
{
LeafBlueprint::visitMembers(visitor);
- visit(visitor, "attribute", _search_context->attributeName());
+ visit_attribute(visitor, _attr);
visit(visitor, "query_term", _query_term);
}
@@ -275,6 +307,11 @@ public:
search->fetchPostings(execInfo);
}
}
+
+ void visitMembers(vespalib::ObjectVisitor& visitor) const override {
+ LeafBlueprint::visitMembers(visitor);
+ visit_attribute(visitor, _attribute);
+ }
};
LocationPreFilterBlueprint::~LocationPreFilterBlueprint() = default;
@@ -325,6 +362,10 @@ public:
SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override {
return create_default_filter(strict, constraint);
}
+ void visitMembers(vespalib::ObjectVisitor& visitor) const override {
+ LeafBlueprint::visitMembers(visitor);
+ visit_attribute(visitor, _attribute);
+ }
};
//-----------------------------------------------------------------------------
@@ -436,6 +477,10 @@ public:
return {};
}
}
+ void visitMembers(vespalib::ObjectVisitor& visitor) const override {
+ LeafBlueprint::visitMembers(visitor);
+ visit_attribute(visitor, _iattr);
+ }
};
template <typename SearchType>
@@ -623,7 +668,7 @@ public:
void visitMembers(vespalib::ObjectVisitor &visitor) const override {
LeafBlueprint::visitMembers(visitor);
- visit(visitor, "attribute", _attrName);
+ visit_attribute(visitor, _iattr);
}
std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override {
if (fields.has_field(_attrName)) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 9c9df6b82fe..91aa308f008 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -336,7 +336,7 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
visitor.visitInt("estHits", state.estimate().estHits);
visitor.visitInt("cost_tier", state.cost_tier());
visitor.visitInt("tree_size", state.tree_size());
- visitor.visitInt("allow_termwise_eval", state.allow_termwise_eval());
+ visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval());
visitor.closeStruct();
visitor.visitInt("sourceId", _sourceId);
visitor.visitInt("docid_limit", _docid_limit);
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
index 922c2fefa28..df732d3ab24 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
@@ -68,7 +68,7 @@ is_present(uint8_t presence_flag) {
class IndexBuilder {
public:
virtual ~IndexBuilder() = default;
- virtual void add(uint32_t lid, EntryRef ref) = 0;
+ virtual void add(uint32_t lid) = 0;
virtual void wait_complete() = 0;
};
@@ -78,7 +78,7 @@ public:
*/
class ThreadedIndexBuilder : public IndexBuilder {
public:
- ThreadedIndexBuilder(AttributeVector& attr, vespalib::GenerationHandler& generation_handler, TensorStore& store, NearestNeighborIndex& index, vespalib::Executor& shared_executor)
+ ThreadedIndexBuilder(TensorAttribute& attr, vespalib::GenerationHandler& generation_handler, TensorStore& store, NearestNeighborIndex& index, vespalib::Executor& shared_executor)
: _attr(attr),
_generation_handler(generation_handler),
_store(store),
@@ -87,7 +87,7 @@ public:
_queue(MAX_PENDING),
_pending(0)
{}
- void add(uint32_t lid, EntryRef ref) override;
+ void add(uint32_t lid) override;
void wait_complete() override {
drainUntilPending(0);
}
@@ -134,7 +134,7 @@ private:
}
}
static constexpr uint32_t MAX_PENDING = 1000;
- AttributeVector& _attr;
+ TensorAttribute& _attr;
const vespalib::GenerationHandler& _generation_handler;
TensorStore& _store;
NearestNeighborIndex& _index;
@@ -146,7 +146,7 @@ private:
};
void
-ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) {
+ThreadedIndexBuilder::add(uint32_t lid) {
Entry item;
while (pop(item)) {
// First process items that are ready to complete
@@ -157,9 +157,8 @@ ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) {
// Then we can issue a new one
++_pending;
- auto dense_store = _store.as_dense();
- auto task = vespalib::makeLambdaTask([this, ref, lid, dense_store]() {
- auto prepared = _index.prepare_add_document(lid, dense_store->get_vectors(ref),
+ auto task = vespalib::makeLambdaTask([this, lid]() {
+ auto prepared = _index.prepare_add_document(lid, _attr.get_vectors(lid),
_generation_handler.takeGuard());
std::unique_lock guard(_mutex);
_queue.push(std::make_pair(lid, std::move(prepared)));
@@ -177,7 +176,7 @@ public:
_index(index)
{
}
- void add(uint32_t lid, EntryRef) override {
+ void add(uint32_t lid) override {
_index.add_document(lid);
if ((lid % LOAD_COMMIT_INTERVAL) == 0) {
_attr.commit();
@@ -193,7 +192,7 @@ private:
}
-TensorAttributeLoader::TensorAttributeLoader(AttributeVector& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index)
+TensorAttributeLoader::TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index)
: _attr(attr),
_generation_handler(generation_handler),
_ref_vector(ref_vector),
@@ -261,7 +260,7 @@ TensorAttributeLoader::build_index(vespalib::Executor* executor, uint32_t docid_
for (uint32_t lid = 0; lid < docid_limit; ++lid) {
auto ref = _ref_vector[lid].load_relaxed();
if (ref.valid()) {
- builder->add(lid, ref);
+ builder->add(lid);
}
}
builder->wait_complete();
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
index 97add17d8f3..9417737cec5 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
@@ -5,7 +5,6 @@
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
#include <vespa/vespalib/util/rcuvector.h>
-namespace search { class AttributeVector; }
namespace vespalib { class Executor; }
namespace search::tensor {
@@ -13,6 +12,7 @@ namespace search::tensor {
class BlobSequenceReader;
class DenseTensorStore;
class NearestNeighborIndex;
+class TensorAttribute;
class TensorStore;
/**
@@ -23,7 +23,7 @@ class TensorAttributeLoader {
using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
using GenerationHandler = vespalib::GenerationHandler;
using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>;
- AttributeVector& _attr;
+ TensorAttribute& _attr;
GenerationHandler& _generation_handler;
RefVector& _ref_vector;
TensorStore& _store;
@@ -35,7 +35,7 @@ class TensorAttributeLoader {
bool load_index();
public:
- TensorAttributeLoader(AttributeVector& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index);
+ TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index);
~TensorAttributeLoader();
bool on_load(vespalib::Executor* executor);
};
diff --git a/security-utils/src/main/java/com/yahoo/security/Base58.java b/security-utils/src/main/java/com/yahoo/security/Base58.java
new file mode 100644
index 00000000000..3010bc878a8
--- /dev/null
+++ b/security-utils/src/main/java/com/yahoo/security/Base58.java
@@ -0,0 +1,22 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.security;
+
+/**
+ * Base58 encoding using the alphabet standardized by Bitcoin et al., which avoids
+ * the use of characters [0OIl] to avoid visual ambiguity. It does not feature any
+ * potential word/line-breaking characters, which means encoded strings can usually
+ * be selected in one go on web pages or in the terminal.
+ *
+ * @see <a href="https://en.wikipedia.org/wiki/Base58">Base58 on Wiki</a>
+ *
+ * @author vekterli
+ */
+public class Base58 {
+
+ private static final BaseNCodec INSTANCE = BaseNCodec.of("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz");
+
+ public static BaseNCodec codec() {
+ return INSTANCE;
+ }
+
+}
diff --git a/security-utils/src/main/java/com/yahoo/security/Base62.java b/security-utils/src/main/java/com/yahoo/security/Base62.java
new file mode 100644
index 00000000000..86c60a1bb1d
--- /dev/null
+++ b/security-utils/src/main/java/com/yahoo/security/Base62.java
@@ -0,0 +1,21 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.security;
+
+/**
+ * Base62 encoding which has the nice property that it does not feature any
+ * potential word/line-breaking characters, which means encoded strings can
+ * usually be selected in one go on web pages or in the terminal.
+ *
+ * @see <a href="https://en.wikipedia.org/wiki/Base62">Base62 on Wiki</a>
+ *
+ * @author vekterli
+ */
+public class Base62 {
+
+ private static final BaseNCodec INSTANCE = BaseNCodec.of("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ public static BaseNCodec codec() {
+ return INSTANCE;
+ }
+
+}
diff --git a/security-utils/src/main/java/com/yahoo/security/BaseNCodec.java b/security-utils/src/main/java/com/yahoo/security/BaseNCodec.java
new file mode 100644
index 00000000000..0921f238460
--- /dev/null
+++ b/security-utils/src/main/java/com/yahoo/security/BaseNCodec.java
@@ -0,0 +1,151 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.security;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+
+/**
+ * <p>
+ * Codec that enables easy conversion from an array of bytes to any numeric base in [2, 256)
+ * and back again, using a supplied custom alphabet.
+ * </p>
+ * <p>
+ * Implemented by treating the input byte sequence to encode verbatim as a big-endian
+ * <code>BigInteger</code> and iteratively doing a <code>divmod</code> operation until
+ * the quotient is zero, emitting the modulus mapped onto the alphabet for each iteration.
+ * </p>
+ * <p>
+ * Decoding reverses this process, ending up with the same <code>BigInteger</code> as in
+ * the initial encoding step.
+ * </p>
+ * <p>
+ * Note that <code>BigInteger</code>s represent the <em>canonical</em> form of any given
+ * integer, which means that leading zero bytes are implicitly ignored. We therefore
+ * special-case this by unary-coding the number of leading zeroes in the encoded form,
+ * where a leading zero byte is mapped to the <em>first</em> character of the alphabet.
+ * </p>
+ * <p>Example for Base58, which starts its alphabet with 1 (0 is not present):</p>
+ * <pre>
+ * "Hello World!" = "2NEpo7TZRRrLZSi2U"
+ * "\0\0Hello World!" = "112NEpo7TZRRrLZSi2U" (note leading 1s)
+ * </pre>
+ * <p>Example for Base62, which starts its alphabet with 0:</p>
+ * <pre>
+ * "Hello World!" = "T8dgcjRGkZ3aysdN"
+ * "\0\0Hello World!" = "00T8dgcjRGkZ3aysdN" (node leading 0s)
+ * </pre>
+ * <p>
+ * <strong>Important:</strong> runtime complexity is <em>O(n<sup>2</sup>)</em> for both
+ * encoding and decoding, so this should only be used to encode/decode relatively short
+ * byte sequences. This is <em>not</em> a replacement for Base64 etc. encoding that runs
+ * in linear time! In addition, a <code>BaseNCodec</code> with a Base64 alphabet encodes
+ * to a completely different output than a regular Base64 encoder when the input is not
+ * evenly divisible by three. This is due to regular Base64 explicitly handling padding,
+ * while this codec does not.
+ * </p>
+ *
+ * @author vekterli
+ */
+public class BaseNCodec {
+
+ public static final int MAX_BASE = 255; /** Inclusive */
+
+ private static class Alphabet {
+ final char[] alphabetChars;
+ final int[] reverseLut;
+
+ Alphabet(String alphabetIn) {
+ if (alphabetIn.length() < 2) { // We don't do unary...
+ throw new IllegalArgumentException("Alphabet requires at least two symbols");
+ }
+ if (alphabetIn.length() > MAX_BASE) {
+ throw new IllegalArgumentException("Alphabet size too large");
+ }
+ alphabetChars = alphabetIn.toCharArray();
+ int highestChar = Integer.MIN_VALUE;
+ for (char ch : alphabetChars) {
+ highestChar = Math.max(highestChar, ch);
+ }
+ reverseLut = new int[highestChar + 1];
+ Arrays.fill(reverseLut, -1); // -1 => invalid mapping
+ for (int i = 0; i < alphabetChars.length; ++i) {
+ if (reverseLut[alphabetChars[i]] != -1) {
+ throw new IllegalArgumentException("Alphabet character '%c' occurs more than once"
+ .formatted(alphabetChars[i]));
+ }
+ reverseLut[alphabetChars[i]] = i;
+ }
+ }
+ }
+
+ private static final BigInteger BN_ZERO = BigInteger.valueOf(0);
+
+ private final Alphabet alphabet;
+ private final BigInteger alphabetLenBN;
+
+ private BaseNCodec(String alphabet) {
+ this.alphabet = new Alphabet(alphabet);
+ this.alphabetLenBN = BigInteger.valueOf(this.alphabet.alphabetChars.length);
+ }
+
+ public static BaseNCodec of(String alphabet) {
+ return new BaseNCodec(alphabet);
+ }
+
+ public int base() { return this.alphabet.alphabetChars.length; }
+
+ public String encode(byte[] input) {
+ var sb = new StringBuilder(input.length * 2); // Not at all exact, but builder can resize anyway
+ var num = new BigInteger(1, input); // Treat as _positive_ big endian bigint (explicit signum=1)
+ // Standard base N digit conversion loop. Note: emits in reverse order since we
+ // append the least significant digit first. We reverse this later on.
+ while (!num.equals(BN_ZERO)) {
+ BigInteger[] quotRem = num.divideAndRemainder(alphabetLenBN);
+ num = quotRem[0];
+ sb.append(alphabet.alphabetChars[quotRem[1].intValue()]);
+ }
+ for (byte leadingByte : input) {
+ if (leadingByte != 0x00) {
+ break;
+ }
+ sb.append(alphabet.alphabetChars[0]);
+ }
+ return sb.reverse().toString();
+ }
+
+ public byte[] decode(String input) {
+ char[] inputChars = input.toCharArray();
+ int prefixNulls = 0;
+ for (char leadingChar : inputChars) {
+ if (leadingChar != alphabet.alphabetChars[0]) {
+ break;
+ }
+ ++prefixNulls;
+ }
+ // Restore the BigInteger representation by reversing the base conversion done during encoding.
+ var accu = BN_ZERO;
+ for (char c : inputChars) {
+ int idx = (c < alphabet.reverseLut.length) ? alphabet.reverseLut[c] : -1;
+ if (idx == -1) {
+ throw new IllegalArgumentException("Input character not part of codec alphabet");
+ }
+ accu = accu.multiply(alphabetLenBN).add(BigInteger.valueOf(idx));
+ }
+ byte[] bnBytes = accu.toByteArray();
+ // If the most significant bigint byte is zero, it means the most significant bit of the
+ // next byte is 1 (or the bnBytes length is 1, in which case prefixNulls == 1) and the bigint
+ // representation uses 1 extra byte to be positive in 2's complement. If so, prune it away
+ // to avoid prefixing with a spurious null-byte.
+ boolean msbZero = (bnBytes[0] == 0x0);
+ if (prefixNulls == 0 && !msbZero) {
+ return bnBytes;
+ } else {
+ int realLen = (msbZero ? bnBytes.length - 1 : bnBytes.length);
+ byte[] result = new byte[prefixNulls + realLen];
+ // #prefixNulls prefix bytes are implicitly zero
+ System.arraycopy(bnBytes, (msbZero ? 1 : 0), result, prefixNulls, realLen);
+ return result;
+ }
+ }
+
+}
diff --git a/security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java b/security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java
new file mode 100644
index 00000000000..da67ea2dff3
--- /dev/null
+++ b/security-utils/src/test/java/com/yahoo/security/BaseNCodecTest.java
@@ -0,0 +1,122 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.security;
+
+import org.junit.jupiter.api.Test;
+
+import java.math.BigInteger;
+
+import static com.yahoo.security.ArrayUtils.hex;
+import static com.yahoo.security.ArrayUtils.toUtf8Bytes;
+import static com.yahoo.security.ArrayUtils.unhex;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * @author vekterli
+ */
+public class BaseNCodecTest {
+
+ private static void verifyRoundtrip(BaseNCodec codec, byte[] bytes, String expectedEncoded) {
+ String enc = codec.encode(bytes);
+ assertEquals(expectedEncoded, enc);
+ byte[] dec = codec.decode(enc);
+ assertEquals(hex(bytes), hex(dec));
+ }
+
+ private static void verifyRoundtrip(BaseNCodec codec, String str, String expectedEncoded) {
+ verifyRoundtrip(codec, toUtf8Bytes(str), expectedEncoded);
+ }
+
+ @Test
+ void decoding_chars_not_in_alphabet_throws() {
+ var b58 = Base58.codec();
+ // [0OIl] are not in Base58 alphabet, but within the alphabet LUT range
+ assertThrows(IllegalArgumentException.class, () -> b58.decode("233QC0"));
+ // '{' is one beyond 'z', which is the highest char in the LUT range
+ assertThrows(IllegalArgumentException.class, () -> b58.decode("233QC{"));
+ }
+
+ @Test
+ void alphabet_char_duplication_during_codec_setup_throws() {
+ assertThrows(IllegalArgumentException.class, () -> BaseNCodec.of("abcda"));
+ }
+
+ @Test
+ void base58_codec_test_cases_pass() {
+ var b58 = Base58.codec();
+ assertEquals(58, b58.base());
+ // https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 test vectors:
+ verifyRoundtrip(b58, "Hello World!", "2NEpo7TZRRrLZSi2U");
+ verifyRoundtrip(b58, "The quick brown fox jumps over the lazy dog.",
+ "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z");
+ verifyRoundtrip(b58, unhex("0000287fb4cd"), "11233QC4");
+
+ // Values that have been cross-referenced with other encoder implementations:
+ verifyRoundtrip(b58, "", "");
+ verifyRoundtrip(b58, unhex("00"), "1");
+ verifyRoundtrip(b58, unhex("0000"), "11");
+ verifyRoundtrip(b58, unhex("ff"), "5Q");
+ verifyRoundtrip(b58, unhex("00ff"), "15Q");
+ verifyRoundtrip(b58, unhex("ff00"), "LQX");
+ verifyRoundtrip(b58, unhex("ffffff"), "2UzHL");
+ verifyRoundtrip(b58, unhex("287fb4cd"), "233QC4");
+ }
+
+ @Test
+ void base62_codec_test_cases_pass() {
+ var b62 = Base62.codec();
+ assertEquals(62, b62.base());
+ verifyRoundtrip(b62, "Hello World!", "T8dgcjRGkZ3aysdN");
+ verifyRoundtrip(b62, "\0\0Hello World!", "00T8dgcjRGkZ3aysdN");
+ verifyRoundtrip(b62, "", "");
+ verifyRoundtrip(b62, unhex("00"), "0");
+ verifyRoundtrip(b62, unhex("0000"), "00");
+ verifyRoundtrip(b62, unhex("00000000ffffffff"), "00004gfFC3");
+ verifyRoundtrip(b62, unhex("ffffffff00000000"), "LygHZwPV2MC");
+ }
+
+ // Test with some common bases that are easier to reason about:
+
+ @Test
+ void codec_generalizes_down_to_base_10() {
+ var b10 = BaseNCodec.of("0123456789");
+ verifyRoundtrip(b10, unhex("00"), "0");
+ verifyRoundtrip(b10, unhex("000f"), "015");
+ verifyRoundtrip(b10, unhex("ffff"), "65535");
+
+ // A large prime number: 2^252 + 27742317777372353535851937790883648493 (Curve25519 order)
+ var numStr = "7237005577332262213973186563042994240857116359379907606001950938285454250989";
+ var numBN = new BigInteger(numStr);
+ verifyRoundtrip(b10, numBN.toByteArray(), numStr);
+ }
+
+ // Possibly world's most inefficient hex conversion?
+ @Test
+ void codec_generalizes_down_to_base_16() {
+ var b2 = BaseNCodec.of("0123456789ABCDEF");
+ assertEquals(16, b2.base());
+ verifyRoundtrip(b2, unhex(""), "");
+ verifyRoundtrip(b2, unhex("00"), "0");
+ verifyRoundtrip(b2, unhex("80"), "80");
+ verifyRoundtrip(b2, unhex("01"), "1");
+ verifyRoundtrip(b2, unhex("F0"), "F0");
+ verifyRoundtrip(b2, unhex("0F"), "F");
+ verifyRoundtrip(b2, unhex("F00F"), "F00F");
+ verifyRoundtrip(b2, unhex("5FAF"), "5FAF");
+ }
+
+ // Very likely genuinely the world's most inefficient binary conversion.
+ @Test
+ void codec_generalizes_down_to_base_2() {
+ var b2 = BaseNCodec.of("01");
+ assertEquals(2, b2.base());
+ verifyRoundtrip(b2, unhex(""), "");
+ verifyRoundtrip(b2, unhex("00"), "0");
+ verifyRoundtrip(b2, unhex("000000"), "000"); // note: prefix zero byte sentinels!
+ verifyRoundtrip(b2, unhex("80"), "10000000");
+ verifyRoundtrip(b2, unhex("01"), "1");
+ verifyRoundtrip(b2, unhex("F0"), "11110000");
+ verifyRoundtrip(b2, unhex("0F"), "1111");
+ }
+
+}
diff --git a/vespalib/src/tests/coro/generator/.gitignore b/vespalib/src/tests/coro/generator/.gitignore
new file mode 100644
index 00000000000..748003a81fe
--- /dev/null
+++ b/vespalib/src/tests/coro/generator/.gitignore
@@ -0,0 +1 @@
+/vespalib_generator_bench_app
diff --git a/vespalib/src/tests/coro/generator/CMakeLists.txt b/vespalib/src/tests/coro/generator/CMakeLists.txt
index b4f59c69451..e2534274f7c 100644
--- a/vespalib/src/tests/coro/generator/CMakeLists.txt
+++ b/vespalib/src/tests/coro/generator/CMakeLists.txt
@@ -6,4 +6,11 @@ vespa_add_executable(vespalib_generator_test_app TEST
vespalib
GTest::GTest
)
+vespa_add_executable(vespalib_generator_bench_app TEST
+ SOURCES
+ generator_bench.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
vespa_add_test(NAME vespalib_generator_test_app COMMAND vespalib_generator_test_app)
diff --git a/vespalib/src/tests/coro/generator/generator_bench.cpp b/vespalib/src/tests/coro/generator/generator_bench.cpp
new file mode 100644
index 00000000000..664132b7ba4
--- /dev/null
+++ b/vespalib/src/tests/coro/generator/generator_bench.cpp
@@ -0,0 +1,58 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/coro/generator.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <ranges>
+#include <vector>
+
+using vespalib::coro::Generator;
+using vespalib::BenchmarkTimer;
+
+std::vector<size_t> make_data() __attribute__((noinline));
+std::vector<size_t> make_data(size_t size) {
+ std::vector<size_t> data;
+ for (size_t i = 0; i < size; ++i) {
+ data.push_back(i);
+ }
+ return data;
+}
+
+template <std::ranges::input_range T>
+size_t calc_sum(T&& values) {
+ size_t sum = 0;
+ for (auto&& value: values) {
+ sum += value;
+ }
+ return sum;
+}
+
+size_t calc_sum_direct(const std::vector<size_t> &values) {
+ return calc_sum(values);
+}
+
+size_t calc_sum_wrapped(const std::vector<size_t> &values) {
+ return calc_sum([](const std::vector<size_t> &inner_values)->Generator<size_t>
+ {
+ for (auto&& value: inner_values) {
+ co_yield value;
+ }
+ }(values));
+}
+
+TEST(GeneratorBench, direct_vs_wrapped_vector_for_loop) {
+ std::vector<size_t> data = make_data(100000);
+ double direct_ms = BenchmarkTimer::benchmark([&data](){
+ size_t sink = calc_sum_direct(data);
+ (void) sink;
+ }, 5.0) * 1000.0;
+ fprintf(stderr, "direct: %g ms\n", direct_ms);
+ double wrapped_ms = BenchmarkTimer::benchmark([&data](){
+ size_t sink = calc_sum_wrapped(data);
+ (void) sink;
+ }, 5.0) * 1000.0;
+ fprintf(stderr, "wrapped: %g ms\n", wrapped_ms);
+ fprintf(stderr, "ratio: %g\n", (wrapped_ms/direct_ms));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()