diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-03-18 22:09:00 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-03-18 22:09:00 +0000 |
commit | 94579ec02f1b9cf90a2ef40dcb9d66ce8dce85e2 (patch) | |
tree | 73f127aa97e76b351d693fcb5816690e465f1f9c | |
parent | 3a785aa5363cb143a04988b39975f33431a54bf4 (diff) |
Use vespalib::hash_set instead of std::set to reduce number of allocation and epeed it up. Also use faster 2^N AND based hash tables.
11 files changed, 90 insertions, 56 deletions
diff --git a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt index 8e1d18a87b7..a25d2abb477 100644 --- a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(eval_tensor_sparse OBJECT SOURCES + direct_sparse_tensor_builder.cpp sparse_tensor.cpp sparse_tensor_add.cpp sparse_tensor_address_builder.cpp diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp new file mode 100644 index 00000000000..4a28b54d201 --- /dev/null +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp @@ -0,0 +1,62 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "direct_sparse_tensor_builder.h" + +namespace vespalib::tensor { + +void +DirectSparseTensorBuilder::copyCells(const Cells &cells_in) +{ + for (const auto &cell : cells_in) { + SparseTensorAddressRef oldRef = cell.first; + SparseTensorAddressRef newRef(oldRef, _stash); + _cells[newRef] = cell.second; + } +} + +DirectSparseTensorBuilder::DirectSparseTensorBuilder() + : _stash(SparseTensor::STASH_CHUNK_SIZE), + _type(eval::ValueType::double_type()), + _cells() +{ +} + +DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in) + : _stash(SparseTensor::STASH_CHUNK_SIZE), + _type(type_in), + _cells() +{ +} + +DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in) + : _stash(SparseTensor::STASH_CHUNK_SIZE), + _type(type_in), + _cells() +{ + copyCells(cells_in); +} + +DirectSparseTensorBuilder::~DirectSparseTensorBuilder() = default; + +Tensor::UP +DirectSparseTensorBuilder::build() { + return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); +} + +void +DirectSparseTensorBuilder::insertCell(SparseTensorAddressRef address, double value) { + // This address should not already exist and a new cell should be inserted. + insertCell(address, value, [](double, double) -> double { HDR_ABORT("should not be reached"); }); +} + +void +DirectSparseTensorBuilder::insertCell(SparseTensorAddressBuilder &address, double value) { + // This address should not already exist and a new cell should be inserted. + insertCell(address.getAddressRef(), value, [](double, double) -> double { HDR_ABORT("should not be reached"); }); +} + +void DirectSparseTensorBuilder::reserve(uint32_t estimatedCells) { + _cells.resize(estimatedCells*2); +} + +}
\ No newline at end of file diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h index d54c8810a81..f9182a199be 100644 --- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h @@ -25,43 +25,13 @@ private: Cells _cells; public: - void - copyCells(const Cells &cells_in) - { - for (const auto &cell : cells_in) { - SparseTensorAddressRef oldRef = cell.first; - SparseTensorAddressRef newRef(oldRef, _stash); - _cells[newRef] = cell.second; - } - } - - DirectSparseTensorBuilder() - : _stash(SparseTensor::STASH_CHUNK_SIZE), - _type(eval::ValueType::double_type()), - _cells() - { - } - - DirectSparseTensorBuilder(const eval::ValueType &type_in) - : _stash(SparseTensor::STASH_CHUNK_SIZE), - _type(type_in), - _cells() - { - } - - DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in) - : _stash(SparseTensor::STASH_CHUNK_SIZE), - _type(type_in), - _cells() - { - copyCells(cells_in); - } + void copyCells(const Cells &cells_in); + DirectSparseTensorBuilder(); + DirectSparseTensorBuilder(const eval::ValueType &type_in); + DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in); + ~DirectSparseTensorBuilder(); - ~DirectSparseTensorBuilder() {}; - - Tensor::UP build() { - return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); - } + Tensor::UP build(); template <class Function> void insertCell(SparseTensorAddressRef address, double value, Function &&func) @@ -75,10 +45,7 @@ public: } } - void insertCell(SparseTensorAddressRef address, double value) { - // This address should not already exist and a new cell should be inserted. - insertCell(address, value, [](double, double) -> double { HDR_ABORT("should not be reached"); }); - } + void insertCell(SparseTensorAddressRef address, double value); template <class Function> void insertCell(SparseTensorAddressBuilder &address, double value, Function &&func) @@ -86,14 +53,11 @@ public: insertCell(address.getAddressRef(), value, func); } - void insertCell(SparseTensorAddressBuilder &address, double value) { - // This address should not already exist and a new cell should be inserted. - insertCell(address.getAddressRef(), value, [](double, double) -> double { HDR_ABORT("should not be reached"); }); - } + void insertCell(SparseTensorAddressBuilder &address, double value); eval::ValueType &fast_type() { return _type; } Cells &cells() { return _cells; } - void reserve(uint32_t estimatedCells) { _cells.resize(estimatedCells*2); } + void reserve(uint32_t estimatedCells); }; } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp index 1fc93e8234f..d183c33f5cd 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp @@ -245,5 +245,4 @@ SparseTensor::remove(const CellValues &cellAddresses) const } -VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(vespalib::tensor::SparseTensorAddressRef, double, vespalib::hash<vespalib::tensor::SparseTensorAddressRef>, - std::equal_to<vespalib::tensor::SparseTensorAddressRef>, vespalib::hashtable_base::and_modulator); +VESPALIB_HASH_MAP_INSTANTIATE(vespalib::tensor::SparseTensorAddressRef, double); diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h index 880cd32c605..e5ea639b460 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h @@ -22,7 +22,7 @@ class SparseTensor : public Tensor { public: using Cells = hash_map<SparseTensorAddressRef, double, hash<SparseTensorAddressRef>, - std::equal_to<SparseTensorAddressRef>, hashtable_base::and_modulator>; + std::equal_to<>, hashtable_base::and_modulator>; static constexpr size_t STASH_CHUNK_SIZE = 16384u; diff --git a/searchlib/src/vespa/searchlib/fef/properties.h b/searchlib/src/vespa/searchlib/fef/properties.h index 1cbc0ba9064..377926509bd 100644 --- a/searchlib/src/vespa/searchlib/fef/properties.h +++ b/searchlib/src/vespa/searchlib/fef/properties.h @@ -129,9 +129,10 @@ public: class Properties { private: - typedef vespalib::string Key; - typedef Property::Values Value; - typedef vespalib::hash_map<Key, Value> Map; + using Key = vespalib::string; + using Value = Property::Values; + using Map = vespalib::hash_map<Key, Value, vespalib::hash<Key>, + std::equal_to<>, vespalib::hashtable_base::and_modulator>; uint32_t _numValues; Map _data; diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp index 48a9c7be147..0bc85a63ceb 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.cpp +++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp @@ -3,6 +3,7 @@ #include "rank_program.h" #include "featureoverrider.h" #include <vespa/vespalib/locale/c.h> +#include <vespa/vespalib/stllike/hash_set.hpp> #include <algorithm> #include <cassert> @@ -14,7 +15,6 @@ using vespalib::Stash; namespace search::fef { using MappedValues = std::map<const NumberOrObject *, LazyValue>; -using ValueSet = std::set<const NumberOrObject *>; namespace { @@ -179,6 +179,7 @@ RankProgram::setup(const MatchData &md, const auto &specs = _resolver->getExecutorSpecs(); _executors.reserve(specs.size()); + _is_const.resize(specs.size()*2); // Reserve space in hashmap for executors to be const for (uint32_t i = 0; i < specs.size(); ++i) { vespalib::ArrayRef<NumberOrObject> outputs = _hot_stash.create_array<NumberOrObject>(specs[i].output_types.size()); StashSelector stash(_hot_stash, _cold_stash); diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h index aa6f77abce4..15b7912bfb1 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.h +++ b/searchlib/src/vespa/searchlib/fef/rank_program.h @@ -10,7 +10,7 @@ #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/util/array.h> #include <vespa/vespalib/util/stash.h> -#include <set> +#include <vespa/vespalib/stllike/hash_set.h> namespace search::fef { @@ -31,7 +31,8 @@ private: RankProgram &operator=(const RankProgram &) = delete; using MappedValues = std::map<const NumberOrObject *, LazyValue>; - using ValueSet = std::set<const NumberOrObject *>; + using ValueSet = vespalib::hash_set<const NumberOrObject *, vespalib::hash<const NumberOrObject *>, + std::equal_to<>, vespalib::hashtable_base::and_modulator>; BlueprintResolver::SP _resolver; vespalib::Stash _hot_stash; diff --git a/vespalib/src/tests/stllike/hash_test.cpp b/vespalib/src/tests/stllike/hash_test.cpp index 017a16ee7b6..b39b6859623 100644 --- a/vespalib/src/tests/stllike/hash_test.cpp +++ b/vespalib/src/tests/stllike/hash_test.cpp @@ -356,6 +356,9 @@ TEST("test hash set find") EXPECT_TRUE(*set.find(S(1)) == S(1)); auto cit = set.find<uint32_t>(7); EXPECT_TRUE(*cit == S(7)); + + EXPECT_EQUAL(1u, set.count(S(7))); + EXPECT_EQUAL(0u, set.count(S(10007))); } TEST("test hash set range constructor") diff --git a/vespalib/src/vespa/vespalib/stllike/hash_map.hpp b/vespalib/src/vespa/vespalib/stllike/hash_map.hpp index 08edcf3c837..0b474afa750 100644 --- a/vespalib/src/vespa/vespalib/stllike/hash_map.hpp +++ b/vespalib/src/vespa/vespalib/stllike/hash_map.hpp @@ -74,10 +74,11 @@ hash_map<K, V, H, EQ, M>::getMemoryUsed() const vespalib::hashtable<K, std::pair<K,V>, H, E, vespalib::Select1st<std::pair<K,V>>, M>::insert(std::pair<K,V> &&); \ template vespalib::hashtable<K, std::pair<K,V>, H, E, vespalib::Select1st<std::pair<K,V>>, M>::insert_result \ vespalib::hashtable<K, std::pair<K,V>, H, E, vespalib::Select1st<std::pair<K,V>>, M>::insertInternal(std::pair<K,V> &&); \ - template class vespalib::Array<vespalib::hash_node<std::pair<K,V>>>; #define VESPALIB_HASH_MAP_INSTANTIATE_H_E(K, V, H, E) \ - VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(K, V, H, E, vespalib::hashtable_base::prime_modulator) + template class vespalib::Array<vespalib::hash_node<std::pair<K,V>>>; \ + VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(K, V, H, E, vespalib::hashtable_base::prime_modulator) \ + VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(K, V, H, E, vespalib::hashtable_base::and_modulator) #define VESPALIB_HASH_MAP_INSTANTIATE_H(K, V, H) VESPALIB_HASH_MAP_INSTANTIATE_H_E(K, V, H, std::equal_to<>) diff --git a/vespalib/src/vespa/vespalib/stllike/hash_set.h b/vespalib/src/vespa/vespalib/stllike/hash_set.h index 8d315ebfd07..08288086bf3 100644 --- a/vespalib/src/vespa/vespalib/stllike/hash_set.h +++ b/vespalib/src/vespa/vespalib/stllike/hash_set.h @@ -42,6 +42,7 @@ public: template<typename InputIt> void insert(InputIt first, InputIt last); void erase(const K & key); + size_t count(const K & key) const { return _ht.find(key) != end() ? 1 : 0; } iterator find(const K & key) { return _ht.find(key); } const_iterator find(const K & key) const { return _ht.find(key); } |