summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-03-18 22:09:00 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-03-18 22:09:00 +0000
commit94579ec02f1b9cf90a2ef40dcb9d66ce8dce85e2 (patch)
tree73f127aa97e76b351d693fcb5816690e465f1f9c
parent3a785aa5363cb143a04988b39975f33431a54bf4 (diff)
Use vespalib::hash_set instead of std::set to reduce number of allocation and epeed it up. Also use faster 2^N AND based hash tables.
-rw-r--r--eval/src/vespa/eval/tensor/sparse/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp62
-rw-r--r--eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h54
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp3
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/properties.h7
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.h5
-rw-r--r--vespalib/src/tests/stllike/hash_test.cpp3
-rw-r--r--vespalib/src/vespa/vespalib/stllike/hash_map.hpp5
-rw-r--r--vespalib/src/vespa/vespalib/stllike/hash_set.h1
11 files changed, 90 insertions, 56 deletions
diff --git a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt
index 8e1d18a87b7..a25d2abb477 100644
--- a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt
@@ -1,6 +1,7 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(eval_tensor_sparse OBJECT
SOURCES
+ direct_sparse_tensor_builder.cpp
sparse_tensor.cpp
sparse_tensor_add.cpp
sparse_tensor_address_builder.cpp
diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
new file mode 100644
index 00000000000..4a28b54d201
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
@@ -0,0 +1,62 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "direct_sparse_tensor_builder.h"
+
+namespace vespalib::tensor {
+
+void
+DirectSparseTensorBuilder::copyCells(const Cells &cells_in)
+{
+ for (const auto &cell : cells_in) {
+ SparseTensorAddressRef oldRef = cell.first;
+ SparseTensorAddressRef newRef(oldRef, _stash);
+ _cells[newRef] = cell.second;
+ }
+}
+
+DirectSparseTensorBuilder::DirectSparseTensorBuilder()
+ : _stash(SparseTensor::STASH_CHUNK_SIZE),
+ _type(eval::ValueType::double_type()),
+ _cells()
+{
+}
+
+DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in)
+ : _stash(SparseTensor::STASH_CHUNK_SIZE),
+ _type(type_in),
+ _cells()
+{
+}
+
+DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in)
+ : _stash(SparseTensor::STASH_CHUNK_SIZE),
+ _type(type_in),
+ _cells()
+{
+ copyCells(cells_in);
+}
+
+DirectSparseTensorBuilder::~DirectSparseTensorBuilder() = default;
+
+Tensor::UP
+DirectSparseTensorBuilder::build() {
+ return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash));
+}
+
+void
+DirectSparseTensorBuilder::insertCell(SparseTensorAddressRef address, double value) {
+ // This address should not already exist and a new cell should be inserted.
+ insertCell(address, value, [](double, double) -> double { HDR_ABORT("should not be reached"); });
+}
+
+void
+DirectSparseTensorBuilder::insertCell(SparseTensorAddressBuilder &address, double value) {
+ // This address should not already exist and a new cell should be inserted.
+ insertCell(address.getAddressRef(), value, [](double, double) -> double { HDR_ABORT("should not be reached"); });
+}
+
+void DirectSparseTensorBuilder::reserve(uint32_t estimatedCells) {
+ _cells.resize(estimatedCells*2);
+}
+
+} \ No newline at end of file
diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
index d54c8810a81..f9182a199be 100644
--- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
+++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
@@ -25,43 +25,13 @@ private:
Cells _cells;
public:
- void
- copyCells(const Cells &cells_in)
- {
- for (const auto &cell : cells_in) {
- SparseTensorAddressRef oldRef = cell.first;
- SparseTensorAddressRef newRef(oldRef, _stash);
- _cells[newRef] = cell.second;
- }
- }
-
- DirectSparseTensorBuilder()
- : _stash(SparseTensor::STASH_CHUNK_SIZE),
- _type(eval::ValueType::double_type()),
- _cells()
- {
- }
-
- DirectSparseTensorBuilder(const eval::ValueType &type_in)
- : _stash(SparseTensor::STASH_CHUNK_SIZE),
- _type(type_in),
- _cells()
- {
- }
-
- DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in)
- : _stash(SparseTensor::STASH_CHUNK_SIZE),
- _type(type_in),
- _cells()
- {
- copyCells(cells_in);
- }
+ void copyCells(const Cells &cells_in);
+ DirectSparseTensorBuilder();
+ DirectSparseTensorBuilder(const eval::ValueType &type_in);
+ DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in);
+ ~DirectSparseTensorBuilder();
- ~DirectSparseTensorBuilder() {};
-
- Tensor::UP build() {
- return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash));
- }
+ Tensor::UP build();
template <class Function>
void insertCell(SparseTensorAddressRef address, double value, Function &&func)
@@ -75,10 +45,7 @@ public:
}
}
- void insertCell(SparseTensorAddressRef address, double value) {
- // This address should not already exist and a new cell should be inserted.
- insertCell(address, value, [](double, double) -> double { HDR_ABORT("should not be reached"); });
- }
+ void insertCell(SparseTensorAddressRef address, double value);
template <class Function>
void insertCell(SparseTensorAddressBuilder &address, double value, Function &&func)
@@ -86,14 +53,11 @@ public:
insertCell(address.getAddressRef(), value, func);
}
- void insertCell(SparseTensorAddressBuilder &address, double value) {
- // This address should not already exist and a new cell should be inserted.
- insertCell(address.getAddressRef(), value, [](double, double) -> double { HDR_ABORT("should not be reached"); });
- }
+ void insertCell(SparseTensorAddressBuilder &address, double value);
eval::ValueType &fast_type() { return _type; }
Cells &cells() { return _cells; }
- void reserve(uint32_t estimatedCells) { _cells.resize(estimatedCells*2); }
+ void reserve(uint32_t estimatedCells);
};
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
index 1fc93e8234f..d183c33f5cd 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
@@ -245,5 +245,4 @@ SparseTensor::remove(const CellValues &cellAddresses) const
}
-VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(vespalib::tensor::SparseTensorAddressRef, double, vespalib::hash<vespalib::tensor::SparseTensorAddressRef>,
- std::equal_to<vespalib::tensor::SparseTensorAddressRef>, vespalib::hashtable_base::and_modulator);
+VESPALIB_HASH_MAP_INSTANTIATE(vespalib::tensor::SparseTensorAddressRef, double);
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
index 880cd32c605..e5ea639b460 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
@@ -22,7 +22,7 @@ class SparseTensor : public Tensor
{
public:
using Cells = hash_map<SparseTensorAddressRef, double, hash<SparseTensorAddressRef>,
- std::equal_to<SparseTensorAddressRef>, hashtable_base::and_modulator>;
+ std::equal_to<>, hashtable_base::and_modulator>;
static constexpr size_t STASH_CHUNK_SIZE = 16384u;
diff --git a/searchlib/src/vespa/searchlib/fef/properties.h b/searchlib/src/vespa/searchlib/fef/properties.h
index 1cbc0ba9064..377926509bd 100644
--- a/searchlib/src/vespa/searchlib/fef/properties.h
+++ b/searchlib/src/vespa/searchlib/fef/properties.h
@@ -129,9 +129,10 @@ public:
class Properties
{
private:
- typedef vespalib::string Key;
- typedef Property::Values Value;
- typedef vespalib::hash_map<Key, Value> Map;
+ using Key = vespalib::string;
+ using Value = Property::Values;
+ using Map = vespalib::hash_map<Key, Value, vespalib::hash<Key>,
+ std::equal_to<>, vespalib::hashtable_base::and_modulator>;
uint32_t _numValues;
Map _data;
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp
index 48a9c7be147..0bc85a63ceb 100644
--- a/searchlib/src/vespa/searchlib/fef/rank_program.cpp
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp
@@ -3,6 +3,7 @@
#include "rank_program.h"
#include "featureoverrider.h"
#include <vespa/vespalib/locale/c.h>
+#include <vespa/vespalib/stllike/hash_set.hpp>
#include <algorithm>
#include <cassert>
@@ -14,7 +15,6 @@ using vespalib::Stash;
namespace search::fef {
using MappedValues = std::map<const NumberOrObject *, LazyValue>;
-using ValueSet = std::set<const NumberOrObject *>;
namespace {
@@ -179,6 +179,7 @@ RankProgram::setup(const MatchData &md,
const auto &specs = _resolver->getExecutorSpecs();
_executors.reserve(specs.size());
+ _is_const.resize(specs.size()*2); // Reserve space in hashmap for executors to be const
for (uint32_t i = 0; i < specs.size(); ++i) {
vespalib::ArrayRef<NumberOrObject> outputs = _hot_stash.create_array<NumberOrObject>(specs[i].output_types.size());
StashSelector stash(_hot_stash, _cold_stash);
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h
index aa6f77abce4..15b7912bfb1 100644
--- a/searchlib/src/vespa/searchlib/fef/rank_program.h
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.h
@@ -10,7 +10,7 @@
#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/util/array.h>
#include <vespa/vespalib/util/stash.h>
-#include <set>
+#include <vespa/vespalib/stllike/hash_set.h>
namespace search::fef {
@@ -31,7 +31,8 @@ private:
RankProgram &operator=(const RankProgram &) = delete;
using MappedValues = std::map<const NumberOrObject *, LazyValue>;
- using ValueSet = std::set<const NumberOrObject *>;
+ using ValueSet = vespalib::hash_set<const NumberOrObject *, vespalib::hash<const NumberOrObject *>,
+ std::equal_to<>, vespalib::hashtable_base::and_modulator>;
BlueprintResolver::SP _resolver;
vespalib::Stash _hot_stash;
diff --git a/vespalib/src/tests/stllike/hash_test.cpp b/vespalib/src/tests/stllike/hash_test.cpp
index 017a16ee7b6..b39b6859623 100644
--- a/vespalib/src/tests/stllike/hash_test.cpp
+++ b/vespalib/src/tests/stllike/hash_test.cpp
@@ -356,6 +356,9 @@ TEST("test hash set find")
EXPECT_TRUE(*set.find(S(1)) == S(1));
auto cit = set.find<uint32_t>(7);
EXPECT_TRUE(*cit == S(7));
+
+ EXPECT_EQUAL(1u, set.count(S(7)));
+ EXPECT_EQUAL(0u, set.count(S(10007)));
}
TEST("test hash set range constructor")
diff --git a/vespalib/src/vespa/vespalib/stllike/hash_map.hpp b/vespalib/src/vespa/vespalib/stllike/hash_map.hpp
index 08edcf3c837..0b474afa750 100644
--- a/vespalib/src/vespa/vespalib/stllike/hash_map.hpp
+++ b/vespalib/src/vespa/vespalib/stllike/hash_map.hpp
@@ -74,10 +74,11 @@ hash_map<K, V, H, EQ, M>::getMemoryUsed() const
vespalib::hashtable<K, std::pair<K,V>, H, E, vespalib::Select1st<std::pair<K,V>>, M>::insert(std::pair<K,V> &&); \
template vespalib::hashtable<K, std::pair<K,V>, H, E, vespalib::Select1st<std::pair<K,V>>, M>::insert_result \
vespalib::hashtable<K, std::pair<K,V>, H, E, vespalib::Select1st<std::pair<K,V>>, M>::insertInternal(std::pair<K,V> &&); \
- template class vespalib::Array<vespalib::hash_node<std::pair<K,V>>>;
#define VESPALIB_HASH_MAP_INSTANTIATE_H_E(K, V, H, E) \
- VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(K, V, H, E, vespalib::hashtable_base::prime_modulator)
+ template class vespalib::Array<vespalib::hash_node<std::pair<K,V>>>; \
+ VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(K, V, H, E, vespalib::hashtable_base::prime_modulator) \
+ VESPALIB_HASH_MAP_INSTANTIATE_H_E_M(K, V, H, E, vespalib::hashtable_base::and_modulator)
#define VESPALIB_HASH_MAP_INSTANTIATE_H(K, V, H) VESPALIB_HASH_MAP_INSTANTIATE_H_E(K, V, H, std::equal_to<>)
diff --git a/vespalib/src/vespa/vespalib/stllike/hash_set.h b/vespalib/src/vespa/vespalib/stllike/hash_set.h
index 8d315ebfd07..08288086bf3 100644
--- a/vespalib/src/vespa/vespalib/stllike/hash_set.h
+++ b/vespalib/src/vespa/vespalib/stllike/hash_set.h
@@ -42,6 +42,7 @@ public:
template<typename InputIt>
void insert(InputIt first, InputIt last);
void erase(const K & key);
+ size_t count(const K & key) const { return _ht.find(key) != end() ? 1 : 0; }
iterator find(const K & key) { return _ht.find(key); }
const_iterator find(const K & key) const { return _ht.find(key); }