summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-05-31 15:28:15 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-05-31 15:28:15 +0200
commitf7b0a4a774f0e0e8ad58862e729c91609803fc09 (patch)
tree61596612a24dca136bf97f47d2f239840bf4a10e /searchlib
parent7193310dae6598730c6d27ad2cd4230df35a2445 (diff)
Add support for specifying max number of samples in field length calculator
constructor.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/index/field_length_calculator.h13
2 files changed, 11 insertions, 10 deletions
diff --git a/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp b/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp
index f61a7f103fa..c99d241cbc0 100644
--- a/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp
+++ b/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp
@@ -36,15 +36,15 @@ TEST(FieldLengthCalculatorTest, startup_is_average)
EXPECT_DOUBLE_EQ((3 + 4 + 7)/3.0, calc.get_average_field_length());
EXPECT_EQ(3, calc.get_num_samples());
calc.add_field_length(9);
- EXPECT_DOUBLE_EQ(5.75, calc.get_average_field_length());
+ EXPECT_DOUBLE_EQ((3 + 4 + 7 + 9)/4.0, calc.get_average_field_length());
EXPECT_EQ(4, calc.get_num_samples());
}
TEST(FieldLengthCalculatorTest, average_until_max_num_samples)
{
- FieldLengthCalculator calc;
+ const uint32_t max_num_samples = 5;
+ FieldLengthCalculator calc(0.0, 0, max_num_samples);
static constexpr double epsilon = 0.000000001; // Allowed difference
- const uint32_t max_num_samples = calc.get_max_num_samples();
for (uint32_t i = 0; i + 1 < max_num_samples; ++i) {
calc.add_field_length(i + 1);
}
@@ -59,7 +59,7 @@ TEST(FieldLengthCalculatorTest, average_until_max_num_samples)
// No longer arithmetic average
EXPECT_LT(arith_avg(max_num_samples + 1), calc.get_average_field_length());
// Switched to exponential decay
- EXPECT_NEAR((arith_avg(max_num_samples) * (max_num_samples - 1) + max_num_samples + 1) / max_num_samples, calc.get_average_field_length(), 0.000000001);
+ EXPECT_NEAR((arith_avg(max_num_samples) * (max_num_samples - 1) + max_num_samples + 1) / max_num_samples, calc.get_average_field_length(), epsilon);
EXPECT_EQ(max_num_samples, calc.get_num_samples());
}
diff --git a/searchlib/src/vespa/searchlib/index/field_length_calculator.h b/searchlib/src/vespa/searchlib/index/field_length_calculator.h
index e3323654e20..50d47ced063 100644
--- a/searchlib/src/vespa/searchlib/index/field_length_calculator.h
+++ b/searchlib/src/vespa/searchlib/index/field_length_calculator.h
@@ -7,12 +7,12 @@ namespace search::index {
/**
* Class used to calculate average field length, with a bias towards
- * the latest field lengths when MAX_NUM_SAMPLES samples have been reached.
+ * the latest field lengths when max_num_samples samples have been reached.
*/
class FieldLengthCalculator {
std::atomic<double> _average_field_length;
uint32_t _num_samples; // Capped by _max_num_samples
- static constexpr uint32_t MAX_NUM_SAMPLES = 100000;
+ uint32_t _max_num_samples;
public:
FieldLengthCalculator()
@@ -20,18 +20,19 @@ public:
{
}
- FieldLengthCalculator(double average_field_length, uint32_t num_samples)
+ FieldLengthCalculator(double average_field_length, uint32_t num_samples, uint32_t max_num_samples = 100000)
: _average_field_length(average_field_length),
- _num_samples(std::min(num_samples, MAX_NUM_SAMPLES))
+ _num_samples(std::min(num_samples, max_num_samples)),
+ _max_num_samples(max_num_samples)
{
}
double get_average_field_length() const { return _average_field_length.load(std::memory_order_relaxed); }
uint32_t get_num_samples() const { return _num_samples; }
- static constexpr uint32_t get_max_num_samples() { return MAX_NUM_SAMPLES; }
+ uint32_t get_max_num_samples() { return _max_num_samples; }
void add_field_length(uint32_t field_length) {
- if (_num_samples < MAX_NUM_SAMPLES) {
+ if (_num_samples < _max_num_samples) {
++_num_samples;
}
_average_field_length.store((_average_field_length.load(std::memory_order_relaxed) * (_num_samples - 1) + field_length) / _num_samples, std::memory_order_relaxed);