summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHaavard <havardpe@yahoo-inc.com>2017-02-14 15:43:57 +0000
committerHaavard <havardpe@yahoo-inc.com>2017-02-15 12:27:02 +0000
commitbc4d31c73a9bb450f251881afc01a00fd1f843ba (patch)
tree8c245873f615eaf24258726954ab96ea69f61768 /searchlib
parent35e47c065c8ce01f8813b1f344994535c5e7fc32 (diff)
split executors into hot and cold stash
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.cpp202
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.h18
2 files changed, 131 insertions, 89 deletions
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp
index 62b0cab9498..cb19e3ca5b0 100644
--- a/searchlib/src/vespa/searchlib/fef/rank_program.cpp
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp
@@ -8,9 +8,13 @@ LOG_SETUP(".fef.rank_program");
#include <algorithm>
#include <set>
+using vespalib::Stash;
+
namespace search {
namespace fef {
+using MappedValues = std::map<const NumberOrObject *, const NumberOrObject *>;
+
namespace {
struct Override
@@ -57,89 +61,123 @@ std::vector<Override> prepare_overrides(const BlueprintResolver::FeatureMap &fea
}
struct UnboxingExecutor : FeatureExecutor {
- bool isPure() override { return true; }
- void execute(uint32_t) override {
- outputs().set_number(0, inputs().get_object(0).get().as_double());
+ const NumberOrObject &input;
+ NumberOrObject &output;
+ UnboxingExecutor(const NumberOrObject &input_in, NumberOrObject &output_in)
+ : input(input_in), output(output_in) {}
+ void execute(uint32_t) override { output.as_number = input.as_object.get().as_double(); }
+};
+
+class Features {
+private:
+ vespalib::ArrayRef<NumberOrObject> _features;
+ size_t _used;
+public:
+ explicit Features(vespalib::ArrayRef<NumberOrObject> features)
+ : _features(features), _used(0) {}
+ vespalib::ArrayRef<NumberOrObject> alloc(size_t cnt) {
+ assert((_used + cnt) <= _features.size());
+ NumberOrObject *begin = &_features[_used];
+ _used += cnt;
+ return vespalib::ArrayRef<NumberOrObject>(begin, cnt);
}
+ bool is_full() const { return (_used == _features.size()); }
};
-} // namespace search::fef::<unnamed>
+class StashSelector {
+private:
+ Stash &_primary;
+ Stash &_secondary;
+ bool _use_primary;
+ Stash::Mark _primary_mark;
+public:
+ StashSelector(Stash &primary, Stash &secondary)
+ : _primary(primary), _secondary(secondary),
+ _use_primary(true), _primary_mark(primary.mark()) {}
+ Stash &get() const { return _use_primary ? _primary : _secondary; }
+ void use_secondary() {
+ assert(_use_primary);
+ _use_primary = false;
+ _primary.revert(_primary_mark);
+ }
+};
-size_t
-RankProgram::count_features() const
-{
- size_t cnt = 0;
- const auto &specs = _resolver->getExecutorSpecs();
- for (const auto &entry: specs) {
- cnt += entry.output_types.size(); // normal outputs
+class ProgramBuilder {
+private:
+ std::vector<FeatureExecutor *> _program;
+ std::set<const NumberOrObject *> _is_calculated;
+public:
+ ProgramBuilder() : _program(), _is_calculated() {}
+ bool is_calculated(const NumberOrObject *raw_value) const {
+ return (_is_calculated.count(raw_value) == 1);
}
- for (const auto &seed_entry: _resolver->getSeedMap()) {
- auto seed = seed_entry.second;
- if (specs[seed.executor].output_types[seed.output]) {
- ++cnt; // unboxed seeds
+ void add(FeatureExecutor *executor, bool is_const) {
+ if (is_const) {
+ executor->execute(1);
+ const auto &outputs = executor->outputs();
+ for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) {
+ _is_calculated.insert(outputs.get_raw(out_idx));
+ }
+ } else {
+ _program.push_back(executor);
}
}
- return cnt;
-}
+ void unbox(const NumberOrObject &input, NumberOrObject &output, Stash &stash) {
+ if (is_calculated(&input)) {
+ output.as_number = input.as_object.get().as_double();
+ } else {
+ _program.push_back(&stash.create<UnboxingExecutor>(input, output));
+ }
+ }
+ const std::vector<FeatureExecutor *> &get() const { return _program; }
+};
-void
-RankProgram::add_unboxing_executors(vespalib::ArrayRef<NumberOrObject> features, size_t feature_offset, size_t total_features)
+bool executor_is_const(FeatureExecutor *executor,
+ const ProgramBuilder &program,
+ const std::vector<FeatureExecutor *> &executors,
+ const std::vector<BlueprintResolver::FeatureRef> &inputs)
{
- const auto &specs = _resolver->getExecutorSpecs();
- for (const auto &seed_entry: _resolver->getSeedMap()) {
- auto seed = seed_entry.second;
- if (specs[seed.executor].output_types[seed.output]) {
- vespalib::ArrayRef<const NumberOrObject *> inputs = _stash.create_array<const NumberOrObject *>(1);
- inputs[0] = _executors[seed.executor]->outputs().get_raw(seed.output);
- vespalib::ArrayRef<NumberOrObject> outputs(&features[feature_offset++], 1);
- _unboxed_seeds[inputs[0]] = &outputs[0];
- _executors.emplace_back(&_stash.create<UnboxingExecutor>());
- _executors.back()->bind_inputs(inputs);
- _executors.back()->bind_outputs(outputs);
- _executors.back()->bind_match_data(*_match_data);
+ if (!executor->isPure()) {
+ return false;
+ }
+ for (const auto &ref: inputs) {
+ if (!program.is_calculated(executors[ref.executor]->outputs().get_raw(ref.output))) {
+ return false;
}
}
- assert(feature_offset == total_features);
+ return true;
}
-void
-RankProgram::compile()
-{
- std::vector<FeatureExecutor *> program;
- std::set<const NumberOrObject *> is_calculated;
- for (size_t i = 0; i < _executors.size(); ++i) {
- FeatureExecutor &executor = *_executors[i];
- bool is_const = executor.isPure();
- const auto &inputs = executor.inputs();
- for (size_t in_idx = 0; is_const && (in_idx < inputs.size()); ++in_idx) {
- is_const &= (is_calculated.count(inputs.get_raw(in_idx)) > 0);
- }
- if (is_const) {
- executor.execute(1);
- const auto &outputs = executor.outputs();
- for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) {
- is_calculated.insert(outputs.get_raw(out_idx));
- }
- } else {
- program.push_back(&executor);
+size_t count_features(const BlueprintResolver &resolver) {
+ size_t cnt = 0;
+ const auto &specs = resolver.getExecutorSpecs();
+ for (const auto &entry: specs) {
+ cnt += entry.output_types.size(); // normal outputs
+ }
+ for (const auto &seed_entry: resolver.getSeedMap()) {
+ auto seed = seed_entry.second;
+ if (specs[seed.executor].output_types[seed.output]) {
+ ++cnt; // unboxed seeds
}
}
- _program = _stash.copy_array<FeatureExecutor *>(program);
+ return cnt;
}
-FeatureResolver
-RankProgram::resolve(const BlueprintResolver::FeatureMap &features, bool unbox_seeds) const
+FeatureResolver resolve(const BlueprintResolver::FeatureMap &features,
+ const BlueprintResolver::ExecutorSpecList &specs,
+ const std::vector<FeatureExecutor *> &executors,
+ const MappedValues &unboxed_seeds,
+ bool unbox_seeds)
{
FeatureResolver result(features.size());
- const auto &specs = _resolver->getExecutorSpecs();
for (const auto &entry: features) {
const auto &name = entry.first;
auto ref = entry.second;
bool is_object = specs[ref.executor].output_types[ref.output];
- const NumberOrObject *raw_value = _executors[ref.executor]->outputs().get_raw(ref.output);
+ const NumberOrObject *raw_value = executors[ref.executor]->outputs().get_raw(ref.output);
if (is_object && unbox_seeds) {
- auto pos = _unboxed_seeds.find(raw_value);
- if (pos != _unboxed_seeds.end()) {
+ auto pos = unboxed_seeds.find(raw_value);
+ if (pos != unboxed_seeds.end()) {
raw_value = pos->second;
is_object = false;
}
@@ -149,10 +187,13 @@ RankProgram::resolve(const BlueprintResolver::FeatureMap &features, bool unbox_s
return result;
}
+} // namespace search::fef::<unnamed>
+
RankProgram::RankProgram(BlueprintResolver::SP resolver)
: _resolver(resolver),
_match_data(),
- _stash(32768),
+ _hot_stash(32768),
+ _cold_stash(),
_program(),
_executors(),
_unboxed_seeds()
@@ -170,44 +211,59 @@ RankProgram::setup(const MatchDataLayout &mdl_in,
auto override = overrides.begin();
auto override_end = overrides.end();
- size_t feature_offset = 0;
- size_t total_features = count_features();
- vespalib::ArrayRef<NumberOrObject> features = _stash.create_array<NumberOrObject>(total_features);
+ ProgramBuilder program;
+ Features features(_hot_stash.create_array<NumberOrObject>(count_features(*_resolver)));
const auto &specs = _resolver->getExecutorSpecs();
for (uint32_t i = 0; i < specs.size(); ++i) {
+ StashSelector stash(_hot_stash, _cold_stash);
+ FeatureExecutor *executor = &(specs[i].blueprint->createExecutor(queryEnv, stash.get()));
+ bool is_const = executor_is_const(executor, program, _executors, specs[i].inputs);
+ if (is_const) {
+ stash.use_secondary();
+ executor = &(specs[i].blueprint->createExecutor(queryEnv, stash.get()));
+ is_const = executor->isPure();
+ }
size_t num_inputs = specs[i].inputs.size();
- vespalib::ArrayRef<const NumberOrObject *> inputs = _stash.create_array<const NumberOrObject *>(num_inputs);
+ vespalib::ArrayRef<const NumberOrObject *> inputs = stash.get().create_array<const NumberOrObject *>(num_inputs);
for (size_t input_idx = 0; input_idx < num_inputs; ++input_idx) {
auto ref = specs[i].inputs[input_idx];
inputs[input_idx] = _executors[ref.executor]->outputs().get_raw(ref.output);
}
- size_t num_outputs = specs[i].output_types.size();
- vespalib::ArrayRef<NumberOrObject> outputs(&features[feature_offset], num_outputs);
- feature_offset += num_outputs;
- FeatureExecutor *executor = &(specs[i].blueprint->createExecutor(queryEnv, _stash));
+ vespalib::ArrayRef<NumberOrObject> outputs = features.alloc(specs[i].output_types.size());
for (; (override < override_end) && (override->ref.executor == i); ++override) {
FeatureExecutor *tmp = executor;
- executor = &(_stash.create<FeatureOverrider>(*tmp, override->ref.output, override->value));
+ executor = &(stash.get().create<FeatureOverrider>(*tmp, override->ref.output, override->value));
}
executor->bind_inputs(inputs);
executor->bind_outputs(outputs);
executor->bind_match_data(*_match_data);
_executors.push_back(executor);
+ program.add(executor, is_const);
+ }
+ for (const auto &seed_entry: _resolver->getSeedMap()) {
+ auto seed = seed_entry.second;
+ if (specs[seed.executor].output_types[seed.output]) {
+ const NumberOrObject &input = *_executors[seed.executor]->outputs().get_raw(seed.output);
+ NumberOrObject &output = features.alloc(1)[0];
+ _unboxed_seeds[&input] = &output;
+ program.unbox(input, output, _hot_stash);
+ }
}
- add_unboxing_executors(features, feature_offset, total_features);
- compile();
+ _program = _hot_stash.copy_array<FeatureExecutor *>(program.get());
+ assert(_executors.size() == specs.size());
+ assert(features.is_full());
}
FeatureResolver
RankProgram::get_seeds(bool unbox_seeds) const
{
- return resolve(_resolver->getSeedMap(), unbox_seeds);
+ return resolve(_resolver->getSeedMap(), _resolver->getExecutorSpecs(), _executors, _unboxed_seeds, unbox_seeds);
}
FeatureResolver
RankProgram::get_all_features(bool unbox_seeds) const
{
- return resolve(_resolver->getFeatureMap(), unbox_seeds);
+ return resolve(_resolver->getFeatureMap(), _resolver->getExecutorSpecs(), _executors, _unboxed_seeds, unbox_seeds);
}
} // namespace fef
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h
index 029f021762f..b8f60dac71e 100644
--- a/searchlib/src/vespa/searchlib/fef/rank_program.h
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.h
@@ -32,26 +32,12 @@ private:
BlueprintResolver::SP _resolver;
MatchData::UP _match_data;
- vespalib::Stash _stash;
+ vespalib::Stash _hot_stash;
+ vespalib::Stash _cold_stash;
vespalib::ArrayRef<FeatureExecutor *> _program;
std::vector<FeatureExecutor *> _executors;
MappedValues _unboxed_seeds;
- size_t count_features() const;
-
- /**
- * Add unboxing executors for seeds that are object features to
- * make sure all output values are numbers.
- **/
- void add_unboxing_executors(vespalib::ArrayRef<NumberOrObject> features, size_t feature_offset, size_t total_features);
-
- /**
- * Prepare the final program and evaluate all constant features.
- **/
- void compile();
-
- FeatureResolver resolve(const BlueprintResolver::FeatureMap &features, bool unbox_seeds) const;
-
public:
typedef std::unique_ptr<RankProgram> UP;