diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-18 15:28:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-18 15:28:13 +0100 |
commit | b685f9b60ccf376f816d0a3da884e6d0b61a818c (patch) | |
tree | 7616aad7538a128224301857b949197ce81e3abd | |
parent | eb0c8204fa2b377542959c2ddfb4002468cea0ec (diff) | |
parent | 64a0c8e97e097ea6efb8e828cd67025d3da83a92 (diff) |
Merge pull request #29691 from vespa-engine/havardpe/dump-blueprint-cost
dump blueprint cost
9 files changed, 331 insertions, 41 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 20cf2008e4b..eefca5c82e9 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -646,6 +646,7 @@ getExpectedBlueprint() " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " children: std::vector {\n" @@ -666,6 +667,7 @@ getExpectedBlueprint() " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" @@ -696,6 +698,7 @@ getExpectedSlimeBlueprint() { " tree_size: 2," " allow_termwise_eval: false" " }," + " cost: 1.0," " sourceId: 4294967295," " docid_limit: 0," " children: {" @@ -721,6 +724,7 @@ getExpectedSlimeBlueprint() { " tree_size: 1," " allow_termwise_eval: true" " }," + " cost: 1.0," " sourceId: 4294967295," " docid_limit: 0" " }" diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 53803c9c9e7..234ff5a9d19 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -14,6 +14,10 @@ #include <vespa/searchlib/test/diskindex/testdiskindex.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/data/simple_buffer.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/data/slime/inserter.h> #include <filesystem> #include <vespa/log/log.h> @@ -24,6 +28,11 @@ using namespace search::fef; using namespace search::query; using search::BitVector; using BlueprintVector = std::vector<std::unique_ptr<Blueprint>>; +using vespalib::Slime; +using vespalib::slime::Inspector; +using vespalib::slime::SlimeInserter; +using vespalib::make_string_short::fmt; +using Path = std::vector<std::variant<size_t,vespalib::stringref>>; struct InvalidSelector : ISourceSelector { InvalidSelector() : ISourceSelector(Source()) {} @@ -480,13 +489,62 @@ struct SourceBlenderTestFixture { void addChildrenForSimpleSBTest(IntermediateBlueprint & parent); }; +vespalib::string path_to_str(const Path &path) { + size_t cnt = 0; + vespalib::string str("["); + for (const auto &item: path) { + if (cnt++ > 0) { + str.append(","); + } + std::visit(vespalib::overload{ + [&str](size_t value)noexcept{ str.append(fmt("%zu", value)); }, + [&str](vespalib::stringref value)noexcept{ str.append(value); }}, item); + } + str.append("]"); + return str; +} + +vespalib::string to_str(const Inspector &value) { + if (!value.valid()) { + return "<missing>"; + } + vespalib::SimpleBuffer buf; + vespalib::slime::JsonFormat::encode(value, buf, true); + return buf.get().make_string(); +} + +void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { + auto ignore_cost = [expect_eq](const auto &path, const auto &a, const auto &b) { + if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) { + vespalib::stringref field = std::get<vespalib::stringref>(path.back()); + if (field == "cost") { + return true; + } + } + if (expect_eq) { + fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(), + to_str(a).c_str(), to_str(b).c_str()); + } + return false; + }; + Slime a; + Slime b; + bp1.asSlime(SlimeInserter(a)); + bp2.asSlime(SlimeInserter(b)); + if (expect_eq) { + EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), ignore_cost)); + } else { + EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), ignore_cost)); + } +} + void optimize_and_compare(Blueprint::UP top, Blueprint::UP expect) { - EXPECT_NOT_EQUAL(expect->asString(), top->asString()); + TEST_DO(compare(*top, *expect, false)); top = Blueprint::optimize(std::move(top)); - EXPECT_EQUAL(expect->asString(), top->asString()); + TEST_DO(compare(*top, *expect, true)); expect = Blueprint::optimize(std::move(expect)); - EXPECT_EQUAL(expect->asString(), top->asString()); + TEST_DO(compare(*expect, *top, true)); } void SourceBlenderTestFixture::addChildrenForSBTest(IntermediateBlueprint & parent) { @@ -1120,7 +1178,7 @@ TEST("require that children of near are not optimized") { addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); top_up = Blueprint::optimize(std::move(top_up)); - EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + TEST_DO(compare(*top_up, *expect_up, true)); } TEST("require that children of onear are not optimized") { @@ -1131,7 +1189,7 @@ TEST("require that children of onear are not optimized") { addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); top_up = Blueprint::optimize(std::move(top_up)); - EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + TEST_DO(compare(*top_up, *expect_up, true)); } TEST("require that ANDNOT without children is optimized to empty search") { diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index 2a59a578ec9..fa12b453d8c 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -599,6 +599,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " _weights: std::vector {\n" @@ -622,6 +623,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index b71a579e097..71d53ade2f7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -358,6 +358,7 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visitor.visitInt("tree_size", state.tree_size()); visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval()); visitor.closeStruct(); + visitor.visitFloat("cost", _cost); visitor.visitInt("sourceId", _sourceId); visitor.visitInt("docid_limit", _docid_limit); } diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 49a4cca1e1c..9b430bdd913 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -161,6 +161,7 @@ vespa_define_module( src/tests/simple_thread_bundle src/tests/singleexecutor src/tests/slime + src/tests/slime/are_equal src/tests/slime/external_data_value src/tests/slime/summary-feature-benchmark src/tests/small_vector diff --git a/vespalib/src/tests/slime/are_equal/CMakeLists.txt b/vespalib/src/tests/slime/are_equal/CMakeLists.txt new file mode 100644 index 00000000000..0fa6e99900f --- /dev/null +++ b/vespalib/src/tests/slime/are_equal/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_slime_are_equal_test_app TEST + SOURCES + slime_are_equal_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_slime_are_equal_test_app COMMAND vespalib_slime_are_equal_test_app) diff --git a/vespalib/src/tests/slime/are_equal/slime_are_equal_test.cpp b/vespalib/src/tests/slime/are_equal/slime_are_equal_test.cpp new file mode 100644 index 00000000000..4751d51958a --- /dev/null +++ b/vespalib/src/tests/slime/are_equal/slime_are_equal_test.cpp @@ -0,0 +1,173 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/data/slime/json_format.h> +#include <vespa/vespalib/data/simple_buffer.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/overload.h> +#include <functional> +#include <variant> + +using namespace vespalib::slime::convenience; +using vespalib::make_string_short::fmt; +using vespalib::slime::NIX; + +using Path = std::vector<std::variant<size_t,vespalib::stringref>>; +using Hook = std::function<bool(const Path &, const Inspector &, const Inspector &)>; +using vespalib::slime::Inspector; + +Slime parse(const std::string &json) { + Slime slime; + ASSERT_TRUE(vespalib::slime::JsonFormat::decode(json, slime)); + return slime; +} + +const Inspector &full_obj() { + static vespalib::string str = + "{" + " a: 'foo'," + " b: 'bar'," + " c: 'baz'," + " d: [1,2,3,4,5]" + "}"; + static Slime slime = parse(str); + return slime.get(); +} + +const Inspector &subset_obj() { + static vespalib::string str = + "{" + " a: 'foo'," + " c: 'baz'," + " d: [1,2,3]" + "}"; + static Slime slime = parse(str); + return slime.get(); +} + +const Inspector &wildcard_obj() { + static vespalib::string str = + "{" + " a: 'foo'," + " b: null," + " c: null," + " d: [null,2,3,null]" + "}"; + static Slime slime = parse(str); + return slime.get(); +} + +Slime add_data_and_nix(Slime slime) { + Cursor &root = slime.get(); + char space1[3] = { 1, 2, 3 }; + char space2[3] = { 2, 4, 6 }; + root["ref"].addData(Memory(space1, 3)); + root["ref"].addNix(); + root["same"].addData(Memory(space1, 3)); + root["same"].addNix(); + root["err1"].addData(Memory(space2, 3)); + // err1: invalid nix vs valid nix + return slime; +} + +const Inspector &leaf_cmp_obj() { + static vespalib::string str = + "{" + " ref: [ true, 7, 2.0, 'foo']," + "same: [ true, 7, 2.0, 'foo']," + "err1: [false, 5, 2.5, 'bar']," + "err2: [ 1, 7.0, 2, 3, '0x010203', 'null']" + "}"; + static Slime slime = add_data_and_nix(parse(str)); + return slime.get(); +} + +vespalib::string path_to_str(const Path &path) { + size_t cnt = 0; + vespalib::string str("["); + for (const auto &item: path) { + if (cnt++ > 0) { + str.append(","); + } + std::visit(vespalib::overload{ + [&str](size_t value)noexcept{ str.append(fmt("%zu", value)); }, + [&str](vespalib::stringref value)noexcept{ str.append(value); }}, item); + } + str.append("]"); + return str; +} + +vespalib::string to_str(const Inspector &value) { + if (!value.valid()) { + return "<missing>"; + } + vespalib::SimpleBuffer buf; + vespalib::slime::JsonFormat::encode(value, buf, true); + return buf.get().make_string(); +} + +Hook dump_mismatches(Hook hook) { + return [hook](const Path &path, const Inspector &a, const Inspector &b) + { + bool result = hook(path, a, b); + fprintf(stderr, "mismatch at %s: %s vs %s (%s)\n", path_to_str(path).c_str(), + to_str(a).c_str(), to_str(b).c_str(), result ? "allowed" : "FAIL"); + return result; + }; +} + +void verify(const Inspector &a, const Inspector &b, Hook c, bool expect) { + fprintf(stderr, "---> cmp\n"); + Hook my_hook = dump_mismatches(c); + bool result = vespalib::slime::are_equal(a, b, my_hook); + fprintf(stderr, "<--- cmp\n"); + EXPECT_EQUAL(result, expect); +} + +TEST("strict compare (used by == operator)") { + auto allow_nothing = [](const auto &, const auto &, const auto &)noexcept{ return false; }; + TEST_DO(verify( full_obj(), full_obj(), allow_nothing, true)); + TEST_DO(verify( full_obj(), subset_obj(), allow_nothing, false)); + TEST_DO(verify( subset_obj(), full_obj(), allow_nothing, false)); + TEST_DO(verify( full_obj(), wildcard_obj(), allow_nothing, false)); + TEST_DO(verify(wildcard_obj(), full_obj(), allow_nothing, false)); +} + +TEST("subset compare") { + auto allow_subset_ab = [](const auto &, const auto &a, const auto &)noexcept{ return !a.valid(); }; + auto allow_subset_ba = [](const auto &, const auto &, const auto &b)noexcept{ return !b.valid(); }; + TEST_DO(verify( subset_obj(), full_obj(), allow_subset_ab, true)); + TEST_DO(verify( full_obj(), subset_obj(), allow_subset_ab, false)); + TEST_DO(verify( full_obj(), subset_obj(), allow_subset_ba, true)); + TEST_DO(verify( subset_obj(), full_obj(), allow_subset_ba, false)); +} + +TEST("wildcard compare") { + auto allow_wildcard_a = [](const auto &, const auto &a, const auto &)noexcept + { return a.valid() && a.type().getId() == NIX::ID; }; + auto allow_wildcard_b = [](const auto &, const auto &, const auto &b)noexcept + { return b.valid() && b.type().getId() == NIX::ID; }; + TEST_DO(verify(wildcard_obj(), full_obj(), allow_wildcard_a, false)); + TEST_DO(verify(wildcard_obj(), subset_obj(), allow_wildcard_a, true)); + TEST_DO(verify( subset_obj(), wildcard_obj(), allow_wildcard_a, false)); + TEST_DO(verify( full_obj(), wildcard_obj(), allow_wildcard_b, false)); + TEST_DO(verify( subset_obj(), wildcard_obj(), allow_wildcard_b, true)); + TEST_DO(verify(wildcard_obj(), subset_obj(), allow_wildcard_b, false)); +} + +TEST("leaf nodes") { + auto allow_nothing = [](const auto &, const auto &, const auto &)noexcept{ return false; }; + const Inspector &root = leaf_cmp_obj(); + EXPECT_EQUAL(root["ref"].entries(), 6u); + EXPECT_EQUAL(root["same"].entries(), 6u); + EXPECT_EQUAL(root["err1"].entries(), 5u); // invalid nix at end + EXPECT_EQUAL(root["err2"].entries(), 6u); + for (size_t i = 0; i < 6; ++i) { + TEST_DO(verify(root["ref"][i], root["same"][i], allow_nothing, true)); + TEST_DO(verify(root["ref"][i], root["err1"][i], allow_nothing, false)); + TEST_DO(verify(root["ref"][i], root["err2"][i], allow_nothing, false)); + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/data/slime/inspector.cpp b/vespalib/src/vespa/vespalib/data/slime/inspector.cpp index bd3a1079682..742c5ab0623 100644 --- a/vespalib/src/vespa/vespalib/data/slime/inspector.cpp +++ b/vespalib/src/vespa/vespalib/data/slime/inspector.cpp @@ -7,39 +7,58 @@ namespace vespalib::slime { -class Equal { -public: - Equal(const Inspector & rhs) : _rhs(rhs), _equal(true) { } - bool isEqual() const { return _equal; } -protected: - const Inspector & _rhs; - bool _equal; +using Path = std::vector<std::variant<size_t,vespalib::stringref>>; +using Hook = std::function<bool(const Path &, const Inspector &, const Inspector &)>; + +namespace { + +struct EqualState { + Path path; + Hook hook; + bool failed; + explicit EqualState(Hook hook_in) + : path(), hook(std::move(hook_in)), failed(false) {} + void mismatch(const Inspector &a, const Inspector &b) { + if (!failed && !hook(path, a, b)) { + failed = true; + } + } + void check_equal(const Inspector &a, const Inspector &b); }; - -class EqualObject : public ObjectTraverser, public Equal { -public: - EqualObject(const Inspector & rhs) : Equal(rhs) { } -private: - void field(const Memory &symbol, const Inspector &inspector) override { - if ( _equal ) { - _equal = (inspector == _rhs[symbol]); + +struct EqualObject : ObjectTraverser { + EqualState &state; + const Inspector &rhs; + EqualObject(EqualState &state_in, const Inspector &rhs_in) noexcept + : state(state_in), rhs(rhs_in) {} + void field(const Memory &symbol, const Inspector &inspector) final { + if (!state.failed) { + state.path.emplace_back(symbol.make_stringref()); + state.check_equal(inspector, rhs[symbol]); + state.path.pop_back(); } } }; -class EqualArray : public ArrayTraverser, public Equal { -public: - EqualArray(const Inspector & rhs) : Equal(rhs) { } -private: - void entry(size_t idx, const Inspector &inspector) override { - if ( _equal ) { - _equal = (inspector == _rhs[idx]); +struct MissingFields : ObjectTraverser { + EqualState &state; + const Inspector &lhs; + MissingFields(EqualState &state_in, const Inspector &lhs_in) noexcept + : state(state_in), lhs(lhs_in) {} + void field(const Memory &symbol, const Inspector &inspector) final { + if (!state.failed) { + const Inspector &field = lhs[symbol]; + if (!field.valid()) { + state.path.emplace_back(symbol.make_stringref()); + state.mismatch(field, inspector); + state.path.pop_back(); + } } } }; -bool operator == (const Inspector & a, const Inspector & b) -{ +void +EqualState::check_equal(const Inspector &a, const Inspector &b) { bool equal(a.type().getId() == b.type().getId()); if (equal) { switch (a.type().getId()) { @@ -63,28 +82,45 @@ bool operator == (const Inspector & a, const Inspector & b) break; case ARRAY::ID: { - EqualArray traverser(b); - a.traverse(traverser); - equal = traverser.isEqual() && (a.entries() == b.entries()); + size_t cnt = std::max(a.entries(), b.entries()); + for (size_t i = 0; !failed && i < cnt; ++i) { + path.emplace_back(i); + check_equal(a[i], b[i]); + path.pop_back(); + } } break; case OBJECT::ID: { - EqualObject traverser(b); - a.traverse(traverser); - equal = traverser.isEqual() && (a.fields() == b.fields()); + EqualObject cmp(*this, b); + MissingFields missing(*this, a); + a.traverse(cmp); + b.traverse(missing); } break; default: - assert(false); + abort(); break; } } - return equal; + if (!equal) { + mismatch(a, b); + } +} + +} // <unnamed> + +bool are_equal(const Inspector &a, const Inspector &b, Hook allow_mismatch) { + EqualState state(std::move(allow_mismatch)); + state.check_equal(a, b); + return !state.failed; +} + +bool operator==(const Inspector &a, const Inspector &b) { + return are_equal(a, b, [](const Path &, const Inspector &, const Inspector &)noexcept{ return false; }); } -std::ostream & operator << (std::ostream & os, const Inspector & inspector) -{ +std::ostream &operator<<(std::ostream &os, const Inspector &inspector) { os << inspector.toString(); return os; } diff --git a/vespalib/src/vespa/vespalib/data/slime/inspector.h b/vespalib/src/vespa/vespalib/data/slime/inspector.h index 11380405c4c..f035e179d95 100644 --- a/vespalib/src/vespa/vespalib/data/slime/inspector.h +++ b/vespalib/src/vespa/vespalib/data/slime/inspector.h @@ -5,6 +5,8 @@ #include "type.h" #include "symbol.h" #include <vespa/vespalib/data/memory.h> +#include <functional> +#include <variant> namespace vespalib::slime { @@ -38,8 +40,13 @@ struct Inspector { virtual ~Inspector() {} }; -bool operator == (const Inspector & a, const Inspector & b); -std::ostream & operator << (std::ostream & os, const Inspector & inspector); +bool operator==(const Inspector &a, const Inspector &b); +std::ostream &operator<<(std::ostream &os, const Inspector &inspector); +// check if two inspectors are equal +// has support for allowing specific mismatches +bool are_equal(const Inspector &a, const Inspector &b, + std::function<bool(const std::vector<std::variant<size_t,vespalib::stringref>> &path, + const Inspector &sub_a, const Inspector &sub_b)> allow_mismatch); } // namespace vespalib::slime |