diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-10-06 16:21:06 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-06 16:21:06 +0200 |
commit | 0f2bf3326eae13c3e6d15bd5090727f0f371bf16 (patch) | |
tree | 145b58f9c741bdaba925fa109ff6f80c744893b2 | |
parent | bf9ffb40400a718c37de35f1f9f6ba7e900fac8a (diff) | |
parent | 392be2d2538cafb73ca1269003973db91306e3e2 (diff) |
Merge pull request #14728 from vespa-engine/arnej/add-generic-concat-squish
Arnej/add generic concat squish
-rw-r--r-- | eval/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/tests/instruction/generic_concat/CMakeLists.txt | 9 | ||||
-rw-r--r-- | eval/src/tests/instruction/generic_concat/generic_concat_test.cpp | 146 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/tensor_spec.cpp | 7 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/tensor_spec.h | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_concat.cpp | 186 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_concat.h | 44 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_join.cpp | 68 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_join.h | 22 |
10 files changed, 440 insertions, 45 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 485d68b4ae3..18c3676c366 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -34,6 +34,7 @@ vespa_define_module( src/tests/eval/value_codec src/tests/eval/value_type src/tests/gp/ponder_nov2017 + src/tests/instruction/generic_concat src/tests/instruction/generic_join src/tests/instruction/generic_merge src/tests/instruction/generic_reduce diff --git a/eval/src/tests/instruction/generic_concat/CMakeLists.txt b/eval/src/tests/instruction/generic_concat/CMakeLists.txt new file mode 100644 index 00000000000..ddc6d4f5944 --- /dev/null +++ b/eval/src/tests/instruction/generic_concat/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_generic_concat_test_app TEST + SOURCES + generic_concat_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_generic_concat_test_app COMMAND eval_generic_concat_test_app) diff --git a/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp new file mode 100644 index 00000000000..a2510fdd2fa --- /dev/null +++ b/eval/src/tests/instruction/generic_concat/generic_concat_test.cpp @@ -0,0 +1,146 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/eval/simple_tensor.h> +#include <vespa/eval/eval/simple_tensor_engine.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/instruction/generic_concat.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::instruction; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +std::vector<Layout> concat_layouts = { + {}, {}, + {}, {x(5)}, + {x(5)}, {}, + {x(2)}, {x(3)}, + {x(2)}, {y(3)}, + {y(2)}, {z(3)}, + {x(5)}, {x(2),y(5)}, + {y(3)}, {x(2),z(3)}, + {x(2)}, {x(3),y(5),z(2)}, + {x(2),y(5),z(2)}, {x(3),y(5),z(2)}, + {x(3),y(5)}, {y(5),z(7)}, + float_cells({x(3),y(5)}), {y(5),z(7)}, + {x(3),y(5)}, float_cells({y(5),z(7)}), + float_cells({x(3),y(5)}), float_cells({y(5),z(7)}), + {y({"a","b","c"})}, {y({"a","b","c"})}, + {y({"a","b","c"})}, {y({"a","b"})}, + {y({"a","b","c"})}, {y({"b","c","d"})}, + float_cells({y({"a","b","c"})}), {y({"b","c","d"})}, + {y({"a","b","c"})}, float_cells({y({"b","c","d"})}), + float_cells({y({"a","b","c"})}), float_cells({z({"foo","bar","baz"})}), + {y({"a","b","c"})}, {y({"a","b","c"}),z({"foo","bar","baz"})}, + {y({"a","b"}),z({"foo","bar","baz"})}, {y({"a","b","c"}),z({"foo","bar"})}, + {x(2),y({"a","b","c"})}, {x(3),y({"b","c","d"})}, + {x(2),y({"a","b"})}, {x(3),z({"c","d"})} +}; + +TensorSpec perform_simpletensor_concat(const TensorSpec &a, const TensorSpec &b, const std::string &dimension) { + auto lhs = SimpleTensor::create(a); + auto rhs = SimpleTensor::create(b); + auto out = SimpleTensor::concat(*lhs, *rhs, dimension); + return SimpleTensorEngine::ref().to_spec(*out); +} + +bool concat_address(const TensorSpec::Address &me, const TensorSpec::Address &other, + const std::string &concat_dim, size_t my_offset, + TensorSpec::Address &my_out, TensorSpec::Address &other_out) +{ + my_out.insert_or_assign(concat_dim, my_offset); + for (const auto &my_dim: me) { + const auto & name = my_dim.first; + const auto & label = my_dim.second; + if (name == concat_dim) { + my_out.insert_or_assign(name, label.index + my_offset); + } else { + auto pos = other.find(name); + if ((pos == other.end()) || (pos->second == label)) { + my_out.insert_or_assign(name, label); + other_out.insert_or_assign(name, label); + } else { + return false; + } + } + } + return true; +} + +bool concat_addresses(const TensorSpec::Address &a, const TensorSpec::Address &b, + const std::string &concat_dim, size_t b_offset, + TensorSpec::Address &a_out, TensorSpec::Address &b_out) +{ + return concat_address(a, b, concat_dim, 0, a_out, b_out) && + concat_address(b, a, concat_dim, b_offset, b_out, a_out); +} + +TensorSpec reference_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) { + ValueType a_type = ValueType::from_spec(a.type()); + ValueType b_type = ValueType::from_spec(b.type()); + ValueType res_type = ValueType::concat(a_type, b_type, concat_dim); + EXPECT_FALSE(res_type.is_error()); + size_t b_offset = 1; + size_t concat_dim_index = a_type.dimension_index(concat_dim); + if (concat_dim_index != ValueType::Dimension::npos) { + const auto &dim = a_type.dimensions()[concat_dim_index]; + EXPECT_TRUE(dim.is_indexed()); + b_offset = dim.size; + } + TensorSpec result(res_type.to_spec()); + for (const auto &cell_a: a.cells()) { + for (const auto &cell_b: b.cells()) { + TensorSpec::Address addr_a; + TensorSpec::Address addr_b; + if (concat_addresses(cell_a.first, cell_b.first, concat_dim, b_offset, addr_a, addr_b)) { + result.set(addr_a, cell_a.second); + result.set(addr_b, cell_b.second); + } + } + } + return result; +} + +TensorSpec perform_generic_concat(const TensorSpec &a, const TensorSpec &b, const std::string &concat_dim) { + Stash stash; + const auto &factory = SimpleValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + auto my_op = GenericConcat::make_instruction(lhs->type(), rhs->type(), concat_dim, factory, stash); + InterpretedFunction::EvalSingle single(my_op); + return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs}))); +} + +TEST(GenericConcatTest, generic_reference_concat_works) { + ASSERT_TRUE((concat_layouts.size() % 2) == 0); + for (size_t i = 0; i < concat_layouts.size(); i += 2) { + const TensorSpec lhs = spec(concat_layouts[i], N()); + const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto actual = reference_concat(lhs, rhs, "x"); + auto expect = perform_simpletensor_concat(lhs, rhs, "x"); + EXPECT_EQ(actual, expect); + } +} + +TEST(GenericConcatTest, generic_concat_works_for_simple_values) { + ASSERT_TRUE((concat_layouts.size() % 2) == 0); + for (size_t i = 0; i < concat_layouts.size(); i += 2) { + const TensorSpec lhs = spec(concat_layouts[i], N()); + const TensorSpec rhs = spec(concat_layouts[i + 1], Div16(N())); + SCOPED_TRACE(fmt("\n===\nin LHS: %s\nin RHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto actual = perform_generic_concat(lhs, rhs, "x"); + auto expect = reference_concat(lhs, rhs, "x"); + EXPECT_EQ(actual, expect); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/tensor_spec.cpp b/eval/src/vespa/eval/eval/tensor_spec.cpp index 712ee282c71..d3aafc7632a 100644 --- a/eval/src/vespa/eval/eval/tensor_spec.cpp +++ b/eval/src/vespa/eval/eval/tensor_spec.cpp @@ -45,6 +45,13 @@ TensorSpec & TensorSpec::operator = (const TensorSpec &) = default; TensorSpec::~TensorSpec() { } +TensorSpec & +TensorSpec::set(Address address, double value) { + auto res = _cells.emplace(std::move(address), value); + if (!res.second) { assert(res.first->second.value == value); } + return *this; +} + vespalib::string TensorSpec::to_string() const { diff --git a/eval/src/vespa/eval/eval/tensor_spec.h b/eval/src/vespa/eval/eval/tensor_spec.h index 974ad4a1f4c..8a4343b3faa 100644 --- a/eval/src/vespa/eval/eval/tensor_spec.h +++ b/eval/src/vespa/eval/eval/tensor_spec.h @@ -68,6 +68,7 @@ public: TensorSpec(const TensorSpec &); TensorSpec & operator = (const TensorSpec &); ~TensorSpec(); + TensorSpec &set(Address address, double value); TensorSpec &add(Address address, double value) { auto res = _cells.emplace(std::move(address), value); if (!res.second) { diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 91ff4fd63dc..7b159272ae4 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -2,6 +2,7 @@ vespa_add_library(eval_instruction OBJECT SOURCES + generic_concat generic_join generic_reduce generic_merge diff --git a/eval/src/vespa/eval/instruction/generic_concat.cpp b/eval/src/vespa/eval/instruction/generic_concat.cpp new file mode 100644 index 00000000000..e3b3a3f0331 --- /dev/null +++ b/eval/src/vespa/eval/instruction/generic_concat.cpp @@ -0,0 +1,186 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "generic_concat.h" +#include "generic_join.h" +#include <vespa/eval/eval/value.h> +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <cassert> + +namespace vespalib::eval::instruction { + +using State = InterpretedFunction::State; +using Instruction = InterpretedFunction::Instruction; + +namespace { + +template <typename T, typename IN> uint64_t wrap_param(const IN &value_in) { + const T &value = value_in; + static_assert(sizeof(uint64_t) == sizeof(&value)); + return (uint64_t)&value; +} + +template <typename T> const T &unwrap_param(uint64_t param) { + return *((const T *)param); +} + +struct ConcatParam +{ + ValueType res_type; + SparseJoinPlan sparse_plan; + DenseConcatPlan dense_plan; + const ValueBuilderFactory &factory; + + ConcatParam(const ValueType &lhs_type, const ValueType &rhs_type, + const vespalib::string &dimension, const ValueBuilderFactory &factory_in) + : res_type(ValueType::concat(lhs_type, rhs_type, dimension)), + sparse_plan(lhs_type, rhs_type), + dense_plan(lhs_type, rhs_type, dimension, res_type), + factory(factory_in) + { + assert(!res_type.is_error()); + } +}; + +template <typename LCT, typename RCT> +std::unique_ptr<Value> +generic_concat(const Value &a, const Value &b, + const SparseJoinPlan &sparse_plan, + const DenseConcatPlan &dense_plan, + const ValueType &res_type, const ValueBuilderFactory &factory) +{ + using OCT = typename eval::UnifyCellTypes<LCT, RCT>::type; + auto a_cells = a.cells().typify<LCT>(); + auto b_cells = b.cells().typify<RCT>(); + SparseJoinState sparse(sparse_plan, a.index(), b.index()); + auto builder = factory.create_value_builder<OCT>(res_type, + sparse_plan.sources.size(), + dense_plan.right.output_size, + sparse.first_index.size()); + auto outer = sparse.first_index.create_view({}); + auto inner = sparse.second_index.create_view(sparse.second_view_dims); + outer->lookup({}); + while (outer->next_result(sparse.first_address, sparse.first_subspace)) { + inner->lookup(sparse.address_overlap); + while (inner->next_result(sparse.second_only_address, sparse.second_subspace)) { + OCT *dst = builder->add_subspace(sparse.full_address).begin(); + { + size_t left_input_offset = dense_plan.left.input_size * sparse.lhs_subspace; + auto copy_left = [&](size_t in_idx, size_t out_idx) { dst[out_idx] = a_cells[in_idx]; }; + dense_plan.left.execute(left_input_offset, 0, copy_left); + } + { + size_t right_input_offset = dense_plan.right.input_size * sparse.rhs_subspace; + auto copy_right = [&](size_t in_idx, size_t out_idx) { dst[out_idx] = b_cells[in_idx]; }; + dense_plan.right.execute(right_input_offset, dense_plan.right_offset, copy_right); + } + } + } + return builder->build(std::move(builder)); +} + +template <typename LCT, typename RCT> +void my_generic_concat_op(State &state, uint64_t param_in) { + const auto ¶m = unwrap_param<ConcatParam>(param_in); + const Value &lhs = state.peek(1); + const Value &rhs = state.peek(0); + auto res_value = generic_concat<LCT, RCT>(lhs, rhs, param.sparse_plan, param.dense_plan, + param.res_type, param.factory); + auto &result = state.stash.create<std::unique_ptr<Value>>(std::move(res_value)); + const Value &result_ref = *(result.get()); + state.pop_pop_push(result_ref); +} + +struct SelectGenericConcatOp { + template <typename LCT, typename RCT> static auto invoke() { + return my_generic_concat_op<LCT, RCT>; + } +}; + +enum class Case { NONE, OUT, BOTH }; + +} // namespace <unnamed> + +DenseConcatPlan::InOutLoop::InOutLoop(const ValueType &in_type, + std::string concat_dimension, + const ValueType &out_type) +{ + std::vector<size_t> out_loop_cnt; + Case prev_case = Case::NONE; + auto update_plan = [&](Case my_case, size_t in_size, size_t out_size, size_t in_val, size_t out_val) { + if (my_case == prev_case) { + assert(!out_loop_cnt.empty()); + in_loop_cnt.back() *= in_size; + out_loop_cnt.back() *= out_size; + } else { + in_loop_cnt.push_back(in_size); + out_loop_cnt.push_back(out_size); + in_stride.push_back(in_val); + out_stride.push_back(out_val); + prev_case = my_case; + } + }; + auto visitor = overload + { + [&](visit_ranges_first, const auto &) { abort(); }, + [&](visit_ranges_second, const auto &b) { + if (b.name == concat_dimension) { update_plan(Case::OUT, 1, b.size, 0, 1); + } else { update_plan(Case::OUT, b.size, b.size, 0, 1); } + }, + [&](visit_ranges_both, const auto &a, const auto &b) { update_plan(Case::BOTH, a.size, b.size, 1, 1); } + }; + + const auto input_dimensions = in_type.nontrivial_indexed_dimensions(); + const auto output_dimensions = out_type.nontrivial_indexed_dimensions(); + visit_ranges(visitor, input_dimensions.begin(), input_dimensions.end(), output_dimensions.begin(), output_dimensions.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); + + input_size = 1; + output_size = 1; + for (size_t i = in_loop_cnt.size(); i-- > 0; ) { + if (in_stride[i] != 0) { + in_stride[i] = input_size; + input_size *= in_loop_cnt[i]; + } + assert(out_stride[i] != 0); + assert(out_loop_cnt[i] != 0); + out_stride[i] = output_size; + output_size *= out_loop_cnt[i]; + } +} + +InterpretedFunction::Instruction +GenericConcat::make_instruction(const ValueType &lhs_type, const ValueType &rhs_type, + const vespalib::string &dimension, + const ValueBuilderFactory &factory, Stash &stash) +{ + auto ¶m = stash.create<ConcatParam>(lhs_type, rhs_type, dimension, factory); + auto fun = typify_invoke<2,TypifyCellType,SelectGenericConcatOp>( + lhs_type.cell_type(), rhs_type.cell_type()); + return Instruction(fun, wrap_param<ConcatParam>(param)); +} + +DenseConcatPlan::DenseConcatPlan(const ValueType &lhs_type, + const ValueType &rhs_type, + std::string concat_dimension, + const ValueType &out_type) + : right_offset(0), + left(lhs_type, concat_dimension, out_type), + right(rhs_type, concat_dimension, out_type) +{ + const auto output_dimensions = out_type.nontrivial_indexed_dimensions(); + for (size_t i = 0; i < output_dimensions.size(); ++i) { + if (output_dimensions[i].name == concat_dimension) { + right_offset = left.in_loop_cnt[i] * left.out_stride[i]; + } + } + assert(right_offset > 0); + assert(left.output_size == right.output_size); +} + +DenseConcatPlan::~DenseConcatPlan() = default; +DenseConcatPlan::InOutLoop::~InOutLoop() = default; + +} // namespace diff --git a/eval/src/vespa/eval/instruction/generic_concat.h b/eval/src/vespa/eval/instruction/generic_concat.h new file mode 100644 index 00000000000..c2636fb7678 --- /dev/null +++ b/eval/src/vespa/eval/instruction/generic_concat.h @@ -0,0 +1,44 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/nested_loop.h> +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/vespalib/stllike/string.h> +#include <vector> + +namespace vespalib::eval { struct ValueBuilderFactory; } + +namespace vespalib::eval::instruction { + +struct GenericConcat { + static InterpretedFunction::Instruction + make_instruction(const ValueType &lhs_type, const ValueType &rhs_type, + const vespalib::string &dimension, + const ValueBuilderFactory &factory, Stash &stash); +}; + +struct DenseConcatPlan { + size_t right_offset; + struct InOutLoop { + size_t input_size; + size_t output_size; + std::vector<size_t> in_loop_cnt; + std::vector<size_t> in_stride; + std::vector<size_t> out_stride; + template <typename F> void execute(size_t in_off, size_t out_off, const F &f) const { + run_nested_loop(in_off, out_off, in_loop_cnt, in_stride, out_stride, f); + } + InOutLoop(const ValueType &in_type, + std::string concat_dimension, + const ValueType &out_type); + ~InOutLoop(); + }; + InOutLoop left; + InOutLoop right; + DenseConcatPlan(const ValueType &lhs_type, const ValueType &rhs_type, std::string concat_dimension, const ValueType &res_type); + ~DenseConcatPlan(); +}; + +} // namespace diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp index 0f2fa4970db..53324924bdd 100644 --- a/eval/src/vespa/eval/instruction/generic_join.cpp +++ b/eval/src/vespa/eval/instruction/generic_join.cpp @@ -51,49 +51,6 @@ JoinParam::~JoinParam() = default; //----------------------------------------------------------------------------- -// Contains various state needed to perform the sparse part (all -// mapped dimensions) of the join operation. Performs swapping of -// sparse indexes to ensure that we look up entries from the smallest -// index in the largest index. -struct SparseJoinState { - bool swapped; - const Value::Index &first_index; - const Value::Index &second_index; - const std::vector<size_t> &second_view_dims; - std::vector<vespalib::stringref> full_address; - std::vector<vespalib::stringref*> first_address; - std::vector<const vespalib::stringref*> address_overlap; - std::vector<vespalib::stringref*> second_only_address; - size_t lhs_subspace; - size_t rhs_subspace; - size_t &first_subspace; - size_t &second_subspace; - - SparseJoinState(const SparseJoinPlan &plan, const Value::Index &lhs, const Value::Index &rhs) - : swapped(rhs.size() < lhs.size()), - first_index(swapped ? rhs : lhs), second_index(swapped ? lhs : rhs), - second_view_dims(swapped ? plan.lhs_overlap : plan.rhs_overlap), - full_address(plan.sources.size()), - first_address(), address_overlap(), second_only_address(), - lhs_subspace(), rhs_subspace(), - first_subspace(swapped ? rhs_subspace : lhs_subspace), - second_subspace(swapped ? lhs_subspace : rhs_subspace) - { - auto first_source = swapped ? SparseJoinPlan::Source::RHS : SparseJoinPlan::Source::LHS; - for (size_t i = 0; i < full_address.size(); ++i) { - if (plan.sources[i] == SparseJoinPlan::Source::BOTH) { - first_address.push_back(&full_address[i]); - address_overlap.push_back(&full_address[i]); - } else if (plan.sources[i] == first_source) { - first_address.push_back(&full_address[i]); - } else { - second_only_address.push_back(&full_address[i]); - } - } - } - ~SparseJoinState(); -}; -SparseJoinState::~SparseJoinState() = default; template <typename LCT, typename RCT, typename OCT, typename Fun> void my_mixed_join_op(State &state, uint64_t param_in) { @@ -237,11 +194,32 @@ SparseJoinPlan::~SparseJoinPlan() = default; //----------------------------------------------------------------------------- +SparseJoinState::SparseJoinState(const SparseJoinPlan &plan, const Value::Index &lhs, const Value::Index &rhs) + : swapped(rhs.size() < lhs.size()), + first_index(swapped ? rhs : lhs), second_index(swapped ? lhs : rhs), + second_view_dims(swapped ? plan.lhs_overlap : plan.rhs_overlap), + full_address(plan.sources.size()), + first_address(), address_overlap(), second_only_address(), + lhs_subspace(), rhs_subspace(), + first_subspace(swapped ? rhs_subspace : lhs_subspace), + second_subspace(swapped ? lhs_subspace : rhs_subspace) +{ + auto first_source = swapped ? SparseJoinPlan::Source::RHS : SparseJoinPlan::Source::LHS; + for (size_t i = 0; i < full_address.size(); ++i) { + if (plan.sources[i] == SparseJoinPlan::Source::BOTH) { + first_address.push_back(&full_address[i]); + address_overlap.push_back(&full_address[i]); + } else if (plan.sources[i] == first_source) { + first_address.push_back(&full_address[i]); + } else { + second_only_address.push_back(&full_address[i]); + } + } } -//----------------------------------------------------------------------------- +SparseJoinState::~SparseJoinState() = default; -namespace vespalib::eval::instruction { +//----------------------------------------------------------------------------- using JoinTypify = TypifyValue<TypifyCellType,operation::TypifyOp2>; diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h index 0f121104d5e..30e78b52510 100644 --- a/eval/src/vespa/eval/instruction/generic_join.h +++ b/eval/src/vespa/eval/instruction/generic_join.h @@ -57,6 +57,28 @@ struct SparseJoinPlan { ~SparseJoinPlan(); }; +// Contains various state needed to perform the sparse part (all +// mapped dimensions) of the join operation. Performs swapping of +// sparse indexes to ensure that we look up entries from the smallest +// index in the largest index. +struct SparseJoinState { + bool swapped; + const Value::Index &first_index; + const Value::Index &second_index; + const std::vector<size_t> &second_view_dims; + std::vector<vespalib::stringref> full_address; + std::vector<vespalib::stringref*> first_address; + std::vector<const vespalib::stringref*> address_overlap; + std::vector<vespalib::stringref*> second_only_address; + size_t lhs_subspace; + size_t rhs_subspace; + size_t &first_subspace; + size_t &second_subspace; + + SparseJoinState(const SparseJoinPlan &plan, const Value::Index &lhs, const Value::Index &rhs); + ~SparseJoinState(); +}; + //----------------------------------------------------------------------------- } // namespace |