add normalize() method to tensorspec

author: Arne Juul <arnej@verizonmedia.com> 2020-11-25 15:41:42 +0000
committer: Arne Juul <arnej@verizonmedia.com> 2020-11-25 17:14:38 +0000
commit: aff89369768fdbbca74d4eabe796fafac80bb465 (patch)
tree: 65f9a485db9563b64ccb03464b5180b763ebac80 /eval
parent: e1584673531bc771fa94731da337ce311b4ff7d1 (diff)
5 files changed, 93 insertions, 22 deletions
diff --git a/eval/src/tests/instruction/generic_create/generic_create_test.cpp b/eval/src/tests/instruction/generic_create/generic_create_test.cpp
index 42af4ba6621..00af75e4d83 100644
--- a/eval/src/tests/instruction/generic_create/generic_create_test.cpp
+++ b/eval/src/tests/instruction/generic_create/generic_create_test.cpp
@@ -94,15 +94,12 @@ void test_generic_create_with(const ValueBuilderFactory &factory) {
     for (const auto & layout : create_layouts) {
         TensorSpec full = spec(layout, N());
         auto actual = perform_generic_create(full, factory);
-        auto ref_spec = reference_create(full);
-        // use SimpleValue to add implicit cells with default value
-        auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+        auto expect = reference_create(full).normalize();
         EXPECT_EQ(actual, expect);
         for (size_t n : {2, 3, 4, 5}) {
             TensorSpec partial = remove_each(full, n);
             actual = perform_generic_create(partial, factory);
-            ref_spec = reference_create(partial);
-            expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+            expect = reference_create(partial).normalize();
             EXPECT_EQ(actual, expect);
         }
     }
diff --git a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp
index 18b1d6903dd..be66b4bbda1 100644
--- a/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp
+++ b/eval/src/tests/instruction/generic_peek/generic_peek_test.cpp
@@ -151,9 +151,7 @@ void verify_peek_equal(const TensorSpec &input,
     }
     if (reduce_dims.empty()) return;
     ValueType result_type = param_type.reduce(reduce_dims);
-    auto ref_spec = reference_peek(input, spec);
-    // use SimpleValue to add implicit cells with default value
-    auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+    auto expect = reference_peek(input, spec).normalize();
     SCOPED_TRACE(fmt("peek input: %s\n  peek spec: %s\n  peek result %s\n",
                      input.to_string().c_str(),
                      to_str(spec).c_str(),
diff --git a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
index fa55406be3a..7d419f7ec95 100644
--- a/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
+++ b/eval/src/tests/instruction/generic_reduce/generic_reduce_test.cpp
@@ -71,14 +71,11 @@ void test_generic_reduce_with(const ValueBuilderFactory &factory) {
         TensorSpec input = spec(layout, Div16(N()));
         for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) {
             for (const Domain &domain: layout) {
-                auto ref_spec = ReferenceOperations::reduce(input, {domain.dimension}, aggr);
-                // use SimpleValue to add implicit cells with default value
-                auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+                auto expect = ReferenceOperations::reduce(input, {domain.dimension}, aggr).normalize();
                 auto actual = perform_generic_reduce(input, {domain.dimension}, aggr, factory);
                 EXPECT_EQ(actual, expect);
             }
-            auto ref_spec = ReferenceOperations::reduce(input, {}, aggr);
-            auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+            auto expect = ReferenceOperations::reduce(input, {}, aggr).normalize();
             auto actual = perform_generic_reduce(input, {}, aggr, factory);
             EXPECT_EQ(actual, expect);
         }
@@ -105,13 +102,11 @@ TEST(GenericReduceTest, immediate_generic_reduce_works) {
         TensorSpec input = spec(layout, Div16(N()));
         for (Aggr aggr: {Aggr::SUM, Aggr::AVG, Aggr::MIN, Aggr::MAX}) {
             for (const Domain &domain: layout) {
-                auto ref_spec = ReferenceOperations::reduce(input, {domain.dimension}, aggr);
-                auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+                auto expect = ReferenceOperations::reduce(input, {domain.dimension}, aggr).normalize();
                 auto actual = immediate_generic_reduce(input, {domain.dimension}, aggr);
                 EXPECT_EQ(actual, expect);
             }
-            auto ref_spec = ReferenceOperations::reduce(input, {}, aggr);
-            auto expect = spec_from_value(*value_from_spec(ref_spec, SimpleValueBuilderFactory::get()));
+            auto expect = ReferenceOperations::reduce(input, {}, aggr).normalize();
             auto actual = immediate_generic_reduce(input, {}, aggr);
             EXPECT_EQ(actual, expect);
         }
diff --git a/eval/src/vespa/eval/eval/tensor_spec.cpp b/eval/src/vespa/eval/eval/tensor_spec.cpp
index 98bc09217c1..22faee004b4 100644
--- a/eval/src/vespa/eval/eval/tensor_spec.cpp
+++ b/eval/src/vespa/eval/eval/tensor_spec.cpp
@@ -1,13 +1,15 @@
 // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "tensor_spec.h"
-#include "value.h"
-#include "value_codec.h"
-#include "tensor.h"
-#include "tensor_engine.h"
-#include "simple_tensor_engine.h"
+#include "array_array_map.h"
 #include "function.h"
 #include "interpreted_function.h"
+#include "simple_tensor_engine.h"
+#include "tensor.h"
+#include "tensor_engine.h"
+#include "value.h"
+#include "value_codec.h"
+#include "value_type.h"
 #include <vespa/vespalib/util/stringfmt.h>
 #include <vespa/vespalib/data/slime/slime.h>
 #include <ostream>
@@ -33,6 +35,73 @@ TensorSpec::Address extract_address(const slime::Inspector &address) {
     return extractor.address;
 }
 
+struct NormalizeTensorSpec {
+    /*
+     * This is basically value_from_spec() + spec_from_value()
+     * implementation, taken from value_codec.cpp
+     */
+    template <typename T>
+    static TensorSpec invoke(const ValueType &type, const TensorSpec &spec) {
+        size_t dense_size = type.dense_subspace_size();
+        size_t num_mapped_dims = type.count_mapped_dimensions();
+        size_t max_subspaces = std::max(spec.cells().size() / dense_size, size_t(1));
+        ArrayArrayMap<vespalib::stringref,T> map(num_mapped_dims, dense_size, max_subspaces);
+        std::vector<vespalib::stringref> sparse_key;
+        for (const auto &entry: spec.cells()) {
+            sparse_key.clear();
+            size_t dense_key = 0;
+            auto binding = entry.first.begin();
+            for (const auto &dim : type.dimensions()) {
+                assert(binding != entry.first.end());
+                assert(dim.name == binding->first);
+                assert(dim.is_mapped() == binding->second.is_mapped());
+                if (dim.is_mapped()) {
+                    sparse_key.push_back(binding->second.name);
+                } else {
+                    assert(binding->second.index < dim.size);
+                    dense_key = (dense_key * dim.size) + binding->second.index;
+                }
+                ++binding;
+            }
+            assert(binding == entry.first.end());
+            assert(dense_key < map.values_per_entry());
+            auto [tag, ignore] = map.lookup_or_add_entry(ConstArrayRef<vespalib::stringref>(sparse_key));
+            map.get_values(tag)[dense_key] = entry.second;
+        }
+        // if spec is missing the required dense space, add it here:
+        if ((map.keys_per_entry() == 0) && (map.size() == 0)) {
+            map.add_entry(ConstArrayRef<vespalib::stringref>());
+        }
+        TensorSpec result(type.to_spec());
+        map.each_entry([&](const auto &keys, const auto &values)
+                       {
+                           auto sparse_addr_iter = keys.begin();
+                           TensorSpec::Address address;
+                           for (const auto &dim : type.dimensions()) {
+                               if (dim.is_mapped()) {
+                                   address.emplace(dim.name, *sparse_addr_iter++);
+                               }
+                           }
+                           assert(sparse_addr_iter == keys.end());
+                           for (size_t i = 0; i < values.size(); ++i) {
+                               size_t dense_key = i;
+                               for (auto dim = type.dimensions().rbegin();
+                                    dim != type.dimensions().rend();
+                                    ++dim)
+                               {
+                                   if (dim->is_indexed()) {
+                                       size_t label = dense_key % dim->size;
+                                       address.emplace(dim->name, label).first->second = TensorSpec::Label(label);
+                                       dense_key /= dim->size;
+                                   }
+                               }
+                               result.add(address, values[i]);
+                           }
+                       });
+        return result;
+    }
+};
+
 } // namespace vespalib::eval::<unnamed>
 
 
@@ -150,5 +219,16 @@ operator<<(std::ostream &out, const TensorSpec &spec)
     return out;
 }
 
+TensorSpec
+TensorSpec::normalize() const
+{
+    ValueType my_type = ValueType::from_spec(type());
+    if (my_type.is_error()) {
+        return TensorSpec(my_type.to_spec());
+    }
+    return typify_invoke<1,TypifyCellType,NormalizeTensorSpec>(my_type.cell_type(), my_type, *this);
+}
+
+
 } // namespace vespalib::eval
 } // namespace vespalib
diff --git a/eval/src/vespa/eval/eval/tensor_spec.h b/eval/src/vespa/eval/eval/tensor_spec.h
index 8f02e56f860..41c65a1c4fb 100644
--- a/eval/src/vespa/eval/eval/tensor_spec.h
+++ b/eval/src/vespa/eval/eval/tensor_spec.h
@@ -72,6 +72,7 @@ public:
     const vespalib::string &type() const { return _type; }
     const Cells &cells() const { return _cells; }
     vespalib::string to_string() const;
+    TensorSpec normalize() const;
     void to_slime(slime::Cursor &tensor) const;
     static TensorSpec from_slime(const slime::Inspector &tensor);
     static TensorSpec from_value(const eval::Value &value);
author	Arne Juul <arnej@verizonmedia.com>	2020-11-25 15:41:42 +0000
committer	Arne Juul <arnej@verizonmedia.com>	2020-11-25 17:14:38 +0000
commit	aff89369768fdbbca74d4eabe796fafac80bb465 (patch)
tree	65f9a485db9563b64ccb03464b5180b763ebac80 /eval
parent	e1584673531bc771fa94731da337ce311b4ff7d1 (diff)