Merge pull request #5267 from vespa-engine/lesters/tensorflow-broadcasting

Make TensorFlow import joins compatible with broadcasting
author: Jon Bratseth <bratseth@oath.com> 2018-03-09 13:21:42 +0100
committer: GitHub <noreply@github.com> 2018-03-09 13:21:42 +0100
commit: cdc39bff34edb40bf58e7777f3a3846c7c80c171 (patch)
tree: 5e32485cfd693162c78a69002f885381a67ef4ca /searchlib
parent: 49913f1b6a031c477b997f39dcd14ed604ab9789 (diff)
parent: 28eb8acb97a8b1c1b3f3afc02d3e84003526947c (diff)
6 files changed, 218 insertions, 30 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java
index 3e6e036636d..0f9833567c7 100644
--- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java
@@ -4,9 +4,11 @@ package com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.op
 import com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.DimensionRenamer;
 import com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.OrderedTensorType;
 import com.yahoo.tensor.TensorType;
+import com.yahoo.tensor.functions.Reduce;
 import com.yahoo.tensor.functions.TensorFunction;
 import org.tensorflow.framework.NodeDef;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
 import java.util.function.DoubleBinaryOperator;
@@ -25,10 +27,52 @@ public class Join extends TensorFlowOperation {
         if (!allInputTypesPresent(2)) {
             return null;
         }
-        OrderedTensorType a = inputs.get(0).type().get();
-        OrderedTensorType b = inputs.get(1).type().get();
-        OrderedTensorType out = a.type().rank() >= b.type().rank() ? a : b;
-        return out;
+        OrderedTensorType a = largestInput().type().get();
+        OrderedTensorType b = smallestInput().type().get();
+
+        // Well now we have potentially entered the wonderful world of "broadcasting"
+        // https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
+        // In broadcasting, the size of each dimension is compared element-wise,
+        // starting with the trailing dimensions and working forward. A special
+        // case occurs when the size of one dimension is 1, while the other is not.
+        // Then the dimension with size 1 is "stretched" to be of compatible size.
+        //
+        // An example:
+        //
+        // Tensor A: d0[5], d1[1], d2[3], d3[1]
+        // Tensor B:        d1[4], d2[1], d3[2]
+        //
+        // In TensorFlow and using the above rules of broadcasting, the resulting
+        // type is:
+        //           d0[5], d1[4], d2[3], d2[2]
+        //
+        // However, in Vespa's tensor logic, the join of the two above tensors would
+        // result in a tensor of type:
+        //           d0[5], d1[1], d2[1], d3[1]
+        //
+        // By reducing the dimensions of size 1 in each tensor before joining,
+        // we get equal results as in TensorFlow.
+
+        OrderedTensorType.Builder builder = new OrderedTensorType.Builder(node);
+        int sizeDifference = a.rank() - b.rank();
+        for (int i = 0; i < a.rank(); ++i) {
+            TensorType.Dimension aDim = a.dimensions().get(i);
+            long size = aDim.size().orElse(-1L);
+
+            if (i - sizeDifference >= 0) {
+                TensorType.Dimension bDim = b.dimensions().get(i - sizeDifference);
+                size = Math.max(size, bDim.size().orElse(-1L));
+            }
+
+            if (aDim.type() == TensorType.Dimension.Type.indexedBound) {
+                builder.add(TensorType.Dimension.indexed(aDim.name(), size));
+            } else if (aDim.type() == TensorType.Dimension.Type.indexedUnbound) {
+                builder.add(TensorType.Dimension.indexed(aDim.name()));
+            } else if (aDim.type() == TensorType.Dimension.Type.mapped) {
+                builder.add(TensorType.Dimension.mapped(aDim.name()));
+            }
+        }
+        return builder.build();
     }
 
     @Override
@@ -36,15 +80,39 @@ public class Join extends TensorFlowOperation {
         if (!allInputTypesPresent(2)) {
             return null;
         }
-        Optional<TensorFunction> aFunction = inputs.get(0).function();
-        Optional<TensorFunction> bFunction = inputs.get(1).function();
-        if (!aFunction.isPresent() || !bFunction.isPresent()) {
+        if (!allInputFunctionsPresent(2)) {
             return null;
         }
 
-        // The dimension renaming below takes care of broadcasting.
+        TensorFlowOperation a = largestInput();
+        TensorFlowOperation b = smallestInput();
+
+        List<String> aDimensionsToReduce = new ArrayList<>();
+        List<String> bDimensionsToReduce = new ArrayList<>();
+        int sizeDifference = a.type().get().rank() - b.type().get().rank();
+        for (int i = 0; i < b.type().get().rank(); ++i) {
+            TensorType.Dimension bDim = b.type().get().dimensions().get(i);
+            TensorType.Dimension aDim = a.type().get().dimensions().get(i + sizeDifference);
+            long bSize = bDim.size().orElse(-1L);
+            long aSize = aDim.size().orElse(-1L);
+            if (bSize == 1L && aSize != 1L) {
+                bDimensionsToReduce.add(bDim.name());
+            }
+            if (aSize == 1L && bSize != 1L) {
+                aDimensionsToReduce.add(bDim.name());
+            }
+        }
+
+        TensorFunction aReducedFunction = a.function().get();
+        if (aDimensionsToReduce.size() > 0) {
+            aReducedFunction = new Reduce(a.function().get(), Reduce.Aggregator.sum, aDimensionsToReduce);
+        }
+        TensorFunction bReducedFunction = b.function().get();
+        if (bDimensionsToReduce.size() > 0) {
+            bReducedFunction = new Reduce(b.function().get(), Reduce.Aggregator.sum, bDimensionsToReduce);
+        }
 
-        return new com.yahoo.tensor.functions.Join(aFunction.get(), bFunction.get(), operator);
+        return new com.yahoo.tensor.functions.Join(aReducedFunction, bReducedFunction, operator);
     }
 
     @Override
@@ -52,22 +120,8 @@ public class Join extends TensorFlowOperation {
         if (!allInputTypesPresent(2)) {
             return;
         }
-
-        // Well now we have potentially entered the wonderful world of "broadcasting"
-        // https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
-        // I'm not able to extract from that any unambiguous specification of which dimensions
-        // should be "stretched" when the tensor do not have the same number of dimensions.
-        // From trying this with TensorFlow it appears that the second tensor is matched to the
-        // "end" (highest numbered) dimensions of the first, but I'm not sure whether this is generally true.
-        // Anyway, we move the dimensions of b to the last dimensions of a (instead of by default, the first).
-
-        OrderedTensorType a = inputs.get(0).type().get();
-        OrderedTensorType b = inputs.get(1).type().get();
-        if (a.rank() < b.rank()) {
-            OrderedTensorType temp = a;
-            a = b;
-            b = temp;
-        }
+        OrderedTensorType a = largestInput().type().get();
+        OrderedTensorType b = smallestInput().type().get();
         int sizeDifference = a.rank() - b.rank();
         for (int i = 0; i < b.rank(); ++i) {
             String bDim = b.dimensions().get(i).name();
@@ -76,4 +130,16 @@ public class Join extends TensorFlowOperation {
         }
     }
 
+    private TensorFlowOperation largestInput() {
+        OrderedTensorType a = inputs.get(0).type().get();
+        OrderedTensorType b = inputs.get(1).type().get();
+        return a.rank() >= b.rank() ? inputs.get(0) : inputs.get(1);
+    }
+
+    private TensorFlowOperation smallestInput() {
+        OrderedTensorType a = inputs.get(0).type().get();
+        OrderedTensorType b = inputs.get(1).type().get();
+        return a.rank() < b.rank() ? inputs.get(0) : inputs.get(1);
+    }
+
 }
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py b/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py
index adbf29b9ab6..06ae4c4e5d5 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py
@@ -16,8 +16,11 @@ X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
 y = tf.placeholder(tf.int64, shape=(None), name="y")
 training = tf.placeholder_with_default(False, shape=(), name='training')
 
+def leaky_relu_with_small_constant(z, name=None):
+    return tf.maximum(tf.constant(0.01, shape=[1]) * z, z, name=name)
+
 X_drop = tf.layers.dropout(X, dropout_rate, training=training, name="xdrop")
-output = tf.layers.dense(X_drop, n_outputs, name="outputs")
+output = tf.layers.dense(X_drop, n_outputs, activation=leaky_relu_with_small_constant, name="outputs")
 
 init = tf.global_variables_initializer()
 file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt b/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt
index 52ae5e77a40..ad431f0460d 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt
@@ -218,6 +218,35 @@ meta_graphs {
         }
       }
       op {
+        name: "Maximum"
+        input_arg {
+          name: "x"
+          type_attr: "T"
+        }
+        input_arg {
+          name: "y"
+          type_attr: "T"
+        }
+        output_arg {
+          name: "z"
+          type_attr: "T"
+        }
+        attr {
+          name: "T"
+          type: "type"
+          allowed_values {
+            list {
+              type: DT_HALF
+              type: DT_FLOAT
+              type: DT_DOUBLE
+              type: DT_INT32
+              type: DT_INT64
+            }
+          }
+        }
+        is_commutative: true
+      }
+      op {
         name: "Merge"
         input_arg {
           name: "inputs"
@@ -2022,6 +2051,96 @@ meta_graphs {
       }
     }
     node {
+      name: "outputs/Const"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape {
+              dim {
+                size: 1
+              }
+            }
+            float_val: 0.009999999776482582
+          }
+        }
+      }
+    }
+    node {
+      name: "outputs/mul"
+      op: "Mul"
+      input: "outputs/Const"
+      input: "outputs/BiasAdd"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 10
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "outputs/Maximum"
+      op: "Maximum"
+      input: "outputs/mul"
+      input: "outputs/BiasAdd"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 10
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
       name: "init"
       op: "NoOp"
       input: "^outputs/kernel/Assign"
@@ -2082,7 +2201,7 @@ meta_graphs {
             dtype: DT_STRING
             tensor_shape {
             }
-            string_val: "_temp_8370883d2d9a4584b706fa987019b91d/part"
+            string_val: "_temp_6962088d414d471890a43f51e0ba56f9/part"
           }
         }
       }
@@ -2738,7 +2857,7 @@ meta_graphs {
       outputs {
         key: "y"
         value {
-          name: "outputs/BiasAdd:0"
+          name: "outputs/Maximum:0"
           dtype: DT_FLOAT
           tensor_shape {
             dim {
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001 b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001
index e1b1b015b9f..000c9b3a7b5 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index
index 04ace49d9e3..9492ef4bde2 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java
index a13ff3147c8..c0e25a85ed0 100644
--- a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java
@@ -31,8 +31,8 @@ public class DropoutImportTestCase {
 
         RankingExpression output = signature.outputExpression("y");
         assertNotNull(output);
-        assertEquals("outputs/BiasAdd", output.getName());
-        assertEquals("join(reduce(join(tf_macro_X, constant(outputs_kernel_read), f(a,b)(a * b)), sum, d2), constant(outputs_bias_read), f(a,b)(a + b))",
+        assertEquals("outputs/Maximum", output.getName());
+        assertEquals("join(join(tf_macro_outputs_BiasAdd, reduce(constant(outputs_Const), sum, d1), f(a,b)(a * b)), tf_macro_outputs_BiasAdd, f(a,b)(max(a,b)))",
                 output.getRoot().toString());
         model.assertEqualResult("X", output.getName());
     }
author	Jon Bratseth <bratseth@oath.com>	2018-03-09 13:21:42 +0100
committer	GitHub <noreply@github.com>	2018-03-09 13:21:42 +0100
commit	cdc39bff34edb40bf58e7777f3a3846c7c80c171 (patch)
tree	5e32485cfd693162c78a69002f885381a67ef4ca /searchlib
parent	49913f1b6a031c477b997f39dcd14ed604ab9789 (diff)
parent	28eb8acb97a8b1c1b3f3afc02d3e84003526947c (diff)