diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-03-09 13:21:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-03-09 13:21:42 +0100 |
commit | cdc39bff34edb40bf58e7777f3a3846c7c80c171 (patch) | |
tree | 5e32485cfd693162c78a69002f885381a67ef4ca /searchlib | |
parent | 49913f1b6a031c477b997f39dcd14ed604ab9789 (diff) | |
parent | 28eb8acb97a8b1c1b3f3afc02d3e84003526947c (diff) |
Merge pull request #5267 from vespa-engine/lesters/tensorflow-broadcasting
Make TensorFlow import joins compatible with broadcasting
Diffstat (limited to 'searchlib')
6 files changed, 218 insertions, 30 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java index 3e6e036636d..0f9833567c7 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java @@ -4,9 +4,11 @@ package com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.op import com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.DimensionRenamer; import com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.OrderedTensorType; import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.functions.Reduce; import com.yahoo.tensor.functions.TensorFunction; import org.tensorflow.framework.NodeDef; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.function.DoubleBinaryOperator; @@ -25,10 +27,52 @@ public class Join extends TensorFlowOperation { if (!allInputTypesPresent(2)) { return null; } - OrderedTensorType a = inputs.get(0).type().get(); - OrderedTensorType b = inputs.get(1).type().get(); - OrderedTensorType out = a.type().rank() >= b.type().rank() ? a : b; - return out; + OrderedTensorType a = largestInput().type().get(); + OrderedTensorType b = smallestInput().type().get(); + + // Well now we have potentially entered the wonderful world of "broadcasting" + // https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html + // In broadcasting, the size of each dimension is compared element-wise, + // starting with the trailing dimensions and working forward. A special + // case occurs when the size of one dimension is 1, while the other is not. + // Then the dimension with size 1 is "stretched" to be of compatible size. + // + // An example: + // + // Tensor A: d0[5], d1[1], d2[3], d3[1] + // Tensor B: d1[4], d2[1], d3[2] + // + // In TensorFlow and using the above rules of broadcasting, the resulting + // type is: + // d0[5], d1[4], d2[3], d2[2] + // + // However, in Vespa's tensor logic, the join of the two above tensors would + // result in a tensor of type: + // d0[5], d1[1], d2[1], d3[1] + // + // By reducing the dimensions of size 1 in each tensor before joining, + // we get equal results as in TensorFlow. + + OrderedTensorType.Builder builder = new OrderedTensorType.Builder(node); + int sizeDifference = a.rank() - b.rank(); + for (int i = 0; i < a.rank(); ++i) { + TensorType.Dimension aDim = a.dimensions().get(i); + long size = aDim.size().orElse(-1L); + + if (i - sizeDifference >= 0) { + TensorType.Dimension bDim = b.dimensions().get(i - sizeDifference); + size = Math.max(size, bDim.size().orElse(-1L)); + } + + if (aDim.type() == TensorType.Dimension.Type.indexedBound) { + builder.add(TensorType.Dimension.indexed(aDim.name(), size)); + } else if (aDim.type() == TensorType.Dimension.Type.indexedUnbound) { + builder.add(TensorType.Dimension.indexed(aDim.name())); + } else if (aDim.type() == TensorType.Dimension.Type.mapped) { + builder.add(TensorType.Dimension.mapped(aDim.name())); + } + } + return builder.build(); } @Override @@ -36,15 +80,39 @@ public class Join extends TensorFlowOperation { if (!allInputTypesPresent(2)) { return null; } - Optional<TensorFunction> aFunction = inputs.get(0).function(); - Optional<TensorFunction> bFunction = inputs.get(1).function(); - if (!aFunction.isPresent() || !bFunction.isPresent()) { + if (!allInputFunctionsPresent(2)) { return null; } - // The dimension renaming below takes care of broadcasting. + TensorFlowOperation a = largestInput(); + TensorFlowOperation b = smallestInput(); + + List<String> aDimensionsToReduce = new ArrayList<>(); + List<String> bDimensionsToReduce = new ArrayList<>(); + int sizeDifference = a.type().get().rank() - b.type().get().rank(); + for (int i = 0; i < b.type().get().rank(); ++i) { + TensorType.Dimension bDim = b.type().get().dimensions().get(i); + TensorType.Dimension aDim = a.type().get().dimensions().get(i + sizeDifference); + long bSize = bDim.size().orElse(-1L); + long aSize = aDim.size().orElse(-1L); + if (bSize == 1L && aSize != 1L) { + bDimensionsToReduce.add(bDim.name()); + } + if (aSize == 1L && bSize != 1L) { + aDimensionsToReduce.add(bDim.name()); + } + } + + TensorFunction aReducedFunction = a.function().get(); + if (aDimensionsToReduce.size() > 0) { + aReducedFunction = new Reduce(a.function().get(), Reduce.Aggregator.sum, aDimensionsToReduce); + } + TensorFunction bReducedFunction = b.function().get(); + if (bDimensionsToReduce.size() > 0) { + bReducedFunction = new Reduce(b.function().get(), Reduce.Aggregator.sum, bDimensionsToReduce); + } - return new com.yahoo.tensor.functions.Join(aFunction.get(), bFunction.get(), operator); + return new com.yahoo.tensor.functions.Join(aReducedFunction, bReducedFunction, operator); } @Override @@ -52,22 +120,8 @@ public class Join extends TensorFlowOperation { if (!allInputTypesPresent(2)) { return; } - - // Well now we have potentially entered the wonderful world of "broadcasting" - // https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html - // I'm not able to extract from that any unambiguous specification of which dimensions - // should be "stretched" when the tensor do not have the same number of dimensions. - // From trying this with TensorFlow it appears that the second tensor is matched to the - // "end" (highest numbered) dimensions of the first, but I'm not sure whether this is generally true. - // Anyway, we move the dimensions of b to the last dimensions of a (instead of by default, the first). - - OrderedTensorType a = inputs.get(0).type().get(); - OrderedTensorType b = inputs.get(1).type().get(); - if (a.rank() < b.rank()) { - OrderedTensorType temp = a; - a = b; - b = temp; - } + OrderedTensorType a = largestInput().type().get(); + OrderedTensorType b = smallestInput().type().get(); int sizeDifference = a.rank() - b.rank(); for (int i = 0; i < b.rank(); ++i) { String bDim = b.dimensions().get(i).name(); @@ -76,4 +130,16 @@ public class Join extends TensorFlowOperation { } } + private TensorFlowOperation largestInput() { + OrderedTensorType a = inputs.get(0).type().get(); + OrderedTensorType b = inputs.get(1).type().get(); + return a.rank() >= b.rank() ? inputs.get(0) : inputs.get(1); + } + + private TensorFlowOperation smallestInput() { + OrderedTensorType a = inputs.get(0).type().get(); + OrderedTensorType b = inputs.get(1).type().get(); + return a.rank() < b.rank() ? inputs.get(0) : inputs.get(1); + } + } diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py b/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py index adbf29b9ab6..06ae4c4e5d5 100644 --- a/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py +++ b/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py @@ -16,8 +16,11 @@ X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X") y = tf.placeholder(tf.int64, shape=(None), name="y") training = tf.placeholder_with_default(False, shape=(), name='training') +def leaky_relu_with_small_constant(z, name=None): + return tf.maximum(tf.constant(0.01, shape=[1]) * z, z, name=name) + X_drop = tf.layers.dropout(X, dropout_rate, training=training, name="xdrop") -output = tf.layers.dense(X_drop, n_outputs, name="outputs") +output = tf.layers.dense(X_drop, n_outputs, activation=leaky_relu_with_small_constant, name="outputs") init = tf.global_variables_initializer() file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt b/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt index 52ae5e77a40..ad431f0460d 100644 --- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt +++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt @@ -218,6 +218,35 @@ meta_graphs { } } op { + name: "Maximum" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + is_commutative: true + } + op { name: "Merge" input_arg { name: "inputs" @@ -2022,6 +2051,96 @@ meta_graphs { } } node { + name: "outputs/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + } + float_val: 0.009999999776482582 + } + } + } + } + node { + name: "outputs/mul" + op: "Mul" + input: "outputs/Const" + input: "outputs/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 10 + } + } + } + } + } + } + node { + name: "outputs/Maximum" + op: "Maximum" + input: "outputs/mul" + input: "outputs/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 10 + } + } + } + } + } + } + node { name: "init" op: "NoOp" input: "^outputs/kernel/Assign" @@ -2082,7 +2201,7 @@ meta_graphs { dtype: DT_STRING tensor_shape { } - string_val: "_temp_8370883d2d9a4584b706fa987019b91d/part" + string_val: "_temp_6962088d414d471890a43f51e0ba56f9/part" } } } @@ -2738,7 +2857,7 @@ meta_graphs { outputs { key: "y" value { - name: "outputs/BiasAdd:0" + name: "outputs/Maximum:0" dtype: DT_FLOAT tensor_shape { dim { diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001 b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001 Binary files differindex e1b1b015b9f..000c9b3a7b5 100644 --- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001 +++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001 diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index Binary files differindex 04ace49d9e3..9492ef4bde2 100644 --- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index +++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java index a13ff3147c8..c0e25a85ed0 100644 --- a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java @@ -31,8 +31,8 @@ public class DropoutImportTestCase { RankingExpression output = signature.outputExpression("y"); assertNotNull(output); - assertEquals("outputs/BiasAdd", output.getName()); - assertEquals("join(reduce(join(tf_macro_X, constant(outputs_kernel_read), f(a,b)(a * b)), sum, d2), constant(outputs_bias_read), f(a,b)(a + b))", + assertEquals("outputs/Maximum", output.getName()); + assertEquals("join(join(tf_macro_outputs_BiasAdd, reduce(constant(outputs_Const), sum, d1), f(a,b)(a * b)), tf_macro_outputs_BiasAdd, f(a,b)(max(a,b)))", output.getRoot().toString()); model.assertEqualResult("X", output.getName()); } |