summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2018-03-09 13:21:42 +0100
committerGitHub <noreply@github.com>2018-03-09 13:21:42 +0100
commitcdc39bff34edb40bf58e7777f3a3846c7c80c171 (patch)
tree5e32485cfd693162c78a69002f885381a67ef4ca /searchlib
parent49913f1b6a031c477b997f39dcd14ed604ab9789 (diff)
parent28eb8acb97a8b1c1b3f3afc02d3e84003526947c (diff)
Merge pull request #5267 from vespa-engine/lesters/tensorflow-broadcasting
Make TensorFlow import joins compatible with broadcasting
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java116
-rw-r--r--searchlib/src/test/files/integration/tensorflow/dropout/dropout.py5
-rw-r--r--searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt123
-rw-r--r--searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001bin31400 -> 31400 bytes
-rw-r--r--searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.indexbin165 -> 165 bytes
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java4
6 files changed, 218 insertions, 30 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java
index 3e6e036636d..0f9833567c7 100644
--- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/importer/operations/Join.java
@@ -4,9 +4,11 @@ package com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.op
import com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.DimensionRenamer;
import com.yahoo.searchlib.rankingexpression.integration.tensorflow.importer.OrderedTensorType;
import com.yahoo.tensor.TensorType;
+import com.yahoo.tensor.functions.Reduce;
import com.yahoo.tensor.functions.TensorFunction;
import org.tensorflow.framework.NodeDef;
+import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.function.DoubleBinaryOperator;
@@ -25,10 +27,52 @@ public class Join extends TensorFlowOperation {
if (!allInputTypesPresent(2)) {
return null;
}
- OrderedTensorType a = inputs.get(0).type().get();
- OrderedTensorType b = inputs.get(1).type().get();
- OrderedTensorType out = a.type().rank() >= b.type().rank() ? a : b;
- return out;
+ OrderedTensorType a = largestInput().type().get();
+ OrderedTensorType b = smallestInput().type().get();
+
+ // Well now we have potentially entered the wonderful world of "broadcasting"
+ // https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
+ // In broadcasting, the size of each dimension is compared element-wise,
+ // starting with the trailing dimensions and working forward. A special
+ // case occurs when the size of one dimension is 1, while the other is not.
+ // Then the dimension with size 1 is "stretched" to be of compatible size.
+ //
+ // An example:
+ //
+ // Tensor A: d0[5], d1[1], d2[3], d3[1]
+ // Tensor B: d1[4], d2[1], d3[2]
+ //
+ // In TensorFlow and using the above rules of broadcasting, the resulting
+ // type is:
+ // d0[5], d1[4], d2[3], d2[2]
+ //
+ // However, in Vespa's tensor logic, the join of the two above tensors would
+ // result in a tensor of type:
+ // d0[5], d1[1], d2[1], d3[1]
+ //
+ // By reducing the dimensions of size 1 in each tensor before joining,
+ // we get equal results as in TensorFlow.
+
+ OrderedTensorType.Builder builder = new OrderedTensorType.Builder(node);
+ int sizeDifference = a.rank() - b.rank();
+ for (int i = 0; i < a.rank(); ++i) {
+ TensorType.Dimension aDim = a.dimensions().get(i);
+ long size = aDim.size().orElse(-1L);
+
+ if (i - sizeDifference >= 0) {
+ TensorType.Dimension bDim = b.dimensions().get(i - sizeDifference);
+ size = Math.max(size, bDim.size().orElse(-1L));
+ }
+
+ if (aDim.type() == TensorType.Dimension.Type.indexedBound) {
+ builder.add(TensorType.Dimension.indexed(aDim.name(), size));
+ } else if (aDim.type() == TensorType.Dimension.Type.indexedUnbound) {
+ builder.add(TensorType.Dimension.indexed(aDim.name()));
+ } else if (aDim.type() == TensorType.Dimension.Type.mapped) {
+ builder.add(TensorType.Dimension.mapped(aDim.name()));
+ }
+ }
+ return builder.build();
}
@Override
@@ -36,15 +80,39 @@ public class Join extends TensorFlowOperation {
if (!allInputTypesPresent(2)) {
return null;
}
- Optional<TensorFunction> aFunction = inputs.get(0).function();
- Optional<TensorFunction> bFunction = inputs.get(1).function();
- if (!aFunction.isPresent() || !bFunction.isPresent()) {
+ if (!allInputFunctionsPresent(2)) {
return null;
}
- // The dimension renaming below takes care of broadcasting.
+ TensorFlowOperation a = largestInput();
+ TensorFlowOperation b = smallestInput();
+
+ List<String> aDimensionsToReduce = new ArrayList<>();
+ List<String> bDimensionsToReduce = new ArrayList<>();
+ int sizeDifference = a.type().get().rank() - b.type().get().rank();
+ for (int i = 0; i < b.type().get().rank(); ++i) {
+ TensorType.Dimension bDim = b.type().get().dimensions().get(i);
+ TensorType.Dimension aDim = a.type().get().dimensions().get(i + sizeDifference);
+ long bSize = bDim.size().orElse(-1L);
+ long aSize = aDim.size().orElse(-1L);
+ if (bSize == 1L && aSize != 1L) {
+ bDimensionsToReduce.add(bDim.name());
+ }
+ if (aSize == 1L && bSize != 1L) {
+ aDimensionsToReduce.add(bDim.name());
+ }
+ }
+
+ TensorFunction aReducedFunction = a.function().get();
+ if (aDimensionsToReduce.size() > 0) {
+ aReducedFunction = new Reduce(a.function().get(), Reduce.Aggregator.sum, aDimensionsToReduce);
+ }
+ TensorFunction bReducedFunction = b.function().get();
+ if (bDimensionsToReduce.size() > 0) {
+ bReducedFunction = new Reduce(b.function().get(), Reduce.Aggregator.sum, bDimensionsToReduce);
+ }
- return new com.yahoo.tensor.functions.Join(aFunction.get(), bFunction.get(), operator);
+ return new com.yahoo.tensor.functions.Join(aReducedFunction, bReducedFunction, operator);
}
@Override
@@ -52,22 +120,8 @@ public class Join extends TensorFlowOperation {
if (!allInputTypesPresent(2)) {
return;
}
-
- // Well now we have potentially entered the wonderful world of "broadcasting"
- // https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
- // I'm not able to extract from that any unambiguous specification of which dimensions
- // should be "stretched" when the tensor do not have the same number of dimensions.
- // From trying this with TensorFlow it appears that the second tensor is matched to the
- // "end" (highest numbered) dimensions of the first, but I'm not sure whether this is generally true.
- // Anyway, we move the dimensions of b to the last dimensions of a (instead of by default, the first).
-
- OrderedTensorType a = inputs.get(0).type().get();
- OrderedTensorType b = inputs.get(1).type().get();
- if (a.rank() < b.rank()) {
- OrderedTensorType temp = a;
- a = b;
- b = temp;
- }
+ OrderedTensorType a = largestInput().type().get();
+ OrderedTensorType b = smallestInput().type().get();
int sizeDifference = a.rank() - b.rank();
for (int i = 0; i < b.rank(); ++i) {
String bDim = b.dimensions().get(i).name();
@@ -76,4 +130,16 @@ public class Join extends TensorFlowOperation {
}
}
+ private TensorFlowOperation largestInput() {
+ OrderedTensorType a = inputs.get(0).type().get();
+ OrderedTensorType b = inputs.get(1).type().get();
+ return a.rank() >= b.rank() ? inputs.get(0) : inputs.get(1);
+ }
+
+ private TensorFlowOperation smallestInput() {
+ OrderedTensorType a = inputs.get(0).type().get();
+ OrderedTensorType b = inputs.get(1).type().get();
+ return a.rank() < b.rank() ? inputs.get(0) : inputs.get(1);
+ }
+
}
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py b/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py
index adbf29b9ab6..06ae4c4e5d5 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/dropout.py
@@ -16,8 +16,11 @@ X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')
+def leaky_relu_with_small_constant(z, name=None):
+ return tf.maximum(tf.constant(0.01, shape=[1]) * z, z, name=name)
+
X_drop = tf.layers.dropout(X, dropout_rate, training=training, name="xdrop")
-output = tf.layers.dense(X_drop, n_outputs, name="outputs")
+output = tf.layers.dense(X_drop, n_outputs, activation=leaky_relu_with_small_constant, name="outputs")
init = tf.global_variables_initializer()
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt b/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt
index 52ae5e77a40..ad431f0460d 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/saved_model.pbtxt
@@ -218,6 +218,35 @@ meta_graphs {
}
}
op {
+ name: "Maximum"
+ input_arg {
+ name: "x"
+ type_attr: "T"
+ }
+ input_arg {
+ name: "y"
+ type_attr: "T"
+ }
+ output_arg {
+ name: "z"
+ type_attr: "T"
+ }
+ attr {
+ name: "T"
+ type: "type"
+ allowed_values {
+ list {
+ type: DT_HALF
+ type: DT_FLOAT
+ type: DT_DOUBLE
+ type: DT_INT32
+ type: DT_INT64
+ }
+ }
+ }
+ is_commutative: true
+ }
+ op {
name: "Merge"
input_arg {
name: "inputs"
@@ -2022,6 +2051,96 @@ meta_graphs {
}
}
node {
+ name: "outputs/Const"
+ op: "Const"
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: 1
+ }
+ }
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 1
+ }
+ }
+ float_val: 0.009999999776482582
+ }
+ }
+ }
+ }
+ node {
+ name: "outputs/mul"
+ op: "Mul"
+ input: "outputs/Const"
+ input: "outputs/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ dim {
+ size: 10
+ }
+ }
+ }
+ }
+ }
+ }
+ node {
+ name: "outputs/Maximum"
+ op: "Maximum"
+ input: "outputs/mul"
+ input: "outputs/BiasAdd"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_output_shapes"
+ value {
+ list {
+ shape {
+ dim {
+ size: -1
+ }
+ dim {
+ size: 10
+ }
+ }
+ }
+ }
+ }
+ }
+ node {
name: "init"
op: "NoOp"
input: "^outputs/kernel/Assign"
@@ -2082,7 +2201,7 @@ meta_graphs {
dtype: DT_STRING
tensor_shape {
}
- string_val: "_temp_8370883d2d9a4584b706fa987019b91d/part"
+ string_val: "_temp_6962088d414d471890a43f51e0ba56f9/part"
}
}
}
@@ -2738,7 +2857,7 @@ meta_graphs {
outputs {
key: "y"
value {
- name: "outputs/BiasAdd:0"
+ name: "outputs/Maximum:0"
dtype: DT_FLOAT
tensor_shape {
dim {
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001 b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001
index e1b1b015b9f..000c9b3a7b5 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.data-00000-of-00001
Binary files differ
diff --git a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index
index 04ace49d9e3..9492ef4bde2 100644
--- a/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index
+++ b/searchlib/src/test/files/integration/tensorflow/dropout/saved/variables/variables.index
Binary files differ
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java
index a13ff3147c8..c0e25a85ed0 100644
--- a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/integration/tensorflow/DropoutImportTestCase.java
@@ -31,8 +31,8 @@ public class DropoutImportTestCase {
RankingExpression output = signature.outputExpression("y");
assertNotNull(output);
- assertEquals("outputs/BiasAdd", output.getName());
- assertEquals("join(reduce(join(tf_macro_X, constant(outputs_kernel_read), f(a,b)(a * b)), sum, d2), constant(outputs_bias_read), f(a,b)(a + b))",
+ assertEquals("outputs/Maximum", output.getName());
+ assertEquals("join(join(tf_macro_outputs_BiasAdd, reduce(constant(outputs_Const), sum, d1), f(a,b)(a * b)), tf_macro_outputs_BiasAdd, f(a,b)(max(a,b)))",
output.getRoot().toString());
model.assertEqualResult("X", output.getName());
}