Skip to content

Commit a24b8ca

Browse files
authored
Setting all the optimizers to have useLocking = True (#310)
* Setting all the optimizers to have useLocking = True, like Keras. Adding a determinism test that's currently failing. * More work on the GradientDescentTest. * Tidying up the test.
1 parent e013353 commit a24b8ca

File tree

9 files changed

+189
-9
lines changed

9 files changed

+189
-9
lines changed

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.tensorflow.Output;
2121
import org.tensorflow.op.Op;
2222
import org.tensorflow.op.core.Variable;
23+
import org.tensorflow.op.train.ApplyAdadelta;
2324
import org.tensorflow.types.family.TType;
2425

2526
import java.util.List;
@@ -160,7 +161,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
160161
tf.dtypes.cast(tf.constant(learningRate), gradient.type()),
161162
tf.dtypes.cast(tf.constant(rho), gradient.type()),
162163
tf.dtypes.cast(tf.constant(epsilon), gradient.type()),
163-
gradient);
164+
gradient,
165+
ApplyAdadelta.useLocking(true));
164166
}
165167

166168
/** {@inheritDoc} */

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.tensorflow.Operand;
2020
import org.tensorflow.Output;
2121
import org.tensorflow.op.Op;
22+
import org.tensorflow.op.train.ApplyAdagrad;
2223
import org.tensorflow.op.core.Variable;
2324
import org.tensorflow.types.family.TType;
2425

@@ -42,6 +43,9 @@ public class AdaGrad extends Optimizer {
4243
public static final float LEARNING_RATE_DEFAULT = 0.001f;
4344
public static final float INITIAL_ACCUMULATOR_DEFAULT = 0.01f;
4445

46+
private static final ApplyAdagrad.Options[] opts = new ApplyAdagrad.Options[]{
47+
ApplyAdagrad.updateSlots(true), ApplyAdagrad.useLocking(true)};
48+
4549
private final float learningRate;
4650

4751
private final float initialAccumulatorValue;
@@ -140,7 +144,7 @@ private <T extends TType> void createAdaGradSlot(Output<T> v) {
140144
protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable) {
141145
Variable<T> slot = getSlot(variable, ACCUMULATOR).get();
142146
return tf.train.applyAdagrad(
143-
variable, slot, tf.dtypes.cast(tf.constant(learningRate), gradient.type()), gradient);
147+
variable, slot, tf.dtypes.cast(tf.constant(learningRate), gradient.type()), gradient, opts);
144148
}
145149

146150
/** {@inheritDoc} */

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGradDA.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.tensorflow.op.Op;
2323
import org.tensorflow.op.core.Assign;
2424
import org.tensorflow.op.core.Variable;
25+
import org.tensorflow.op.train.ApplyAdagradDa;
2526
import org.tensorflow.types.TInt64;
2627
import org.tensorflow.types.family.TType;
2728

@@ -219,7 +220,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
219220
tf.dtypes.cast(tf.constant(learningRate), gradient.type()),
220221
tf.dtypes.cast(tf.constant(l1Strength), gradient.type()),
221222
tf.dtypes.cast(tf.constant(l2Strength), gradient.type()),
222-
globalStep);
223+
globalStep,
224+
ApplyAdagradDa.useLocking(true));
223225
}
224226

225227
/**

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adam.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.tensorflow.op.core.Assign;
2727
import org.tensorflow.op.core.Constant;
2828
import org.tensorflow.op.core.Variable;
29+
import org.tensorflow.op.train.ApplyAdam;
2930
import org.tensorflow.types.TFloat32;
3031
import org.tensorflow.types.family.TType;
3132

@@ -237,7 +238,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
237238
tf.dtypes.cast(betaOneConst, gradient.type()),
238239
tf.dtypes.cast(betaTwoConst, gradient.type()),
239240
tf.dtypes.cast(epsilonConst, gradient.type()),
240-
gradient);
241+
gradient,
242+
ApplyAdam.useLocking(true));
241243
}
242244

243245
/**

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Adamax.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
170170
tf.dtypes.cast(betaOneConst, gradient.type()),
171171
tf.dtypes.cast(betaTwoConst, gradient.type()),
172172
tf.dtypes.cast(epsilonConst, gradient.type()),
173-
gradient);
173+
gradient,
174+
ApplyAdaMax.useLocking(true));
174175
}
175176

176177
/** {@inheritDoc} */

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/GradientDescent.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.tensorflow.Graph;
1919
import org.tensorflow.Output;
2020
import org.tensorflow.op.Op;
21+
import org.tensorflow.op.train.ApplyGradientDescent;
2122
import org.tensorflow.types.family.TType;
2223

2324
/**
@@ -66,7 +67,10 @@ public GradientDescent(Graph graph, String name, float learningRate) {
6667
@Override
6768
protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable) {
6869
return tf.train.applyGradientDescent(
69-
variable, tf.dtypes.cast(tf.constant(learningRate), gradient.type()), gradient);
70+
variable,
71+
tf.dtypes.cast(tf.constant(learningRate), gradient.type()),
72+
gradient,
73+
ApplyGradientDescent.useLocking(true));
7074
}
7175

7276
/** {@inheritDoc} */

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/Momentum.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
139139
tf.dtypes.cast(tf.constant(learningRate), gradient.type()),
140140
gradient,
141141
tf.dtypes.cast(tf.constant(momentum), gradient.type()),
142-
ApplyMomentum.useNesterov(useNesterov));
142+
ApplyMomentum.useNesterov(useNesterov),
143+
ApplyMomentum.useLocking(true));
143144
}
144145

145146
/** {@inheritDoc} */

tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/RMSProp.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.tensorflow.Output;
2121
import org.tensorflow.op.Op;
2222
import org.tensorflow.op.core.Variable;
23+
import org.tensorflow.op.train.ApplyCenteredRmsProp;
24+
import org.tensorflow.op.train.ApplyRmsProp;
2325
import org.tensorflow.types.family.TType;
2426

2527
import java.util.List;
@@ -202,7 +204,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
202204
tf.dtypes.cast(tf.constant(decay), gradient.type()),
203205
tf.dtypes.cast(tf.constant(momentum), gradient.type()),
204206
tf.dtypes.cast(tf.constant(epsilon), gradient.type()),
205-
gradient);
207+
gradient,
208+
ApplyCenteredRmsProp.useLocking(true));
206209
}
207210
return tf.train.applyRmsProp(
208211
variable,
@@ -212,7 +215,8 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
212215
tf.dtypes.cast(tf.constant(decay), gradient.type()),
213216
tf.dtypes.cast(tf.constant(momentum), gradient.type()),
214217
tf.dtypes.cast(tf.constant(epsilon), gradient.type()),
215-
gradient);
218+
gradient,
219+
ApplyRmsProp.useLocking(true));
216220
}
217221

218222
/** {@inheritDoc} */

tensorflow-framework/src/test/java/org/tensorflow/framework/optimizers/GradientDescentTest.java

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,34 @@
22

33
import org.junit.jupiter.api.*;
44
import org.tensorflow.Graph;
5+
import org.tensorflow.Session;
6+
import org.tensorflow.Tensor;
7+
import org.tensorflow.framework.initializers.Glorot;
8+
import org.tensorflow.framework.initializers.VarianceScaling;
59
import org.tensorflow.framework.utils.TestSession;
10+
import org.tensorflow.ndarray.FloatNdArray;
611
import org.tensorflow.ndarray.Shape;
12+
import org.tensorflow.ndarray.buffer.DataBuffers;
713
import org.tensorflow.op.Op;
814
import org.tensorflow.op.Ops;
915
import org.tensorflow.op.core.Assign;
1016
import org.tensorflow.op.core.Constant;
17+
import org.tensorflow.op.core.Init;
18+
import org.tensorflow.op.core.Placeholder;
1119
import org.tensorflow.op.core.Variable;
20+
import org.tensorflow.op.math.Add;
21+
import org.tensorflow.op.math.Mean;
22+
import org.tensorflow.op.nn.Relu;
23+
import org.tensorflow.proto.framework.ConfigProto;
24+
import org.tensorflow.proto.framework.GraphDef;
1225
import org.tensorflow.types.TFloat32;
1326
import org.tensorflow.types.family.TType;
1427

1528
import java.util.ArrayList;
29+
import java.util.Arrays;
1630
import java.util.List;
1731

32+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
1833
import static org.junit.jupiter.api.Assertions.assertEquals;
1934

2035
/** Test cases for GradientDescent Optimizer */
@@ -97,4 +112,149 @@ public void testBasic() {
97112
session.evaluate(expectedVar1, var1);
98113
}
99114
}
115+
116+
// This test fails due to incorrect gradients being generated some of the time, when
117+
// using an identical graph on identical data. It should not, but it seems to be a
118+
// problem in TF-core.
119+
@Disabled
120+
@Test
121+
public void testDeterminism() {
122+
ConfigProto config =
123+
ConfigProto.newBuilder()
124+
.setIntraOpParallelismThreads(1)
125+
.setInterOpParallelismThreads(1)
126+
.build();
127+
128+
GraphDef def;
129+
String initName;
130+
String trainName;
131+
String lossName;
132+
133+
String fcWeightName, fcBiasName, outputWeightName, outputBiasName;
134+
135+
try (Graph g = new Graph()) {
136+
Ops tf = Ops.create(g);
137+
138+
Glorot<TFloat32> initializer =
139+
new Glorot<>(tf, VarianceScaling.Distribution.TRUNCATED_NORMAL, 1L);
140+
// Inputs
141+
Placeholder<TFloat32> input =
142+
tf.withName("input").placeholder(TFloat32.class, Placeholder.shape(Shape.of(-1, 20)));
143+
144+
// Fully connected layer
145+
Variable<TFloat32> fcWeights =
146+
tf.variable(initializer.call(tf.array(20L, 200L), TFloat32.class));
147+
fcWeightName = fcWeights.op().name();
148+
Variable<TFloat32> fcBiases = tf.variable(tf.fill(tf.array(200), tf.constant(0.1f)));
149+
fcBiasName = fcBiases.op().name();
150+
Relu<TFloat32> relu = tf.nn.relu(tf.math.add(tf.linalg.matMul(input, fcWeights), fcBiases));
151+
152+
// Output layer
153+
Variable<TFloat32> outputWeights =
154+
tf.variable(initializer.call(tf.array(200L, 2L), TFloat32.class));
155+
outputWeightName = outputWeights.op().name();
156+
Variable<TFloat32> outputBiases = tf.variable(tf.fill(tf.array(2L), tf.constant(0.1f)));
157+
outputBiasName = outputBiases.op().name();
158+
Add<TFloat32> output = tf.math.add(tf.linalg.matMul(relu, outputWeights), outputBiases);
159+
160+
// Loss
161+
Placeholder<TFloat32> placeholder =
162+
tf.withName("output").placeholder(TFloat32.class, Placeholder.shape(Shape.of(-1, 2)));
163+
Mean<TFloat32> loss =
164+
tf.math.mean(
165+
tf.nn.raw.softmaxCrossEntropyWithLogits(output, placeholder).loss(), tf.constant(0));
166+
lossName = loss.op().name();
167+
168+
GradientDescent gd = new GradientDescent(g, 10.0f);
169+
Op trainingOp = gd.minimize(loss);
170+
trainName = trainingOp.op().name();
171+
172+
// Create the init op
173+
Init init = tf.init();
174+
initName = init.op().name();
175+
176+
def = g.toGraphDef();
177+
}
178+
179+
float[] data =
180+
new float[] {
181+
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, -8.0f, -9.0f, 10.0f, 11.0f, 12.0f, 13.0f,
182+
-14.0f, -15.0f, 0.16f, 0.17f, 0.18f, 1.9f, 0.2f
183+
};
184+
TFloat32 dataTensor = TFloat32.tensorOf(Shape.of(1, 20), DataBuffers.of(data));
185+
float[] target = new float[] {0.2f, 0.8f};
186+
TFloat32 targetTensor = TFloat32.tensorOf(Shape.of(1, 2), DataBuffers.of(target));
187+
188+
int numRuns = 20;
189+
List<List<Tensor>> initialized = new ArrayList<>(numRuns);
190+
List<List<Tensor>> trained = new ArrayList<>(numRuns);
191+
float[] initialLoss = new float[numRuns];
192+
float[] postTrainingLoss = new float[numRuns];
193+
194+
for (int i = 0; i < numRuns; i++) {
195+
try (Graph g = new Graph();
196+
Session s = new Session(g, config)) {
197+
g.importGraphDef(def);
198+
s.run(initName);
199+
200+
initialized.add(
201+
s.runner()
202+
.fetch(fcWeightName)
203+
.fetch(fcBiasName)
204+
.fetch(outputWeightName)
205+
.fetch(outputBiasName)
206+
.run());
207+
208+
TFloat32 lossVal = (TFloat32) s.runner()
209+
.addTarget(trainName)
210+
.feed("input", dataTensor)
211+
.feed("output", targetTensor)
212+
.fetch(lossName)
213+
.run().get(0);
214+
initialLoss[i] = lossVal.getFloat();
215+
lossVal.close();
216+
217+
trained.add(
218+
s.runner()
219+
.fetch(fcWeightName)
220+
.fetch(fcBiasName)
221+
.fetch(outputWeightName)
222+
.fetch(outputBiasName)
223+
.run());
224+
225+
lossVal = (TFloat32) s.runner()
226+
.addTarget(trainName)
227+
.feed("input", dataTensor)
228+
.feed("output", targetTensor)
229+
.fetch(lossName)
230+
.run().get(0);
231+
postTrainingLoss[i] = lossVal.getFloat();
232+
lossVal.close();
233+
}
234+
}
235+
236+
for (int i = 1; i < numRuns; i++) {
237+
assertEquals(initialLoss[0], initialLoss[i]);
238+
assertEquals(postTrainingLoss[0], postTrainingLoss[i]);
239+
// Because the weights are references not copies.
240+
assertEquals(initialized.get(i), trained.get(i));
241+
assertEquals(
242+
initialized.get(0),
243+
initialized.get(i),
244+
"Variables not initialized identically (0," + i + ")");
245+
assertEquals(
246+
trained.get(0), trained.get(i), "Variables not trained identically (0," + i + ")");
247+
}
248+
249+
for (List<Tensor> curInit : initialized) {
250+
for (Tensor t : curInit) {
251+
t.close();
252+
}
253+
}
254+
for (List<Tensor> curTrained : trained) {
255+
for (Tensor t : curTrained) {
256+
t.close();
257+
}
258+
}
259+
}
100260
}

0 commit comments

Comments
 (0)