tensorflow
diff --git a/‎README.md
+11 b/‎README.md
+11
diff --git a/‎snake-dqn/.gitignore
+1 b/‎snake-dqn/.gitignore
+1
diff --git a/‎snake-dqn/README.md
+13 b/‎snake-dqn/README.md
+13
diff --git a/‎snake-dqn/agent.js
+9-4 b/‎snake-dqn/agent.js
+9-4
diff --git a/‎snake-dqn/agent_test.js
+1-1 b/‎snake-dqn/agent_test.js
+1-1
diff --git a/‎snake-dqn/dqn.js
+4 b/‎snake-dqn/dqn.js
+4
diff --git a/‎snake-dqn/dqn_test.js
+11-10 b/‎snake-dqn/dqn_test.js
+11-10
diff --git a/‎snake-dqn/images/dqn-screenshot.png
23.8 KB b/‎snake-dqn/images/dqn-screenshot.png
23.8 KB
diff --git a/‎snake-dqn/index.html
+95 b/‎snake-dqn/index.html
+95
@@ -272,6 +272,17 @@ to another project.
     <td>Layers</td>
     <td>Export trained model from tfjs-node and load it in browser</td>
   </tr>
+  <tr>
+    <td><a href="./snake-dqn">snake-dqn</a></td>
+    <td><a href="https://storage.googleapis.com/tfjs-examples/snake-dqn/index.html">🔗</a></td>
+    <td></td>
+    <td>Reinforcement learning</td>
+    <td>Deep Q-Network (DQN)</td>
+    <td>Node.js</td>
+    <td>Browser</td>
+    <td>Layers</td>
+    <td>Export trained model from tfjs-node and load it in browser</td>
+  </tr>
   <tr>
     <td><a href="./translation">translation</a></td>
     <td><a href="https://storage.googleapis.com/tfjs-examples/translation/dist/index.html">🔗</a></td>
 
@@ -0,0 +1 @@
+models/
@@ -1,5 +1,9 @@
 # Using Deep Q-Learning to Solve the Snake Game
 
+![DQN Snake Game](./images/dqn-screenshot.png)
+
+[See this example live!](https://storage.googleapis.com/tfjs-examples/snake-dqn/index.html)
+
 Deep Q-Learning is a reinforcement-learning (RL) algorithm. It is used
 frequently to solve arcade-style games like the Snake game used in this
 example.
@@ -59,3 +63,12 @@ tensorboard --logdir /tmp/snake_logs
 
 Once started, the tensorboard backend process will print an `http://` URL to the
 console. Open your browser and navigate to the URL to see the logged curves.
+
+## Running the demo in the browser
+
+After the DQN training completes, you can use the following command to
+launch a demo that shows how the network plays the game in the browser:
+
+```sh
+yarn watch
+```
@@ -67,6 +67,7 @@ export class SnakeGameAgent {
 
   reset() {
     this.cumulativeReward_ = 0;
+    this.fruitsEaten_ = 0;
     this.game.reset();
   }
 
@@ -98,15 +99,19 @@ export class SnakeGameAgent {
       });
     }
 
-    const {state: nextState, reward, done} = this.game.step(action);
+    const {state: nextState, reward, done, fruitEaten} = this.game.step(action);
 
     this.replayMemory.append([state, action, reward, done, nextState]);
 
     this.cumulativeReward_ += reward;
+    if (fruitEaten) {
+      this.fruitsEaten_++;
+    }
     const output = {
       action,
       cumulativeReward: this.cumulativeReward_,
-      done
+      done,
+      fruitsEaten: this.fruitsEaten_
     };
     if (done) {
       this.reset();
@@ -130,8 +135,8 @@ export class SnakeGameAgent {
           batch.map(example => example[0]), this.game.height, this.game.width);
       const actionTensor = tf.tensor1d(
           batch.map(example => example[1]), 'int32');
-      const qs = this.onlineNetwork.predict(
-          stateTensor).mul(tf.oneHot(actionTensor, NUM_ACTIONS)).sum(-1);
+      const qs = this.onlineNetwork.apply(stateTensor, {training: true})
+          .mul(tf.oneHot(actionTensor, NUM_ACTIONS)).sum(-1);
 
       const rewardTensor = tf.tensor1d(batch.map(example => example[2]));
       const nextStateTensor = getStateTensor(
 
@@ -51,7 +51,7 @@ describe('SnakeGameAgent', () => {
         expect(agent.replayMemory.buffer[bufferIndex % 100][1])
             .toEqual(out.action);
 
-        expect(agent.replayMemory.buffer[bufferIndex % 100][2]).toEqual(
+        expect(agent.replayMemory.buffer[bufferIndex % 100][2]).toBeCloseTo(
             outPrev == null ? out.cumulativeReward :
             out.cumulativeReward - outPrev.cumulativeReward);
         expect(agent.replayMemory.buffer[bufferIndex % 100][3]).toEqual(out.done);
 
@@ -38,12 +38,14 @@ export function createDeepQNetwork(h, w, numActions) {
     activation: 'relu',
     inputShape: [h, w, 2]
   }));
+  model.add(tf.layers.batchNormalization());
   model.add(tf.layers.conv2d({
     filters: 256,
     kernelSize: 3,
     strides: 1,
     activation: 'relu'
   }));
+  model.add(tf.layers.batchNormalization());
   model.add(tf.layers.conv2d({
     filters: 256,
     kernelSize: 3,
@@ -52,7 +54,9 @@ export function createDeepQNetwork(h, w, numActions) {
   }));
   model.add(tf.layers.flatten());
   model.add(tf.layers.dense({units: 100, activation: 'relu'}));
+  model.add(tf.layers.dropout({rate: 0.25}));
   model.add(tf.layers.dense({units: numActions}));
+
   return model;
 }
 
 
@@ -68,16 +68,18 @@ describe('copyWeights', () => {
 
     // Initially, the two networks should have different values in their
     // weights.
-    const onlineWeights0 = onlineNetwork.getWeights();
-    const targetWeights0 = targetNetwork.getWeights();
-    expect(onlineWeights0.length).toEqual(targetWeights0.length);
-    // The 1st weight is the first conv layer's kernel.
-    expect(onlineWeights0[0].sub(targetWeights0[0]).abs().mean().arraySync())
+    const conv1Weights0 = onlineNetwork.layers[0].getWeights();
+    const conv1Weights1 = targetNetwork.layers[0].getWeights();
+    expect(conv1Weights0.length).toEqual(conv1Weights1.length);
+    // The 1st weight is the 1st conv layer's kernel.
+    expect(conv1Weights0[0].sub(conv1Weights1[0]).abs().mean().arraySync())
         .toBeGreaterThan(0);
-    // Skip the 2nd weight, because it's the bias of the first conv layer's
-    // kernel, which has an all-zero initializer.
-    // The 3rd weight is the second conv layer's kernel.
-    expect(onlineWeights0[2].sub(targetWeights0[2]).abs().mean().arraySync())
+
+    const conv2Weights0 = onlineNetwork.layers[2].getWeights();
+    const conv2Weights1 = targetNetwork.layers[2].getWeights();
+    expect(conv2Weights0.length).toEqual(conv2Weights1.length);
+    // The 1st weight is the 2nd conv layer's kernel.
+    expect(conv2Weights0[0].sub(conv2Weights1[0]).abs().mean().arraySync())
         .toBeGreaterThan(0);
 
     copyWeights(targetNetwork, onlineNetwork);
@@ -87,7 +89,6 @@ describe('copyWeights', () => {
     const onlineWeights1 = onlineNetwork.getWeights();
     const targetWeights1 = targetNetwork.getWeights();
     expect(onlineWeights1.length).toEqual(targetWeights1.length);
-    expect(onlineWeights1.length).toEqual(onlineWeights0.length);
     for (let i = 0; i < onlineWeights1.length; ++i) {
       expect(onlineWeights1[i].sub(targetWeights1[i]).abs().mean().arraySync())
           .toEqual(0);
 
@@ -0,0 +1,95 @@
+<!--
+Copyright 2018 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================
+-->
+
+<!doctype html>
+
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <link rel="stylesheet" href="../shared/tfjs-examples.css" />
+</head>
+
+<style>
+  #load-model-div {
+    margin-top: 10px;
+    margin-bottom: 10px;
+  }
+
+  #reset {
+    margin-left: 100px;
+  }
+
+  #show-q-values-div {
+    margin-top: 5px;
+  }
+
+  #game-status-div {
+    margin-top: 15px;
+  }
+</style>
+
+<body>
+  <div class='tfjs-example-container centered-container'>
+    <section class='title-area'>
+      <h1>TensorFlow.js Reinforcement Learning: Snake DQN</h1>
+      <p class='subtitle'>Deep Q-Network for the Snake Game</p>
+    </section>
+    <section>
+      <p class='section-head'>Description</p>
+      <p>
+        This page loads a trained Deep Q-Network (DQN) and use it to play the
+        snake game.
+        The training is done in Node.js using <a href="https://github.com/tensorflow/tfjs-node">tfjs-node</a>.
+        See <a href="https://github.com/tensorflow/tfjs-examples/blob/master/snake-dqn/train.js">train.js</a>.
+      </p>
+    </section>
+    <section>
+      <p class='section-head'>Algorithm</p>
+      <p>
+        A <a href="https://en.wikipedia.org/wiki/Q-learning#Variants">DQN</a> is trained to estimate the value of actions given the current game state.
+        The DQN is a 2D convolutional network. See <a href="https://github.com/tensorflow/tfjs-examples/blob/master/snake-dqn/dqn.js">dqn.js</a>.
+        The epsilon-greedy algorithm is used to balance exploration and exploitation during training.
+      </p>
+    </section>
+
+    <section>
+      <div id="load-model-div">
+        <button id="load-hosted-model" width="200px" disabled>Load hosted model</button>
+      </div>
+
+      <div>
+        <button id="auto-play-stop" disabled>Auto Play</button>
+        <button id="step" disabled>Step</button>
+        <button id="reset" disabled>Reset</button>
+      </div>
+      <div id="show-q-values-div">
+        <input type="checkbox" id="show-q-values" checked>
+        <span>Show Q-values</span>
+      </div>
+      <div id="game-status-div">
+        <span id="game-status">Game started.</span>
+      </div>
+      <div>
+        <canvas id="game-canvas" height="400px" width="400px"></canvas>
+      </div>
+    </section>
+
+  </div>
+
+</body>
+
+<script src="index.js"></script>