Develop (#100)

justheuristic · web-flow · commit b891405a1fd2 · 2017-05-06T23:53:24.000+04:00
* remove comments of deprecated stuff

* incr tmax default

* add actual actor-critic scheme

* attention&amp;tests; Simplify recurrence and allow unfixed size outputs

* typo

* update scheme
diff --git a/agentnet/agent/recurrence.py b/agentnet/agent/recurrence.py
@@ -36,7 +36,8 @@ class Recurrence(DictLayer):
     Recurrence is a lasagne layer that takes an inner graph and rolls it for several steps using scan.
     Conversely, it can be used as any other lasagne layer, even as a part of another recurrence.
 
-    [tutorial on recurrence](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Custom%20rnn%20with%20recurrence.ipynb)
+    [tutorial on recurrence](https://github.com/yandexdataschool/AgentNet/blob/master
+/examples/Custom%20rnn%20with%20recurrence.ipynb)
 
     :param input_nonsequences: inputs that are same at each time tick.
         Technically it's a dictionary that maps InputLayer from one-step graph
@@ -90,15 +91,9 @@ class Recurrence(DictLayer):
             - state variable sequences in order of dict.items()
             - tracked_outputs in given order
 
-        WARNING! can not be used further as an atomic lasagne layer.
-        Instead, consider calling .get_sequences() or unpacking it
-
-        state_sequence_layers, output_sequence_layers = Recurrence(...).get_sequences()
-        (see .get_sequences help for more info)
-
-        OR
-
-        state_seq_layer, ... , output1_seq_layer, output2_seq_layer, ... = Recurrence(...)
+        WARNING! this layer has a dictionary of outputs. 
+        It shouldn't used further as an atomic lasagne layer.
+        Instead, consider using my_recurrence[one_of_states_or_outputs] (see code below)
 
     Examples
     --------
@@ -483,15 +478,23 @@ def get_output_for(self, inputs, accumulate_updates="warn",recurrence_flags={},
         ## initial states that are given as input
         initial_states_provided = OrderedDict(list(zip(self.state_init, initial_states_provided)))
 
-        def get_initial_state(state_out_layer,batch_size=batch_size):
-            """Pick dedicated initial state or create zeros of appropriate shape and dtype"""
+        def get_initial_state(layer, batch_size=batch_size):
+            """Pick dedicated initial state or create zeros of appropriate shape and dtype
+            :param layer: layer for new hidden state (key of self.state_variables)
+            :param batch_size: symbolic batch_size
+            """
             # if we have a dedicated init, use it
-            if state_out_layer in initial_states_provided:
-                initial_state = initial_states_provided[state_out_layer]
+            if layer in initial_states_provided:
+                initial_state = initial_states_provided[layer]
             # otherwise initialize with zeros
             else:
-                dtype = get_layer_dtype(state_out_layer)
-                initial_state = T.zeros((batch_size,) + tuple(state_out_layer.output_shape[1:]),dtype=dtype)
+                assert None not in layer.output_shape[1:],\
+                    "Some of your state layers ({}) has undefined shape along non-batch dimension. (shape: {}) " \
+                    "Therefore, it's initial value can't be inferred. Please set explicit initial value via state_init" \
+                    "".format(layer.name or layer, layer.output_shape)
+
+                dtype = get_layer_dtype(layer)
+                initial_state = T.zeros((batch_size,) + tuple(layer.output_shape[1:]), dtype=dtype)
 
                 #cast to non-broadcastable tensortype
                 t_state = T.TensorType(dtype, (False,) * initial_state.ndim)
@@ -507,16 +510,16 @@ def get_initial_state(state_out_layer,batch_size=batch_size):
         # AND scan is not unrolled, the step function will not receive prev outputs as parameters, while
         # if unroll_scan, these parameters are present. we forcibly initialize outputs to prevent
         # complications during parameter parsing in step function below.
-        initial_output_fillers = list(map(get_initial_state, self.tracked_outputs))
+        initial_output_fillers = [None]*len(self.tracked_outputs)
         
         
         outputs_info = initial_states + initial_output_fillers
         
         # recurrent step function
         def step(*args):
 
-            sequence_slices, prev_states, prev_outputs, nonsequences = \
-                unpack_list(args, [n_input_seq, n_states, n_outputs, n_input_nonseq])
+            sequence_slices, prev_states, nonsequences = \
+                unpack_list(args, [n_input_seq, n_states, n_input_nonseq])
             # make dicts of prev_states and inputs
             prev_states_dict = OrderedDict(zip(list(self.state_variables.keys()), prev_states))
 
@@ -539,10 +542,6 @@ def step(*args):
                             for (prev_state,state) in zip(prev_states,new_states)]
             assert None not in new_states, "Some state variables has different dtype/shape from init ."
 
-            new_outputs = [get_type(prev_out).convert_variable(out.astype(prev_out.dtype))
-                            for (prev_out,out) in zip(prev_outputs,new_outputs)]
-            assert None not in new_outputs, "Some of the tracked outputs has shape/dtype changing over time. Please report this."
-
             return new_states + new_outputs
 
         ###handling mask_input###
diff --git a/agentnet/experiments/openai_gym/pool.py b/agentnet/experiments/openai_gym/pool.py
@@ -206,7 +206,7 @@ def update(self, n_steps=100, append=False, max_size=None, add_last_observation=
                                                    max_pool_size=max_size or self.max_size)
 
     def evaluate(self, n_games=1, save_path="./records", use_monitor=True, record_video=True, verbose=True,
-                 t_max=10000):
+                 t_max=100000):
         """Plays an entire game start to end, records the logs(and possibly mp4 video), returns reward.
 
         :param save_path: where to save the report
diff --git a/agentnet/memory/attention.py b/agentnet/memory/attention.py
@@ -0,0 +1,151 @@
+import theano.tensor as T
+from agentnet.utils.layers import DictLayer
+from lasagne.init import GlorotUniform
+
+class AttentionLayer(DictLayer):
+    def __init__(self,
+                 input_sequence,
+                 controller_state,
+                 num_units,
+                 mask_input = None,
+                 nonlinearity = T.tanh,
+                 weights_nonlinearity=T.nnet.softmax,
+                 W_enc = GlorotUniform(),
+                 W_dec = GlorotUniform(),
+                 W_out = GlorotUniform(),
+            ):
+        """
+        Implements basic Bahdanau-style attention. Implementation is inspired by tfnn@yandex.
+
+        Kurzgesagt, attention lets network decide which fraction of sequence/image should it view now
+        by using small one-layer block that predicts (input_element,controller) -> do i want to see input_element
+        for all input_elements. You can read more about it here - http://distill.pub/2016/augmented-rnns/ .
+
+        This layer outputs a dict with keys "attn" and "probs"
+        - attn - inputs processed with attention, shape [batch_size, enc_units]
+        - probs - probabilities for each activation [batch_size, seq_length]
+
+        This layer assumes input sequence/image/video/whatever to have 1 spatial dimension (see below).
+        - rnn/emb format [batch,seq_len,units] works out of the box
+        - 1d convolution format [batch,units,seq_len] needs dimshuffle(conv,[0,2,1])
+        - 2d convolution format [batch,units,dim1,dim2] needs two-step procedure
+          - step1 = dimshuffle(conv,[0,2,3,1])
+          - step2 = reshape(step1,[-1,dim1*dim2,units])
+        - higher dimensionality follows the same principle as 2d example above
+        - reshape and dimshuffle can both be found in lasagne.layers (aliases to ReshapeLayer and DimshuffleLayer)
+
+        When calling get_output, you can pass flag hard_attention=True to replace attention with argmax over logits.
+
+        :param input_sequence: sequence of inputs to be processed with attention
+        :type input_sequence: lasagne.layers.Layer with shape [batch,seq_length,units]
+
+        :param conteroller_state: single time-step state of decoder (usually lstm/gru/rnn hid)
+        :type controller_state: lasagne.layers.Layer with shape [batch,units]
+
+        :param num_units: number of hidden units in attention intermediate activation
+        :type num_units: int
+
+        :param nonlinearity: nonlinearity in attention intermediate activation
+        :type nonlinearity: function(x) -> x that works with theano tensors
+
+        :param weights_nonlinearity: nonlinearity that converts logits of shape [batch,seq_length] into attention weights of same shape
+            (you can provide softmax with tunable temperature or gumbel-softmax or anything of the sort)
+        :type weights_nonlinearity: function(x) -> x that works with theano tensors
+
+
+        :param mask_input: mask for input_sequence (like other lasagne masks). Default is no mask
+        :type mask_input: lasagne.layers.Layer with shape [batch,seq_length]
+
+        Other params can be theano shared variable, expression, numpy array or callable.
+        Initial value, expression or initializer for the weights.
+        These should be a matrix with shape ``(num_inputs, num_units)``.
+        See :func:`lasagne.utils.create_param` for more information.
+
+        The roles of those params are:
+        W_enc - weights from encoder (each state) to hidden layer
+        W_dec - weights from decoder (each state) to hidden layer
+        W_out - hidden to logit weights
+        No logit biases are introduces because softmax is invariant to adding bias to each logit
+
+        """
+        assert len(input_sequence.output_shape)==3,"input_sequence must be a 3-dimensional (batch,time,units)"
+        assert len(controller_state.output_shape)==2,"controller_state must be a 2-dimensional for single tick (batch,units)"
+        assert mask_input is None or len(mask_input.output_shape)==2,"mask_input must be 2-dimensional (batch,time) or None"
+
+        batch_size,seq_len,enc_units = input_sequence.output_shape
+        dec_units = controller_state.output_shape[-1]
+
+        incomings = [input_sequence,controller_state]
+        if mask_input is not None:
+            incomings.append(mask_input)
+
+        output_shapes = {'attn':(batch_size,enc_units),
+                         'probs':(batch_size,seq_len)}
+
+        super(AttentionLayer,self).__init__(incomings,output_shapes)
+
+
+
+        self.W_enc = self.add_param(W_enc,(enc_units,num_units),name='enc_to_hid')
+        self.W_dec = self.add_param(W_dec,(dec_units,num_units),name='dec_to_hid')
+        self.W_out = self.add_param(W_out,(num_units,1),name='hid_to_logit')
+        self.nonlinearity = nonlinearity
+        self.weights_nonlinearity = weights_nonlinearity
+
+    def get_output_for(self, inputs, hard_attention=False , **kwargs):
+        """
+        :param inputs: should consist of (enc_seq, dec) or  (enc_seq, dec, inp_mask)
+        Shapes are
+        enc_seq: [batch_size, seq_length, enc_units]
+        dec: [batch_size, dec_units]
+        inp_mask: [batch_size,seq_length] if any
+
+        ---------------------------------
+        :returns: dict with keys "attn" and "probs"
+        - attn - inputs processed with attention, shape [batch_size, enc_size]
+        - probs - probabilities for each activation [batch_size, ninp]
+        """
+        assert len(inputs) in (2,3),"inputs should be (enc_seq, dec) or  (enc_seq, dec, inp_mask)"
+        mask_provided = len(inputs)==3
+
+        #parse inputs
+        enc_seq, dec = inputs[:2]
+        if mask_provided:
+            mask = inputs[-1]
+
+        #Hidden layer activations, shape [batch,seq_len,hid_units]
+        hid = self.nonlinearity(
+            enc_seq.dot(self.W_enc) +\
+            dec.dot(self.W_dec)[:,None,:]
+        )
+
+
+        #Logits from hidden. Mask implementation from tfnn
+
+        logits = hid.dot(self.W_out)[:,:,0] # [batch_size,seq_len]
+
+        if mask_provided:                  # substract large number from mask=0 time-steps
+            logits -= (1 - mask) * 1000    # (written to match tfnn implementation)
+
+        if not hard_attention:
+            #regular soft attention, use softmax
+            probs = self.weights_nonlinearity(logits)       # [batch_size,seq_len]
+
+            # Compose attention.
+            attn = T.sum(probs[:,:,None] * enc_seq, axis=1)
+
+            return {'attn':attn, 'probs':probs}
+
+        else: #hard_attention
+
+            #use argmax over logits
+            max_i = logits.argmax(axis=-1)
+            batch_size = enc_seq.shape[0]
+            attn = enc_seq[T.arange(batch_size),max_i]
+
+            # one-hot probabilities
+            one_hot = T.extra_ops.to_one_hot(max_i,logits.shape[1])
+
+            return {'attn': attn, 'probs': one_hot }
+
+
diff --git a/examples/Deep Kung-Fu with GRUs and A2c algorithm (OpenAI Gym).ipynb b/examples/Deep Kung-Fu with GRUs and A2c algorithm (OpenAI Gym).ipynb
@@ -135,7 +135,7 @@
    "metadata": {},
    "source": [
     "# Basic agent setup\n",
-    "Here we define a simple agent that maps game images into Qvalues using simple convolutional neural network.\n",
+    "Here we define a simple agent that maps game images into action probas and state values via convolutional neural network.\n",
     "\n",
     "![a2c](https://s12.postimg.org/odg35favx/a2c_scheme.png)"
    ]
@@ -248,7 +248,7 @@
     "from agentnet.agent import Agent\n",
     "#all together\n",
     "agent = Agent(observation_layers=observation_layer,\n",
-    "              policy_estimators=(logits_layer,V_layer,V_old),\n",
+    "              policy_estimators=(logits_layer,V_layer),\n",
     "              agent_states={new_wnd:prev_wnd,new_cell:prev_cell,new_out:prev_out},\n",
     "              action_layers=action_layer)\n"
    ]
diff --git a/tests/test_attention.py b/tests/test_attention.py
@@ -0,0 +1,114 @@
+"""
+Tests for attention module
+"""
+import numpy as np
+import theano
+import agentnet
+from agentnet.memory import GRUCell
+from agentnet.memory.attention import AttentionLayer
+from lasagne.layers import *
+
+
+def test_attention():
+    """
+    minimalstic test that showcases attentive RNN that reads some chunk
+    of input sequence on each tick and outputs nothing
+    """
+
+    # step inner graph
+    class step:
+        enc_activations = InputLayer((None, None, 12), name='placeholder for encoder activations (to be attended)')
+        prev_gru = InputLayer((None, 15),name='gru prev state (15 units)')
+
+        attention = AttentionLayer(enc_activations,prev_gru,num_units=16)
+
+        gru = GRUCell(prev_gru, attention['attn'] , name='rnn that reads enc_sequence with attention')
+
+        attn_probs = attention['probs'] #weights from inside attention
+
+    # outer graph
+
+
+    encoder_activations = InputLayer((None,None,12),name='encoder sequence (will be sent to enc_sequence)')
+
+    rec = agentnet.Recurrence(input_nonsequences={step.enc_activations: encoder_activations},
+                              state_variables={step.gru: step.prev_gru},
+                              tracked_outputs=[step.attn_probs],
+                              unroll_scan=False,
+                              n_steps = 10)
+
+    weights = get_all_params(rec)
+
+    gru_states,attention_probs_seq = rec[step.gru,step.attn_probs]
+
+    run = theano.function([encoder_activations.input_var], get_output([gru_states,attention_probs_seq]),
+                          updates=rec.get_automatic_updates(),allow_input_downcast=True)
+
+    #run on surrogate data
+    gru_seq,probs_seq = run(np.random.randn(5, 25, 12))
+
+    assert gru_seq.shape == (5, 10, 15) #hidden GRU strates, 5 samples/10ticks/15units
+    assert probs_seq.shape == (5, 10, 25) #attention sequences, 5 samples/10ticks/25 input seq length
+
+    #hard attention
+    hard_outputs = get_output([gru_states,attention_probs_seq],recurrence_flags={'hard_attention':True})
+
+    hard_run = theano.function([encoder_activations.input_var], hard_outputs,
+                                updates=rec.get_automatic_updates(),allow_input_downcast=True)
+
+    #run on surrogate data
+    _,hard_probs_seq = hard_run(np.random.randn(5, 25, 12))
+
+    #check if probs are one-hot
+    assert hard_probs_seq.shape == (5, 10, 25) #attention sequences, 5 samples/10ticks/25 input seq length
+    assert len(np.unique(hard_probs_seq.ravel()))==2  #only 0's and 1's
+
+
+def test_attention_2d():
+    """
+    Almost a copy-paste of previous test, but this time attention is applied to an image instead
+    of a 1d sequence.
+    """
+
+    # step inner graph
+    class step:
+        image = InputLayer((None,3,24,24), name='placeholder for 24x24 image (to be attended)')
+        prev_gru = InputLayer((None, 15),name='gru prev state (15 units)')
+
+        #get image dimensions
+        n_channels,width,height = image.output_shape[1:]
+
+        #flatten all image spots to look like 1d sequence
+        image_chunks = reshape(dimshuffle(image,[0,2,3,1]),(-1,width*height,n_channels))
+
+        attention = AttentionLayer(image_chunks,prev_gru,num_units=16)
+
+        gru = GRUCell(prev_gru, attention['attn'] , name='rnn that reads enc_sequence with attention')
+
+        #weights from inside attention - reshape back into image
+        attn_probs = reshape(attention['probs'],(-1,width,height))
+
+
+    # outer graph
+
+
+    input_image = InputLayer((None,3,24,24),name='24x24-pixel RGB image to be sent into step.image')
+
+    rec = agentnet.Recurrence(input_nonsequences={step.image: input_image},
+                              state_variables={step.gru: step.prev_gru},
+                              tracked_outputs=[step.attn_probs],
+                              unroll_scan=False,
+                              n_steps = 10)
+
+    weights = get_all_params(rec)
+
+    gru_states,attention_probs_seq = rec[step.gru,step.attn_probs]
+
+    run = theano.function([input_image.input_var], get_output([gru_states,attention_probs_seq]),
+                          updates=rec.get_automatic_updates(),allow_input_downcast=True)
+
+    #run on surrogate data
+    gru_seq,probs_seq = run(np.random.randn(5, 3, 24,24))
+
+    assert gru_seq.shape == (5, 10, 15) #hidden GRU strates, 5 samples/10ticks/15units
+    assert probs_seq.shape == (5, 10, 24,24) #attention sequences, 5 samples/10ticks/24width/24height