Skip to content
This repository was archived by the owner on Mar 31, 2019. It is now read-only.

Commit b891405

Browse files
Develop (#100)
* remove comments of deprecated stuff * incr tmax default * add actual actor-critic scheme * attention&tests; Simplify recurrence and allow unfixed size outputs * typo * update scheme
1 parent 65bf76b commit b891405

File tree

5 files changed

+290
-26
lines changed

5 files changed

+290
-26
lines changed

Diff for: agentnet/agent/recurrence.py

+22-23
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ class Recurrence(DictLayer):
3636
Recurrence is a lasagne layer that takes an inner graph and rolls it for several steps using scan.
3737
Conversely, it can be used as any other lasagne layer, even as a part of another recurrence.
3838
39-
[tutorial on recurrence](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Custom%20rnn%20with%20recurrence.ipynb)
39+
[tutorial on recurrence](https://github.com/yandexdataschool/AgentNet/blob/master
40+
/examples/Custom%20rnn%20with%20recurrence.ipynb)
4041
4142
:param input_nonsequences: inputs that are same at each time tick.
4243
Technically it's a dictionary that maps InputLayer from one-step graph
@@ -90,15 +91,9 @@ class Recurrence(DictLayer):
9091
- state variable sequences in order of dict.items()
9192
- tracked_outputs in given order
9293
93-
WARNING! can not be used further as an atomic lasagne layer.
94-
Instead, consider calling .get_sequences() or unpacking it
95-
96-
state_sequence_layers, output_sequence_layers = Recurrence(...).get_sequences()
97-
(see .get_sequences help for more info)
98-
99-
OR
100-
101-
state_seq_layer, ... , output1_seq_layer, output2_seq_layer, ... = Recurrence(...)
94+
WARNING! this layer has a dictionary of outputs.
95+
It shouldn't used further as an atomic lasagne layer.
96+
Instead, consider using my_recurrence[one_of_states_or_outputs] (see code below)
10297
10398
Examples
10499
--------
@@ -483,15 +478,23 @@ def get_output_for(self, inputs, accumulate_updates="warn",recurrence_flags={},
483478
## initial states that are given as input
484479
initial_states_provided = OrderedDict(list(zip(self.state_init, initial_states_provided)))
485480

486-
def get_initial_state(state_out_layer,batch_size=batch_size):
487-
"""Pick dedicated initial state or create zeros of appropriate shape and dtype"""
481+
def get_initial_state(layer, batch_size=batch_size):
482+
"""Pick dedicated initial state or create zeros of appropriate shape and dtype
483+
:param layer: layer for new hidden state (key of self.state_variables)
484+
:param batch_size: symbolic batch_size
485+
"""
488486
# if we have a dedicated init, use it
489-
if state_out_layer in initial_states_provided:
490-
initial_state = initial_states_provided[state_out_layer]
487+
if layer in initial_states_provided:
488+
initial_state = initial_states_provided[layer]
491489
# otherwise initialize with zeros
492490
else:
493-
dtype = get_layer_dtype(state_out_layer)
494-
initial_state = T.zeros((batch_size,) + tuple(state_out_layer.output_shape[1:]),dtype=dtype)
491+
assert None not in layer.output_shape[1:],\
492+
"Some of your state layers ({}) has undefined shape along non-batch dimension. (shape: {}) " \
493+
"Therefore, it's initial value can't be inferred. Please set explicit initial value via state_init" \
494+
"".format(layer.name or layer, layer.output_shape)
495+
496+
dtype = get_layer_dtype(layer)
497+
initial_state = T.zeros((batch_size,) + tuple(layer.output_shape[1:]), dtype=dtype)
495498

496499
#cast to non-broadcastable tensortype
497500
t_state = T.TensorType(dtype, (False,) * initial_state.ndim)
@@ -507,16 +510,16 @@ def get_initial_state(state_out_layer,batch_size=batch_size):
507510
# AND scan is not unrolled, the step function will not receive prev outputs as parameters, while
508511
# if unroll_scan, these parameters are present. we forcibly initialize outputs to prevent
509512
# complications during parameter parsing in step function below.
510-
initial_output_fillers = list(map(get_initial_state, self.tracked_outputs))
513+
initial_output_fillers = [None]*len(self.tracked_outputs)
511514

512515

513516
outputs_info = initial_states + initial_output_fillers
514517

515518
# recurrent step function
516519
def step(*args):
517520

518-
sequence_slices, prev_states, prev_outputs, nonsequences = \
519-
unpack_list(args, [n_input_seq, n_states, n_outputs, n_input_nonseq])
521+
sequence_slices, prev_states, nonsequences = \
522+
unpack_list(args, [n_input_seq, n_states, n_input_nonseq])
520523
# make dicts of prev_states and inputs
521524
prev_states_dict = OrderedDict(zip(list(self.state_variables.keys()), prev_states))
522525

@@ -539,10 +542,6 @@ def step(*args):
539542
for (prev_state,state) in zip(prev_states,new_states)]
540543
assert None not in new_states, "Some state variables has different dtype/shape from init ."
541544

542-
new_outputs = [get_type(prev_out).convert_variable(out.astype(prev_out.dtype))
543-
for (prev_out,out) in zip(prev_outputs,new_outputs)]
544-
assert None not in new_outputs, "Some of the tracked outputs has shape/dtype changing over time. Please report this."
545-
546545
return new_states + new_outputs
547546

548547
###handling mask_input###

Diff for: agentnet/experiments/openai_gym/pool.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def update(self, n_steps=100, append=False, max_size=None, add_last_observation=
206206
max_pool_size=max_size or self.max_size)
207207

208208
def evaluate(self, n_games=1, save_path="./records", use_monitor=True, record_video=True, verbose=True,
209-
t_max=10000):
209+
t_max=100000):
210210
"""Plays an entire game start to end, records the logs(and possibly mp4 video), returns reward.
211211
212212
:param save_path: where to save the report

Diff for: agentnet/memory/attention.py

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import theano.tensor as T
2+
from agentnet.utils.layers import DictLayer
3+
from lasagne.init import GlorotUniform
4+
5+
class AttentionLayer(DictLayer):
6+
def __init__(self,
7+
input_sequence,
8+
controller_state,
9+
num_units,
10+
mask_input = None,
11+
nonlinearity = T.tanh,
12+
weights_nonlinearity=T.nnet.softmax,
13+
W_enc = GlorotUniform(),
14+
W_dec = GlorotUniform(),
15+
W_out = GlorotUniform(),
16+
):
17+
"""
18+
Implements basic Bahdanau-style attention. Implementation is inspired by tfnn@yandex.
19+
20+
Kurzgesagt, attention lets network decide which fraction of sequence/image should it view now
21+
by using small one-layer block that predicts (input_element,controller) -> do i want to see input_element
22+
for all input_elements. You can read more about it here - http://distill.pub/2016/augmented-rnns/ .
23+
24+
This layer outputs a dict with keys "attn" and "probs"
25+
- attn - inputs processed with attention, shape [batch_size, enc_units]
26+
- probs - probabilities for each activation [batch_size, seq_length]
27+
28+
This layer assumes input sequence/image/video/whatever to have 1 spatial dimension (see below).
29+
- rnn/emb format [batch,seq_len,units] works out of the box
30+
- 1d convolution format [batch,units,seq_len] needs dimshuffle(conv,[0,2,1])
31+
- 2d convolution format [batch,units,dim1,dim2] needs two-step procedure
32+
- step1 = dimshuffle(conv,[0,2,3,1])
33+
- step2 = reshape(step1,[-1,dim1*dim2,units])
34+
- higher dimensionality follows the same principle as 2d example above
35+
- reshape and dimshuffle can both be found in lasagne.layers (aliases to ReshapeLayer and DimshuffleLayer)
36+
37+
When calling get_output, you can pass flag hard_attention=True to replace attention with argmax over logits.
38+
39+
:param input_sequence: sequence of inputs to be processed with attention
40+
:type input_sequence: lasagne.layers.Layer with shape [batch,seq_length,units]
41+
42+
:param conteroller_state: single time-step state of decoder (usually lstm/gru/rnn hid)
43+
:type controller_state: lasagne.layers.Layer with shape [batch,units]
44+
45+
:param num_units: number of hidden units in attention intermediate activation
46+
:type num_units: int
47+
48+
:param nonlinearity: nonlinearity in attention intermediate activation
49+
:type nonlinearity: function(x) -> x that works with theano tensors
50+
51+
:param weights_nonlinearity: nonlinearity that converts logits of shape [batch,seq_length] into attention weights of same shape
52+
(you can provide softmax with tunable temperature or gumbel-softmax or anything of the sort)
53+
:type weights_nonlinearity: function(x) -> x that works with theano tensors
54+
55+
56+
:param mask_input: mask for input_sequence (like other lasagne masks). Default is no mask
57+
:type mask_input: lasagne.layers.Layer with shape [batch,seq_length]
58+
59+
Other params can be theano shared variable, expression, numpy array or callable.
60+
Initial value, expression or initializer for the weights.
61+
These should be a matrix with shape ``(num_inputs, num_units)``.
62+
See :func:`lasagne.utils.create_param` for more information.
63+
64+
The roles of those params are:
65+
W_enc - weights from encoder (each state) to hidden layer
66+
W_dec - weights from decoder (each state) to hidden layer
67+
W_out - hidden to logit weights
68+
No logit biases are introduces because softmax is invariant to adding bias to each logit
69+
70+
"""
71+
assert len(input_sequence.output_shape)==3,"input_sequence must be a 3-dimensional (batch,time,units)"
72+
assert len(controller_state.output_shape)==2,"controller_state must be a 2-dimensional for single tick (batch,units)"
73+
assert mask_input is None or len(mask_input.output_shape)==2,"mask_input must be 2-dimensional (batch,time) or None"
74+
75+
batch_size,seq_len,enc_units = input_sequence.output_shape
76+
dec_units = controller_state.output_shape[-1]
77+
78+
incomings = [input_sequence,controller_state]
79+
if mask_input is not None:
80+
incomings.append(mask_input)
81+
82+
output_shapes = {'attn':(batch_size,enc_units),
83+
'probs':(batch_size,seq_len)}
84+
85+
super(AttentionLayer,self).__init__(incomings,output_shapes)
86+
87+
88+
89+
self.W_enc = self.add_param(W_enc,(enc_units,num_units),name='enc_to_hid')
90+
self.W_dec = self.add_param(W_dec,(dec_units,num_units),name='dec_to_hid')
91+
self.W_out = self.add_param(W_out,(num_units,1),name='hid_to_logit')
92+
self.nonlinearity = nonlinearity
93+
self.weights_nonlinearity = weights_nonlinearity
94+
95+
def get_output_for(self, inputs, hard_attention=False , **kwargs):
96+
"""
97+
:param inputs: should consist of (enc_seq, dec) or (enc_seq, dec, inp_mask)
98+
Shapes are
99+
enc_seq: [batch_size, seq_length, enc_units]
100+
dec: [batch_size, dec_units]
101+
inp_mask: [batch_size,seq_length] if any
102+
103+
---------------------------------
104+
:returns: dict with keys "attn" and "probs"
105+
- attn - inputs processed with attention, shape [batch_size, enc_size]
106+
- probs - probabilities for each activation [batch_size, ninp]
107+
"""
108+
assert len(inputs) in (2,3),"inputs should be (enc_seq, dec) or (enc_seq, dec, inp_mask)"
109+
mask_provided = len(inputs)==3
110+
111+
#parse inputs
112+
enc_seq, dec = inputs[:2]
113+
if mask_provided:
114+
mask = inputs[-1]
115+
116+
#Hidden layer activations, shape [batch,seq_len,hid_units]
117+
hid = self.nonlinearity(
118+
enc_seq.dot(self.W_enc) +\
119+
dec.dot(self.W_dec)[:,None,:]
120+
)
121+
122+
123+
#Logits from hidden. Mask implementation from tfnn
124+
125+
logits = hid.dot(self.W_out)[:,:,0] # [batch_size,seq_len]
126+
127+
if mask_provided: # substract large number from mask=0 time-steps
128+
logits -= (1 - mask) * 1000 # (written to match tfnn implementation)
129+
130+
if not hard_attention:
131+
#regular soft attention, use softmax
132+
probs = self.weights_nonlinearity(logits) # [batch_size,seq_len]
133+
134+
# Compose attention.
135+
attn = T.sum(probs[:,:,None] * enc_seq, axis=1)
136+
137+
return {'attn':attn, 'probs':probs}
138+
139+
else: #hard_attention
140+
141+
#use argmax over logits
142+
max_i = logits.argmax(axis=-1)
143+
batch_size = enc_seq.shape[0]
144+
attn = enc_seq[T.arange(batch_size),max_i]
145+
146+
# one-hot probabilities
147+
one_hot = T.extra_ops.to_one_hot(max_i,logits.shape[1])
148+
149+
return {'attn': attn, 'probs': one_hot }
150+
151+

Diff for: examples/Deep Kung-Fu with GRUs and A2c algorithm (OpenAI Gym).ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@
135135
"metadata": {},
136136
"source": [
137137
"# Basic agent setup\n",
138-
"Here we define a simple agent that maps game images into Qvalues using simple convolutional neural network.\n",
138+
"Here we define a simple agent that maps game images into action probas and state values via convolutional neural network.\n",
139139
"\n",
140140
"![a2c](https://s12.postimg.org/odg35favx/a2c_scheme.png)"
141141
]
@@ -248,7 +248,7 @@
248248
"from agentnet.agent import Agent\n",
249249
"#all together\n",
250250
"agent = Agent(observation_layers=observation_layer,\n",
251-
" policy_estimators=(logits_layer,V_layer,V_old),\n",
251+
" policy_estimators=(logits_layer,V_layer),\n",
252252
" agent_states={new_wnd:prev_wnd,new_cell:prev_cell,new_out:prev_out},\n",
253253
" action_layers=action_layer)\n"
254254
]

Diff for: tests/test_attention.py

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
Tests for attention module
3+
"""
4+
import numpy as np
5+
import theano
6+
import agentnet
7+
from agentnet.memory import GRUCell
8+
from agentnet.memory.attention import AttentionLayer
9+
from lasagne.layers import *
10+
11+
12+
def test_attention():
13+
"""
14+
minimalstic test that showcases attentive RNN that reads some chunk
15+
of input sequence on each tick and outputs nothing
16+
"""
17+
18+
# step inner graph
19+
class step:
20+
enc_activations = InputLayer((None, None, 12), name='placeholder for encoder activations (to be attended)')
21+
prev_gru = InputLayer((None, 15),name='gru prev state (15 units)')
22+
23+
attention = AttentionLayer(enc_activations,prev_gru,num_units=16)
24+
25+
gru = GRUCell(prev_gru, attention['attn'] , name='rnn that reads enc_sequence with attention')
26+
27+
attn_probs = attention['probs'] #weights from inside attention
28+
29+
# outer graph
30+
31+
32+
encoder_activations = InputLayer((None,None,12),name='encoder sequence (will be sent to enc_sequence)')
33+
34+
rec = agentnet.Recurrence(input_nonsequences={step.enc_activations: encoder_activations},
35+
state_variables={step.gru: step.prev_gru},
36+
tracked_outputs=[step.attn_probs],
37+
unroll_scan=False,
38+
n_steps = 10)
39+
40+
weights = get_all_params(rec)
41+
42+
gru_states,attention_probs_seq = rec[step.gru,step.attn_probs]
43+
44+
run = theano.function([encoder_activations.input_var], get_output([gru_states,attention_probs_seq]),
45+
updates=rec.get_automatic_updates(),allow_input_downcast=True)
46+
47+
#run on surrogate data
48+
gru_seq,probs_seq = run(np.random.randn(5, 25, 12))
49+
50+
assert gru_seq.shape == (5, 10, 15) #hidden GRU strates, 5 samples/10ticks/15units
51+
assert probs_seq.shape == (5, 10, 25) #attention sequences, 5 samples/10ticks/25 input seq length
52+
53+
#hard attention
54+
hard_outputs = get_output([gru_states,attention_probs_seq],recurrence_flags={'hard_attention':True})
55+
56+
hard_run = theano.function([encoder_activations.input_var], hard_outputs,
57+
updates=rec.get_automatic_updates(),allow_input_downcast=True)
58+
59+
#run on surrogate data
60+
_,hard_probs_seq = hard_run(np.random.randn(5, 25, 12))
61+
62+
#check if probs are one-hot
63+
assert hard_probs_seq.shape == (5, 10, 25) #attention sequences, 5 samples/10ticks/25 input seq length
64+
assert len(np.unique(hard_probs_seq.ravel()))==2 #only 0's and 1's
65+
66+
67+
def test_attention_2d():
68+
"""
69+
Almost a copy-paste of previous test, but this time attention is applied to an image instead
70+
of a 1d sequence.
71+
"""
72+
73+
# step inner graph
74+
class step:
75+
image = InputLayer((None,3,24,24), name='placeholder for 24x24 image (to be attended)')
76+
prev_gru = InputLayer((None, 15),name='gru prev state (15 units)')
77+
78+
#get image dimensions
79+
n_channels,width,height = image.output_shape[1:]
80+
81+
#flatten all image spots to look like 1d sequence
82+
image_chunks = reshape(dimshuffle(image,[0,2,3,1]),(-1,width*height,n_channels))
83+
84+
attention = AttentionLayer(image_chunks,prev_gru,num_units=16)
85+
86+
gru = GRUCell(prev_gru, attention['attn'] , name='rnn that reads enc_sequence with attention')
87+
88+
#weights from inside attention - reshape back into image
89+
attn_probs = reshape(attention['probs'],(-1,width,height))
90+
91+
92+
# outer graph
93+
94+
95+
input_image = InputLayer((None,3,24,24),name='24x24-pixel RGB image to be sent into step.image')
96+
97+
rec = agentnet.Recurrence(input_nonsequences={step.image: input_image},
98+
state_variables={step.gru: step.prev_gru},
99+
tracked_outputs=[step.attn_probs],
100+
unroll_scan=False,
101+
n_steps = 10)
102+
103+
weights = get_all_params(rec)
104+
105+
gru_states,attention_probs_seq = rec[step.gru,step.attn_probs]
106+
107+
run = theano.function([input_image.input_var], get_output([gru_states,attention_probs_seq]),
108+
updates=rec.get_automatic_updates(),allow_input_downcast=True)
109+
110+
#run on surrogate data
111+
gru_seq,probs_seq = run(np.random.randn(5, 3, 24,24))
112+
113+
assert gru_seq.shape == (5, 10, 15) #hidden GRU strates, 5 samples/10ticks/15units
114+
assert probs_seq.shape == (5, 10, 24,24) #attention sequences, 5 samples/10ticks/24width/24height

0 commit comments

Comments
 (0)