tensorflow
diff --git a/‎README.md
+2-5 b/‎README.md
+2-5
diff --git a/‎setup.py
-1 b/‎setup.py
-1
diff --git a/‎tensor2tensor/bin/t2t-bleu
-200 b/‎tensor2tensor/bin/t2t-bleu
-200
diff --git a/‎tensor2tensor/bin/t2t-datagen
100755100644 b/‎tensor2tensor/bin/t2t-datagen
100755100644
diff --git a/‎tensor2tensor/bin/t2t-decoder
100755100644
+2-5 b/‎tensor2tensor/bin/t2t-decoder
100755100644
+2-5
diff --git a/‎tensor2tensor/bin/t2t-make-tf-configs
100755100644 b/‎tensor2tensor/bin/t2t-make-tf-configs
100755100644
diff --git a/‎tensor2tensor/bin/t2t-trainer
100755100644 b/‎tensor2tensor/bin/t2t-trainer
100755100644
diff --git a/‎tensor2tensor/models/transformer_vae.py
+17-4 b/‎tensor2tensor/models/transformer_vae.py
+17-4
diff --git a/‎tensor2tensor/utils/bleu_hook.py
+1-67 b/‎tensor2tensor/utils/bleu_hook.py
+1-67
@@ -126,12 +126,9 @@ t2t-decoder \
   --output_dir=$TRAIN_DIR \
   --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
   --decode_from_file=$DECODE_FILE
-  --decode_to_file=translation.en
-```
-
-# Eval BLEU
 
-t2t-bleu --translation=translation.en --reference=ref-translation.de
+cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes
+```
 
 ---
 
 
@@ -24,7 +24,6 @@
         'tensor2tensor/bin/t2t-datagen',
         'tensor2tensor/bin/t2t-decoder',
         'tensor2tensor/bin/t2t-make-tf-configs',
-        'tensor2tensor/bin/t2t-bleu',
     ],
     install_requires=[
         'bz2file',
 
@@ -46,10 +46,7 @@ import tensorflow as tf
 flags = tf.flags
 FLAGS = flags.FLAGS
 
-flags.DEFINE_string("output_dir", "",
-                    "Training directory where the latest checkpoint is used.")
-flags.DEFINE_string("checkpoint_path", None,
-                    "Path to the model checkpoint. Overrides output_dir.")
+flags.DEFINE_string("output_dir", "", "Training directory to load from.")
 flags.DEFINE_string("decode_from_file", None,
                     "Path to the source file for decoding")
 flags.DEFINE_string("decode_to_file", None,
@@ -93,7 +90,7 @@ def main(_):
     decoding.decode_interactively(estimator, decode_hp)
   elif FLAGS.decode_from_file:
     decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp,
-                              FLAGS.decode_to_file, checkpoint_path=FLAGS.checkpoint_path)
+                              FLAGS.decode_to_file)
   else:
     decoding.decode_from_dataset(
         estimator,
 
@@ -147,17 +147,22 @@ def nearest(x, means, hparams):
                                                            transpose_b=True)
   _, nearest_idx = tf.nn.top_k(- dist, k=1)
   nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size)
-  nearest_hot = tf.reshape(nearest_hot, [tf.shape(x)[0], tf.shape(x)[1],
-                                         tf.shape(x)[2], hparams.v_size])
+  shape = common_layers.shape_list(x)
+  shape[-1] = hparams.v_size
+  nearest_hot = tf.reshape(nearest_hot, shape=shape)
   return tf.stop_gradient(nearest_hot)
 
 
 def kmeans(x, means, hparams, name):
   with tf.variable_scope(name):
     x_means_hot = nearest(x, means, hparams)
     x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1))
-    kl = tf.reduce_sum(tf.square(x - x_means), axis=-1)
-    return x_means_hot, tf.reduce_mean(kl)  # * 10.0
+    x_flat = tf.reshape(x, [-1, hparams.hidden_size])
+    kl = tf.reduce_mean(tf.reduce_sum(tf.square(x_flat - x_means), axis=-1))
+    reg_loss1 = tf.nn.l2_loss((tf.stop_gradient(x) - x_means))
+    reg_loss2 = hparams.beta * tf.nn.l2_loss((x - tf.stop_gradient(x_means)))
+    l = kl + reg_loss1 + reg_loss2
+    return x_means_hot, x_means, l
 
 
 def bit_to_int(x_bit, nbits):
@@ -233,6 +238,12 @@ def embed(x):
       _, hot, l = dae(x, hparams, name)
       c = tf.argmax(hot, axis=-1)
       h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense")
+    if hparams.bottleneck_kind == "vq-vae":
+      means = tf.get_variable(name="means", shape=[hparams.v_size,
+                                                   hparams.hidden_size])
+      x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans")
+      h1 = x_means
+      c = tf.argmax(x_means_hot, axis=-1)
     h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2")
     res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin")
     return res, c, l, embed
@@ -500,6 +511,8 @@ def transformer_ae_small():
   hparams.add_hparam("decode_autoregressive", True)
   hparams.add_hparam("do_vae", True)
   hparams.add_hparam("bit_vae", True)
+  hparams.add_hparam("beta", 0.25)
+  hparams.kl_warmup_steps = 150000
   return hparams
 
 
 
@@ -20,17 +20,13 @@
 
 import collections
 import math
-import re
-import sys
-import unicodedata
 
 # Dependency imports
 
 import numpy as np
 # pylint: disable=redefined-builtin
 from six.moves import xrange
 from six.moves import zip
-import six
 # pylint: enable=redefined-builtin
 
 import tensorflow as tf
@@ -96,17 +92,10 @@ def compute_bleu(reference_corpus,
       matches_by_order[len(ngram) - 1] += overlap[ngram]
     for ngram in translation_ngram_counts:
       possible_matches_by_order[len(ngram)-1] += translation_ngram_counts[ngram]
-  assert reference_length, "no reference provided"
-  assert translation_length, "no translation provided"
   precisions = [0] * max_order
-  smooth = 1.0
   for i in xrange(0, max_order):
     if possible_matches_by_order[i] > 0:
-      if matches_by_order[i] > 0:
-        precisions[i] = matches_by_order[i] / possible_matches_by_order[i]
-      else:
-        smooth *= 2
-        precisions[i] = 1.0 / (smooth * possible_matches_by_order[i])
+      precisions[i] = matches_by_order[i] / possible_matches_by_order[i]
     else:
       precisions[i] = 0.0
 
@@ -142,58 +131,3 @@ def bleu_score(predictions, labels, **unused_kwargs):
 
   bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32)
   return bleu, tf.constant(1.0)
-
-
-class UnicodeRegex:
-  """Ad-hoc hack to recognize all punctuation and symbols.
-
-  without dependening on https://pypi.python.org/pypi/regex/."""
-  def _property_chars(prefix):
-    return ''.join(six.unichr(x) for x in range(sys.maxunicode)
-                   if unicodedata.category(six.unichr(x)).startswith(prefix))
-  punctuation = _property_chars('P')
-  nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])')
-  punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])')
-  symbol_re = re.compile('([' + _property_chars('S') + '])')
-
-
-def bleu_tokenize(string):
-  r"""Tokenize a string following the official BLEU implementation.
-
-  See https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L954-L983
-  In our case, the input string is expected to be just one line
-  and no HTML entities de-escaping is needed.
-  So we just tokenize on punctuation and symbols,
-  except when a punctuation is preceded and followed by a digit
-  (e.g. a comma/dot as a thousand/decimal separator).
-
-  Note that a numer (e.g. a year) followed by a dot at the end of sentence is NOT tokenized,
-  i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g`
-  does not match this case (unless we add a space after each sentence).
-  However, this error is already in the original mteval-v14.pl
-  and we want to be consistent with it.
-
-  Args:
-    string: the input string
-
-  Returns:
-    a list of tokens
-  """
-  string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string)
-  string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string)
-  string = UnicodeRegex.symbol_re.sub(r' \1 ', string)
-  return string.split()
-
-
-def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
-  """Compute BLEU for two files (reference and hypothesis translation)."""
-  # TODO: Does anyone care about Python2 compatibility?
-  ref_lines = open(ref_filename, 'rt', encoding='utf-8').read().splitlines()
-  hyp_lines = open(hyp_filename, 'rt', encoding='utf-8').read().splitlines()
-  assert len(ref_lines) == len(hyp_lines)
-  if not case_sensitive:
-    ref_lines = [x.lower() for x in ref_lines]
-    hyp_lines = [x.lower() for x in hyp_lines]
-  ref_tokens = [bleu_tokenize(x) for x in ref_lines]
-  hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
-  return compute_bleu(ref_tokens, hyp_tokens)