!!fix!!

HiKapok · HiKapok · commit e2495a543304 · 2018-03-21T17:08:35.000+08:00
diff --git a/README.md b/README.md
@@ -11,15 +11,18 @@ For more details, please refer to [README of SSD-Tensorflow](https://github.com/
 ##  ##
 update:
 
-- Add SSD preprocesing method using Tensorflow ops
+- Add SSD preprocesing method using Tensorflow ops [zero ground truth fixed]
 - Modify details of the network to match the original Caffe code
 - Add NMS using Tensorflow ops to support two mode
 - Fix most part of the matching strategy between ground truth and anchors
 - Replica GPU training support (If you are using Tensorflow 1.5.0+, then rename the replicate_model\_fn.py)
 - Add voc eval with debug
 - Add realtime eval, using class-wise bboxes-select and nms
 - Add support for training use *vgg16_reducedfc* model converted from pytorch, you can get from [here](https://drive.google.com/open?id=184srhbt8_uvLKeWW_Yo8Mc5wTyc0lJT7)
-- Other important fixes **[2018.03.18]**
+- Other important fixes **[2018.03.21]**
+- Make all anchors on different layers be matched together, to avoid some suboptimal matching results
+- Refactor anchors matching pipeline
+- Fix attribute 'difficult' missing problem in the TFRecords dataset
 - Model-320(reduced version) trained on VOC07+12 dataset now is available at [here](), the heavy one need to be trained by yourself
 
 Note: Model trained using the initial version of this code can only get to 0.45~0.55mAP, clone the latest version will give you much better performance. Futher improvement is still going on.
diff --git a/datasets/pascalvoc_to_tfrecords.py b/datasets/pascalvoc_to_tfrecords.py
@@ -64,7 +64,7 @@
 
 # TFRecords convertion parameters.
 RANDOM_SEED = 4242
-SAMPLES_PER_FILES = 200
+SAMPLES_PER_FILES = 1500
 
 
 def _process_image(directory, name):
@@ -103,12 +103,16 @@ def _process_image(directory, name):
         labels.append(int(VOC_LABELS[label][0]))
         labels_text.append(label.encode('ascii'))
 
-        if obj.find('difficult'):
-            difficult.append(int(obj.find('difficult').text))
+        isdifficult = obj.find('difficult')
+        if isdifficult is not None:
+            #print('ddd')
+            difficult.append(int(isdifficult.text))
         else:
             difficult.append(0)
-        if obj.find('truncated'):
-            truncated.append(int(obj.find('truncated').text))
+
+        istruncated = obj.find('truncated')
+        if istruncated is not None:
+            truncated.append(int(istruncated.text))
         else:
             truncated.append(0)
 
diff --git a/draw_toolbox.py b/draw_toolbox.py
@@ -76,6 +76,8 @@ def bboxes_draw_on_img(img, classes, scores, bboxes, thickness=2):
     line_type = 8
 
     for i in range(bboxes.shape[0]):
+        if classes[i] < 1: continue
+
         bbox = bboxes[i]
         color = colors_tableau[classes[i]]
         # Draw bounding box...
diff --git a/eval_ron_network.py b/eval_ron_network.py
@@ -100,7 +100,7 @@
 tf.app.flags.DEFINE_string(
     'master', '', 'The address of the TensorFlow master to use.')
 tf.app.flags.DEFINE_string(
-    'checkpoint_path', './model/model.ckpt-122044',#118815
+    'checkpoint_path', './model/model.ckpt-121551',#118815
     'The directory where the model was written to or an absolute path to a '
     'checkpoint file.')
 tf.app.flags.DEFINE_string(
diff --git a/nets/ron_vgg_320.py b/nets/ron_vgg_320.py
@@ -100,7 +100,7 @@ class RONNet(object):
         no_annotation_label=21,
         feat_layers=['block7','block6', 'block5', 'block4'],
         feat_shapes=[(5, 5), (10, 10), (20, 20), (40, 40)],
-        allowed_borders = [0, 0, 0, 0],
+        allowed_borders = [32, 16, 8, 4],
         anchor_sizes=[(224., 256.),
                       (160., 192.),
                       (96., 128.),
@@ -120,7 +120,7 @@ class RONNet(object):
         # anchor_steps=[64],
 
         anchor_offset=0.5,
-        prior_scaling=[1., 1., 1., 1.]#[0.1, 0.1, 0.2, 0.2]
+        prior_scaling=[0.1, 0.1, 0.2, 0.2]#[1., 1., 1., 1.]#
         )
 
     def __init__(self, params=None):
@@ -477,7 +477,7 @@ def ron_net(inputs,
         # Block 6
         net = slim.conv2d(net, 4096, [7, 7], scope='fc6')
         end_points['block6'] = net
-        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
+        #net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
         # Block 7: 1x1 conv, no padding.
         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
         end_points['block7'] = net
@@ -769,7 +769,7 @@ def ron_losses(logits, localisations, objness_logits, objness_pred,
             loss = custom_layers.modified_smooth_l1(localisations, tf.stop_gradient(glocalisations), sigma = 3.)
             #loss = custom_layers.abs_smooth(localisations - tf.stop_gradient(glocalisations))
 
-            loss = tf.cond(n_cls_positives > 0., lambda: beta * n_cls_positives / total_examples_for_cls * tf.reduce_mean(tf.boolean_mask(tf.reduce_sum(loss, axis=-1), tf.stop_gradient(cls_positive_mask))), lambda: 0.)
+            loss = tf.cond(n_cls_positives > 0., lambda: beta * tf.reduce_mean(tf.boolean_mask(tf.reduce_sum(loss, axis=-1), tf.stop_gradient(cls_positive_mask))), lambda: 0.)
             #loss = tf.cond(n_positives > 0., lambda: beta * n_positives / total_examples_for_objness * tf.reduce_mean(tf.boolean_mask(tf.reduce_sum(loss, axis=-1), tf.stop_gradient(positive_mask))), lambda: 0.)
             #loss = tf.reduce_mean(loss * weights)
             #loss = tf.reduce_sum(loss * weights)
diff --git a/nets/ssd_common.py b/nets/ssd_common.py
@@ -131,6 +131,8 @@ def tf_ssd_bboxes_encode_layer(labels,
     feat_cx = (feat_xmax + feat_xmin) / 2.
     feat_h = feat_ymax - feat_ymin
     feat_w = feat_xmax - feat_xmin
+
+    bboxes = tf.stack([ymin_, xmin_, ymax_, xmax_], axis=-1)
     # Encode features.
     # the prior_scaling (in fact is 5 and 10) is use for balance the regression loss of center and with(or height)
     # (x-x_ref)/x_ref * 10 + log(w/w_ref) * 5
@@ -142,7 +144,7 @@ def tf_ssd_bboxes_encode_layer(labels,
     feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
     # now feat_localizations is our regression object
 
-    return feat_labels * tf.cast(matched_gt_mask, tf.int64) + (-1 * tf.cast(matched_gt < -1, tf.int64)), tf.expand_dims(tf.reshape(tf.cast(matched_gt_mask, tf.float32), tf.shape(ymin_)), -1) * feat_localizations, feat_scores
+    return feat_labels * tf.cast(matched_gt_mask, tf.int64) + (-1 * tf.cast(matched_gt < -1, tf.int64)), tf.expand_dims(tf.reshape(tf.cast(matched_gt_mask, tf.float32), tf.shape(ymin_)), -1) * feat_localizations, feat_scores, bboxes
 
 # def tf_ssd_bboxes_encode_layer(labels,
 #                                bboxes,
@@ -362,17 +364,85 @@ def tf_ssd_bboxes_encode(labels,
         target_labels = []
         target_localizations = []
         target_scores = []
-        for i, anchors_layer in enumerate(anchors):
-            with tf.name_scope('bboxes_encode_block_%i' % i):
-                t_labels, t_loc, t_scores = \
-                    tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer,
-                                               num_classes, img_shape, allowed_borders[i], no_annotation_label,
-                                               positive_threshold, ignore_threshold,
-                                               prior_scaling, dtype)
-                target_labels.append(t_labels)
-                target_localizations.append(t_loc)
-                target_scores.append(t_scores)
-        return target_labels, target_localizations, target_scores
+        target_bboxes = []
+
+        shape_recorder = []
+        full_shape_anchors = {}
+        with tf.name_scope('anchor_concat'):
+            for i, anchors_layer in enumerate(anchors):
+                yref, xref, href, wref = anchors_layer
+
+                ymin_ = yref - href / 2.
+                xmin_ = xref - wref / 2.
+                ymax_ = yref + href / 2.
+                xmax_ = xref + wref / 2.
+
+                shape_recorder.append(ymin_.shape)
+                full_shape_yxhw = [(ymin_ + ymax_)/2, (xmin_ + xmax_)/2, (ymax_ - ymin_), (xmax_ - xmin_)]
+
+                full_shape_anchors[i] = [np.reshape(_, (-1)) for _ in full_shape_yxhw]
+            #print(full_shape_anchors)
+            remap_anchors = list(zip(*full_shape_anchors.values()))
+
+            for i in range(len(full_shape_anchors)):
+                full_shape_anchors[i] = np.concatenate(remap_anchors[i], axis=0)
+                #print(full_shape_anchors[i].shape)
+            # print([_.shape for _ in remap_anchors[0]])
+            # print([_.shape for _ in remap_anchors[1]])
+            # print([_.shape for _ in remap_anchors[2]])
+            # print([_.shape for _ in remap_anchors[3]])
+            #print(shape_recorder)
+        len_recorder = [np.prod(_) for _ in shape_recorder]
+        #print(len_recorder)
+        #print(allowed_borders)
+        flaten_allowed_borders = []
+        for i, allowed_border in enumerate(allowed_borders):
+            flaten_allowed_borders.append([allowed_border]*len_recorder[i])
+        #print([len(_) for _ in flaten_allowed_borders])
+        flaten_allowed_borders = np.concatenate(flaten_allowed_borders, axis=0)
+
+        t_labels, t_loc, t_scores, t_bbox = tf_ssd_bboxes_encode_layer(labels, bboxes, list(full_shape_anchors.values()), num_classes, img_shape, flaten_allowed_borders, no_annotation_label, positive_threshold, ignore_threshold, prior_scaling, dtype)
+
+        reshaped_loc = []
+        for i, loc in enumerate(tf.split(t_loc, len_recorder)):
+            reshaped_loc.append(tf.reshape(loc, list(shape_recorder[i])+[-1]))
+        reshaped_bbox = []
+        for i, bbox in enumerate(tf.split(t_bbox, len_recorder)):
+            reshaped_bbox.append(tf.reshape(bbox, list(shape_recorder[i])+[-1]))
+        #print(reshaped_loc)
+        #print(reshaped_bbox)
+        return tf.split(t_labels, len_recorder), reshaped_loc, tf.split(t_scores, len_recorder), reshaped_bbox
+    # with tf.name_scope(scope):
+    #     target_labels = []
+    #     target_localizations = []
+    #     target_scores = []
+    #     target_bboxes = []
+    #     for i, anchors_layer in enumerate(anchors):
+    #         with tf.name_scope('bboxes_encode_block_%i' % i):
+
+    #             yref, xref, href, wref = anchors_layer
+
+    #             ymin_ = yref - href / 2.
+    #             xmin_ = xref - wref / 2.
+    #             ymax_ = yref + href / 2.
+    #             xmax_ = xref + wref / 2.
+
+    #             yref_, xref_, href_, wref_ = (ymin_ + ymax_)/2, (xmin_ + xmax_)/2, (ymax_ - ymin_), (xmax_ - xmin_)
+    #             t_labels, t_loc, t_scores, t_bbox = \
+    #                 tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer,
+    #                                            num_classes, img_shape, allowed_borders[i], no_annotation_label,
+    #                                            positive_threshold, ignore_threshold,
+    #                                            prior_scaling, dtype)
+    #             print('anchors_layer:', [yref_.shape, xref_.shape, href_.shape, wref_.shape])
+    #             print('t_labels:', t_labels)
+    #             print('t_loc:', t_loc)
+    #             print('t_scores:', t_scores)
+    #             print('t_bbox:', t_bbox)
+    #             target_labels.append(t_labels)
+    #             target_localizations.append(t_loc)
+    #             target_scores.append(t_scores)
+    #             target_bboxes.append(t_bbox)
+    #     return target_labels, target_localizations, target_scores, target_bboxes
 
 
 def tf_ssd_bboxes_decode_layer(feat_localizations,
diff --git a/preprocessing/ssd_vgg_preprocessing.py b/preprocessing/ssd_vgg_preprocessing.py
@@ -345,7 +345,7 @@ def ron_preprocess_for_train(image, labels, bboxes,
                 lambda x, ordering: distort_color(x, ordering, fast_mode),
                 num_cases=4)
         tf_summary_image(dst_image, bboxes, 'image_color_distorted_4')
-
+        dst_image = random_sample_flip_resized_image
         # Rescale to VGG input scale.
         dst_image.set_shape([None, None, 3])
         image = dst_image * 255.
@@ -358,7 +358,7 @@ def ron_preprocess_for_train(image, labels, bboxes,
 def preprocess_for_eval(image, labels, bboxes,
                         out_shape=EVAL_SIZE, data_format='NHWC',
                         difficults=None, resize='WARP_RESIZE',
-                        scope='ssd_preprocessing_train'):
+                        scope='ssd_preprocessing_eval'):
     """Preprocess an image for evaluation.
 
     Args:
diff --git a/ron_net.py b/ron_net.py
@@ -16,6 +16,12 @@
 import tensorflow as tf
 import os
 
+import numpy as np
+import tf_extended as tfe
+from tensorflow.python.framework import ops
+import draw_toolbox
+from scipy.misc import imread, imsave, imshow, imresize
+
 from tensorflow.python import debug as tf_debug
 from tensorflow.python.ops import control_flow_ops
 
@@ -46,7 +52,7 @@
 tf.app.flags.DEFINE_float(
     'loss_alpha', 1./3, 'Alpha parameter in the loss function.')
 tf.app.flags.DEFINE_float(
-    'loss_beta', 1./3, 'Beta parameter in the loss function.')
+    'loss_beta', 1./5, 'Beta parameter in the loss function.')
 tf.app.flags.DEFINE_float(
     'negative_ratio', 3., 'Negative ratio in the loss function.')
 tf.app.flags.DEFINE_float(
@@ -173,6 +179,16 @@
 
 FLAGS = tf.app.flags.FLAGS
 
+def save_image_with_bbox(image, labels_, scores_, bboxes_):
+    if not hasattr(save_image_with_bbox, "counter"):
+        save_image_with_bbox.counter = 0  # it doesn't exist yet, so initialize it
+    save_image_with_bbox.counter += 1
+
+    #print(labels_)
+    img_to_draw = np.copy(image)#common_preprocessing.np_image_unwhitened(image))
+    img_to_draw = draw_toolbox.bboxes_draw_on_img(img_to_draw, labels_, scores_, bboxes_, thickness=2)
+    imsave(os.path.join('./Debug', '{}.jpg').format(save_image_with_bbox.counter), img_to_draw)
+    return save_image_with_bbox.counter
 # =========================================================================== #
 # Main training routine.
 # =========================================================================== #
@@ -219,7 +235,16 @@ def main(_):
                                                          'object/label',
                                                          'object/bbox',
                                                          'object/difficult'])
-        glabels = tf.cast(isdifficult < tf.ones_like(isdifficult), glabels.dtype) * glabels
+
+        #glabels = tf.cast(isdifficult < tf.ones_like(isdifficult), glabels.dtype) * glabels
+
+        isdifficult_mask =tf.cond(tf.reduce_sum(tf.cast(tf.logical_not(tf.equal(tf.ones_like(isdifficult), isdifficult)), tf.float32)) < 1., lambda : tf.one_hot(0, tf.shape(isdifficult)[0], on_value=True, off_value=False, dtype=tf.bool), lambda : isdifficult < tf.ones_like(isdifficult))
+
+        glabels = tf.boolean_mask(glabels, isdifficult_mask)
+        gbboxes = tf.boolean_mask(gbboxes, isdifficult_mask)
+
+        #glabels = tf.Print(glabels, [glabels,isdifficult], message='glabels: ', summarize=200)
+
         #### DEBUG ####
         #image = tf.Print(image, [shape, glabels, gbboxes], message='before preprocess: ', summarize=20)
         # Select the preprocessing function.
@@ -235,20 +260,42 @@ def main(_):
         #### DEBUG ####
         #image = tf.Print(image, [shape, glabels, gbboxes], message='after preprocess: ', summarize=20)
 
-        #glabels = tf.Print(glabels, [glabels], message='glabels: ', summarize=20)
+        #glabels = tf.Print(glabels, [glabels,isdifficult], message='glabels: ', summarize=200)
 
+        # save_image_op = tf.py_func(save_image_with_bbox,
+        #                             [image,
+        #                             tf.reshape(tf.clip_by_value(glabels, 0, 22), [-1]),
+        #                             #tf.convert_to_tensor(list(rscores.keys()), dtype=tf.int64),
+        #                             tf.reshape(tf.ones_like(gbboxes), [-1]),
+        #                             tf.reshape(gbboxes, [-1, 4])],
+        #                             tf.int64, stateful=True)
 
         # Encode groundtruth labels and bboxes.
         # glocalisations is our regression object
         # gclasses is the ground_trutuh label
         # gscores is the the jaccard score with ground_truth
-        gclasses, glocalisations, gscores = \
+        gclasses, glocalisations, gscores, gbboxes = \
             ron_net.bboxes_encode(glabels, gbboxes, ron_anchors, positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold)
 
+        #gclasses[1] = tf.Print(gclasses[1], [gclasses[1]], message='gclasses[1]: ', summarize=200)
+        # save_image_op = tf.py_func(save_image_with_bbox,
+        #                             [image,
+        #                             tf.reshape(tf.clip_by_value(gclasses[3], 0, 22), [-1]),
+        #                             #tf.convert_to_tensor(list(rscores.keys()), dtype=tf.int64),
+        #                             tf.reshape(gscores[3], [-1]),
+        #                             tf.reshape(gbboxes[3], [-1, 4])],
+        #                             tf.int64, stateful=True)
+        # save_image_op = tf.py_func(save_image_with_bbox,
+        #                             [image,
+        #                             tf.clip_by_value(tf.concat([tf.reshape(_, [-1]) for _ in gclasses], axis=0), 0, 22),
+        #                             tf.concat([tf.reshape(_, [-1]) for _ in gscores], axis=0),
+        #                             tf.concat([tf.reshape(_, [-1, 4]) for _ in gbboxes], axis=0)],
+        #                             tf.int64, stateful=True)
         # each size of the batch elements
         # include one image, three others(gclasses, glocalisations, gscores)
         batch_shape = [1] + [len(ron_anchors)] * 3
 
+        #with tf.control_dependencies([save_image_op]):
         # Training batches and queue.
         r = tf.train.batch(
             tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
@@ -359,7 +406,7 @@ def wrapper_debug(sess):
                 logdir=FLAGS.model_dir,
                 master='',
                 is_chief=True,
-                init_fn=tf_utils.get_init_fn(FLAGS, os.path.join(FLAGS.data_dir, 'vgg_model/vgg16_reducedfc.ckpt')),
+                init_fn=tf_utils.get_init_fn(FLAGS, os.path.join(FLAGS.data_dir, 'vgg_model/vgg16_reducedfc.ckpt')),#'vgg_model/vgg16_reducedfc.ckpt'
                 summary_op=summary_op,
                 number_of_steps=FLAGS.max_number_of_steps,
                 log_every_n_steps=FLAGS.log_every_n_steps,
diff --git a/ron_net_multi_gpu.py b/ron_net_multi_gpu.py
@@ -269,7 +269,10 @@ def main(_):
                                                          'object/label',
                                                          'object/bbox',
                                                          'object/difficult'])
-        glabels = tf.cast(isdifficult < tf.ones_like(isdifficult), glabels.dtype) * glabels
+        isdifficult_mask =tf.cond(tf.reduce_sum(tf.cast(tf.logical_not(tf.equal(tf.ones_like(isdifficult), isdifficult)), tf.float32)) < 1., lambda : tf.one_hot(0, tf.shape(isdifficult)[0], on_value=True, off_value=False, dtype=tf.bool), lambda : isdifficult < tf.ones_like(isdifficult))
+
+        glabels = tf.boolean_mask(glabels, isdifficult_mask)
+        gbboxes = tf.boolean_mask(gbboxes, isdifficult_mask)
 
         # Select the preprocessing function.
         preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
diff --git a/ron_net_multi_gpu_optimized.py b/ron_net_multi_gpu_optimized.py
@@ -355,7 +355,10 @@ def train_input_fn():
                                                          'object/label',
                                                          'object/bbox',
                                                          'object/difficult'])
-        glabels = tf.cast(isdifficult < tf.ones_like(isdifficult), glabels.dtype) * glabels
+        isdifficult_mask =tf.cond(tf.reduce_sum(tf.cast(tf.logical_not(tf.equal(tf.ones_like(isdifficult), isdifficult)), tf.float32)) < 1., lambda : tf.one_hot(0, tf.shape(isdifficult)[0], on_value=True, off_value=False, dtype=tf.bool), lambda : isdifficult < tf.ones_like(isdifficult))
+
+        glabels = tf.boolean_mask(glabels, isdifficult_mask)
+        gbboxes = tf.boolean_mask(gbboxes, isdifficult_mask)
         # Select the preprocessing function.
         preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
         image_preprocessing_fn = preprocessing_factory.get_preprocessing(
diff --git a/ron_net_multi_gpu_replica.py b/ron_net_multi_gpu_replica.py
diff --git a/tf_convert_data.py b/tf_convert_data.py