Skip to content

Commit 397ed16

Browse files
committed
update v1
1 parent a99b31c commit 397ed16

21 files changed

+3637
-379
lines changed

Diff for: README.md

+8
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,13 @@ This repository contains code of the re-implement of RON following the above pap
66

77
The code is modified from [SSD-Tensorflow](https://github.com/balancap/SSD-Tensorflow). You can use the code to train/evaluate a network for object detection task.
88
For more details, please refer to [README of SSD-Tensorflow](https://github.com/balancap/SSD-Tensorflow/blob/master/README.md).
9+
## ##
10+
update:
911

12+
- Add SSD preprocesing method using Tensorflow
13+
- Modify the network to match the original Caffe code
14+
- Add nms using Tensorflow ops to support two mode
15+
- Replica GPU training support
16+
- Add voc eval
1017

18+
Note: Trainig is in process, and the model will be released later.

Diff for: convert_caffe_ron.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#convert from caffe
2+
# https://github.com/hujie-frank/SENet
3+
# https://github.com/ruotianluo/pytorch-resnet
4+
# /ruotianluo/pytorch-resnet/master/convert.py
5+
#
6+
# install caffe python 3.6
7+
# https://yangcha.github.io/Caffe-Conda3/
8+
9+
# import caffe
10+
import sys
11+
import os
12+
sys.path.insert(0,'/media/rs/7A0EE8880EE83EAF1/Kapok/caffe-master/python')
13+
os.environ["GLOG_minloglevel"] = "2"
14+
import caffe
15+
from caffe.proto import caffe_pb2
16+
17+
18+
# others
19+
import re
20+
import numpy as np
21+
from collections import OrderedDict
22+
import cv2
23+
24+
25+
26+
27+
##--------------------------------------------------------
28+
29+
# how to get caffe keys:
30+
# caffe_model.params.keys()
31+
# odict_keys(['conv1/7x7_s2', 'conv1/7x7_s2/bn', 'conv1/7x7_s2/bn/scale', 'conv2_1_1x1_reduce', 'conv2_1_1x1_reduce/bn', 'conv2_1_1x1_reduce/bn/scale', 'conv2_1_3x3', 'conv2_1_3x3/bn', 'conv2_1_3x3/bn/scale', 'conv2_1_1x1_increase', 'conv2_1_1x1_increase/bn', 'conv2_1_1x1_increase/bn/scale', 'conv2_1_1x1_down', 'conv2_1_1x1_up', 'conv2_1_1x1_proj', 'conv2_1_1x1_proj/bn', 'conv2_1_1x1_proj/bn/scale', 'conv2_2_1x1_reduce', 'conv2_2_1x1_reduce/bn', 'conv2_2_1x1_reduce/bn/scale', 'conv2_2_3x3', 'conv2_2_3x3/bn', 'conv2_2_3x3/bn/scale', 'conv2_2_1x1_increase', 'conv2_2_1x1_increase/bn', 'conv2_2_1x1_increase/bn/scale', 'conv2_2_1x1_down', 'conv2_2_1x1_up', 'conv2_3_1x1_reduce', 'conv2_3_1x1_reduce/bn', 'conv2_3_1x1_reduce/bn/scale', 'conv2_3_3x3', 'conv2_3_3x3/bn', 'conv2_3_3x3/bn/scale', 'conv2_3_1x1_increase', 'conv2_3_1x1_increase/bn', 'conv2_3_1x1_increase/bn/scale', 'conv2_3_1x1_down', 'conv2_3_1x1_up', 'conv3_1_1x1_reduce', 'conv3_1_1x1_reduce/bn', 'conv3_1_1x1_reduce/bn/scale', 'conv3_1_3x3', 'conv3_1_3x3/bn', 'conv3_1_3x3/bn/scale', 'conv3_1_1x1_increase', 'conv3_1_1x1_increase/bn', 'conv3_1_1x1_increase/bn/scale', 'conv3_1_1x1_down', 'conv3_1_1x1_up', 'conv3_1_1x1_proj', 'conv3_1_1x1_proj/bn', 'conv3_1_1x1_proj/bn/scale', 'conv3_2_1x1_reduce', 'conv3_2_1x1_reduce/bn', 'conv3_2_1x1_reduce/bn/scale', 'conv3_2_3x3', 'conv3_2_3x3/bn', 'conv3_2_3x3/bn/scale', 'conv3_2_1x1_increase', 'conv3_2_1x1_increase/bn', 'conv3_2_1x1_increase/bn/scale', 'conv3_2_1x1_down', 'conv3_2_1x1_up', 'conv3_3_1x1_reduce', 'conv3_3_1x1_reduce/bn', 'conv3_3_1x1_reduce/bn/scale', 'conv3_3_3x3', 'conv3_3_3x3/bn', 'conv3_3_3x3/bn/scale', 'conv3_3_1x1_increase', 'conv3_3_1x1_increase/bn', 'conv3_3_1x1_increase/bn/scale', 'conv3_3_1x1_down', 'conv3_3_1x1_up', 'conv3_4_1x1_reduce', 'conv3_4_1x1_reduce/bn', 'conv3_4_1x1_reduce/bn/scale', 'conv3_4_3x3', 'conv3_4_3x3/bn', 'conv3_4_3x3/bn/scale', 'conv3_4_1x1_increase', 'conv3_4_1x1_increase/bn', 'conv3_4_1x1_increase/bn/scale', 'conv3_4_1x1_down', 'conv3_4_1x1_up', 'conv4_1_1x1_reduce', 'conv4_1_1x1_reduce/bn', 'conv4_1_1x1_reduce/bn/scale', 'conv4_1_3x3', 'conv4_1_3x3/bn', 'conv4_1_3x3/bn/scale', 'conv4_1_1x1_increase', 'conv4_1_1x1_increase/bn', 'conv4_1_1x1_increase/bn/scale', 'conv4_1_1x1_down', 'conv4_1_1x1_up', 'conv4_1_1x1_proj', 'conv4_1_1x1_proj/bn', 'conv4_1_1x1_proj/bn/scale', 'conv4_2_1x1_reduce', 'conv4_2_1x1_reduce/bn', 'conv4_2_1x1_reduce/bn/scale', 'conv4_2_3x3', 'conv4_2_3x3/bn', 'conv4_2_3x3/bn/scale', 'conv4_2_1x1_increase', 'conv4_2_1x1_increase/bn', 'conv4_2_1x1_increase/bn/scale', 'conv4_2_1x1_down', 'conv4_2_1x1_up', 'conv4_3_1x1_reduce', 'conv4_3_1x1_reduce/bn', 'conv4_3_1x1_reduce/bn/scale', 'conv4_3_3x3', 'conv4_3_3x3/bn', 'conv4_3_3x3/bn/scale', 'conv4_3_1x1_increase', 'conv4_3_1x1_increase/bn', 'conv4_3_1x1_increase/bn/scale', 'conv4_3_1x1_down', 'conv4_3_1x1_up', 'conv4_4_1x1_reduce', 'conv4_4_1x1_reduce/bn', 'conv4_4_1x1_reduce/bn/scale', 'conv4_4_3x3', 'conv4_4_3x3/bn', 'conv4_4_3x3/bn/scale', 'conv4_4_1x1_increase', 'conv4_4_1x1_increase/bn', 'conv4_4_1x1_increase/bn/scale', 'conv4_4_1x1_down', 'conv4_4_1x1_up', 'conv4_5_1x1_reduce', 'conv4_5_1x1_reduce/bn', 'conv4_5_1x1_reduce/bn/scale', 'conv4_5_3x3', 'conv4_5_3x3/bn', 'conv4_5_3x3/bn/scale', 'conv4_5_1x1_increase', 'conv4_5_1x1_increase/bn', 'conv4_5_1x1_increase/bn/scale', 'conv4_5_1x1_down', 'conv4_5_1x1_up', 'conv4_6_1x1_reduce', 'conv4_6_1x1_reduce/bn', 'conv4_6_1x1_reduce/bn/scale', 'conv4_6_3x3', 'conv4_6_3x3/bn', 'conv4_6_3x3/bn/scale', 'conv4_6_1x1_increase', 'conv4_6_1x1_increase/bn', 'conv4_6_1x1_increase/bn/scale', 'conv4_6_1x1_down', 'conv4_6_1x1_up', 'conv5_1_1x1_reduce', 'conv5_1_1x1_reduce/bn', 'conv5_1_1x1_reduce/bn/scale', 'conv5_1_3x3', 'conv5_1_3x3/bn', 'conv5_1_3x3/bn/scale', 'conv5_1_1x1_increase', 'conv5_1_1x1_increase/bn', 'conv5_1_1x1_increase/bn/scale', 'conv5_1_1x1_down', 'conv5_1_1x1_up', 'conv5_1_1x1_proj', 'conv5_1_1x1_proj/bn', 'conv5_1_1x1_proj/bn/scale', 'conv5_2_1x1_reduce', 'conv5_2_1x1_reduce/bn', 'conv5_2_1x1_reduce/bn/scale', 'conv5_2_3x3', 'conv5_2_3x3/bn', 'conv5_2_3x3/bn/scale', 'conv5_2_1x1_increase', 'conv5_2_1x1_increase/bn', 'conv5_2_1x1_increase/bn/scale', 'conv5_2_1x1_down', 'conv5_2_1x1_up', 'conv5_3_1x1_reduce', 'conv5_3_1x1_reduce/bn', 'conv5_3_1x1_reduce/bn/scale', 'conv5_3_3x3', 'conv5_3_3x3/bn', 'conv5_3_3x3/bn/scale', 'conv5_3_1x1_increase', 'conv5_3_1x1_increase/bn', 'conv5_3_1x1_increase/bn/scale', 'conv5_3_1x1_down', 'conv5_3_1x1_up', 'classifier'])
32+
#
33+
34+
# how to copy weights:
35+
# e.g.
36+
# pytorch_state_dict['conv.weight'] = caffe_net_params['conv'][0].data
37+
# pytorch_state_dict['conv.bias '] = caffe_net_params['conv'][1].data
38+
39+
40+
41+
# main #################################################################
42+
if __name__ == '__main__':
43+
print('%s: calling main function ...' % os.path.basename(__file__))
44+
prototxt_file = '/media/rs/3EBAC1C7BAC17BC1/Detections/RON_Tensorflow/traincudnn.prototxt'
45+
caffemodel_file = '/media/rs/3EBAC1C7BAC17BC1/Detections/RON_Tensorflow/model/RON320_VOC0712_VOC07.caffemodel'
46+
47+
caffe.set_mode_cpu()
48+
caffe_net = caffe.Net(prototxt_file, caffemodel_file, caffe.TEST)
49+
50+
caffe_net_params = caffe_net.params
51+
print(caffe_model.params.keys())
52+
53+
exit(0)
54+
55+
56+
57+
58+
59+
60+
61+
62+

Diff for: datasets/dataset_factory.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@
2323

2424
from datasets import pascalvoc_2007
2525
from datasets import pascalvoc_2012
26+
from datasets import pascalvoc_2007_2012
2627

2728
datasets_map = {
2829
'cifar10': cifar10,
2930
'imagenet': imagenet,
3031
'pascalvoc_2007': pascalvoc_2007,
3132
'pascalvoc_2012': pascalvoc_2012,
33+
'pascalvoc_0712': pascalvoc_2007_2012
3234
}
3335

3436

@@ -52,4 +54,30 @@ def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
5254
return datasets_map[name].get_split(split_name,
5355
dataset_dir,
5456
file_pattern,
55-
reader)
57+
reader,
58+
False)
59+
60+
def get_replica_dataset(name, split_name, dataset_dir, num_workers, worker_index, file_pattern=None, reader=None):
61+
"""Given a dataset name and a split_name returns a Dataset.
62+
63+
Args:
64+
name: String, the name of the dataset.
65+
split_name: A train/test split name.
66+
dataset_dir: The directory where the dataset files are stored.
67+
file_pattern: The file pattern to use for matching the dataset source files.
68+
reader: The subclass of tf.ReaderBase. If left as `None`, then the default
69+
reader defined by each dataset is used.
70+
Returns:
71+
A `Dataset` class.
72+
Raises:
73+
ValueError: If the dataset `name` is unknown.
74+
"""
75+
if name not in datasets_map:
76+
raise ValueError('Name of dataset unknown %s' % name)
77+
return datasets_map[name].get_split(split_name,
78+
dataset_dir,
79+
file_pattern,
80+
reader,
81+
True,
82+
num_workers = num_workers,
83+
worker_index = worker_index)

Diff for: datasets/pascalvoc_2007.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
NUM_CLASSES = 20
8787

8888

89-
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
89+
def get_split(split_name, dataset_dir, file_pattern=None, reader=None, replica=False, **kwargs):
9090
"""Gets a dataset tuple with instructions for reading ImageNet.
9191
9292
Args:
@@ -109,4 +109,6 @@ def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
109109
file_pattern, reader,
110110
SPLITS_TO_SIZES,
111111
ITEMS_TO_DESCRIPTIONS,
112-
NUM_CLASSES)
112+
NUM_CLASSES,
113+
replica,
114+
**kwargs)

Diff for: datasets/pascalvoc_2007_2012.py

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Copyright 2015 Paul Balanca. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""Provides data for the Pascal VOC Dataset (images + annotations).
16+
"""
17+
import tensorflow as tf
18+
from datasets import pascalvoc_common
19+
20+
slim = tf.contrib.slim
21+
22+
FILE_PATTERN = 'voc_20??_%s_*.tfrecord'
23+
ITEMS_TO_DESCRIPTIONS = {
24+
'image': 'A color image of varying height and width.',
25+
'shape': 'Shape of the image',
26+
'object/bbox': 'A list of bounding boxes, one per each object.',
27+
'object/label': 'A list of labels, one per each object.',
28+
}
29+
# (Images, Objects) statistics on every class.
30+
TRAIN_STATISTICS = {
31+
'cow': (444, 847),
32+
'car': (1874, 3267),
33+
'pottedplant': (772, 1487),
34+
'none': (0, 0),
35+
'person': (6095, 13256),
36+
'bicycle': (795, 1064),
37+
'bottle': (950, 1764),
38+
'dog': (1707, 2025),
39+
'motorbike': (771, 1052),
40+
'boat': (689, 1140),
41+
'train': (805, 925),
42+
'total': (16551, 40058),
43+
'diningtable': (738, 824),
44+
'sheep': (421, 1070),
45+
'bus': (607, 822),
46+
'aeroplane': (908, 1171),
47+
'sofa': (736, 814),
48+
'chair': (1564, 3152),
49+
'tvmonitor': (831, 1108),
50+
'horse': (769, 1072),
51+
'cat': (1417, 1593),
52+
'bird': (1095, 1605)
53+
}
54+
55+
TEST_STATISTICS = {
56+
'none': (0, 0),
57+
'aeroplane': (1, 1),
58+
'bicycle': (1, 1),
59+
'bird': (1, 1),
60+
'boat': (1, 1),
61+
'bottle': (1, 1),
62+
'bus': (1, 1),
63+
'car': (1, 1),
64+
'cat': (1, 1),
65+
'chair': (1, 1),
66+
'cow': (1, 1),
67+
'diningtable': (1, 1),
68+
'dog': (1, 1),
69+
'horse': (1, 1),
70+
'motorbike': (1, 1),
71+
'person': (1, 1),
72+
'pottedplant': (1, 1),
73+
'sheep': (1, 1),
74+
'sofa': (1, 1),
75+
'train': (1, 1),
76+
'tvmonitor': (1, 1),
77+
'total': (20, 20),
78+
}
79+
SPLITS_TO_SIZES = {
80+
'train': 22136,
81+
'test': 4952,
82+
}
83+
SPLITS_TO_STATISTICS = {
84+
'train': TRAIN_STATISTICS,
85+
'test': TEST_STATISTICS,
86+
}
87+
NUM_CLASSES = 20
88+
89+
90+
def get_split(split_name, dataset_dir, file_pattern=None, reader=None, replica=False, **kwargs):
91+
"""Gets a dataset tuple with instructions for reading ImageNet.
92+
93+
Args:
94+
split_name: A train/test split name.
95+
dataset_dir: The base directory of the dataset sources.
96+
file_pattern: The file pattern to use when matching the dataset sources.
97+
It is assumed that the pattern contains a '%s' string so that the split
98+
name can be inserted.
99+
reader: The TensorFlow reader type.
100+
101+
Returns:
102+
A `Dataset` namedtuple.
103+
104+
Raises:
105+
ValueError: if `split_name` is not a valid train/test split.
106+
"""
107+
if not file_pattern:
108+
file_pattern = FILE_PATTERN
109+
return pascalvoc_common.get_split(split_name, dataset_dir,
110+
file_pattern, reader,
111+
SPLITS_TO_SIZES,
112+
ITEMS_TO_DESCRIPTIONS,
113+
NUM_CLASSES,
114+
replica,
115+
**kwargs)
116+
117+
# for k, v in TRAIN_STATISTICS2.items():
118+
# print("'{}': ({}, {})".format(k ,v[0]+TRAIN_STATISTICS1[k][0],v[1]+TRAIN_STATISTICS1[k][1]))

Diff for: datasets/pascalvoc_2012.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
NUM_CLASSES = 20
6161

6262

63-
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
63+
def get_split(split_name, dataset_dir, file_pattern=None, reader=None, replica=False, **kwargs):
6464
"""Gets a dataset tuple with instructions for reading ImageNet.
6565
6666
Args:
@@ -80,8 +80,10 @@ def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
8080
if not file_pattern:
8181
file_pattern = FILE_PATTERN
8282
return pascalvoc_common.get_split(split_name, dataset_dir,
83-
file_pattern, reader,
84-
SPLITS_TO_SIZES,
85-
ITEMS_TO_DESCRIPTIONS,
86-
NUM_CLASSES)
83+
file_pattern, reader,
84+
SPLITS_TO_SIZES,
85+
ITEMS_TO_DESCRIPTIONS,
86+
NUM_CLASSES,
87+
replica,
88+
**kwargs)
8789

Diff for: datasets/pascalvoc_common.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,15 @@
4545
'tvmonitor': (20, 'Indoor'),
4646
}
4747

48+
VOC_CLASSES = ( # always index 0
49+
'aeroplane', 'bicycle', 'bird', 'boat',
50+
'bottle', 'bus', 'car', 'cat', 'chair',
51+
'cow', 'diningtable', 'dog', 'horse',
52+
'motorbike', 'person', 'pottedplant',
53+
'sheep', 'sofa', 'train', 'tvmonitor')
4854

4955
def get_split(split_name, dataset_dir, file_pattern, reader,
50-
split_to_sizes, items_to_descriptions, num_classes):
56+
split_to_sizes, items_to_descriptions, num_classes, replica, **kwargs):
5157
"""Gets a dataset tuple with instructions for reading Pascal VOC dataset.
5258
5359
Args:
@@ -66,7 +72,22 @@ def get_split(split_name, dataset_dir, file_pattern, reader,
6672
"""
6773
if split_name not in split_to_sizes:
6874
raise ValueError('split name %s was not recognized.' % split_name)
69-
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
75+
76+
if replica:
77+
if 'num_workers' not in kwargs:
78+
raise ValueError('Must provide "num_workers" for slim DatasetDataProvider.')
79+
if 'worker_index' not in kwargs:
80+
raise ValueError('Must provide "worker_index" for slim DatasetDataProvider.')
81+
if not (kwargs['worker_index'] < kwargs['num_workers']):
82+
raise ValueError('"worker_index" must in the range [0, num_workers].')
83+
input_file_list = tf.gfile.Glob(os.path.join(dataset_dir, file_pattern % split_name))
84+
85+
slice_start = int(kwargs['worker_index']*len(input_file_list)/kwargs['num_workers'])
86+
slice_end = int((kwargs['worker_index']+1)*len(input_file_list)/kwargs['num_workers'])
87+
input_file_list.sort()
88+
file_pattern = input_file_list[slice_start:slice_end]
89+
else:
90+
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
7091

7192
# Allowing None in the signature so that dataset_factory can use the default.
7293
if reader is None:

0 commit comments

Comments
 (0)