Skip to content

Commit 9eb387d

Browse files
committed
添加更多中文注释
1 parent 893ffea commit 9eb387d

File tree

4 files changed

+60
-42
lines changed

4 files changed

+60
-42
lines changed

convert_weight.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,17 @@ def main(argv):
8787
with tf.variable_scope('yolov3'):
8888
feature_map = model.forward(inputs, is_training=False) # 返回3个尺度的feature_map
8989

90+
# 获取网络给出绝对boxes(左上角,右下角)信息, 未经过最大抑制去除多余boxes
9091
boxes, confs, probs = model.predict(feature_map)
9192
scores = confs * probs
92-
# print("boxes: ", boxes)
9393
print("=>", boxes.name[:-2], scores.name[:-2])
94-
# exit()
94+
# cpu 运行是恢复模型所需要的网络节点的名字
9595
cpu_out_node_names = [boxes.name[:-2], scores.name[:-2]]
9696
boxes, scores, labels = utils.gpu_nms(boxes, scores, flags.num_classes,
9797
score_thresh=flags.score_threshold,
9898
iou_thresh=flags.iou_threshold)
9999
print("=>", boxes.name[:-2], scores.name[:-2], labels.name[:-2])
100+
# gpu 运行是恢复模型所需要的网络节点的名字 , 直接运算得出最终结果
100101
gpu_out_node_names = [boxes.name[:-2], scores.name[:-2], labels.name[:-2]]
101102
feature_map_1, feature_map_2, feature_map_3 = feature_map
102103
saver = tf.train.Saver(var_list=tf.global_variables(scope='yolov3'))

core/utils.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def load_weights(var_list, weights_file):
244244
# print(np.fromfile(fp, dtype=np.int32, count=-1))
245245
# print(fp)
246246
# exit()
247-
weights = np.fromfile(fp, dtype=np.float32)
247+
weights = np.fromfile(fp, dtype=np.float32) # 读取所有
248248

249249
ptr = 0
250250
i = 0
@@ -266,7 +266,8 @@ def load_weights(var_list, weights_file):
266266
batch_norm_vars = [beta, gamma, mean, var]
267267
for var in batch_norm_vars:
268268
shape = var.shape.as_list()
269-
num_params = np.prod(shape) # 总的元素数量
269+
num_params = np.prod(shape) # 计算BN层的参数量
270+
# 读取相对应的参数量
270271
var_weights = weights[ptr:ptr + num_params].reshape(shape) # 恢复shape
271272
ptr += num_params
272273
assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
@@ -277,8 +278,7 @@ def load_weights(var_list, weights_file):
277278
bias = var2
278279
bias_shape = bias.shape.as_list()
279280
bias_params = np.prod(bias_shape)
280-
bias_weights = weights[ptr:ptr +
281-
bias_params].reshape(bias_shape)
281+
bias_weights = weights[ptr:ptr + bias_params].reshape(bias_shape)
282282
ptr += bias_params
283283
assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
284284
# we loaded 1 variable
@@ -287,9 +287,10 @@ def load_weights(var_list, weights_file):
287287
shape = var1.shape.as_list()
288288
num_params = np.prod(shape)
289289

290+
# 这是什么沙雕模型文件需要这种加载方式
290291
var_weights = weights[ptr:ptr + num_params].reshape(
291292
(shape[3], shape[2], shape[0], shape[1])) # 沙雕模型文件
292-
# remember to transpose to column-major
293+
# remember to transpose to column-major 维度交换
293294
var_weights = np.transpose(var_weights, (2, 3, 1, 0))
294295
ptr += num_params
295296
assign_ops.append(

core/yolov3.py

+48-33
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def _yolo_block(self, inputs, filters):
8585
inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3)
8686
return route, inputs
8787

88+
# 目标识别的层, 转换到合适的深度,以满足不同class_num数据的分类
8889
def _detection_layer(self, inputs, anchors):
8990
num_anchors = len(anchors)
9091
feature_map = slim.conv2d(inputs, num_anchors * (5 + self._NUM_CLASSES), 1,
@@ -93,20 +94,22 @@ def _detection_layer(self, inputs, anchors):
9394
biases_initializer=tf.zeros_initializer())
9495
return feature_map
9596

96-
# reorganization
97+
# 讲网络计算的的缩放量和偏移量与anchors,网格位置结合,得到在原图中的绝对位置与大小
9798
def _reorg_layer(self, feature_map, anchors):
9899
# 将张量转换为适合的格式
99100
num_anchors = len(anchors) # num_anchors=3
100101
grid_size = feature_map.shape.as_list()[1:3] # 网格数
101102
# the downscale image in height and weight
102103
stride = tf.cast(self.img_size // grid_size, tf.float32) # [h,w] -> [y,x] 平均每个网络多少个像素值
104+
# 讲anchors 与 目标信息拆开 (batch_size, cell, cell , anchor_num * (5 + class_num)) -->
105+
# (batch_size, cell, cell , anchor_num ,5 + class_num)
103106
feature_map = tf.reshape(feature_map,
104107
[-1, grid_size[0], grid_size[1], num_anchors, 5 + self._NUM_CLASSES]) # 特征图
105108

106109
box_centers, box_sizes, conf_logits, prob_logits = tf.split(
107110
feature_map, [2, 2, 1, self._NUM_CLASSES], axis=-1) # 分离各个值,在最后一个维度进行
108111

109-
box_centers = tf.nn.sigmoid(box_centers) # 使得偏移量变为非负
112+
box_centers = tf.nn.sigmoid(box_centers) # 使得偏移量变为非负,且在0~1之间, 超过1之后,中心点就偏移到了其他的单元中
110113

111114
grid_x = tf.range(grid_size[1], dtype=tf.int32)
112115
grid_y = tf.range(grid_size[0], dtype=tf.int32)
@@ -150,16 +153,15 @@ def _reorg_layer(self, feature_map, anchors):
150153
x_y_offset = tf.cast(x_y_offset, tf.float32)
151154

152155
box_centers = box_centers + x_y_offset # 物体的中心坐标
153-
box_centers = box_centers * stride[::-1] # 在原图的坐标位置,反归一化
156+
box_centers = box_centers * stride[::-1] # 在原图的坐标位置,反归一化 [h,w] -> [y,x]
154157

155-
# tf.exp(box_sizes) 避免缩放出现负数, box_size[13,13,3,2],anchor[3,2]
158+
# tf.exp(box_sizes) 避免缩放出现负数, box_size[13,13,3,2], anchor[3,2]
156159
box_sizes = tf.exp(box_sizes) * anchors # anchors -> [w, h] 使用网络计算出的缩放量对anchors进行缩放
157160
boxes = tf.concat([box_centers, box_sizes], axis=-1) # 计算除所有的方框在原图中的位置
158161
return x_y_offset, boxes, conf_logits, prob_logits
159162

160163
@staticmethod # 静态静态方法不睡和类和实例进行绑定
161-
def _upsample(inputs, out_shape): # 上采样
162-
164+
def _upsample(inputs, out_shape): # 上采样, 放大图片
163165
new_height, new_width = out_shape[1], out_shape[2]
164166
inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width)) # 使用最近邻改变图像大小
165167
inputs = tf.identity(inputs, name='upsampled')
@@ -215,23 +217,25 @@ def forward(self, inputs, is_training=False, reuse=False):
215217
with tf.variable_scope('yolo-v3'):
216218
# https://github.com/YunYang1994/tensorflow-yolov3/raw/master/docs/images/levio.jpeg
217219
# https://images2018.cnblogs.com/blog/606386/201803/606386-20180327004340505-1572852891.png
218-
# feature_map1 13x13x255
220+
# feature_map1 13x13x1024 --> 13x13x[3x(5+class_num)]
219221
route, inputs = self._yolo_block(inputs, 512)
220222
feature_map_1 = self._detection_layer(inputs, self._ANCHORS[6:9])
221223
feature_map_1 = tf.identity(feature_map_1, name='feature_map_1')
222224

223-
# feature_map2 26x26x255
225+
# feature_map2 26x26x512 --> 26x26x[3x(5+class_num)]
224226
inputs = common._conv2d_fixed_padding(route, 256, 1)
225227
upsample_size = route_2.get_shape().as_list()
228+
# 52x52 --> 26x26
226229
inputs = self._upsample(inputs, upsample_size) # 通过直接放大进行上采样
227230
inputs = tf.concat([inputs, route_2], axis=3) # 在axis=3 进行连接,
228231
route, inputs = self._yolo_block(inputs, 256)
229232
feature_map_2 = self._detection_layer(inputs, self._ANCHORS[3:6])
230233
feature_map_2 = tf.identity(feature_map_2, name='feature_map_2')
231234

232-
# feature_map3 52x52x255
235+
# feature_map3 52x52x256 --> 52x52x[3x(5+class_num)]
233236
inputs = common._conv2d_fixed_padding(route, 128, 1)
234237
upsample_size = route_1.get_shape().as_list()
238+
# 26x26 --> 52x52
235239
inputs = self._upsample(inputs, upsample_size)
236240
inputs = tf.concat([inputs, route_1], axis=3)
237241
route, inputs = self._yolo_block(inputs, 128)
@@ -241,7 +245,7 @@ def forward(self, inputs, is_training=False, reuse=False):
241245
return feature_map_1, feature_map_2, feature_map_3
242246

243247
def _reshape(self, x_y_offset, boxes, confs, probs):
244-
248+
# 构成一个(batch_size, cell*cell*len(anchors) , boxes)
245249
grid_size = x_y_offset.shape.as_list()[:2] # 网格数
246250
boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4]) # 3个anchor
247251
confs = tf.reshape(confs, [-1, grid_size[0] * grid_size[1] * 3, 1]) # 3个anchor分别对应概率
@@ -265,13 +269,14 @@ def predict(self, feature_maps):
265269
(feature_map_2, self._ANCHORS[3:6]),
266270
(feature_map_3, self._ANCHORS[0:3])]
267271

272+
# boxe 的相对位置转换为绝对位置
268273
results = [self._reorg_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]
269274
boxes_list, confs_list, probs_list = [], [], []
270275

271276
for result in results:
272-
# print("*results==>", *result)
273-
# print("results==>", len(result))
277+
# *result = x_y_offset, boxes, confs, probs
274278
boxes, conf_logits, prob_logits = self._reshape(*result)
279+
# --> (batch_size, cell*cell*anchor_num, boxes/conf/prob)
275280

276281
confs = tf.sigmoid(conf_logits) # 转化成概率
277282
probs = tf.sigmoid(prob_logits) # 转化成概率,每种类和不在为0
@@ -281,12 +286,12 @@ def predict(self, feature_maps):
281286
probs_list.append(probs)
282287

283288
# 将3个feature_map中所有的信息,整合到一个张量
284-
# shape : [Batch_size,10647,4] 10647=13x13x3+26x26x3+52x52x3
289+
# shape : [Batch_size,10647,4] 10647 = 13x13x3 + 26x26x3 + 52x52x3
285290
boxes = tf.concat(boxes_list, axis=1) # [Batch_size,10647,4]
286291
confs = tf.concat(confs_list, axis=1) # [Batch_size,10647,1]
287292
probs = tf.concat(probs_list, axis=1) # [Batch_size,10647,class_num]
288293

289-
# 坐标转化:中心坐标转化为左上角作案表,右下角坐标
294+
# 坐标转化:中心坐标转化为 左上角作案表,右下角坐标 --> 方便计算矩形框
290295
center_x, center_y, width, height = tf.split(boxes, [1, 1, 1, 1], axis=-1)
291296
x0 = center_x - width / 2.
292297
y0 = center_y - height / 2.
@@ -301,7 +306,7 @@ def compute_loss(self, pred_feature_map, y_true, ignore_thresh=0.5, max_box_per_
301306
:param pred_feature_map: list [feature_map_1,feature_map_2,feature_map3]
302307
feature_map_1[13,13,3,(5 + self._NUM_CLASSES)]
303308
:param y_true: list [y_true_13, y_true_26, y_true_52]
304-
y_true_13 [13,13,3,(5 + self._NUM_CLASSES)]只有含有目标的网格中存在信息,其余均为0.
309+
y_true_13 [13,13,3,(5 + self._NUM_CLASSES)] 只有含有目标的网格中存在信息,其余均为0.
305310
:param ignore_thresh: 0.5
306311
:param max_box_per_image:
307312
:return:
@@ -328,19 +333,22 @@ def loss_layer(self, feature_map_i, y_true, anchors):
328333
grid_size = tf.shape(feature_map_i)[1:3] # cellxcell
329334
grid_size_ = feature_map_i.shape.as_list()[1:3]
330335

331-
# 本身具有[-1, grid_size_[0], grid_size_[1], 3, 5 + self._NUM_CLASSES]的shape,只是进过tf.py_func时丢失.使用reshape重新赋予shape
336+
# 本身具有[-1, grid_size_[0], grid_size_[1], 3, 5 + self._NUM_CLASSES]的shape,
337+
# 但在进过tf.py_func方法时丢失shape信息,使用reshape重新赋予shape
332338
y_true = tf.reshape(y_true, [-1, grid_size_[0], grid_size_[1], 3, 5 + self._NUM_CLASSES])
333339

334340
# the downscale ratio in height and weight
335341
ratio = tf.cast(self.img_size / grid_size, tf.float32)
336342
# N: batch_size
337343
N = tf.cast(tf.shape(feature_map_i)[0], tf.float32)
338344

345+
# 进过self._reorg_layer后会boxe会被换成绝对位置, 会使用ratio进行换算到cellxcell上
339346
x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self._reorg_layer(feature_map_i, anchors)
347+
340348
# shape: take 416x416 input image and 13*13 feature_map for example:
341-
#
342349
# [N, 13, 13, 3, 1]
343350
object_mask = y_true[..., 4:5] # 该feature_map下所有的目标,有目标的为1,无目标的为0
351+
344352
# shape: [N, 13, 13, 3, 4] & [N, 13, 13, 3] ==> [V, 4]
345353
# V: num of true gt box, 该feature_map下所有检测目标的数量
346354
valid_true_boxes = tf.boolean_mask(y_true[..., 0:4],
@@ -353,60 +361,67 @@ def loss_layer(self, feature_map_i, y_true, anchors):
353361
pred_box_xy = pred_boxes[..., 0:2]
354362
pred_box_wh = pred_boxes[..., 2:4]
355363

356-
# calc iou 计算交并比
357-
# shape: [N, 13, 13, 3, V]
364+
# calc iou 计算每个pre_boxe与所有true_boxe的交并比.
358365
# true:[V,2],[V,2]
359366
# pre : [13,13,3,2]
367+
# out_shape: [N, 13, 13, 3, V],
360368
iou = self._broadcast_iou(valid_true_box_xy, valid_true_box_wh, pred_box_xy, pred_box_wh)
361369

362-
# iou shape : [N,13,13,3,V]
363-
best_iou = tf.reduce_max(iou, axis=-1) # 选择每个anchor中最大的那个.
370+
# iou_shape : [N,13,13,3,V] 每个单元下每个anchor与所有的true_boxes的交并比
371+
best_iou = tf.reduce_max(iou, axis=-1) # 选择每个anchor中iou最大的那个.
372+
# out_shape : [N,13,13,3]
373+
364374
# get_ignore_mask
365-
ignore_mask = tf.cast(best_iou < 0.5, tf.float32) # 如果重合率低于0.5
366-
# shape: [N, 13, 13, 3, 1]
375+
ignore_mask = tf.cast(best_iou < 0.5, tf.float32) # 如果iou低于0.5将会丢弃此anchor\
376+
# out_shape : [N,13,13,3] 0,1张量
377+
367378
ignore_mask = tf.expand_dims(ignore_mask, -1)
379+
# out_shape: [N, 13, 13, 3, 1] 0,1张量
380+
368381
# get xy coordinates in one cell from the feature_map
369382
# numerical range: 0 ~ 1
370-
# shape: [N, 13, 13, 3, 2] # 坐标未归一化
371-
true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset # 偏移
372-
pred_xy = pred_box_xy / ratio[::-1] - x_y_offset # 偏移
383+
# shape: [N, 13, 13, 3, 2] # 坐标反归一化
384+
true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset # 绝对(image_size * image_size)信息 转换为 单元(cellxcell)相对信息
385+
pred_xy = pred_box_xy / ratio[::-1] - x_y_offset # 获取网络真实输出值
373386

374387
# get_tw_th, numerical range: 0 ~ 1
375388
# shape: [N, 13, 13, 3, 2],
376389
true_tw_th = y_true[..., 2:4] / anchors # 缩放量
377390
pred_tw_th = pred_box_wh / anchors
378-
# for numerical stability 稳定训练
391+
# for numerical stability 稳定训练, 为0时不对anchors进行缩放, 在模型输出值特别小是e^out_put为0
379392
true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
380393
x=tf.ones_like(true_tw_th), y=true_tw_th)
381394
pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
382395
x=tf.ones_like(pred_tw_th), y=pred_tw_th)
383-
384-
true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9)) # 网络输出的原值(有正负)
396+
# 还原网络最原始的输出值(有正负的)
397+
true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
385398
pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))
386399

387400
# box size punishment:
388401
# box with smaller area has bigger weight. This is taken from the yolo darknet C source code.
389402
# 较小的面接的box有较大的权重
390-
# shape: [N, 13, 13, 3, 1] 2. - 面积
403+
# shape: [N, 13, 13, 3, 1] 2. - 面积 为1时表示保持原始权重
391404
box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (
392405
y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32))
393406

394-
# shape: [N, 13, 13, 3, 1] 方框损失值
407+
# shape: [N, 13, 13, 3, 1] 方框损失值, 中心坐标均方差损失 * mask[N, 13, 13, 3, 1]
408+
# 仅仅计算有目标单元的loss, 不计算那些错误预测的boxes, 在预测是首先会排除那些conf,iou底的单元
395409
xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale) / N # N:batch_size
396410
wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale) / N
397411

398412
# shape: [N, 13, 13, 3, 1]
399413
conf_pos_mask = object_mask # 只要存在目标的boxe
400414
conf_neg_mask = (1 - object_mask) * ignore_mask # 选择不存在目标,同时iou小于阈值(0.5),
415+
401416
# 分离正样本和负样本
402417
# 正样本损失
403418
conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask,
404419
logits=pred_conf_logits)
405420
# 处理后的负样本损失,只计算那些是单元格中没有目标,同时IOU小于0.5的单元,
406-
# 只惩罚IOU<0.5,而不惩罚IOU>0.5 的原因是因为该单元内是有目标的,但是目标中心点却没有落在该单元中.
421+
# 只惩罚IOU<0.5,而不惩罚IOU>0.5 的原因是可能该单元内是有目标的,仅仅只是目标中心点却没有落在该单元中.所以不计算该loss
407422
conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask,
408423
logits=pred_conf_logits)
409-
conf_loss = tf.reduce_sum(conf_loss_pos + conf_loss_neg) / N # 平均交叉熵,同时提高正确率,压低错误率
424+
conf_loss = tf.reduce_sum(conf_loss_pos + conf_loss_neg) / N # 平均交叉熵,同时提高正确分类,压低错误分类
410425

411426
# shape: [N, 13, 13, 3, 1], 分类loss
412427
# boject_mask 只看与anchors相匹配的anchors

train_demo/quick_train.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,11 @@
5252
# a2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="yolov3/darknet-53")
5353
# print(a1 == a2)
5454
# exit()
55+
# 恢复darknet-53特征提取器的权重参数, 只更新yolo-v3目标预测部分参数.
5556
saver_to_restore = tf.train.Saver(
5657
var_list=tf.contrib.framework.get_variables_to_restore(include=["yolov3/darknet-53"])) # 固定特征提取器
5758
update_vars = tf.contrib.framework.get_variables_to_restore(include=["yolov3/yolo-v3"])
58-
# 每一百次降低一次学习率
59+
# 每一百次降低一次学习率, 学习率衰减
5960
learning_rate = tf.train.exponential_decay(LR, global_step, decay_steps=DECAY_STEPS, decay_rate=DECAY_RATE,
6061
staircase=True)
6162
optimizer = tf.train.AdamOptimizer(learning_rate)
@@ -97,4 +98,4 @@
9798
print("\n=======================> evaluation result <================================\n")
9899

99100
writer_test.add_summary(run_items[0], global_step=step)
100-
writer_test.flush() # Flushes the event file to disk
101+
writer_test.flush() # Flushes the event file to disk 写入磁盘

0 commit comments

Comments
 (0)