可视化结果:
Conclusion
论文提出了一种基于深度可分离卷积的新模型MobileNet,同时提出了两个超参数用于快速调节模型适配到特定环境。实验部分将MobileNet与许多先进模型做对比,展现出MobileNet的在尺寸、计算量、速度上的优越性。
代码分析
这里参考的代码是mobilenet.py文件(当然也可以参考TensoeFlow官方的MobileNet)
TensorFlow有实现好的深度卷积API,所以整个代码非常整洁的~
'''
100% Mobilenet V1 (base) with input size 224x224:
See mobilenet_v1()
Layer params macs
--------------------------------------------------------------------------------
MobilenetV1/Conv2d_0/Conv2D: 864 10,838,016
MobilenetV1/Conv2d_1_depthwise/depthwise: 288 3,612,672
MobilenetV1/Conv2d_1_pointwise/Conv2D: 2,048 25,690,112
MobilenetV1/Conv2d_2_depthwise/depthwise: 576 1,806,336
MobilenetV1/Conv2d_2_pointwise/Conv2D: 8,192 25,690,112
MobilenetV1/Conv2d_3_depthwise/depthwise: 1,152 3,612,672
MobilenetV1/Conv2d_3_pointwise/Conv2D: 16,384 51,380,224
MobilenetV1/Conv2d_4_depthwise/depthwise: 1,152 903,168
MobilenetV1/Conv2d_4_pointwise/Conv2D: 32,768 25,690,112
MobilenetV1/Conv2d_5_depthwise/depthwise: 2,304 1,806,336
MobilenetV1/Conv2d_5_pointwise/Conv2D: 65,536 51,380,224
MobilenetV1/Conv2d_6_depthwise/depthwise: 2,304 451,584
MobilenetV1/Conv2d_6_pointwise/Conv2D: 131,072 25,690,112
MobilenetV1/Conv2d_7_depthwise/depthwise: 4,608 903,168
MobilenetV1/Conv2d_7_pointwise/Conv2D: 262,144 51,380,224
MobilenetV1/Conv2d_8_depthwise/depthwise: 4,608 903,168
MobilenetV1/Conv2d_8_pointwise/Conv2D: 262,144 51,380,224
MobilenetV1/Conv2d_9_depthwise/depthwise: 4,608 903,168
MobilenetV1/Conv2d_9_pointwise/Conv2D: 262,144 51,380,224
MobilenetV1/Conv2d_10_depthwise/depthwise: 4,608 903,168
MobilenetV1/Conv2d_10_pointwise/Conv2D: 262,144 51,380,224
MobilenetV1/Conv2d_11_depthwise/depthwise: 4,608 903,168
MobilenetV1/Conv2d_11_pointwise/Conv2D: 262,144 51,380,224
MobilenetV1/Conv2d_12_depthwise/depthwise: 4,608 225,792
MobilenetV1/Conv2d_12_pointwise/Conv2D: 524,288 25,690,112
MobilenetV1/Conv2d_13_depthwise/depthwise: 9,216 451,584
MobilenetV1/Conv2d_13_pointwise/Conv2D: 1,048,576 51,380,224
--------------------------------------------------------------------------------
Total: 3,185,088 567,716,352
'''
def mobilenet(inputs,
num_classes=1000,
is_training=True,
width_multiplier=1,
scope='MobileNet'):
""" MobileNet
More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861).
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
is_training: whether or not the model is being trained.
scope: Optional scope for the variables.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, `num_classes`]
end_points: a dictionary from components of the network to the corresponding
activation.
"""
# depthwise_separable_conv内包含深度卷积和逐点卷积
def _depthwise_separable_conv(inputs,
num_pwc_filters,
width_multiplier,
sc,
downsample=False):
""" Helper function to build the depth-wise separable convolution layer.
"""
num_pwc_filters = round(num_pwc_filters * width_multiplier)
_stride = 2 if downsample else 1
# 设置num_outputs=None跳过逐点卷积
depthwise_conv = slim.separable_convolution2d(inputs,
num_outputs=None,
stride=_stride,
depth_multiplier=1,
kernel_size=[3, 3],
scope=sc+'/depthwise_conv')
bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm')
# 逐点卷积变换通道
pointwise_conv = slim.convolution2d(bn,
num_pwc_filters,
kernel_size=[1, 1],
scope=sc+'/pointwise_conv')
bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm')
return bn
with tf.variable_scope(scope) as sc:
end_points_collection = sc.name + '_end_points'
with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d],
activation_fn=None,
outputs_collections=[end_points_collection]):
with slim.arg_scope([slim.batch_norm],
is_training=is_training,
activation_fn=tf.nn.relu,
fused=True):
# 定义整体结构
net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1')
net = slim.batch_norm(net, scope='conv_1/batch_norm')
net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2')
net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3')
net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4')
net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5')
net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6')
net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7')
net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8')
net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9')
net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10')
net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11')
net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12')
net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13')
net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14')
net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15')
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
end_points['squeeze'] = net
# 接FC再接Softmax
logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc_16')
predictions = slim.softmax(logits, scope='Predictions')
end_points['Logits'] = logits
end_points['Predictions'] = predictions
return logits, end_points
mobilenet.default_image_size = 224