本文主要以代码为主,关于结构相关说明和论文部分有链接(网上搜有很多),主要是能过代码来理解论文中所说的内容是如何实现的。
开山鼻主,那里还没有relu,只是sigmoid.手写数字被数字化为像素大小的灰度图像–32×32。那时,计算能力有限,因此该技术无法扩展到大规模图像。
该模型包含7层(不包括输入层)。由于它是一个相对较小的架构,让我们逐层解释:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras.models import Model
def lenet5(in_shape=(32,32,1),n_classes=10):
in_layer=layers.Input(shape=in_shape)
conv1 = layers.Conv2D(filters=6,kernel_size=5,padding='same',activation='relu')(in_layer)
pool1 = layers.MaxPooling2D(pool_size=2,strides=2)(conv1)
conv2 = layers.Conv2D(filters=16,kernel_size=5,padding='same',activation='relu')(pool1)
pool2 = layers.MaxPooling2D(pool_size=2,strides=2)(conv2)
flatten = layers.Flatten()(pool2)
dense1= layers.Dense(500,activation='relu')(flatten)
logits = layers.Dense(n_classes,activation='softmax')(dense1)
model = Model(in_layer,logits)
return model
model = lenet5()
#model.summary()
#tf.keras.utils.plot_model(model)
2012年,Hinton的深度神经网络将世界上最重要的计算机视觉挑战图像网络中的损失从26%减少到15.3%。
该网络与LeNet非常相似,但更深,拥有大约6000万个参数。作者使用了各种其他技术 - dropout,augmentation 和Stochastic Gradient Descent with momentum。
def alexnet(in_shape=(224,224,3),n_classes=1000):
in_layer = layers.Input(in_shape)
conv1 = layers.Conv2D(96, 11, strides=4, activation='relu')(in_layer)
pool1 = layers.MaxPool2D(3, 2)(conv1)
conv2 = layers.Conv2D(256, 5, strides=1, padding='same', activation='relu')(pool1)
pool2 = layers.MaxPool2D(3, 2)(conv2)
conv3 = layers.Conv2D(384, 3, strides=1, padding='same', activation='relu')(pool2)
conv4 = layers.Conv2D(256, 3, strides=1, padding='same', activation='relu')(conv3)
pool3 = layers.MaxPool2D(3, 2)(conv4)
flattened = layers.Flatten()(pool3)
dense1 = layers.Dense(4096, activation='relu')(flattened)
drop1 = layers.Dropout(0.5)(dense1)
dense2 = layers.Dense(4096, activation='relu')(drop1)
drop2 = layers.Dropout(0.5)(dense2)
preds = layers.Dense(n_classes, activation='softmax')(drop2)
model = Model(in_layer, preds)
return model
model = alexnet()
#model.summary()
#tf.keras.utils.plot_model(model)
2014年imagenet挑战的亚军被命名为VGGNet。由于其简单的统一结构,它以更简单的形式提出了一种更简单的深度卷积神经网络的形式。
VGGNet有两条简单的经验法则:
from functools import partial
conv3 = partial(layers.Conv2D,kernel_size=3,strides=1,padding='same',activation='relu')
def block(in_tensor, filters, n_convs):
conv_block = in_tensor
for _ in range(n_convs):
conv_block = conv3(filters=filters)(conv_block)
return conv_block
def _vgg(in_shape=(224,224,3),n_classes=1000,n_stages_per_blocks=[2, 2, 3, 3, 3]):
in_layer = layers.Input(in_shape)
block1 = block(in_layer, 64, n_stages_per_blocks[0])
pool1 = layers.MaxPooling2D()(block1)
block2 = block(pool1, 128, n_stages_per_blocks[1])
pool2 = layers.MaxPooling2D()(block2)
block3 = block(pool2, 256, n_stages_per_blocks[2])
pool3 = layers.MaxPooling2D()(block3)
block4 = block(pool3, 512, n_stages_per_blocks[3])
pool4 = layers.MaxPooling2D()(block4)
block5 = block(pool4, 512, n_stages_per_blocks[4])
pool5 = layers.MaxPooling2D()(block5)
# flattened = layers.GlobalAvgPool2D()(pool5)
flattened = layers.Flatten()(pool5)
dense1 = layers.Dense(4096, activation='relu')(flattened)
dense2 = layers.Dense(4096, activation='relu')(dense1)
preds = layers.Dense(1000, activation='softmax')(dense2)
model = Model(in_layer, preds)
return model
def vgg16(in_shape=(224,224,3), n_classes=1000):
return _vgg(in_shape, n_classes)
def vgg19(in_shape=(224,224,3), n_classes=1000):
return _vgg(in_shape, n_classes, [2, 2, 4, 4, 4])
model = vgg16()
#model.summary()
#tf.keras.utils.plot_model(model)
参考文献:
关于模型有几点:
from functools import partial
conv1x1 = partial(layers.Conv2D, kernel_size=1, activation='relu')
conv3x3 = partial(layers.Conv2D, kernel_size=3, padding='same', activation='relu')
conv5x5 = partial(layers.Conv2D, kernel_size=5, padding='same', activation='relu')
def inception_module(in_tensor, c1, c3_1, c3, c5_1, c5, pp):
conv1 = conv1x1(c1)(in_tensor)
conv3_1 = conv1x1(c3_1)(in_tensor)
conv3 = conv3x3(c3)(conv3_1)
conv5_1 = conv1x1(c5_1)(in_tensor)
conv5 = conv5x5(c5)(conv5_1)
pool_conv = conv1x1(pp)(in_tensor)
pool = layers.MaxPool2D(3, strides=1, padding='same')(pool_conv)
merged = layers.Concatenate(axis=-1)([conv1, conv3, conv5, pool])
return merged
def aux_clf(in_tensor):
avg_pool = layers.AvgPool2D(5, 3)(in_tensor)
conv = conv1x1(128)(avg_pool)
flattened = layers.Flatten()(conv)
dense = layers.Dense(1024, activation='relu')(flattened)
dropout = layers.Dropout(0.7)(dense)
out = layers.Dense(1000, activation='softmax')(dropout)
return out
def inceptionv1_net(in_shape=(224,224,3), n_classes=1000):
in_layer = layers.Input(in_shape)
conv1 = layers.Conv2D(64, 7, strides=2, activation='relu', padding='same')(in_layer)
pad1 = layers.ZeroPadding2D()(conv1)
pool1 = layers.MaxPool2D(3, 2)(pad1)
conv2_1 = conv1x1(64)(pool1)
conv2_2 = conv3x3(192)(conv2_1)
pad2 = layers.ZeroPadding2D()(conv2_2)
pool2 = layers.MaxPool2D(3, 2)(pad2)
inception3a = inception_module(pool2, 64, 96, 128, 16, 32, 32)
inception3b = inception_module(inception3a, 128, 128, 192, 32, 96, 64)
pad3 = layers.ZeroPadding2D()(inception3b)
pool3 = layers.MaxPool2D(3, 2)(pad3)
inception4a = inception_module(pool3, 192, 96, 208, 16, 48, 64)
inception4b = inception_module(inception4a, 160, 112, 224, 24, 64, 64)
inception4c = inception_module(inception4b, 128, 128, 256, 24, 64, 64)
inception4d = inception_module(inception4c, 112, 144, 288, 32, 48, 64)
inception4e = inception_module(inception4d, 256, 160, 320, 32, 128, 128)
pad4 = layers.ZeroPadding2D()(inception4e)
pool4 = layers.MaxPool2D(3, 2)(pad4)
aux_clf1 = aux_clf(inception4a)
aux_clf2 = aux_clf(inception4d)
inception5a = inception_module(pool4, 256, 160, 320, 32, 128, 128)
inception5b = inception_module(inception5a, 384, 192, 384, 48, 128, 128)
avg_pool = layers.GlobalAvgPool2D()(inception5b)
dropout = layers.Dropout(0.4)(avg_pool)
preds = layers.Dense(1000, activation='softmax')(dropout)
model = Model(in_layer, [preds, aux_clf1, aux_clf2])
return model
model = inception_net()
model.summary()
改进有两点:
v3的主要改进点是分解(Factorization),将7x7分解成两个一维的卷积(1x7, 7x1),3x3同样(1x3, 3x1)。
好处:减少参数,加速计算(多余的计算力可以用来加深网络);把一个卷积拆成两个卷积,使得网络深度进一步增加,增加了网络的非线性表达能力。
另外,把输入从224x224变为299x299。
def conv2d_bn(x,filters,num_row,num_col,padding='same',strides=(1,1)):
x = layers.Conv2D(filters,(num_row,num_col),strides=strides,padding=padding,use_bias=False)(x)
x = layers.BatchNormalization(axis=3,scale=False)(x)
x = layers.Activation('relu')(x)
return x
def inceptionv3_net(in_shape=(299,299,3), n_classes=1000):
in_layer = layers.Input(in_shape)
x = conv2d_bn(in_layer, 32, 3, 3, strides=(2, 2), padding='valid')
x = conv2d_bn(x, 32, 3, 3, padding='valid')
x = conv2d_bn(x, 64, 3, 3)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv2d_bn(x, 80, 1, 1, padding='valid')
x = conv2d_bn(x, 192, 3, 3, padding='valid')
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
# mixed 0: 35 x 35 x 256
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch5x5 = conv2d_bn(x, 48, 1, 1)
branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
x = layers.concatenate(
[branch1x1, branch5x5, branch3x3dbl, branch_pool],
axis=3,
name='mixed0')
# mixed 1: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch5x5 = conv2d_bn(x, 48, 1, 1)
branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = layers.concatenate(
[branch1x1, branch5x5, branch3x3dbl, branch_pool],
axis=3,
name='mixed1')
# mixed 2: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch5x5 = conv2d_bn(x, 48, 1, 1)
branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = layers.concatenate(
[branch1x1, branch5x5, branch3x3dbl, branch_pool],
axis=3,
name='mixed2')
# mixed 3: 17 x 17 x 768
branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(
branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')
branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = layers.concatenate(
[branch3x3, branch3x3dbl, branch_pool],
axis=3)
# mixed 4: 17 x 17 x 768
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 128, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 128, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=3)
# mixed 5, 6: 17 x 17 x 768
for i in range(2):
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 160, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 160, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 160, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = layers.AveragePooling2D(
(3, 3), strides=(1, 1), padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=3,
name='mixed' + str(5 + i))
# mixed 7: 17 x 17 x 768
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 192, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = layers.AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=3,
name='mixed7')
# mixed 8: 8 x 8 x 1280
branch3x3 = conv2d_bn(x, 192, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
strides=(2, 2), padding='valid')
branch7x7x3 = conv2d_bn(x, 192, 1, 1)
branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
branch7x7x3 = conv2d_bn(
branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')
branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = layers.concatenate(
[branch3x3, branch7x7x3, branch_pool],
axis=3,
name='mixed8')
# mixed 9: 8 x 8 x 2048
for i in range(2):
branch1x1 = conv2d_bn(x, 320, 1, 1)
branch3x3 = conv2d_bn(x, 384, 1, 1)
branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
branch3x3 = layers.concatenate(
[branch3x3_1, branch3x3_2],
axis=3,
name='mixed9_' + str(i))
branch3x3dbl = conv2d_bn(x, 448, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
branch3x3dbl = layers.concatenate(
[branch3x3dbl_1, branch3x3dbl_2], axis=3)
branch_pool = layers.AveragePooling2D(
(3, 3), strides=(1, 1), padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = layers.concatenate(
[branch1x1, branch3x3, branch3x3dbl, branch_pool],
axis=3,
name='mixed' + str(9 + i))
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(n_classes, activation='softmax', name='predictions')(x)
model = Model(in_layer,x)
return model
tf.keras.backend.clear_session()
model=inceptionv3_net()
#model.summary()
#tf.keras.utils.plot_model(model)
用到了ResNet的残差结构,所以先看一下resnet,再回来看这一部分
#以下是inception resnetv2 code
def conv2d_bn(x,
filters,
kernel_size,
strides=1,
padding='same',
activation='relu',
use_bias=False,
name=None):
"""Utility function to apply conv + BN.
# Arguments
x: input tensor.
filters: filters in `Conv2D`.
kernel_size: kernel size as in `Conv2D`.
strides: strides in `Conv2D`.
padding: padding mode in `Conv2D`.
activation: activation in `Conv2D`.
use_bias: whether to use a bias in `Conv2D`.
name: name of the ops; will become `name + '_ac'` for the activation
and `name + '_bn'` for the batch norm layer.
# Returns
Output tensor after applying `Conv2D` and `BatchNormalization`.
"""
x = layers.Conv2D(filters,
kernel_size,
strides=strides,
padding=padding,
use_bias=use_bias,
name=name)(x)
if not use_bias:
bn_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else 3
bn_name = None if name is None else name + '_bn'
x = layers.BatchNormalization(axis=bn_axis,
scale=False,
name=bn_name)(x)
if activation is not None:
ac_name = None if name is None else name + '_ac'
x = layers.Activation(activation, name=ac_name)(x)
return x
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'):
"""Adds a Inception-ResNet block.
This function builds 3 types of Inception-ResNet blocks mentioned
in the paper, controlled by the `block_type` argument (which is the
block name used in the official TF-slim implementation):
- Inception-ResNet-A: `block_type='block35'`
- Inception-ResNet-B: `block_type='block17'`
- Inception-ResNet-C: `block_type='block8'`
# Arguments
x: input tensor.
scale: scaling factor to scale the residuals (i.e., the output of
passing `x` through an inception module) before adding them
to the shortcut branch.
Let `r` be the output from the residual branch,
the output of this block will be `x + scale * r`.
block_type: `'block35'`, `'block17'` or `'block8'`, determines
the network structure in the residual branch.
block_idx: an `int` used for generating layer names.
The Inception-ResNet blocks
are repeated many times in this network.
We use `block_idx` to identify
each of the repetitions. For example,
the first Inception-ResNet-A block
will have `block_type='block35', block_idx=0`,
and the layer names will have
a common prefix `'block35_0'`.
activation: activation function to use at the end of the block
(see [activations](../activations.md)).
When `activation=None`, no activation is applied
(i.e., "linear" activation: `a(x) = x`).
# Returns
Output tensor for the block.
# Raises
ValueError: if `block_type` is not one of `'block35'`,
`'block17'` or `'block8'`.
"""
if block_type == 'block35':
branch_0 = conv2d_bn(x, 32, 1)
branch_1 = conv2d_bn(x, 32, 1)
branch_1 = conv2d_bn(branch_1, 32, 3)
branch_2 = conv2d_bn(x, 32, 1)
branch_2 = conv2d_bn(branch_2, 48, 3)
branch_2 = conv2d_bn(branch_2, 64, 3)
branches = [branch_0, branch_1, branch_2]
elif block_type == 'block17':
branch_0 = conv2d_bn(x, 192, 1)
branch_1 = conv2d_bn(x, 128, 1)
branch_1 = conv2d_bn(branch_1, 160, [1, 7])
branch_1 = conv2d_bn(branch_1, 192, [7, 1])
branches = [branch_0, branch_1]
elif block_type == 'block8':
branch_0 = conv2d_bn(x, 192, 1)
branch_1 = conv2d_bn(x, 192, 1)
branch_1 = conv2d_bn(branch_1, 224, [1, 3])
branch_1 = conv2d_bn(branch_1, 256, [3, 1])
branches = [branch_0, branch_1]
else:
raise ValueError('Unknown Inception-ResNet block type. '
'Expects "block35", "block17" or "block8", '
'but got: ' + str(block_type))
block_name = block_type + '_' + str(block_idx)
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else 3
mixed = layers.Concatenate(
axis=channel_axis, name=block_name + '_mixed')(branches)
up = conv2d_bn(mixed,
tf.keras.backend.int_shape(x)[channel_axis],
1,
activation=None,
use_bias=True,
name=block_name + '_conv')
x = layers.Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale,
output_shape=tf.keras.backend.int_shape(x)[1:],
arguments={'scale': scale},
name=block_name)([x, up])
if activation is not None:
x = layers.Activation(activation, name=block_name + '_ac')(x)
return x
def InceptionResNetV2(input_shape=(299,299,3),
classes=1000):
"""Instantiates the Inception-ResNet v2 architecture.
Optionally loads weights pre-trained on ImageNet.
Note that the data format convention used by the model is
the one specified in your Keras config at `~/.keras/keras.json`.
# Arguments
input_shape: optional shape tuple, only to be specified
if `include_top` is `False` (otherwise the input shape
has to be `(299, 299, 3)` (with `'channels_last'` data format)
or `(3, 299, 299)` (with `'channels_first'` data format).
It should have exactly 3 inputs channels,
and width and height should be no smaller than 75.
E.g. `(150, 150, 3)` would be one valid value.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is `True`, and
if no `weights` argument is specified.
# Returns
A Keras `Model` instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
"""
img_input = layers.Input(shape=input_shape)
# Stem block: 35 x 35 x 192
x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid')
x = conv2d_bn(x, 32, 3, padding='valid')
x = conv2d_bn(x, 64, 3)
x = layers.MaxPooling2D(3, strides=2)(x)
x = conv2d_bn(x, 80, 1, padding='valid')
x = conv2d_bn(x, 192, 3, padding='valid')
x = layers.MaxPooling2D(3, strides=2)(x)
# Mixed 5b (Inception-A block): 35 x 35 x 320
branch_0 = conv2d_bn(x, 96, 1)
branch_1 = conv2d_bn(x, 48, 1)
branch_1 = conv2d_bn(branch_1, 64, 5)
branch_2 = conv2d_bn(x, 64, 1)
branch_2 = conv2d_bn(branch_2, 96, 3)
branch_2 = conv2d_bn(branch_2, 96, 3)
branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1)
branches = [branch_0, branch_1, branch_2, branch_pool]
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else 3
x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches)
# 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
for block_idx in range(1, 11):
x = inception_resnet_block(x,
scale=0.17,
block_type='block35',
block_idx=block_idx)
# Mixed 6a (Reduction-A block): 17 x 17 x 1088
branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid')
branch_1 = conv2d_bn(x, 256, 1)
branch_1 = conv2d_bn(branch_1, 256, 3)
branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid')
branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x)
branches = [branch_0, branch_1, branch_pool]
x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches)
# 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
for block_idx in range(1, 21):
x = inception_resnet_block(x,
scale=0.1,
block_type='block17',
block_idx=block_idx)
# Mixed 7a (Reduction-B block): 8 x 8 x 2080
branch_0 = conv2d_bn(x, 256, 1)
branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid')
branch_1 = conv2d_bn(x, 256, 1)
branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid')
branch_2 = conv2d_bn(x, 256, 1)
branch_2 = conv2d_bn(branch_2, 288, 3)
branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid')
branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x)
branches = [branch_0, branch_1, branch_2, branch_pool]
x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches)
# 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
for block_idx in range(1, 10):
x = inception_resnet_block(x,
scale=0.2,
block_type='block8',
block_idx=block_idx)
x = inception_resnet_block(x,
scale=1.,
activation=None,
block_type='block8',
block_idx=10)
# Final convolution block: 8 x 8 x 1536
x = conv2d_bn(x, 1536, 1, name='conv_7b')
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='softmax', name='predictions')(x)
model = Model(img_input, x, name='inception_resnet_v2')
return model
model = InceptionResNetV2()
#model.summary()
#tf.keras.utils.plot_model(model)
ResNet有效的原因:
网络加深之后很容易发生梯度消失问题。残差网络的跳远连接部分保证了网络后面的梯度可以更加方便地传到网络前面,因此可以有效减轻梯度消失问题,使得网络更容易训练。
另一种理解的角度是,ResNet类似于很多神经网络的集成,比如删除网络中的某个residual block,对网络最终的performance不会有太大影响,这和bagging集成时删除某个基学习器很相似。作为对照,删除VGG16中的某个模块就会对最终表现有很大影响。模型集成可以提高模型的表现。
还有一种理解的角度是,加入跳远连接,网络的卷积层学习的是残差,而不是整个映射,这样网络的学习压力就减轻了很多,因此可以学得很好。
还有一种理解的角度是,数据中冗余度比较低的部分可以通过跳远连接得到保留,卷积层集中力量学习冗余度比较高的部分。因此在参数相同的情况下ResNet可以学得更好。
关于ResNet的说明参照说明
关于ResNet两个版本的对比参照说明,现有资料很多,这里就不再缀述了
[Deep Residual Learning for Image Recognition]
(https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award)
[Identity Mappings in Deep Residual Networks]
(https://arxiv.org/abs/1603.05027) (ECCV 2016)
def _after_conv_relu(in_tensor):
norm=layers.BatchNormalization()(in_tensor)
return layers.Activation('relu')(norm)
def _after_conv(in_tensor):
norm=layers.BatchNormalization()(in_tensor)
return norm
def conv1(in_tensor,filters):
conv =layers.Conv2D(filters,kernel_size=1,strides=1)(in_tensor)
return _after_conv(conv)
def conv1_downsample(in_tensor,filters):
conv=layers.Conv2D(filters,kernel_size=1,strides=2)(in_tensor)
return _after_conv(conv)
def conv1_relu(in_tensor,filters):
conv =layers.Conv2D(filters,kernel_size=1,strides=1)(in_tensor)
return _after_conv_relu(conv)
def conv1_downsample_relu(in_tensor,filters):
conv=layers.Conv2D(filters,kernel_size=1,strides=2)(in_tensor)
return _after_conv_relu(conv)
def conv3(in_tensor,filters):
conv=layers.Conv2D(filters,kernel_size=3,strides=1,padding='same')(in_tensor)
return _after_conv(conv)
def conv3_downsample(in_tensor,filters):
conv=layers.Conv2D(filters,kernel_size=3,strides=2,padding='same')(in_tensor)
return _after_conv(conv)
def conv3_relu(in_tensor,filters):
conv=layers.Conv2D(filters,kernel_size=3,strides=1,padding='same')(in_tensor)
return _after_conv(conv)
def conv3_downsample_relu(in_tensor,filters):
conv=layers.Conv2D(filters,kernel_size=3,strides=2,padding='same')(in_tensor)
return _after_conv(conv)
def resnet_block_wo_bottlneck(in_tensor,filters,downsample=False):
if downsample:
conv1_rb = conv3_downsample_relu(in_tensor,filters)
else:
conv1_rb = conv3_relu(in_tensor,filters)
conv2_rb = conv3(conv1_rb,filters)
if downsample:
in_tensor = conv1_downsample(in_tensor,filters)
result = layers.Add()([conv2_rb,in_tensor])
return layers.Activation('relu')(result)
def resnet_block_w_bottlneck(in_tensor,filters,downsample=False,change_channels=False):
if downsample:
conv1_rb = conv1_downsample_relu(in_tensor,int(filters/4))
else:
conv1_rb = conv1_relu(in_tensor,int(filters/4))
conv2_rb = conv3_relu(conv1_rb,int(filters/4))
conv3_rb = conv1(conv2_rb,filters)
if downsample:
in_tensor=conv1_downsample(in_tensor,filters)
elif change_channels:
in_tensor=conv1(in_tensor,filters)
result = layers.Add()([conv3_rb,in_tensor])
return layers.Activation('relu')(result)
def _pre_res_blocks(in_tensor):
conv = layers.Conv2D(64,7,strides=2,padding='same')(in_tensor)
conv = _after_conv(conv)
pool = layers.MaxPool2D(3,2,padding='same')(conv)
return pool
def _post_res_blocks(in_tensor,n_classes):
pool = layers.GlobalAvgPool2D()(in_tensor)
preds = layers.Dense(n_classes,activation='softmax')(pool)
return preds
def convx_wo_bottleneck(in_tensor,filters,n_times,downsample_1=False):
res=in_tensor
for i in range(n_times):
if i==0:
res=resnet_block_wo_bottlneck(res,filters,downsample_1)
else:
res=resnet_block_wo_bottlneck(res,filters)
return res
def convx_w_bottleneck(in_tensor,filters,n_times,downsample_1=False):
res=in_tensor
for i in range(n_times):
if i==0:
res=resnet_block_w_bottlneck(res,filters,downsample_1,not downsample_1)
else:
res=resnet_block_w_bottlneck(res,filters)
return res
def _resnet(in_shape=(224,224,3),n_classes=1000,convx=[64,128,256,512],n_convx=[2,2,2,2],convx_fn=convx_wo_bottleneck):
in_layer = layers.Input(in_shape)
downsampled = _pre_res_blocks(in_layer)
conv2x = convx_fn(downsampled,convx[0],n_convx[0])
conv3x = convx_fn(conv2x,convx[1],n_convx[1],True)
conv4x = convx_fn(conv3x,convx[2],n_convx[2],True)
conv5x = convx_fn(conv4x,convx[3],n_convx[3],True)
preds = _post_res_blocks(conv5x,n_classes)
model =Model(in_layer,preds)
return model
def resnet18(in_shape=(224,224,3),n_classes=1000):
return _resnet(in_shape,n_classes)
def resnet34(in_shape=(224,224,3),n_classes=1000):
return _resnet(in_shape,n_classes,n_convx=[3,4,6,3])
def resnet50(in_shape=(224,224,3),n_classes=1000):
return _resnet(in_shape,n_classes,convx=[256,512,1024,2048],n_convx=[3,4,6,3],convx_fn=convx_w_bottleneck)
def resnet101(in_shape=(224,224,3),n_classes=1000):
return _resnet(in_shape,n_classes,convx=[256,512,1024,2048],n_convx=[3,4,23,3],convx_fn=convx_w_bottleneck)
def resnet152(in_shape=(224,224,3),n_classes=1000):
return _resnet(in_shape,n_classes,convx=[256,512,1024,2048],n_convx=[3,8,36,3],convx_fn=convx_w_bottleneck)
model = resnet18()
#model.summary()
#tf.keras.utils.plot_model(model)
Resnet v2改动要是改动residual path,identify path不变。
def block2(x, filters, kernel_size=3, stride=1,
conv_shortcut=False, name=None):
"""A residual block.
# Arguments
x: input tensor.
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True,
otherwise identity shortcut.
name: string, block label.
# Returns
Output tensor for the residual block.
"""
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
preact = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_preact_bn')(x)
preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
if conv_shortcut is True:
shortcut = layers.Conv2D(4 * filters, 1, strides=stride,
name=name + '_0_conv')(preact)
else:
shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
x = layers.Conv2D(filters, 1, strides=1, use_bias=False,
name=name + '_1_conv')(preact)
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_1_bn')(x)
x = layers.Activation('relu', name=name + '_1_relu')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
x = layers.Conv2D(filters, kernel_size, strides=stride,
use_bias=False, name=name + '_2_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_2_bn')(x)
x = layers.Activation('relu', name=name + '_2_relu')(x)
x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
x = layers.Add(name=name + '_out')([shortcut, x])
return x
def stack2(x, filters, blocks, stride1=2, name=None):
"""A set of stacked residual blocks.
# Arguments
x: input tensor.
filters: integer, filters of the bottleneck layer in a block.
blocks: integer, blocks in the stacked blocks.
stride1: default 2, stride of the first layer in the first block.
name: string, stack label.
# Returns
Output tensor for the stacked blocks.
"""
x = block2(x, filters, conv_shortcut=True, name=name + '_block1')
for i in range(2, blocks):
x = block2(x, filters, name=name + '_block' + str(i))
x = block2(x, filters, stride=stride1, name=name + '_block' + str(blocks))
return x
def ResNet(stack_fn,
preact,
use_bias,
model_name='resnet',
input_shape=None,
classes=1000):
img_input = layers.Input(shape=input_shape)
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)
x = stack_fn(x)
if preact is True:
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name='post_bn')(x)
x = layers.Activation('relu', name='post_relu')(x)
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='softmax', name='probs')(x)
# Create model.
model = Model(img_input, x, name=model_name)
return model
def ResNet50V2(input_shape=(224,224,3),classes=1000):
def stack_fn(x):
x = stack2(x, 64, 3, name='conv2')
x = stack2(x, 128, 4, name='conv3')
x = stack2(x, 256, 6, name='conv4')
x = stack2(x, 512, 3, stride1=1, name='conv5')
return x
return ResNet(stack_fn, True, True, 'resnet50v2',
input_shape,classes)
def ResNet101V2(input_shape=(224,224,3),classes=1000):
def stack_fn(x):
x = stack2(x, 64, 3, name='conv2')
x = stack2(x, 128, 4, name='conv3')
x = stack2(x, 256, 23, name='conv4')
x = stack2(x, 512, 3, stride1=1, name='conv5')
return x
return ResNet(stack_fn, True, True, 'resnet101v2',
input_shape,classes)
def ResNet152V2(input_shape=(224,224,3),classes=1000):
def stack_fn(x):
x = stack2(x, 64, 3, name='conv2')
x = stack2(x, 128, 8, name='conv3')
x = stack2(x, 256, 36, name='conv4')
x = stack2(x, 512, 3, stride1=1, name='conv5')
return x
return ResNet(stack_fn, True, True, 'resnet152v2',
input_shape,classes)
model=ResNet50V2()
#model.summary()
#tf.keras.utils.plot_model(model)
ResNeXt确实比Inception V4的超参数更少,但是他直接废除了Inception的囊括不同感受野的特性仿佛不是很合理,在更多的环境中我们发现Inception V4的效果是优于ResNeXt的。类似结构的ResNeXt的运行速度应该是优于Inception V4的,因为ResNeXt的相同拓扑结构的分支的设计是更符合GPU的硬件设计原则.以下代码提供了一种用depwise convlution实现group convolution的方法,想起来有些困难。
def block3(x, filters, kernel_size=3, stride=1, groups=32,
conv_shortcut=True, name=None):
"""A residual block.
# Arguments
x: input tensor.
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
groups: default 32, group size for grouped convolution.
conv_shortcut: default True, use convolution shortcut if True,
otherwise identity shortcut.
name: string, block label.
# Returns
Output tensor for the residual block.
"""
bn_axis = 3
if conv_shortcut is True:
shortcut = layers.Conv2D((64 // groups) * filters, 1, strides=stride,
use_bias=False, name=name + '_0_conv')(x)
shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_0_bn')(shortcut)
else:
shortcut = x
x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_1_bn')(x)
x = layers.Activation('relu', name=name + '_1_relu')(x)
c = filters // groups
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
x = layers.DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c,
use_bias=False, name=name + '_2_conv')(x)
kernel = np.zeros((1, 1, filters * c, filters), dtype=np.float32)
for i in range(filters):
start = (i // c) * c * c + i % c
end = start + c * c
kernel[:, :, start:end:c, i] = 1.
x = layers.Conv2D(filters, 1, use_bias=False, trainable=False,
kernel_initializer={'class_name': 'Constant',
'config': {'value': kernel}},
name=name + '_2_gconv')(x)
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_2_bn')(x)
x = layers.Activation('relu', name=name + '_2_relu')(x)
x = layers.Conv2D((64 // groups) * filters, 1,
use_bias=False, name=name + '_3_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_3_bn')(x)
x = layers.Add(name=name + '_add')([shortcut, x])
x = layers.Activation('relu', name=name + '_out')(x)
return x
def stack3(x, filters, blocks, stride1=2, groups=32, name=None):
"""A set of stacked residual blocks.
# Arguments
x: input tensor.
filters: integer, filters of the bottleneck layer in a block.
blocks: integer, blocks in the stacked blocks.
stride1: default 2, stride of the first layer in the first block.
groups: default 32, group size for grouped convolution.
name: string, stack label.
# Returns
Output tensor for the stacked blocks.
"""
x = block3(x, filters, stride=stride1, groups=groups, name=name + '_block1')
for i in range(2, blocks + 1):
x = block3(x, filters, groups=groups, conv_shortcut=False,
name=name + '_block' + str(i))
return x
def ResNet(stack_fn,
model_name='resnet',
input_shape=None,
classes=1000):
print(input_shape)
img_input = layers.Input(shape=input_shape)
bn_axis = 3
x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name='conv1_bn')(x)
x = layers.Activation('relu', name='conv1_relu')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)
x = stack_fn(x)
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='softmax', name='probs')(x)
# Create model.
model = Model(img_input, x, name=model_name)
return model
def ResNeXt50(input_shape=(224,224,3),classes=1000):
def stack_fn(x):
x = stack3(x, 128, 3, stride1=1, name='conv2')
x = stack3(x, 256, 4, name='conv3')
x = stack3(x, 512, 6, name='conv4')
x = stack3(x, 1024, 3, name='conv5')
return x
return ResNet(stack_fn, 'resnext50', input_shape,classes)
def ResNeXt101(input_shape=(224,224,3),classes=1000):
def stack_fn(x):
x = stack3(x, 128, 3, stride1=1, name='conv2')
x = stack3(x, 256, 4, name='conv3')
x = stack3(x, 512, 23, name='conv4')
x = stack3(x, 1024, 3, name='conv5')
return x
return ResNet(stack_fn, 'resnext101', input_shape,classes)
model=ResNeXt50()
#model.summary()
#tf.keras.utils.plot_model(model)
def dense_block(x, blocks, name):
"""A dense block.
# Arguments
x: input tensor.
blocks: integer, the number of building blocks.
name: string, block label.
# Returns
output tensor for the block.
"""
for i in range(blocks):
x = conv_block(x, 32, name=name + '_block' + str(i + 1))
return x
def transition_block(x, reduction, name):
"""A transition block.
# Arguments
x: input tensor.
reduction: float, compression rate at transition layers.
name: string, block label.
# Returns
output tensor for the block.
"""
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_bn')(x)
x = layers.Activation('relu', name=name + '_relu')(x)
x = layers.Conv2D(int(tf.keras.backend.int_shape(x)[bn_axis] * reduction), 1,
use_bias=False,
name=name + '_conv')(x)
x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x)
return x
def conv_block(x, growth_rate, name):
"""A building block for a dense block.
# Arguments
x: input tensor.
growth_rate: float, growth rate at dense layers.
name: string, block label.
# Returns
Output tensor for the block.
"""
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
x1 = layers.BatchNormalization(axis=bn_axis,
epsilon=1.001e-5,
name=name + '_0_bn')(x)
x1 = layers.Activation('relu', name=name + '_0_relu')(x1)
x1 = layers.Conv2D(4 * growth_rate, 1,
use_bias=False,
name=name + '_1_conv')(x1)
x1 = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_1_bn')(x1)
x1 = layers.Activation('relu', name=name + '_1_relu')(x1)
x1 = layers.Conv2D(growth_rate, 3,
padding='same',
use_bias=False,
name=name + '_2_conv')(x1)
x = layers.Concatenate(axis=bn_axis, name=name + '_concat')([x, x1])
return x
def DenseNet(blocks,
input_shape=(224,224,3),
classes=1000):
"""Instantiates the DenseNet architecture.
Optionally loads weights pre-trained on ImageNet.
Note that the data format convention used by the model is
the one specified in your Keras config at `~/.keras/keras.json`.
# Arguments
blocks: numbers of building blocks for the four dense layers.
input_shape: optional shape tuple, only to be specified
if `include_top` is False (otherwise the input shape
has to be `(224, 224, 3)` (with `'channels_last'` data format)
or `(3, 224, 224)` (with `'channels_first'` data format).
It should have exactly 3 inputs channels,
and width and height should be no smaller than 32.
E.g. `(200, 200, 3)` would be one valid value.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True, and
if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
"""
img_input = layers.Input(shape=input_shape)
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input)
x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')(x)
x = layers.Activation('relu', name='conv1/relu')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x)
x = layers.MaxPooling2D(3, strides=2, name='pool1')(x)
x = dense_block(x, blocks[0], name='conv2')
x = transition_block(x, 0.5, name='pool2')
x = dense_block(x, blocks[1], name='conv3')
x = transition_block(x, 0.5, name='pool3')
x = dense_block(x, blocks[2], name='conv4')
x = transition_block(x, 0.5, name='pool4')
x = dense_block(x, blocks[3], name='conv5')
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name='bn')(x)
x = layers.Activation('relu', name='relu')(x)
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='softmax', name='fc1000')(x)
# Create model.
if blocks == [6, 12, 24, 16]:
model = Model(img_input, x, name='densenet121')
elif blocks == [6, 12, 32, 32]:
model = Model(img_input, x, name='densenet169')
elif blocks == [6, 12, 48, 32]:
model = Model(img_input, x, name='densenet201')
else:
model = Model(img_input, x, name='densenet')
return model
def DenseNet121(input_shape=(224,224,3),classes=1000):
return DenseNet([6, 12, 24, 16],input_shape,classes)
def DenseNet169(input_shape=(224,224,3),classes=1000):
return DenseNet([6, 12, 32, 32],input_shape,classes)
def DenseNet201(input_shape=(224,224,3),classes=1000):
return DenseNet([6, 12, 48, 16],input_shape,classes)
model = DenseNet121()
#model.summary()
#tf.keras.utils.plot_model(model)
Xception: Deep Learning with Depthwise Separable Convolutions (CVPR 2017)
更多关于这部分的说明参见
关于分组卷积在分组卷积相关的论文中自然是说分组好,这里却有新的观点,分组后信息各自一体,无互相沟通,无法充分利用channel的信息;在shuffleNet中,却是先将channel打散再分组。shufflenet将在后边写。顺便说一名,本篇大作的作者是keras的创造者,所以本代码用keras实现最合适不过。
def Xception(input_shape=(299,299,3),classes=1000):
"""Instantiates the Xception architecture.
Note that the default input image size for this model is 299x299.
# Arguments
input_shape: optional shape tuple, only to be specified
if `include_top` is False (otherwise the input shape
has to be `(299, 299, 3)`.
It should have exactly 3 inputs channels,
and width and height should be no smaller than 71.
E.g. `(150, 150, 3)` would be one valid value.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True,
and if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
RuntimeError: If attempting to run this model with a
backend that does not support separable convolutions.
"""
img_input = layers.Input(shape=input_shape)
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else -1
x = layers.Conv2D(32, (3, 3),
strides=(2, 2),
use_bias=False,
name='block1_conv1')(img_input)
x = layers.BatchNormalization(axis=channel_axis, name='block1_conv1_bn')(x)
x = layers.Activation('relu', name='block1_conv1_act')(x)
x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block1_conv2_bn')(x)
x = layers.Activation('relu', name='block1_conv2_act')(x)
residual = layers.Conv2D(128, (1, 1),
strides=(2, 2),
padding='same',
use_bias=False)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
x = layers.SeparableConv2D(128, (3, 3),
padding='same',
use_bias=False,
name='block2_sepconv1')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv1_bn')(x)
x = layers.Activation('relu', name='block2_sepconv2_act')(x)
x = layers.SeparableConv2D(128, (3, 3),
padding='same',
use_bias=False,
name='block2_sepconv2')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3),
strides=(2, 2),
padding='same',
name='block2_pool')(x)
x = layers.add([x, residual])
residual = layers.Conv2D(256, (1, 1), strides=(2, 2),
padding='same', use_bias=False)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
x = layers.Activation('relu', name='block3_sepconv1_act')(x)
x = layers.SeparableConv2D(256, (3, 3),
padding='same',
use_bias=False,
name='block3_sepconv1')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv1_bn')(x)
x = layers.Activation('relu', name='block3_sepconv2_act')(x)
x = layers.SeparableConv2D(256, (3, 3),
padding='same',
use_bias=False,
name='block3_sepconv2')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2),
padding='same',
name='block3_pool')(x)
x = layers.add([x, residual])
residual = layers.Conv2D(728, (1, 1),
strides=(2, 2),
padding='same',
use_bias=False)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
x = layers.Activation('relu', name='block4_sepconv1_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name='block4_sepconv1')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv1_bn')(x)
x = layers.Activation('relu', name='block4_sepconv2_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name='block4_sepconv2')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2),
padding='same',
name='block4_pool')(x)
x = layers.add([x, residual])
for i in range(8):
residual = x
prefix = 'block' + str(i + 5)
x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name=prefix + '_sepconv1')(x)
x = layers.BatchNormalization(axis=channel_axis,
name=prefix + '_sepconv1_bn')(x)
x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name=prefix + '_sepconv2')(x)
x = layers.BatchNormalization(axis=channel_axis,
name=prefix + '_sepconv2_bn')(x)
x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name=prefix + '_sepconv3')(x)
x = layers.BatchNormalization(axis=channel_axis,
name=prefix + '_sepconv3_bn')(x)
x = layers.add([x, residual])
residual = layers.Conv2D(1024, (1, 1), strides=(2, 2),
padding='same', use_bias=False)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
x = layers.Activation('relu', name='block13_sepconv1_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name='block13_sepconv1')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block13_sepconv1_bn')(x)
x = layers.Activation('relu', name='block13_sepconv2_act')(x)
x = layers.SeparableConv2D(1024, (3, 3),
padding='same',
use_bias=False,
name='block13_sepconv2')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block13_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3),
strides=(2, 2),
padding='same',
name='block13_pool')(x)
x = layers.add([x, residual])
x = layers.SeparableConv2D(1536, (3, 3),
padding='same',
use_bias=False,
name='block14_sepconv1')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block14_sepconv1_bn')(x)
x = layers.Activation('relu', name='block14_sepconv1_act')(x)
x = layers.SeparableConv2D(2048, (3, 3),
padding='same',
use_bias=False,
name='block14_sepconv2')(x)
x = layers.BatchNormalization(axis=channel_axis, name='block14_sepconv2_bn')(x)
x = layers.Activation('relu', name='block14_sepconv2_act')(x)
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='softmax', name='predictions')(x)
# Create model.
model = Model(img_input, x, name='xception')
return model
model = Xception()
#model.summary()
#tf.keras.utils.plot_model(model)
下面将接着写一些轻量级的模型:squeezenet,mobilenet,shufflenet,之前的Xception模型参数也很精简。相关介绍参见
参数是alexnet的1/50,但精度相当。关于说明详见
from tensorflow.keras.layers import Input,Convolution2D,Activation,MaxPooling2D,Dropout,GlobalAveragePooling2D,concatenate
sq1x1 = "squeeze1x1"
exp1x1 = "expand1x1"
exp3x3 = "expand3x3"
relu = "relu_"
def fire_module(x, fire_id, squeeze=16, expand=64):
s_id = 'fire' + str(fire_id) + '/'
if tf.keras.backend.image_data_format() == 'channels_first':
channel_axis = 1
else:
channel_axis = 3
x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)
x = Activation('relu', name=s_id + relu + sq1x1)(x)
left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x)
left = Activation('relu', name=s_id + relu + exp1x1)(left)
right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x)
right = Activation('relu', name=s_id + relu + exp3x3)(right)
x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat')
return x
# Original SqueezeNet from paper.
def SqueezeNet(input_shape=(299,299,3),classes=1000):
"""Instantiates the SqueezeNet architecture.
"""
img_input = Input(shape=input_shape)
x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input)
x = Activation('relu', name='relu_conv1')(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)
x = fire_module(x, fire_id=2, squeeze=16, expand=64)
x = fire_module(x, fire_id=3, squeeze=16, expand=64)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)
x = fire_module(x, fire_id=4, squeeze=32, expand=128)
x = fire_module(x, fire_id=5, squeeze=32, expand=128)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)
x = fire_module(x, fire_id=6, squeeze=48, expand=192)
x = fire_module(x, fire_id=7, squeeze=48, expand=192)
x = fire_module(x, fire_id=8, squeeze=64, expand=256)
x = fire_module(x, fire_id=9, squeeze=64, expand=256)
x = Dropout(0.5, name='drop9')(x)
x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x)
x = Activation('relu', name='relu_conv10')(x)
x = GlobalAveragePooling2D()(x)
x = Activation('softmax', name='loss')(x)
model = Model(img_input, x, name='squeezenet')
return model
model = SqueezeNet()
#model.summary()
#tf.keras.utils.plot_model(model)
The paper demonstrates the performance of MobileNets using alpha
values of
1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25.
For each of these alpha
values, weights for 4 different input image sizes
are provided (224, 192, 160, 128).
The following table describes the size and accuracy of the 100% MobileNet
Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) |
---|---|---|---|
1.0 MobileNet-224 | 70.6 % | 529 | 4.2 |
0.75 MobileNet-224 | 68.4 % | 325 | 2.6 |
0.50 MobileNet-224 | 63.7 % | 149 | 1.3 |
0.25 MobileNet-224 | 50.6 % | 41 | 0.5 |
The following table describes the performance of
the 100 % MobileNet on various input sizes:
Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) |
---|---|---|---|
1.0 MobileNet-224 | 70.6 % | 529 | 4.2 |
1.0 MobileNet-192 | 69.1 % | 529 | 4.2 |
1.0 MobileNet-160 | 67.2 % | 529 | 4.2 |
1.0 MobileNet-128 | 64.4 % | 529 | 4.2 |
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
"""Adds an initial convolution layer (with batch normalization and relu6).
# Arguments
inputs: Input tensor of shape `(rows, cols, 3)`
(with `channels_last` data format) or
(3, rows, cols) (with `channels_first` data format).
It should have exactly 3 inputs channels,
and width and height should be no smaller than 32.
E.g. `(224, 224, 3)` would be one valid value.
filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the convolution).
alpha: controls the width of the network.
- If `alpha` < 1.0, proportionally decreases the number
of filters in each layer.
- If `alpha` > 1.0, proportionally increases the number
of filters in each layer.
- If `alpha` = 1, default number of filters from the paper
are used at each layer.
kernel: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
Can be a single integer to specify the same value for
all spatial dimensions.
strides: An integer or tuple/list of 2 integers,
specifying the strides of the convolution
along the width and height.
Can be a single integer to specify the same value for
all spatial dimensions.
Specifying any stride value != 1 is incompatible with specifying
any `dilation_rate` value != 1.
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if data_format='channels_first'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if data_format='channels_last'.
# Output shape
4D tensor with shape:
`(samples, filters, new_rows, new_cols)`
if data_format='channels_first'
or 4D tensor with shape:
`(samples, new_rows, new_cols, filters)`
if data_format='channels_last'.
`rows` and `cols` values might have changed due to stride.
# Returns
Output tensor of block.
"""
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else -1
filters = int(filters * alpha)
x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs)
x = layers.Conv2D(filters, kernel,
padding='valid',
use_bias=False,
strides=strides,
name='conv1')(x)
x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
return layers.ReLU(6., name='conv1_relu')(x)
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
depth_multiplier=1, strides=(1, 1), block_id=1):
"""Adds a depthwise convolution block.
A depthwise convolution block consists of a depthwise conv,
batch normalization, relu6, pointwise convolution,
batch normalization and relu6 activation.
# Arguments
inputs: Input tensor of shape `(rows, cols, channels)`
(with `channels_last` data format) or
(channels, rows, cols) (with `channels_first` data format).
pointwise_conv_filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the pointwise convolution).
alpha: controls the width of the network.
- If `alpha` < 1.0, proportionally decreases the number
of filters in each layer.
- If `alpha` > 1.0, proportionally increases the number
of filters in each layer.
- If `alpha` = 1, default number of filters from the paper
are used at each layer.
depth_multiplier: The number of depthwise convolution output channels
for each input channel.
The total number of depthwise convolution output
channels will be equal to `filters_in * depth_multiplier`.
strides: An integer or tuple/list of 2 integers,
specifying the strides of the convolution
along the width and height.
Can be a single integer to specify the same value for
all spatial dimensions.
Specifying any stride value != 1 is incompatible with specifying
any `dilation_rate` value != 1.
block_id: Integer, a unique identification designating
the block number.
# Input shape
4D tensor with shape:
`(batch, channels, rows, cols)` if data_format='channels_first'
or 4D tensor with shape:
`(batch, rows, cols, channels)` if data_format='channels_last'.
# Output shape
4D tensor with shape:
`(batch, filters, new_rows, new_cols)`
if data_format='channels_first'
or 4D tensor with shape:
`(batch, new_rows, new_cols, filters)`
if data_format='channels_last'.
`rows` and `cols` values might have changed due to stride.
# Returns
Output tensor of block.
"""
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else -1
pointwise_conv_filters = int(pointwise_conv_filters * alpha)
if strides == (1, 1):
x = inputs
else:
x = layers.ZeroPadding2D(((0, 1), (0, 1)),
name='conv_pad_%d' % block_id)(inputs)
x = layers.DepthwiseConv2D((3, 3),
padding='same' if strides == (1, 1) else 'valid',
depth_multiplier=depth_multiplier,
strides=strides,
use_bias=False,
name='conv_dw_%d' % block_id)(x)
x = layers.BatchNormalization(
axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)
x = layers.Conv2D(pointwise_conv_filters, (1, 1),
padding='same',
use_bias=False,
strides=(1, 1),
name='conv_pw_%d' % block_id)(x)
x = layers.BatchNormalization(axis=channel_axis,
name='conv_pw_%d_bn' % block_id)(x)
return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
def MobileNet(input_shape=None,
alpha=1.0,
depth_multiplier=1,
dropout=1e-3,
classes=1000):
"""Instantiates the MobileNet architecture.
# Arguments
input_shape: optional shape tuple, only to be specified
if `include_top` is False (otherwise the input shape
has to be `(224, 224, 3)`
(with `channels_last` data format)
or (3, 224, 224) (with `channels_first` data format).
It should have exactly 3 inputs channels,
and width and height should be no smaller than 32.
E.g. `(200, 200, 3)` would be one valid value.
alpha: controls the width of the network. This is known as the
width multiplier in the MobileNet paper.
- If `alpha` < 1.0, proportionally decreases the number
of filters in each layer.
- If `alpha` > 1.0, proportionally increases the number
of filters in each layer.
- If `alpha` = 1, default number of filters from the paper
are used at each layer.
depth_multiplier: depth multiplier for depthwise convolution. This
is called the resolution multiplier in the MobileNet paper.
dropout: dropout rate
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True, and
if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
RuntimeError: If attempting to run this model with a
backend that does not support separable convolutions.
"""
# Determine proper input shape and default size.
if input_shape is None:
rows = 224
cols= 224
else:
row_axis, col_axis = (0, 1)
rows = input_shape[row_axis]
cols = input_shape[col_axis]
if rows != cols or rows not in [128, 160, 192, 224]:
rows = 224
cols= 224
warnings.warn('`input_shape` is undefined or non-square, '
'or `rows` is not in [128, 160, 192, 224]. '
'Weights for input shape (224, 224) will be'
' loaded as the default.')
input_shape=(rows,cols,3)
if depth_multiplier != 1:
raise ValueError('If imagenet weights are being loaded, '
'depth multiplier must be 1')
if alpha not in [0.25, 0.50, 0.75, 1.0]:
raise ValueError('If imagenet weights are being loaded, '
'alpha can be one of'
'`0.25`, `0.50`, `0.75` or `1.0` only.')
img_input = layers.Input(shape=input_shape)
x = _conv_block(img_input, 32, alpha, strides=(2, 2))
x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
strides=(2, 2), block_id=2)
x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
strides=(2, 2), block_id=4)
x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
strides=(2, 2), block_id=6)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
strides=(2, 2), block_id=12)
x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
shape = (1, 1, int(1024 * alpha))
x = layers.GlobalAveragePooling2D()(x)
x = layers.Reshape(shape, name='reshape_1')(x)
x = layers.Dropout(dropout, name='dropout')(x)
x = layers.Conv2D(classes, (1, 1),
padding='same',
name='conv_preds')(x)
x = layers.Reshape((classes,), name='reshape_2')(x)
x = layers.Activation('softmax', name='act_softmax')(x)
# Create model.
model = Model(img_input, x, name='mobilenet_%0.2f_%s' % (alpha, rows))
return model
model=MobileNet()
#model.summary()
#tf.keras.utils.plot_model(model)
[MobileNetV2: Inverted Residuals and Linear Bottlenecks]
(https://arxiv.org/abs/1801.04381) (CVPR 2018)
The paper demonstrates the performance of MobileNets using alpha
values of
1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4
For each of these alpha
values, weights for 5 different input image sizes
are provided (224, 192, 160, 128, and 96).
The following table describes the performance of
MobileNet on various input sizes:
MACs stands for Multiply Adds
Classification Checkpoint | MACs (M) | Parameters (M) | Top 1 Accuracy | Top 5 Accuracy |
---|---|---|---|---|
[mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 |
[mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 |
[mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 |
[mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 |
[mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 |
[mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 |
[mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 |
[mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 |
[mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 |
[mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 |
[mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 |
[mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 |
[mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 |
[mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 |
[mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 |
[mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 |
[mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 |
[mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 |
[mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 |
[mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 |
[mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 |
[mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 |
v2在v1的基础上,加入残差,有关说明可以参考
def correct_pad(inputs, kernel_size):
"""Returns a tuple for zero-padding for 2D convolution with downsampling.
# Arguments
input_size: An integer or tuple/list of 2 integers.
kernel_size: An integer or tuple/list of 2 integers.
# Returns
A tuple.
"""
img_dim = 1
input_size = tf.keras.backend.int_shape(inputs)[img_dim:(img_dim + 2)]
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
if input_size[0] is None:
adjust = (1, 1)
else:
adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
correct = (kernel_size[0] // 2, kernel_size[1] // 2)
return ((correct[0] - adjust[0], correct[0]),
(correct[1] - adjust[1], correct[1]))
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else -1
in_channels = tf.keras.backend.int_shape(inputs)[channel_axis]
pointwise_conv_filters = int(filters * alpha)
pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
x = inputs
prefix = 'block_{}_'.format(block_id)
if block_id:
# Expand
x = layers.Conv2D(expansion * in_channels,
kernel_size=1,
padding='same',
use_bias=False,
activation=None,
name=prefix + 'expand')(x)
x = layers.BatchNormalization(axis=channel_axis,
epsilon=1e-3,
momentum=0.999,
name=prefix + 'expand_BN')(x)
x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
else:
prefix = 'expanded_conv_'
# Depthwise
if stride == 2:
x = layers.ZeroPadding2D(padding=correct_pad(x, 3),
name=prefix + 'pad')(x)
x = layers.DepthwiseConv2D(kernel_size=3,
strides=stride,
activation=None,
use_bias=False,
padding='same' if stride == 1 else 'valid',
name=prefix + 'depthwise')(x)
x = layers.BatchNormalization(axis=channel_axis,
epsilon=1e-3,
momentum=0.999,
name=prefix + 'depthwise_BN')(x)
x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)
# Project
x = layers.Conv2D(pointwise_filters,
kernel_size=1,
padding='same',
use_bias=False,
activation=None,
name=prefix + 'project')(x)
x = layers.BatchNormalization(axis=channel_axis,
epsilon=1e-3,
momentum=0.999,
name=prefix + 'project_BN')(x)
if in_channels == pointwise_filters and stride == 1:
return layers.Add(name=prefix + 'add')([inputs, x])
return x
def MobileNetV2(input_shape=None,
alpha=1.0,
classes=1000):
"""Instantiates the MobileNetV2 architecture.
# Arguments
input_shape: optional shape tuple, to be specified if you would
like to use a model with an input img resolution that is not
(224, 224, 3).
It should have exactly 3 inputs channels (224, 224, 3).
You can also omit this option if you would like
to infer input_shape from an input_tensor.
If you choose to include both input_tensor and input_shape then
input_shape will be used if they match, if the shapes
do not match then we will throw an error.
E.g. `(160, 160, 3)` would be one valid value.
alpha: controls the width of the network. This is known as the
width multiplier in the MobileNetV2 paper, but the name is kept for
consistency with MobileNetV1 in Keras.
- If `alpha` < 1.0, proportionally decreases the number
of filters in each layer.
- If `alpha` > 1.0, proportionally increases the number
of filters in each layer.
- If `alpha` = 1, default number of filters from the paper
are used at each layer.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True, and
if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape or invalid alpha, rows when
weights='imagenet'
"""
if input_shape is None:
rows = 224
cols= 224
else:
row_axis, col_axis = (0, 1)
rows = input_shape[row_axis]
cols = input_shape[col_axis]
if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
raise ValueError('If imagenet weights are being loaded, '
'alpha can be one of `0.35`, `0.50`, `0.75`, '
'`1.0`, `1.3` or `1.4` only.')
if rows != cols or rows not in [96, 128, 160, 192, 224]:
rows = 224
warnings.warn('`input_shape` is undefined or non-square, '
'or `rows` is not in [96, 128, 160, 192, 224].'
' Weights for input shape (224, 224) will be'
' loaded as the default.')
input_shape=(rows,cols,3)
img_input = layers.Input(shape=input_shape)
channel_axis = 1 if tf.keras.backend.image_data_format() == 'channels_first' else -1
first_block_filters = _make_divisible(32 * alpha, 8)
x = layers.ZeroPadding2D(padding=correct_pad(img_input, 3),
name='Conv1_pad')(img_input)
x = layers.Conv2D(first_block_filters,
kernel_size=3,
strides=(2, 2),
padding='valid',
use_bias=False,
name='Conv1')(x)
x = layers.BatchNormalization(axis=channel_axis,
epsilon=1e-3,
momentum=0.999,
name='bn_Conv1')(x)
x = layers.ReLU(6., name='Conv1_relu')(x)
x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
expansion=1, block_id=0)
x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
expansion=6, block_id=1)
x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
expansion=6, block_id=2)
x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
expansion=6, block_id=3)
x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
expansion=6, block_id=4)
x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
expansion=6, block_id=5)
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2,
expansion=6, block_id=6)
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
expansion=6, block_id=7)
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
expansion=6, block_id=8)
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
expansion=6, block_id=9)
x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
expansion=6, block_id=10)
x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
expansion=6, block_id=11)
x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
expansion=6, block_id=12)
x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2,
expansion=6, block_id=13)
x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1,
expansion=6, block_id=14)
x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1,
expansion=6, block_id=15)
x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1,
expansion=6, block_id=16)
# no alpha applied to last conv as stated in the paper:
# if the width multiplier is greater than 1 we
# increase the number of output channels
if alpha > 1.0:
last_block_filters = _make_divisible(1280 * alpha, 8)
else:
last_block_filters = 1280
x = layers.Conv2D(last_block_filters,
kernel_size=1,
use_bias=False,
name='Conv_1')(x)
x = layers.BatchNormalization(axis=channel_axis,
epsilon=1e-3,
momentum=0.999,
name='Conv_1_bn')(x)
x = layers.ReLU(6., name='out_relu')(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(classes, activation='softmax',
use_bias=True, name='Logits')(x)
# Create model.
model = Model(img_input, x,
name='mobilenetv2_%0.2f_%s' % (alpha, rows))
return model
model=MobileNetV2()
# model.summary()
# tf.keras.utils.plot_model(model)
论文ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile
Devices 与mobilenet 相似,都是要解决group convolution 通道间信息不交流的问题。
from tensorflow.keras.layers import Activation, Add, Concatenate, GlobalAveragePooling2D,GlobalMaxPooling2D, Input, Dense,DepthwiseConv2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, BatchNormalization, Lambda
import numpy as np
def _group_conv(x, in_channels, out_channels, groups, kernel=1, stride=1, name=''):
"""
grouped convolution
Parameters
----------
x:
Input tensor of with `channels_last` data format
in_channels:
number of input channels
out_channels:
number of output channels
groups:
number of groups per channel
kernel: int(1)
An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
Can be a single integer to specify the same value for
all spatial dimensions.
stride: int(1)
An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
Can be a single integer to specify the same value for all spatial dimensions.
name: str
A string to specifies the layer name
Returns
-------
"""
if groups == 1:
return Conv2D(filters=out_channels, kernel_size=kernel, padding='same',
use_bias=False, strides=stride, name=name)(x)
# number of intput channels per group
ig = in_channels // groups
group_list = []
assert out_channels % groups == 0
for i in range(groups):
offset = i * ig
group = Lambda(lambda z: z[:, :, :, offset: offset + ig], name='%s/g%d_slice' % (name, i))(x)
group_list.append(Conv2D(int(0.5 + out_channels / groups), kernel_size=kernel, strides=stride,
use_bias=False, padding='same', name='%s_/g%d' % (name, i))(group))
return Concatenate(name='%s/concat' % name)(group_list)
def channel_shuffle(x, groups):
"""
Parameters
----------
x:
Input tensor of with `channels_last` data format
groups: int
number of groups per channel
Returns
-------
channel shuffled output tensor
Examples
--------
Example for a 1D Array with 3 groups
>>> d = np.array([0,1,2,3,4,5,6,7,8])
>>> x = np.reshape(d, (3,3))
>>> x = np.transpose(x, [1,0])
>>> x = np.reshape(x, (9,))
'[0 1 2 3 4 5 6 7 8] --> [0 3 6 1 4 7 2 5 8]'
"""
height, width, in_channels = x.shape.as_list()[1:]
channels_per_group = in_channels // groups
x = tf.reshape(x, [-1, height, width, groups, channels_per_group])
x = tf.transpose(x, (0, 1, 2, 4, 3)) # transpose
x = tf.reshape(x, [-1, height, width, in_channels])
return x
def _shuffle_unit(inputs, in_channels, out_channels, groups, bottleneck_ratio, strides=2, stage=1, block=1):
"""
creates a shuffleunit
Parameters
----------
inputs:
Input tensor of with `channels_last` data format
in_channels:
number of input channels
out_channels:
number of output channels
strides:
An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
groups: int(1)
number of groups per channel
bottleneck_ratio: float
bottleneck ratio implies the ratio of bottleneck channels to output channels.
For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
the width of the bottleneck feature map.
stage: int(1)
stage number
block: int(1)
block number
Returns
-------
"""
bn_axis = -1
prefix = 'stage%d/block%d' % (stage, block)
#if strides >= 2:
#out_channels -= in_channels
# default: 1/4 of the output channel of a ShuffleNet Unit
bottleneck_channels = int(out_channels * bottleneck_ratio)
groups = (1 if stage == 2 and block == 1 else groups)
x = _group_conv(inputs, in_channels, out_channels=bottleneck_channels,
groups=(1 if stage == 2 and block == 1 else groups),
name='%s/1x1_gconv_1' % prefix)
x = BatchNormalization(axis=bn_axis, name='%s/bn_gconv_1' % prefix)(x)
x = Activation('relu', name='%s/relu_gconv_1' % prefix)(x)
x = Lambda(channel_shuffle, arguments={'groups': groups}, name='%s/channel_shuffle' % prefix)(x)
x = DepthwiseConv2D(kernel_size=(3, 3), padding="same", use_bias=False,
strides=strides, name='%s/1x1_dwconv_1' % prefix)(x)
x = BatchNormalization(axis=bn_axis, name='%s/bn_dwconv_1' % prefix)(x)
x = _group_conv(x, bottleneck_channels, out_channels=out_channels if strides == 1 else out_channels - in_channels,
groups=groups, name='%s/1x1_gconv_2' % prefix)
x = BatchNormalization(axis=bn_axis, name='%s/bn_gconv_2' % prefix)(x)
if strides < 2:
ret = Add(name='%s/add' % prefix)([x, inputs])
else:
avg = AveragePooling2D(pool_size=3, strides=2, padding='same', name='%s/avg_pool' % prefix)(inputs)
ret = Concatenate(bn_axis, name='%s/concat' % prefix)([x, avg])
ret = Activation('relu', name='%s/relu_out' % prefix)(ret)
return ret
def _block(x, channel_map, bottleneck_ratio, repeat=1, groups=1, stage=1):
"""
creates a bottleneck block containing `repeat + 1` shuffle units
Parameters
----------
x:
Input tensor of with `channels_last` data format
channel_map: list
list containing the number of output channels for a stage
repeat: int(1)
number of repetitions for a shuffle unit with stride 1
groups: int(1)
number of groups per channel
bottleneck_ratio: float
bottleneck ratio implies the ratio of bottleneck channels to output channels.
For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
the width of the bottleneck feature map.
stage: int(1)
stage number
Returns
-------
"""
x = _shuffle_unit(x, in_channels=channel_map[stage - 2],
out_channels=channel_map[stage - 1], strides=2,
groups=groups, bottleneck_ratio=bottleneck_ratio,
stage=stage, block=1)
for i in range(1, repeat + 1):
x = _shuffle_unit(x, in_channels=channel_map[stage - 1],
out_channels=channel_map[stage - 1], strides=1,
groups=groups, bottleneck_ratio=bottleneck_ratio,
stage=stage, block=(i + 1))
return x
def ShuffleNet(scale_factor=1.0,input_shape=(224,224,3), groups=1, num_shuffle_units=[3, 7, 3],
bottleneck_ratio=0.25, classes=1000):
"""
scale_factor:
scales the number of output channels
input_shape:
groups: int
number of groups per channel
num_shuffle_units: list([3,7,3])
number of stages (list length) and the number of shufflenet units in a
stage beginning with stage 2 because stage 1 is fixed
e.g. idx 0 contains 3 + 1 (first shuffle unit in each stage differs) shufflenet units for stage 2
idx 1 contains 7 + 1 Shufflenet Units for stage 3 and
idx 2 contains 3 + 1 Shufflenet Units
bottleneck_ratio:
bottleneck ratio implies the ratio of bottleneck channels to output channels.
For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
the width of the bottleneck feature map.
classes: int(1000)
number of classes to predict
Returns
-------
A Keras model instance
References
----------
- [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices]
(http://www.arxiv.org/pdf/1707.01083.pdf)
"""
name = "ShuffleNet_%.2gX_g%d_br_%.2g_%s" % (scale_factor, groups, bottleneck_ratio, "".join([str(x) for x in num_shuffle_units]))
out_dim_stage_two = {1: 144, 2: 200, 3: 240, 4: 272, 8: 384}
if groups not in out_dim_stage_two:
raise ValueError("Invalid number of groups.")
if not (float(scale_factor) * 4).is_integer():
raise ValueError("Invalid value for scale_factor. Should be x over 4.")
exp = np.insert(np.arange(0, len(num_shuffle_units), dtype=np.float32), 0, 0)
out_channels_in_stage = 2 ** exp
out_channels_in_stage *= out_dim_stage_two[groups] # calculate output channels for each stage
out_channels_in_stage[0] = 24 # first stage has always 24 output channels
out_channels_in_stage *= scale_factor
out_channels_in_stage = out_channels_in_stage.astype(int)
img_input = Input(shape=input_shape)
# create shufflenet architecture
x = Conv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='same',
use_bias=False, strides=(2, 2), activation="relu", name="conv1")(img_input)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name="maxpool1")(x)
# create stages containing shufflenet units beginning at stage 2
for stage in range(0, len(num_shuffle_units)):
repeat = num_shuffle_units[stage]
x = _block(x, out_channels_in_stage, repeat=repeat,
bottleneck_ratio=bottleneck_ratio,
groups=groups, stage=stage + 2)
x = GlobalAveragePooling2D(name="global_pool")(x)
x = Dense(units=classes, name="fc")(x)
x = Activation('softmax', name='softmax')(x)
model = Model(inputs=img_input, outputs=x, name=name)
return model
model=ShuffleNet()
#model.summary()
# tf.keras.utils.plot_model(model)
[EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]
(https://arxiv.org/abs/1905.11946) (ICML 2019)这是一个搜出来的结构,所以里就直接使用了
import math
def correct_pad(inputs, kernel_size):
"""Returns a tuple for zero-padding for 2D convolution with downsampling.
# Arguments
input_size: An integer or tuple/list of 2 integers.
kernel_size: An integer or tuple/list of 2 integers.
# Returns
A tuple.
"""
img_dim = 1
input_size = tf.keras.backend.int_shape(inputs)[img_dim:(img_dim + 2)]
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
if input_size[0] is None:
adjust = (1, 1)
else:
adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
correct = (kernel_size[0] // 2, kernel_size[1] // 2)
return ((correct[0] - adjust[0], correct[0]),
(correct[1] - adjust[1], correct[1]))
def swish(x):
"""Swish activation function.
# Arguments
x: Input tensor.
# Returns
The Swish activation: `x * sigmoid(x)`.
# References
[Searching for Activation Functions](https://arxiv.org/abs/1710.05941)
"""
return tf.nn.swish(x)
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# EfficientNet actually uses an untruncated normal distribution for
# initializing conv layers, but keras.initializers.VarianceScaling use
# a truncated distribution.
# We decided against a custom initializer for better serializability.
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1. / 3.,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
def block(inputs, activation_fn=swish, drop_rate=0., name='',
filters_in=32, filters_out=16, kernel_size=3, strides=1,
expand_ratio=1, se_ratio=0., id_skip=True):
"""A mobile inverted residual block.
# Arguments
inputs: input tensor.
activation_fn: activation function.
drop_rate: float between 0 and 1, fraction of the input units to drop.
name: string, block label.
filters_in: integer, the number of input filters.
filters_out: integer, the number of output filters.
kernel_size: integer, the dimension of the convolution window.
strides: integer, the stride of the convolution.
expand_ratio: integer, scaling coefficient for the input filters.
se_ratio: float between 0 and 1, fraction to squeeze the input filters.
id_skip: boolean.
# Returns
output tensor for the block.
"""
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
# Expansion phase
filters = filters_in * expand_ratio
if expand_ratio != 1:
x = layers.Conv2D(filters, 1,
padding='same',
use_bias=False,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name=name + 'expand_conv')(inputs)
x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x)
x = layers.Activation(activation_fn, name=name + 'expand_activation')(x)
else:
x = inputs
# Depthwise Convolution
if strides == 2:
x = layers.ZeroPadding2D(padding=correct_pad(x, kernel_size),
name=name + 'dwconv_pad')(x)
conv_pad = 'valid'
else:
conv_pad = 'same'
x = layers.DepthwiseConv2D(kernel_size,
strides=strides,
padding=conv_pad,
use_bias=False,
depthwise_initializer=CONV_KERNEL_INITIALIZER,
name=name + 'dwconv')(x)
x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x)
x = layers.Activation(activation_fn, name=name + 'activation')(x)
# Squeeze and Excitation phase
if 0 < se_ratio <= 1:
filters_se = max(1, int(filters_in * se_ratio))
se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x)
se = layers.Reshape((1, 1, filters), name=name + 'se_reshape')(se)
se = layers.Conv2D(filters_se, 1,
padding='same',
activation=activation_fn,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name=name + 'se_reduce')(se)
se = layers.Conv2D(filters, 1,
padding='same',
activation='sigmoid',
kernel_initializer=CONV_KERNEL_INITIALIZER,
name=name + 'se_expand')(se)
x = layers.multiply([x, se], name=name + 'se_excite')
# Output phase
x = layers.Conv2D(filters_out, 1,
padding='same',
use_bias=False,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name=name + 'project_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x)
if (id_skip is True and strides == 1 and filters_in == filters_out):
if drop_rate > 0:
x = layers.Dropout(drop_rate,
noise_shape=(None, 1, 1, 1),
name=name + 'drop')(x)
x = layers.add([x, inputs], name=name + 'add')
return x
DEFAULT_BLOCKS_ARGS = [
{'kernel_size': 3, 'repeats': 1, 'filters_in': 32, 'filters_out': 16,
'expand_ratio': 1, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25},
{'kernel_size': 3, 'repeats': 2, 'filters_in': 16, 'filters_out': 24,
'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
{'kernel_size': 5, 'repeats': 2, 'filters_in': 24, 'filters_out': 40,
'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
{'kernel_size': 3, 'repeats': 3, 'filters_in': 40, 'filters_out': 80,
'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
{'kernel_size': 5, 'repeats': 3, 'filters_in': 80, 'filters_out': 112,
'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25},
{'kernel_size': 5, 'repeats': 4, 'filters_in': 112, 'filters_out': 192,
'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
{'kernel_size': 3, 'repeats': 1, 'filters_in': 192, 'filters_out': 320,
'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25}
]
def EfficientNet(width_coefficient,
depth_coefficient,
default_size,
dropout_rate=0.2,
drop_connect_rate=0.2,
depth_divisor=8,
activation_fn=swish,
blocks_args=DEFAULT_BLOCKS_ARGS,
model_name='efficientnet',
input_shape=None,
classes=1000):
"""Instantiates the EfficientNet architecture using given scaling coefficients.
Optionally loads weights pre-trained on ImageNet.
Note that the data format convention used by the model is
the one specified in your Keras config at `~/.keras/keras.json`.
# Arguments
width_coefficient: float, scaling coefficient for network width.
depth_coefficient: float, scaling coefficient for network depth.
default_size: integer, default input image size.
dropout_rate: float, dropout rate before final classifier layer.
drop_connect_rate: float, dropout rate at skip connections.
depth_divisor: integer, a unit of network width.
activation_fn: activation function.
blocks_args: list of dicts, parameters to construct block modules.
model_name: string, model name.
input_shape: optional shape tuple, only to be specified
if `include_top` is False.
It should have exactly 3 inputs channels.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True, and
if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
"""
img_input = layers.Input(shape=input_shape)
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
def round_filters(filters, divisor=depth_divisor):
"""Round number of filters based on depth multiplier."""
filters *= width_coefficient
new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
return int(new_filters)
def round_repeats(repeats):
"""Round number of repeats based on depth multiplier."""
return int(math.ceil(depth_coefficient * repeats))
# Build stem
x = img_input
x = layers.ZeroPadding2D(padding=correct_pad(x, 3),
name='stem_conv_pad')(x)
x = layers.Conv2D(round_filters(32), 3,
strides=2,
padding='valid',
use_bias=False,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name='stem_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x)
x = layers.Activation(activation_fn, name='stem_activation')(x)
# Build blocks
from copy import deepcopy
blocks_args = deepcopy(blocks_args)
b = 0
blocks = float(sum(args['repeats'] for args in blocks_args))
for (i, args) in enumerate(blocks_args):
assert args['repeats'] > 0
# Update block input and output filters based on depth multiplier.
args['filters_in'] = round_filters(args['filters_in'])
args['filters_out'] = round_filters(args['filters_out'])
for j in range(round_repeats(args.pop('repeats'))):
# The first block needs to take care of stride and filter size increase.
if j > 0:
args['strides'] = 1
args['filters_in'] = args['filters_out']
x = block(x, activation_fn, drop_connect_rate * b / blocks,
name='block{}{}_'.format(i + 1, chr(j + 97)), **args)
b += 1
# Build top
x = layers.Conv2D(round_filters(1280), 1,
padding='same',
use_bias=False,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name='top_conv')(x)
x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x)
x = layers.Activation(activation_fn, name='top_activation')(x)
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
if dropout_rate > 0:
x = layers.Dropout(dropout_rate, name='top_dropout')(x)
x = layers.Dense(classes,
activation='softmax',
kernel_initializer=DENSE_KERNEL_INITIALIZER,
name='probs')(x)
inputs = img_input
# Create model.
model = Model(inputs, x, name=model_name)
return model
def EfficientNetB0(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.0, 1.0, 224, 0.2,
model_name='efficientnet-b0',
input_shape=input_shape,
classes=classes)
def EfficientNetB1(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.0, 1.1, 240, 0.2,
model_name='efficientnet-b1',
input_shape=input_shape,
classes=classes)
def EfficientNetB2(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.1, 1.2, 260, 0.3,
model_name='efficientnet-b2',
input_shape=input_shape,
classes=classes)
def EfficientNetB3(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.2, 1.4, 300, 0.3,
model_name='efficientnet-b3',
input_shape=input_shape,
classes=classes)
def EfficientNetB4(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.4, 1.8, 380, 0.4,
model_name='efficientnet-b4',
input_shape=input_shape,
classes=classes)
def EfficientNetB5(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.6, 2.2, 456, 0.4,
model_name='efficientnet-b5',
input_shape=input_shape,
classes=classes)
def EfficientNetB6(input_shape=(224,224,3),
classes=1000):
return EfficientNet(1.8, 2.6, 528, 0.5,
model_name='efficientnet-b6',
input_shape=input_shape,
classes=classes)
def EfficientNetB7(input_shape=(224,224,3),
classes=1000):
return EfficientNet(2.0, 3.1, 600, 0.5,
model_name='efficientnet-b7',
input_shape=input_shape,
classes=classes)
model = EfficientNetB0()
#model.summary()
# tf.keras.utils.plot_model(model)