我有一个这样设置的目录:
images
-- val
--class1
--class2
-- test
--all_classes
-- train
--class1
--class2
每个目录中都有一组图像。我想预测测试中的每个图像是否属于 1 类或 2 类。
我写这个是为了读取训练和验证数据:
train_path = "/content/drive/train/"
valid_path = "/content/drive/val/"
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator=train_datagen.flow_from_directory(
directory=train_path,
batch_size=32,
class_mode='binary',
target_size=(150,150)
)
validation_generator=test_datagen.flow_from_directory(
directory=valid_path,
batch_size=32,
class_mode='binary',
target_size=(150,150)
)
创建了一个网络:
def create_network():
model = Sequential()
model.add(Input(shape=(150,150,3)))
model.add(Conv2D(32, kernel_size=3,strides=(1, 1),activation='relu', padding='valid', dilation_rate=1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=3, strides=(1, 1), activation='relu',padding='valid', dilation_rate=1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
plot_model(model, to_file='/content/drive/question1_model.png', show_shapes=True, show_layer_names=True)
model.compile(optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
return model
拟合模型:
def fit_model(train_generator=train_generator, validation_generator=validation_generator,network=create_network()):
checkpoint_path = "/content/drive/question1_checkpoint.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
callbacks_list = [
callbacks.EarlyStopping(
monitor = 'accuracy',
patience = 5,
),
callbacks.ModelCheckpoint(
filepath=checkpoint_path,
monitor = 'val_loss',
#save_weights_only=True,
save_best_only=True,
),
]
model = network
history = model.fit(train_generator,
epochs=200,
validation_data=validation_generator,
batch_size=32,
callbacks = callbacks_list,
verbose=1
)
return history,model,time_taken
history,model = fit_model(train_generator,validation_generator)
模型的准确率和验证准确率>80%,我将其重新加载进行预测:
model = load_model('/content/drive/question1_checkpoint.h5')
然后我想预测测试目录中的一组图像:
test_datagen = ImageDataGenerator(rescale=1./255)
test_path = "/content/drive/test/"
test_generator = test_datagen.flow_from_directory(
directory=test_path,
batch_size=16,
class_mode='binary',
target_size=(150,150),
shuffle = False
)
test_generator.reset()
filenames = test_generator.filenames
nb_samples = len(filenames)
batch_size=16
predict = model.predict(test_generator,steps=test_generator.n/batch_size)
当我打印预测的开始时,我可以看到:
[[6.09035552e-01]
[2.47541070e-02]
[7.37663209e-02]
[5.22839129e-02]
[2.94408262e-01]
[1.39171720e-01]
[6.15863085e-01]
我认为这给了我 1 类正确的概率。但是当我打印每个预测的类别时:
predicted_class_indices=np.argmax(predict,axis=-1)
print(predicted_class_indices)
输出是:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0]
这意味着我的预测概率没有被正确地转换到课堂上,对吗?因为例如 2.47541070e-02 是 0.02,而 6.09035552e-01 是 0.60,所以这些不应该被预测为不同的类别吗?有人可以告诉我哪里出错了吗?