我正在寻找一种有效的实施方式形态学图像膨胀 https://en.wikipedia.org/wiki/Dilation_(morphology)在 TensorFlow 中使用方形内核。正如 OpenCV 所示,与实际效果相比,显而易见的方法似乎效率极低。查看粘贴在底部的运行源代码的结果 - 即使最快的方法也比 OpenCV 慢 30 倍左右。这些来自配备 M1 芯片组的 MacBook Air。
Dilation of 640x480 image with a 25x25 kernel took:
0.61ms using opencv
545.40ms using tf.nn.max_pool2d
228.66ms using tf.nn.dilation2d naively
17.63ms using tf.nn.dilation2d with row-col
Question:有谁知道一种使用 TensorFlow 进行图像膨胀的方法,而且效率不是极低?
当前解决方案的源代码:
import numpy as np
import cv2
import tensorflow as tf
import time
def tf_dilate(heatmap, width: int, method: str = 'rowcol'):
""" Dilate the heatmap with a square kernel """
if method=='maxpool':
return tf.nn.max_pool2d(heatmap[None, :, :, None], ksize=width, padding='SAME', strides=(1, 1))[0, :, :, 0]
elif method == 'naive_dilate':
return tf.nn.dilation2d(heatmap[None, :, :, None], filters=tf.zeros((width, width, 1), dtype=heatmap.dtype),
strides=(1, 1, 1, 1), padding="SAME", data_format="NHWC", dilations=(1, 1, 1, 1))[0, :, :, 0]
elif method == 'rowcol_dilate':
row_dilation = tf.nn.dilation2d(heatmap[None, :, :, None], filters=tf.zeros((1, width, 1), dtype=heatmap.dtype),
strides=(1, 1, 1, 1), padding="SAME", data_format="NHWC", dilations=(1, 1, 1, 1))
full_dilation = tf.nn.dilation2d(row_dilation, filters=tf.zeros((width, 1, 1), dtype=heatmap.dtype),
strides=(1, 1, 1, 1), padding="SAME", data_format="NHWC", dilations=(1, 1, 1, 1))
return full_dilation[0, :, :, 0]
else:
raise NotImplementedError(f'No method {method}')
def test_dilation_options(img_shape=(480, 640), kernel_size=25):
img = np.random.randn(*img_shape).astype(np.float32)**2
def get_result_and_time(version: str):
tf_image = tf.constant(img, dtype=tf.float32)
t_start = time.time()
if version=='opencv':
result = cv2.dilate(img, kernel=np.ones((kernel_size, kernel_size), dtype=np.float32))
return time.time()-t_start, result
else:
result = tf_dilate(tf_image, width=kernel_size, method=version)
return time.time()-t_start, result.numpy()
t_opencv, result_opencv = get_result_and_time('opencv')
t_maxpool, result_maxpool = get_result_and_time('maxpool')
t_naive_dilate, result_naive_dilate = get_result_and_time('naive_dilate')
t_rowcol_dilate, result_rowcol_dilate = get_result_and_time('rowcol_dilate')
assert np.array_equal(result_opencv, result_maxpool), "Maxpool result did not match opencv result"
assert np.array_equal(result_opencv, result_naive_dilate), "Naive dilation result did not match opencv result"
assert np.array_equal(result_opencv, result_rowcol_dilate), "Row-col dilation result did not match opencv result"
print(f'Dilation of {img_shape[1]}x{img_shape[0]} image with a {kernel_size}x{kernel_size} kernel took: '
f'\n {t_opencv*1000:.2f}ms using opencv'
f'\n {t_maxpool*1000:.2f}ms using tf.nn.max_pool2d'
f'\n {t_naive_dilate*1000:.2f}ms using tf.nn.dilation2d naively'
f'\n {t_rowcol_dilate*1000:.2f}ms using tf.nn.dilation2d with row-col'
)
if __name__ == '__main__':
test_dilation_options()