这里有两种方法:第一种使用表格的结构来确定行数和列数,而第二种方法使用单元格计数。
方法#1:表结构
这个想法是我们可以计算表格的水平线和垂直线的数量来确定行数和列数。对于行来说,它是rows = horizontal lines - 1
对于列来说,它是columns = vertical lines - 1
.
检测到绿色水平线
检测到绿色垂直线
Result
Rows: 7
Columns: 4
Code
import cv2
# Load image, convert to grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find number of rows
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rows = 0
for c in cnts:
cv2.drawContours(image, [c], -1, (36,255,12), 2)
rows += 1
# Find number of columns
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,25))
vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
columns = 0
for c in cnts:
cv2.drawContours(image, [c], -1, (36,255,12), 2)
columns += 1
print('Rows:', rows - 1)
print('Columns:', columns - 1)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
方法#2:细胞计数
-
获取二值图像。加载图像,转换为灰度,高斯模糊, then 大津的门槛.
-
删除单元格内的文本。查找轮廓并使用过滤器cv2.contourArea()通过填充轮廓来删除文本cv2.drawContours()
-
反转图像。我们反转图像,使单元格为白色,背景为黑色
-
对单元格进行排序并对行/列求和。 We 找到轮廓然后对轮廓进行排序top-to-bottom
using imutils.contours.sort_contours。接下来我们迭代轮廓并找到centroid以获得(cX, cY)
坐标。这个想法是我们可以比较cY
每个单元格的值,通过使用偏移量来确定它是新行还是同一行中的单元格。如果以下情况,单元格应位于同一行:cY
值是+/-某个偏移值。如果它更大,则意味着该单元格位于新行中。我们构建一个模型表,其中表的长度为您提供行数,而任何索引的长度为您提供列数。
二值图像
删除文本轮廓+倒置图像
这是迭代每个单元格以计算行数和列数的可视化
Result
Rows: 7
Columns: 4
Code
import numpy as np
from imutils import contours
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and remove text inside cells
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 4000:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Invert image
invert = 255 - thresh
offset, old_cY, first = 10, 0, True
visualize = cv2.cvtColor(invert, cv2.COLOR_GRAY2BGR)
# Find contours, sort from top-to-bottom and then sum up column/rows
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
# Find centroid
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# New row
if (abs(cY) - abs(old_cY)) > offset:
if first:
row, table = [], []
first = False
old_cY = cY
table.append(row)
row = []
# Cell in same row
if ((abs(cY) - abs(old_cY)) <= offset) or first:
row.append(1)
# Uncomment to visualize
'''
cv2.circle(visualize, (cX, cY), 10, (36, 255, 12), -1)
cv2.imshow('visualize', visualize)
cv2.waitKey(200)
'''
print('Rows: {}'.format(len(table)))
print('Columns: {}'.format(len(table[1])))
cv2.imshow('invert', invert)
cv2.imshow('thresh', thresh)
cv2.waitKey()