我正在尝试使用 Python 和 tesseract 中的 OpenCV 提取图像上的数字。这是我的尝试,但我什么也没得到。该代码没有返回预期的数字
import fitz, pytesseract, os, re
import cv2
sTemp = "Number.png"
directory = '.\MyFolder'
def useMagick(img):
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
command = 'magick convert {} -resize 1024x640 -density 300 -quality 100 {}'.format(img, sTemp)
os.system(command)
def readNumber(img):
img = cv2.imread(img)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
txt = pytesseract.image_to_string(gry)
print(txt)
try:
return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
except:
blur = cv2.GaussianBlur(gry, (3,3), 0)
txt = pytesseract.image_to_string(blur)
try:
return re.findall(r'\d+\s?\/\s?(\d+)', txt)[0]
except:
return 'REVIEW'
sPath = os.path.join(directory, sTemp)
useMagick(sPath)
x = readNumber(sPath)
print(x)
Here's sample of the images
该代码不返回任何数字。如何提高此类图像的质量以便能够提取数字?
经过多次查找,终于解决了问题
import cv2
import numpy as np
import pytesseract
import os, re
sImagesPath = r'MyFolder/'
mylist = []
def replace_chars(text):
list_of_numbers = re.findall(r'\d+', text)
result_number = ''.join(list_of_numbers)
return result_number
for root, dirs, file_names in os.walk(sImagesPath):
for file_name in file_names:
img = cv2.imread(sImagesPath + file_name)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 181, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 13, 10)
txt = pytesseract.image_to_string(thr, lang='eng',config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
mylist.append(replace_chars(txt))
print(replace_chars(txt))
with open('Output.txt', 'w') as f:
for i in mylist:
s = ''.join(map(str, i))
f.write(s + '\n')
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)