使用opencv获取图像中所有文本的位置 [英] Get the location of all text present in image using opencv
问题描述
我有一张包含文本(数字和字母)的图像.我想获取此图像中所有文本和数字的位置.我也想提取所有文本.
如何获取图像中的坐标以及所有文本(数字和字母).例如 10B、44、16、38、22B 等
这是使用
去除垂直线
移除了各种非文本轮廓(对角线、圆形对象和曲线)
检测到的文本区域
导入 cv2将 numpy 导入为 np导入 pytesseractpytesseract.pytesseract.tesseract_cmd = rC:Program FilesTesseract-OCR esseract.exe"# 加载图像,灰度,大津阈值图像 = cv2.imread('1.jpg')灰色 = cv2.cvtColor(图像,cv2.COLOR_BGR2GRAY)thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]干净 = thresh.copy()# 删除水平线水平内核 = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))检测水平= cv2.morphologyEx(阈值,cv2.MORPH_OPEN,水平内核,迭代=2)cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:cv2.drawContours(clean, [c], -1, 0, 3)# 删除垂直线垂直内核 = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))检测垂直= cv2.morphologyEx(阈值,cv2.MORPH_OPEN,垂直内核,迭代=2)cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:cv2.drawContours(clean, [c], -1, 0, 3)cnts = cv2.findContours(干净,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:# 删除对角线面积 = cv2.contourArea(c)如果面积<100:cv2.drawContours(clean, [c], -1, 0, 3)# 删除圆形对象elif 区域 >1000:cv2.drawContours(干净,[c],-1,0,-1)# 删除曲线的东西peri = cv2.arcLength(c, True)approx = cv2.approxPolyDP(c, 0.02 * peri, True)x,y,w,h = cv2.boundingRect(c)如果 len(approx) == 4:cv2.rectangle(clean, (x, y), (x + w, y + h), 0, -1)open_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))开放= cv2.morphologyEx(干净,cv2.MORPH_OPEN,open_kernel,迭代=2)close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,2))close = cv2.morphologyEx(开场,cv2.MORPH_CLOSE,close_kernel,迭代=4)cnts = cv2.findContours(关闭,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:x,y,w,h = cv2.boundingRect(c)面积 = cv2.contourArea(c)如果面积>500:ROI = 图像[y:y+h, x:x+w]ROI = cv2.GaussianBlur(ROI, (3,3), 0)数据 = pytesseract.image_to_string(ROI, lang='eng',config='--psm 6')如果 data.isalnum():cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)打印(数据)cv2.imwrite('image.png', image)cv2.imwrite('clean.png', 干净)cv2.imwrite('close.png', close)cv2.imwrite('opening.png', 开幕)cv2.waitKey()
I have this image that contains text(numbers and alphabets) in it. I want to get the location of all the text and numbers present in this image. Also I want to extract all the text as well.
How do I get the cordinates as well as the all the text(numbers and alphabets) in my image. For eg 10B, 44, 16, 38, 22B etc
Here's a potential approach using morphological operations to filter out non-text contours. The idea is:
Obtain binary image. Load image, grayscale, then Otsu's threshold
Remove horizontal and vertical lines. Create horizontal and vertical kernels using
cv2.getStructuringElement()
then remove lines withcv2.drawContours()
Remove diagonal lines, circle objects, and curved contours. Filter using contour area
cv2.contourArea()
and contour approximationcv2.approxPolyDP()
to isolate non-text contoursExtract text ROIs and OCR. Find contours and filter for ROIs then OCR using Pytesseract.
Removed horizontal lines highlighted in green
Removed vertical lines
Removed assorted non-text contours (diagonal lines, circular objects, and curves)
Detected text regions
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCR esseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
clean = thresh.copy()
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(clean, [c], -1, 0, 3)
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(clean, [c], -1, 0, 3)
cnts = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Remove diagonal lines
area = cv2.contourArea(c)
if area < 100:
cv2.drawContours(clean, [c], -1, 0, 3)
# Remove circle objects
elif area > 1000:
cv2.drawContours(clean, [c], -1, 0, -1)
# Remove curve stuff
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
x,y,w,h = cv2.boundingRect(c)
if len(approx) == 4:
cv2.rectangle(clean, (x, y), (x + w, y + h), 0, -1)
open_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(clean, cv2.MORPH_OPEN, open_kernel, iterations=2)
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,2))
close = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, close_kernel, iterations=4)
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
if area > 500:
ROI = image[y:y+h, x:x+w]
ROI = cv2.GaussianBlur(ROI, (3,3), 0)
data = pytesseract.image_to_string(ROI, lang='eng',config='--psm 6')
if data.isalnum():
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
print(data)
cv2.imwrite('image.png', image)
cv2.imwrite('clean.png', clean)
cv2.imwrite('close.png', close)
cv2.imwrite('opening.png', opening)
cv2.waitKey()
这篇关于使用opencv获取图像中所有文本的位置的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!