使用opencv获取图像中所有文本的位置 [英] Get the location of all text present in image using opencv

查看：74 发布时间：2021/12/27 17:13:29 python opencv machine-learning image-processing deep-learning

本文介绍了使用opencv获取图像中所有文本的位置的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我有一张包含文本(数字和字母)的图像.我想获取此图像中所有文本和数字的位置.我也想提取所有文本.

如何获取图像中的坐标以及所有文本(数字和字母).例如 10B、44、16、38、22B 等

解决方案

这是使用

去除垂直线

移除了各种非文本轮廓(对角线、圆形对象和曲线)

检测到的文本区域

导入 cv2将 numpy 导入为 np导入 pytesseractpytesseract.pytesseract.tesseract_cmd = rC:Program FilesTesseract-OCR	esseract.exe"# 加载图像，灰度，大津阈值图像 = cv2.imread('1.jpg')灰色 = cv2.cvtColor(图像，cv2.COLOR_BGR2GRAY)thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]干净 = thresh.copy()# 删除水平线水平内核 = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))检测水平= cv2.morphologyEx(阈值，cv2.MORPH_OPEN，水平内核，迭代=2)cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:cv2.drawContours(clean, [c], -1, 0, 3)# 删除垂直线垂直内核 = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))检测垂直= cv2.morphologyEx(阈值，cv2.MORPH_OPEN，垂直内核，迭代=2)cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:cv2.drawContours(clean, [c], -1, 0, 3)cnts = cv2.findContours(干净，cv2.RETR_EXTERNAL，cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:# 删除对角线面积 = cv2.contourArea(c)如果面积＜100:cv2.drawContours(clean, [c], -1, 0, 3)# 删除圆形对象elif 区域 >1000:cv2.drawContours(干净，[c]，-1，0，-1)# 删除曲线的东西peri = cv2.arcLength(c, True)approx = cv2.approxPolyDP(c, 0.02 * peri, True)x,y,w,h = cv2.boundingRect(c)如果 len(approx) == 4:cv2.rectangle(clean, (x, y), (x + w, y + h), 0, -1)open_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))开放= cv2.morphologyEx(干净，cv2.MORPH_OPEN，open_kernel，迭代=2)close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,2))close = cv2.morphologyEx(开场，cv2.MORPH_CLOSE，close_kernel，迭代=4)cnts = cv2.findContours(关闭，cv2.RETR_EXTERNAL，cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 else cnts[1]对于 cnts 中的 c:x,y,w,h = cv2.boundingRect(c)面积 = cv2.contourArea(c)如果面积＞500:ROI = 图像[y:y+h, x:x+w]ROI = cv2.GaussianBlur(ROI, (3,3), 0)数据 = pytesseract.image_to_string(ROI, lang='eng',config='--psm 6')如果 data.isalnum():cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)打印(数据)cv2.imwrite('image.png', image)cv2.imwrite('clean.png', 干净)cv2.imwrite('close.png', close)cv2.imwrite('opening.png', 开幕)cv2.waitKey()

I have this image that contains text(numbers and alphabets) in it. I want to get the location of all the text and numbers present in this image. Also I want to extract all the text as well.

How do I get the cordinates as well as the all the text(numbers and alphabets) in my image. For eg 10B, 44, 16, 38, 22B etc

解决方案

Here's a potential approach using morphological operations to filter out non-text contours. The idea is:

Obtain binary image. Load image, grayscale, then Otsu's threshold
Remove horizontal and vertical lines. Create horizontal and vertical kernels using cv2.getStructuringElement() then remove lines with cv2.drawContours()
Remove diagonal lines, circle objects, and curved contours. Filter using contour area cv2.contourArea() and contour approximation cv2.approxPolyDP() to isolate non-text contours
Extract text ROIs and OCR. Find contours and filter for ROIs then OCR using Pytesseract.

Removed horizontal lines highlighted in green

Removed vertical lines

Removed assorted non-text contours (diagonal lines, circular objects, and curves)

Detected text regions

import cv2
import numpy as np
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCR	esseract.exe"

# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
clean = thresh.copy()

# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(clean, [c], -1, 0, 3)

# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(clean, [c], -1, 0, 3)

cnts = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    # Remove diagonal lines
    area = cv2.contourArea(c)
    if area < 100:
        cv2.drawContours(clean, [c], -1, 0, 3)
    # Remove circle objects
    elif area > 1000:
        cv2.drawContours(clean, [c], -1, 0, -1)
    # Remove curve stuff
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    x,y,w,h = cv2.boundingRect(c)
    if len(approx) == 4:
        cv2.rectangle(clean, (x, y), (x + w, y + h), 0, -1)

open_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(clean, cv2.MORPH_OPEN, open_kernel, iterations=2)
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,2))
close = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, close_kernel, iterations=4)
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    area = cv2.contourArea(c)
    if area > 500:
        ROI = image[y:y+h, x:x+w]
        ROI = cv2.GaussianBlur(ROI, (3,3), 0)
        data = pytesseract.image_to_string(ROI, lang='eng',config='--psm 6')
        if data.isalnum():
            cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
            print(data)

cv2.imwrite('image.png', image)
cv2.imwrite('clean.png', clean)
cv2.imwrite('close.png', close)
cv2.imwrite('opening.png', opening)
cv2.waitKey()

这篇关于使用opencv获取图像中所有文本的位置的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

使用opencv获取图像中所有文本的位置 [英] Get the location of all text present in image using opencv

问题描述

相关文章

AI人工智能最新文章

热门教程

热门工具

登录关闭

使用opencv获取图像中所有文本的位置 [英] Get the location of all text present in image using opencv

问题描述

相关文章

AI人工智能最新文章

热门教程

热门工具

登录 关闭

登录关闭