如何旋转图像以对齐要提取的文本? [英] How to rotate an image to align the text for extraction?

查看:54
本文介绍了如何旋转图像以对齐要提取的文本?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用pytessearct从图像中提取文本.但它不适用于倾斜的图像.考虑下面给出的图像:

这里是提取文本的代码,在不倾斜的图像上效果很好.

  img = cv2.imread(< path_to_image>)灰色= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)模糊= cv2.GaussianBlur(灰色,(5,5),0)ret3,thresh = cv2.threshold(模糊,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)def findSignificantContours(img,edgeImg):等高线,层次结构= cv2.findContours(edgeImg,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)#查找1级轮廓级别1 = []对于我来说,tupl在枚举(继承[0])中:#每个数组的格式(下一个,上一个,第一个孩子,父对象)#筛选没有父母的人如果tupl [3] == -1:tupl = np.insert(tupl,0,[i])level1.append(tupl)重要= []tooSmall = edgeImg.size * 5/100#如果轮廓未覆盖图像总面积的5%,则它可能太小对于level1中的tupl:轮廓=轮廓[tupl [0]];面积= cv2.contourArea(轮廓)如果面积>太小:有意义.append([轮廓,区域])#在原始图像上绘制轮廓cv2.drawContours(img,[contour],0,(0,255,0),2,cv2.LINE_AA,maxLevel = 1)Important.sort(key = lambda x:x [1])#print([x [1]表示x有效]);mx =(0,0,0,0)#迄今为止最大的边界框mx_area = 0用于轮廓的连续:x,y,w,h = cv2.boundingRect(续)面积= w * h如果面积>mx_area:mx = x,y,w,hmx_area =面积x,y,w,h = mx#输出到文件roi = img [y:y + h,x:x + w]灰色= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)模糊= cv2.GaussianBlur(灰色,(5,5),0)ret3,thresh = cv2.threshold(模糊,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)cv2_imshow(脱粒)文字= pytesseract.image_to_string(roi);打印(文字);print("\ n");打印(pytesseract.image_to_string(thresh));打印("\ n")返回[x [0]表示x有效];edgeImg_8u = np.asarray(阈值,np.uint8)#查找轮廓有意义= findSignificantContours(img,edgeImg_8u)遮罩= thresh.copy()遮罩[mask>0] = 0cv2.fillPoly(掩码,有效,255)#反转面膜遮罩= np.logical_not(遮罩)#最后删除背景img [mask] = 0; 

Tesseract无法从该图像中提取文本.有没有一种方法可以旋转它以使文本完美对齐,然后将其送入pytesseract?如果我的问题需要进一步澄清,请告诉我.

解决方案

这是一种简单的方法:

  1. 获取二进制图像.加载图像,转换为灰度,

  2. OCR的清洁图像

  3. 检测文本使用python和opencv在图像中的区域

代码

从imutils.perspective中的

 导入four_point_transform导入cv2导入numpy#加载图像,灰度,高斯模糊,大津的阈值图片= cv2.imread("1.jpg")灰色= cv2.cvtColor(图像,cv2.COLOR_BGR2GRAY)模糊= cv2.GaussianBlur(灰色,(7,7),0)阈值= cv2.threshold(模糊,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]#查找轮廓并为最大轮廓排序cnts = cv2.findContours(阈值,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)如果len(cnts)== 2,则cnts = cnts [0]否则,cnts [1]cnts =排序(cnts,key = cv2.contourArea,reverse = True)displayCnt =无对于C中的cnts:#执行轮廓逼近周长= cv2.arcLength(c,True)大约= cv2.approxPolyDP(c,0.02 * peri,True)如果len(大约)== 4:displayCnt =大约休息#获得鸟瞰图变形= four_point_transform(image,displayCnt.reshape(4,2))cv2.imshow(脱粒",脱粒)cv2.imshow(扭曲",扭曲)cv2.waitKey() 

I am using pytessearct to extract the text from images. But it doesn't work on images which are inclined. Consider the image given below:

Here is the code to extract text, which is working fine on images which are not inclined.

img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
    contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    # Find level 1 contours
    level1 = []
    for i, tupl in enumerate(heirarchy[0]):
        # Each array is in format (Next, Prev, First child, Parent)
        # Filter the ones without parent
        if tupl[3] == -1:
            tupl = np.insert(tupl, 0, [i])
            level1.append(tupl)
    significant = []
    tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
    for tupl in level1:
        contour = contours[tupl[0]];
        area = cv2.contourArea(contour)
        if area > tooSmall:
            significant.append([contour, area])
        # Draw the contour on the original image
        cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)

    significant.sort(key=lambda x: x[1])
    #print ([x[1] for x in significant]);
    mx = (0,0,0,0)      # biggest bounding box so far
    mx_area = 0
    for cont in contours:
        x,y,w,h = cv2.boundingRect(cont)
        area = w*h
        if area > mx_area:
            mx = x,y,w,h
            mx_area = area
            x,y,w,h = mx

  # Output to files
    roi = img[y:y+h,x:x+w]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5,5),0)
    ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    cv2_imshow(thresh)
    text = pytesseract.image_to_string(roi); 
    print(text); print("\n"); print(pytesseract.image_to_string(thresh));
    print("\n")
    return [x[0] for x in significant];

edgeImg_8u = np.asarray(thresh, np.uint8)

# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)

#Finally remove the background
img[mask] = 0;

Tesseract can't extract the text from this image. Is there a way I can rotate it to align the text perfectly and then feed it to pytesseract? Please let me know if my question require any more clarity.

解决方案

Here's a simple approach:

  1. Obtain binary image. Load image, convert to grayscale, Gaussian blur, then Otsu's threshold.

  2. Find contours and sort for largest contour. We find contours then filter using contour area with cv2.contourArea to isolate the rectangular contour.

  3. Perform perspective transform. Next we perform contour approximation with cv2.approxPolyDP to obtain the rectangular contour. Finally we utilize imutils.perspective.four_point_transform to actually obtain the bird's eye view of the image.


Binary image

Result

To actually extract the text, take a look at

  1. Use pytesseract OCR to recognize text from an image

  2. Cleaning image for OCR

  3. Detect text area in an image using python and opencv

Code

from imutils.perspective import four_point_transform
import cv2
import numpy

# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None

for c in cnts:
    # Perform contour approximation
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    if len(approx) == 4:
        displayCnt = approx
        break

# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))

cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()

这篇关于如何旋转图像以对齐要提取的文本?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆