计算文本OpenCV的偏斜 [英] Calculating skew of text OpenCV

查看:410
本文介绍了计算文本OpenCV的偏斜的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我试图计算图像中文本的偏斜,所以我可以纠正它最好的OCR结果。

I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.

目前这是我使用的函数:

Currently this is the function I am using:

double compute_skew(Mat &img)
{

    // Binarize
    cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);

    // Invert colors
    cv::bitwise_not(img, img);

    cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
    cv::erode(img, img, element);

    std::vector<cv::Point> points;
    cv::Mat_<uchar>::iterator it = img.begin<uchar>();
    cv::Mat_<uchar>::iterator end = img.end<uchar>();
    for (; it != end; ++it)
        if (*it)
            points.push_back(it.pos());

    cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);

    return angle;
}



当我看看调试时的角度,我得到 0.000000
>

但是当我给它这个图像,我得到约16度的偏斜的正确结果:

However when I give it this image I get proper results of a skew of about 16 degrees:

如何正确检测第一张图像的倾斜?

How can I properly detect the skew in the first image?

推荐答案

有一些其他方法来获得偏斜度,1)通过hough transform 2)通过水平投影轮廓。在不同的角度箱中旋转图像并计算水平投影。具有最大水平直方图值的角度是偏斜角。

there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.

我已提供以下实施1)。我相信这是优于你使用的拳击方法,因为它要求你完全清除任何噪音的图像,这在大多数时间是不可能的。

i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.

你应该知道,如果有太多的噪音,该方法不工作。你可以以不同的方式减少噪声,这取决于你想要作为图像中最主要的线的类型。我已经提供了两种方法。一定要使用参数和阈值等。

you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.

结果(都使用preprocess2运行,所有运行使用相同的参数集)

results (all run using preprocess2, all run using same parameter set)

















代码

#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

void hough_transform(Mat& im,Mat& orig,double* skew)
{
    double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
    int angleBins = 180;
    Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
    int cenx = im.cols/2;
    int ceny = im.rows/2;
    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {
            if(im.at<uchar>(y,x)==255)
            {
                for(int t=0;t<angleBins;t++)
                {
                    double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t    /angleBins*CV_PI);
                    r+=max_r;
                    acc.at<int>(t,int(r))++;
                }
            }
        }
    }
    Mat thresh;
    normalize(acc,acc,255,0,NORM_MINMAX);
    convertScaleAbs(acc,acc);
    /*debug
    Mat cmap;
    applyColorMap(acc,cmap,COLORMAP_JET);
    imshow("cmap",cmap);
    imshow("acc",acc);*/

    Point maxLoc;
    minMaxLoc(acc,0,0,0,&maxLoc);
    double theta = (double)maxLoc.y/angleBins*CV_PI;
    double rho = maxLoc.x-max_r;
    if(abs(sin(theta))<0.000001)//check vertical
    {
        //when vertical, line equation becomes
        //x = rho
        double m = -cos(theta)/sin(theta);
        Point2d p1 = Point2d(rho+im.cols/2,0);
        Point2d p2 = Point2d(rho+im.cols/2,im.rows);
        line(orig,p1,p2,Scalar(0,0,255),1);
        *skew=90;
        cout<<"skew angle "<<" 90"<<endl;
    }else
    {
        //convert normal form back to slope intercept form
        //y = mx + b
        double m = -cos(theta)/sin(theta);
        double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
        Point2d p1 = Point2d(0,b);
        Point2d p2 = Point2d(im.cols,im.cols*m+b);
        line(orig,p1,p2,Scalar(0,0,255),1);
        double skewangle;
        skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2.    x-p1.x)*180./CV_PI);
        *skew=skewangle;
        cout<<"skew angle "<<skewangle<<endl;
    }
    imshow("orig",orig);
}

Mat preprocess1(Mat& im)
{
    Mat ret = Mat::zeros(im.size(),CV_32SC1);

    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {

            int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
            int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
            int g2 = (gy*gy + gx*gx);
            ret.at<int>(y,x)=g2;
        }
    }
    normalize(ret,ret,255,0,NORM_MINMAX);
    ret.convertTo(ret,CV_8UC1);
    threshold(ret,ret,50,255,THRESH_BINARY);
    return ret;
}

Mat preprocess2(Mat& im)
{
    // 1) assume white on black and does local thresholding
    // 2) only allow voting top is white and buttom is black(buttom text line)
    Mat thresh;
    //thresh=255-im;
    thresh=im.clone();
    adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
    Mat ret = Mat::zeros(im.size(),CV_8UC1);
    for(int x=1;x<thresh.cols-1;x++)
    {
        for(int y=1;y<thresh.rows-1;y++)
        {
            bool toprowblack = thresh.at<uchar>(y-1,x)==0 ||  thresh.at<uchar>(y-1,x-1)==0     || thresh.at<uchar>(y-1,x+1)==0;
            bool belowrowblack = thresh.at<uchar>(y+1,x)==0 ||  thresh.at<uchar>(y+1,    x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;

            uchar pix=thresh.at<uchar>(y,x);
            if((!toprowblack && pix==255 && belowrowblack))
            {
                ret.at<uchar>(y,x) = 255;
            }
        }
    }
    return ret;
}
Mat rot(Mat& im,double thetaRad)
{
    cv::Mat rotated;
    double rskew = thetaRad* CV_PI/180;
    double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
    double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
    cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
    Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
    pos.at<double>(0)=(nw-im.cols)*.5;
    pos.at<double>(1)=(nh-im.rows)*.5;
    Mat res = rot_mat*pos;
    rot_mat.at<double>(0,2) += res.at<double>(0);
    rot_mat.at<double>(1,2) += res.at<double>(1);
    cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
    return rotated;
}

int main(int argc, char** argv)
{
    string src="C:/data/skew.png";
    Mat im= imread(src);
    Mat gray;
    cvtColor(im,gray,CV_BGR2GRAY);

    Mat preprocessed = preprocess2(gray);
    imshow("preprocessed2",preprocessed);
    double skew;
    hough_transform(preprocessed,im,&skew);
    Mat rotated = rot(im,skew* CV_PI/180);
    imshow("corrected",rotated);

    waitKey(0);
    return 0;
}

这篇关于计算文本OpenCV的偏斜的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆