opencv二进制图像与文本 [英] opencv binarize images with text

查看:187
本文介绍了opencv二进制图像与文本的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我需要将图片与文字进行二值化..它的效果非常好,但在某些情况下输出为空(白色图片)



代码



  / * 
* Compile
*#g ++ txtbin.cpp -o txtbin`pkg-config opencv --cflags --libs `
*
*运行
*#./txtbin input.jpg output.png
* /

#includestring
#includefstream
#include/usr/include/opencv2/opencv.hpp
#include/usr/include/boost/tuple/tuple.hpp

使用namespace std;
using namespace cv;
using namespace boost;

void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide = 21,float contrast = 0.01){
/ *
* blockSide:对于图像中较大的字体
*对比度:对于较低对比度图像设置较小
* /

Mat I;
Img.convertTo(I,CV_32FC1);
Res = Mat :: zeros(Img.rows / blockSide,Img.cols / blockSide,CV_32FC1);
Mat inpaintmask;
Mat补丁;
Mat smallImg;
Scalar m,s;

for(int i = 0; i for(int j = 0; j patch =
meanStdDev(patch,m,s);

if(s [0]> contrast){
Res.at< float>(i / blockSide,j / blockSide)= m [0]
}
else {
Res.at< float>(i / blockSide,j / blockSide)= 0;
}
}
}

resize(I,smallImg,Res.size());

threshold(Res,inpaintmask,0.02,1.0,THRESH_BINARY);

Mat inpainted;
smallImg.convertTo(smallImg,CV_8UC1,255);

inpaintmask.convertTo(inpaintmask,CV_8UC1);
inpaint(smallImg,inpaintmask,inpainted,5,INPAINT_TELEA);

resize(inpainted,Res,Img.size());
Res.convertTo(Res,CV_32FC1,1.0 / 255.0);
}

tuple< int,int,int,int> detect_text_box(string input,Mat& res,bool draw_contours = false){
Mat large = imread(input);

bool test_output = false;

int
top = large.rows,
bottom = 0,
left = large.cols,
right = 0;

int
rect_bottom,
rect_right;

Mat rgb;
// downsample并用它来处理
pyrDown(large,rgb);
Mat small;
cvtColor(rgb,small,CV_BGR2GRAY);
//形态梯度
Mat grad;
Mat morphKernel = getStructuringElement(MORPH_ELLIPSE,Size(3,3));
morphologyEx(small,grad,MORPH_GRADIENT,morphKernel);
// binarize
Mat bw;
threshold(grad,bw,0.0,255.0,THRESH_BINARY | THRESH_OTSU);
//连接水平定向区域
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT,Size(9,1));
morphologyEx(bw,connected,MORPH_CLOSE,morphKernel);
//找到轮廓
Mat mask = Mat :: zeros(bw.size(),CV_8UC1);
vector< vector< Point> >轮廓;
vector< Vec4i>层次;
findContours(connected,contour,hierarchy,CV_RETR_CCOMP,CV_CHAIN_APPROX_SIMPLE,Point(0,0));
// filter contours
for(int idx = 0; idx> = 0; idx = hierarchy [idx] [0]){
Rect rect = boundingRect(contoururs [idx]) ;
Mat maskROI(mask,rect);
maskROI = Scalar(0,0,0);
//填充轮廓
drawContours(mask,contour,idx,Scalar(255,255,255),CV_FILLED);
//填充区域中的非零像素比例
double r =(double)countNonZero(maskROI)/(rect.width * rect.height);

//假设如果包含文本则至少有45%的区域被填充
if(r> 0.45&&
(rect.height> 8& ;& rect.width> 8)//区域大小的约束
//这两个条件本身并不是非常鲁棒,最好使用东西
//像水平中的有效峰的数量投影作为第三条件
){
if(draw_contours){
rectangle(res,Rect(rect.x * 2,rect.y * 2,rect.width * 2,rect。 height * 2),Scalar(0,255,0),2);
}

if(test_output){
rectangle(rgb,rect,Scalar(0,255,0),2);
}

if(rect.y< top){
top = rect.y;
}
rect_bottom = rect.y + rect.height;
if(rect_bottom> bottom){
bottom = rect_bottom;
}
if(rect.x< left){
left = rect.x;
}
rect_right = rect.x + rect.width;
if(rect_right> right){
right = rect_right;
}
}
}

if(draw_contours){
rectangle(res,Point(left * 2,top * 2),Point * 2,bottom * 2),Scalar(0,0,255),2);
}

if(test_output){
rectangle(rgb,Point(left,top),Point(right,bottom),Scalar(0,0,255),2 );
imwrite(string(test_text_contours.jpg),rgb);
}

return make_tuple(left * 2,top * 2,(right-left)* 2,(bottom-top)* 2);
}

int main(int argc,char * argv []){
string input;
string output =output.png;

int
width = 0,
height = 0;

bool
crop = false,
draw = false;

float margin = 0;

//如果参数丢失则返回错误
if(argc <3){
cerr<< \\\
Usage:txtbin input [options] output\\\
\\\

选项:\\\

\tw< number> - 设置最大宽度)\\\

\th< number> - set max height(保持宽高比)\\\

\tc - 裁剪文字内容contour\\\

\ tm< number> - 添加边距(以%为单位的数字)\ n
\td-绘制文本内容轮廓(调试)\ n endl;
return 1;
}

//解析参数
for(int i = 1; i













解决方案

Ok :)
设置 blockSide 较小(例如7)它会给你的结果图像如下所示。它取决于字体大小,较小的字体需要较小的块大小,否则文本将被过滤掉,您会得到空图像。

  include< iostream> 
#include< vector>
#include< stdio.h>
#include< stdarg.h>
#include/usr/include/opencv2/opencv.hpp
#includefstream
#includeiostream
using namespace std;
using namespace cv;

void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide = 9)// blockSide-参数(对于图像上较大的字体设置较大)
{
Mat I ;
Img.convertTo(I,CV_32FC1);
Res = Mat :: zeros(Img.rows / blockSide,Img.cols / blockSide,CV_32FC1);
Mat inpaintmask;
Mat补丁;
Mat smallImg;
Scalar m,s;

for(int i = 0; i {
for(int j = 0; j {
patch = I(Range(i,i + blockSide + 1),Range(j,j + blockSide + 1)
cv :: meanStdDev(patch,m,s);
if(s [0]> 0.01)//阈值参数(对于较低对比度图像设置得较小)
{
Res.at< float>(i / blockSide,j / blockSide) = m [0];
} else
{
Res.at< float>(i / blockSide,j / blockSide)= 0;
}
}
}

cv :: resize(I,smallImg,Res.size());

cv :: threshold(Res,inpaintmask,0.02,1.0,cv :: THRESH_BINARY);

Mat inpainted;
smallImg.convertTo(smallImg,CV_8UC1,255);

inpaintmask.convertTo(inpaintmask,CV_8UC1);
inpaint(smallImg,inpaintmask,inpainted,5,INPAINT_TELEA);

cv :: resize(inpainted,Res,Img.Size());
Res.convertTo(Res,CV_32FC1,1.0 / 255.0);

}

int main(int argc,char ** argv)
{
namedWindow(Img);
namedWindow(Edges);
// Mat Img = imread(D:\\ImagesForTest\\ BookPage.JPG,0);
Mat Img = imread(test2.jpg,0);
Mat res;
Img.convertTo(Img,CV_32FC1,1.0 / 255.0);
CalcBlockMeanVariance(Img,res);
res = 1.0-res;
res = Img + res;
imshow(Img,Img);
cv :: threshold(res,res,0.85,1,cv :: THRESH_BINARY);
cv :: resize(res,res,cv :: Size(res.cols / 2,res.rows / 2));
imwrite(result.jpg,res * 255);
imshow(Edges,res);
waitKey(0);

return 0;
}








I need to binarize images with text.. It works very well but in some cases the output is empty (white image)

code

/*
 *  Compile
 *  # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs`
 *
 *  Run
 *  # ./txtbin input.jpg output.png
 */

#include "string"
#include "fstream"
#include "/usr/include/opencv2/opencv.hpp"
#include "/usr/include/boost/tuple/tuple.hpp"

using namespace std;
using namespace cv;
using namespace boost;

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){
    /*
     *  blockSide: set greater for larger fonts in image
     *  contrast: set smaller for lower contrast image
     */

    Mat I;
    Img.convertTo(I, CV_32FC1);
    Res = Mat::zeros(Img.rows / blockSide, Img.cols / blockSide, CV_32FC1);
    Mat inpaintmask;
    Mat patch;
    Mat smallImg;
    Scalar m, s;

    for(int i = 0; i < Img.rows - blockSide; i += blockSide){
        for(int j = 0; j < Img.cols - blockSide; j += blockSide){
            patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1));
            meanStdDev(patch, m, s);

            if(s[0] > contrast){
                Res.at<float>(i / blockSide, j / blockSide) = m[0];
            }
            else{
                Res.at<float>(i / blockSide, j / blockSide) = 0;
            }
        }
    }

    resize(I, smallImg, Res.size());

    threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY);

    Mat inpainted;
    smallImg.convertTo(smallImg, CV_8UC1, 255);

    inpaintmask.convertTo(inpaintmask, CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

    resize(inpainted, Res, Img.size());
    Res.convertTo(Res, CV_32FC1, 1.0 / 255.0);
}

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){
    Mat large = imread(input);

    bool test_output = false;

    int
        top = large.rows,
        bottom = 0,
        left = large.cols,
        right = 0;

    int
        rect_bottom,
        rect_right;

    Mat rgb;
    // downsample and use it for processing
    pyrDown(large, rgb);
    Mat small;
    cvtColor(rgb, small, CV_BGR2GRAY);
    // morphological gradient
    Mat grad;
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
    // binarize
    Mat bw;
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
    // connect horizontally oriented regions
    Mat connected;
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
    // find contours
    Mat mask = Mat::zeros(bw.size(), CV_8UC1);
    vector<vector<Point> > contours;
    vector<Vec4i> hierarchy;
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
    // filter contours
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
        Rect rect = boundingRect(contours[idx]);
        Mat maskROI(mask, rect);
        maskROI = Scalar(0, 0, 0);
        // fill the contour
        drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
        // ratio of non-zero pixels in the filled region
        double r = (double)countNonZero(maskROI) / (rect.width * rect.height);

        // assume at least 45% of the area is filled if it contains text
        if (r > 0.45 && 
        (rect.height > 8 && rect.width > 8) // constraints on region size
        // these two conditions alone are not very robust. better to use something 
        //like the number of significant peaks in a horizontal projection as a third condition
        ){
            if(draw_contours){
                rectangle(res, Rect(rect.x * 2, rect.y * 2, rect.width * 2, rect.height * 2), Scalar(0, 255, 0), 2);
            }

            if(test_output){
                rectangle(rgb, rect, Scalar(0, 255, 0), 2);
            }

            if(rect.y < top){
                top = rect.y;
            }
            rect_bottom = rect.y + rect.height;
            if(rect_bottom > bottom){
                bottom = rect_bottom;
            }
            if(rect.x < left){
                left = rect.x;
            }
            rect_right = rect.x + rect.width;
            if(rect_right > right){
                right = rect_right;
            }
        }
    }

    if(draw_contours){
        rectangle(res, Point(left * 2, top * 2), Point(right * 2, bottom * 2), Scalar(0, 0, 255), 2);
    }

    if(test_output){
        rectangle(rgb, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
        imwrite(string("test_text_contours.jpg"), rgb);
    }

    return make_tuple(left * 2, top * 2, (right - left) * 2, (bottom - top) * 2);
}

int main(int argc, char* argv[]){
    string input;
    string output = "output.png";

    int
        width = 0,
        height = 0;

    bool
        crop = false,
        draw = false;

    float margin = 0;

    //  Return error if arguments are missing
    if(argc < 3){
        cerr << "\nUsage: txtbin input [options] output\n\n"
            "Options:\n"
            "\t-w <number>          -- set max width (keeps aspect ratio)\n"
            "\t-h <number>          -- set max height (keeps aspect ratio)\n"
            "\t-c                   -- crop text content contour\n"
            "\t-m <number>          -- add margins (number in %)\n"
            "\t-d                   -- draw text content contours (debugging)\n" << endl;
        return 1;
    }

    //  Parse arguments
    for(int i = 1; i < argc; i++){
        if(i == 1){
            input = string(argv[i]);

            //  Return error if input file is invalid
            ifstream stream(input.c_str());
            if(!stream.good()){
                cerr << "Error: Input file is invalid!" << endl;
                return 1;
            }
        }
        else if(string(argv[i]) == "-w"){
            width = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-h"){
            height = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-c"){
            crop = true;
        }
        else if(string(argv[i]) == "-m"){
            margin = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-d"){
            draw = true;
        }
        else if(i == argc - 1){
            output = string(argv[i]);
        }
    }

    Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE);
    Mat res;
    Img.convertTo(Img, CV_32FC1, 1.0 / 255.0);
    CalcBlockMeanVariance(Img, res);
    res = 1.0 - res;
    res = Img + res;
    threshold(res, res, 0.85, 1, THRESH_BINARY);

    int
        txt_x,
        txt_y,
        txt_width,
        txt_height;

    if(crop || draw){
        tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw);
    }

    if(crop){
        //res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone();
        res = res(Rect(txt_x, txt_y, txt_width, txt_height));
    }

    if(margin){
        int border = res.cols * margin / 100;
        copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255));
    }

    float
        width_input = res.cols,
        height_input = res.rows;

    bool resized = false;

    //  Downscale image
    if(width > 0 && width_input > width){
        float scale = width_input / width;
        width_input /= scale;
        height_input /= scale;
        resized = true;
    }
    if(height > 0 && height_input > height){
        float scale = height_input / height;
        width_input /= scale;
        height_input /= scale;
        resized = true;
    }
    if(resized){
        resize(res, res, Size(round(width_input), round(height_input)));
    }

    imwrite(output, res * 255);

    return 0;
}

解决方案

Ok :) Set blockSide smaller (7 for instance) it will give you result image as shown below. It depends on font size, smaller fonts need smaller block size, else text will be filtered out and you get empty image.

#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdarg.h>
#include "/usr/include/opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
using namespace std;
using namespace cv;

void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=9) // blockSide - the parameter (set greater for larger font on image)
{
    Mat I;
    Img.convertTo(I,CV_32FC1);
    Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
    Mat inpaintmask;
    Mat patch;
    Mat smallImg;
    Scalar m,s;

    for(int i=0;i<Img.rows-blockSide;i+=blockSide)
    {       
        for (int j=0;j<Img.cols-blockSide;j+=blockSide)
        {
            patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1));
            cv::meanStdDev(patch,m,s);
            if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
            {
                Res.at<float>(i/blockSide,j/blockSide)=m[0];
            }else
            {
                Res.at<float>(i/blockSide,j/blockSide)=0;
            }           
        }
    }

    cv::resize(I,smallImg,Res.size());

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);

    Mat inpainted;
    smallImg.convertTo(smallImg,CV_8UC1,255);

    inpaintmask.convertTo(inpaintmask,CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

    cv::resize(inpainted,Res,Img.size());
    Res.convertTo(Res,CV_32FC1,1.0/255.0);

}

int main( int argc, char** argv )
{
    namedWindow("Img");
    namedWindow("Edges");
    //Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0);
    Mat Img=imread("test2.jpg",0);
    Mat res;
    Img.convertTo(Img,CV_32FC1,1.0/255.0);
    CalcBlockMeanVariance(Img,res); 
    res=1.0-res;
    res=Img+res;
    imshow("Img",Img);
    cv::threshold(res,res,0.85,1,cv::THRESH_BINARY);
    cv::resize(res,res,cv::Size(res.cols/2,res.rows/2));
    imwrite("result.jpg",res*255);
    imshow("Edges",res);
    waitKey(0);

    return 0;
}

这篇关于opencv二进制图像与文本的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆