SWT(笔划宽度变换)可以通过屏幕截图帮助OCR吗? [英] Can SWT (Stroke Width Transform) help OCR with screenshots?

查看：100 发布时间：2020/5/19 19:30:07 opencv ocr

本文介绍了SWT(笔划宽度变换)可以通过屏幕截图帮助OCR吗?的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我一直在尝试从屏幕截图中检测文本.屏幕截图可以包含任意内容.我只想找到文本内容.

如果将某些非文本内容检测为文本，则可以.我的底线是不会丢失任何文本内容.

我找到了以下文章:

使用笔画宽度变换在自然场景中检测文本. Boris Epshtein，Yonathan Wexler和Eyal Ofek. IEEE国际计算机视觉和模式识别会议，2010年.

但是我还没有在Windows上找到可行的实现.到目前为止，我只看到它用于自然场景，而不是截屏.如果有人在其他平台上实现了它，是否可以尝试使用下图进行尝试，以便在我下定决心在Windows上实现它之前获得快速评估?谢谢.

解决方案

更新

此答案中的代码似乎无法正常工作(至少我没有设法使其正常工作令人满意).

因此，我运行该实现所基于的代码，您可以在此处

(更合理的)结果是:

我将下面的代码留作以后参考.

我从此处改编了mex实现. 使用以下代码在图像上显示的结果是:

我会让您评估一下是否对您有帮助. 代码如下.

swt.h

#include <opencv2\opencv.hpp>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

using namespace std;

namespace sw
{

#define PI 3.14159265

    struct Point2d {
        int x;
        int y;
        float SWT;
    };

    struct Point2dFloat {
        float x;
        float y;
    };

    struct Ray {
        Point2d p;
        Point2d q;
        std::vector<Point2d> points;
    };


    void strokeWidthTransform(const float * edgeImage,
        const float * gradientX,
        const float * gradientY,
        bool dark_on_light,
        float * SWTImage,
        int h, int w,
        std::vector<Ray> & rays) {
        // First pass
        float prec = .05f;
        for (int row = 0; row < h; row++){
            const float* ptr = edgeImage + row*w;
            for (int col = 0; col < w; col++){
                if (*ptr > 0) {
                    Ray r;

                    Point2d p;
                    p.x = col;
                    p.y = row;
                    r.p = p;
                    std::vector<Point2d> points;
                    points.push_back(p);

                    float curX = (float)col + 0.5f;
                    float curY = (float)row + 0.5f;
                    int curPixX = col;
                    int curPixY = row;
                    float G_x = gradientX[col + row*w];
                    float G_y = gradientY[col + row*w];
                    // normalize gradient
                    float mag = sqrt((G_x * G_x) + (G_y * G_y));
                    if (dark_on_light){
                        G_x = -G_x / mag;
                        G_y = -G_y / mag;
                    }
                    else {
                        G_x = G_x / mag;
                        G_y = G_y / mag;
                    }
                    while (true) {
                        curX += G_x*prec;
                        curY += G_y*prec;
                        if ((int)(floor(curX)) != curPixX || (int)(floor(curY)) != curPixY)     {
                            curPixX = (int)(floor(curX));
                            curPixY = (int)(floor(curY));
                            // check if pixel is outside boundary of image
                            if (curPixX < 0 || (curPixX >= w) || curPixY < 0 || (curPixY >= h)) {
                                break;
                            }
                            Point2d pnew;
                            pnew.x = curPixX;
                            pnew.y = curPixY;
                            points.push_back(pnew);

                            if (edgeImage[curPixY*w + curPixX] > 0) {
                                r.q = pnew;
                                // dot product
                                float G_xt = gradientX[curPixY*w + curPixX];
                                float G_yt = gradientY[curPixY*w + curPixX];
                                mag = sqrt((G_xt * G_xt) + (G_yt * G_yt));
                                if (dark_on_light){
                                    G_xt = -G_xt / mag;
                                    G_yt = -G_yt / mag;
                                }
                                else {
                                    G_xt = G_xt / mag;
                                    G_yt = G_yt / mag;
                                }

                                if (acos(G_x * -G_xt + G_y * -G_yt) < PI / 2.0) {
                                    float length = sqrt(((float)r.q.x - (float)r.p.x)*((float)r.q.x - (float)r.p.x) + ((float)r.q.y - (float)r.p.y)*((float)r.q.y - (float)r.p.y));
                                    for (std::vector<Point2d>::iterator pit = points.begin(); pit != points.end(); pit++) {
                                        float* pSWT = SWTImage + w * pit->y + pit->x;
                                        if (*pSWT < 0) {
                                            *pSWT = length;
                                        }
                                        else {
                                            *pSWT = std::min(length, *pSWT);
                                        }
                                    }
                                    r.points = points;
                                    rays.push_back(r);
                                }
                                break;
                            }
                        }
                    }
                }
                ptr++;
            }
        }
    }


    bool Point2dSort(const Point2d &lhs, const Point2d &rhs) {
        return lhs.SWT < rhs.SWT;
    }

    void SWTMedianFilter(float * SWTImage, int h, int w,
        std::vector<Ray> & rays, float maxWidth = -1) {
        for (std::vector<Ray>::iterator rit = rays.begin(); rit != rays.end(); rit++) {
            for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) {
                pit->SWT = SWTImage[w*pit->y + pit->x];
            }
            std::sort(rit->points.begin(), rit->points.end(), &Point2dSort);
            //std::nth_element( rit->points.begin(), rit->points.end(), rit->points.size()/2, &Point2dSort );
            float median = (rit->points[rit->points.size() / 2]).SWT;
            if (maxWidth > 0 && median >= maxWidth) {
                median = -1;
            }
            for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) {
                SWTImage[w*pit->y + pit->x] = std::min(pit->SWT, median);
            }
        }
    }

    typedef std::vector< std::set<int> > graph_t; // graph as a list of neighbors per node

    void connComp(const graph_t& g, std::vector<int>& c, int i, int l) {
        // starting from node i labe this conn-comp with label l
        if (i < 0 || i > g.size()) {
            return;
        }
        std::vector< int > stack;
        // push i
        stack.push_back(i);
        c[i] = l;
        while (!stack.empty()) {
            // pop
            i = stack.back();
            stack.pop_back();
            // go over all nieghbors
            for (std::set<int>::const_iterator it = g[i].begin(); it != g[i].end(); it++) {
                if (c[*it] < 0) {
                    stack.push_back(*it);
                    c[*it] = l;
                }
            }
        }
    }
    int findNextToLabel(const graph_t& g, const vector<int>& c) {
        for (int i = 0; i < c.size(); i++) {
            if (c[i] < 0) {
                return i;
            }
        }
        return c.size();
    }

    int connected_components(const graph_t& g, vector<int>& c) {
        // check for empty graph!
        if (g.empty()) {
            return 0;
        }
        int i = 0;
        int num_conn = 0;
        do {
            connComp(g, c, i, num_conn);
            num_conn++;
            i = findNextToLabel(g, c);
        } while (i < g.size());
        return num_conn;
    }

    std::vector< std::vector<Point2d> >
        findLegallyConnectedComponents(const float* SWTImage, int h, int w,
        std::vector<Ray> & rays) {
        std::map<int, int> Map;
        std::map<int, Point2d> revmap;
        std::vector<std::vector<Point2d> > components; // empty
        int num_vertices = 0, idx = 0;
        graph_t g;
        // Number vertices for graph.  Associate each point with number
        for (int row = 0; row < h; row++){
            for (int col = 0; col < w; col++){
                idx = col + w * row;
                if (SWTImage[idx] > 0) {
                    Map[idx] = num_vertices;
                    Point2d p;
                    p.x = col;
                    p.y = row;
                    revmap[num_vertices] = p;
                    num_vertices++;
                    std::set<int> empty;
                    g.push_back(empty);
                }
            }
        }
        if (g.empty()) {
            return components; // nothing to do with an empty graph...
        }
        for (int row = 0; row < h; row++){
            for (int col = 0; col < w; col++){
                idx = col + w * row;
                if (SWTImage[idx] > 0) {
                    // check pixel to the right, right-down, down, left-down
                    int this_pixel = Map[idx];
                    float thisVal = SWTImage[idx];
                    if (col + 1 < w) {
                        float right = SWTImage[w*row + col + 1];
                        if (right > 0 && (thisVal / right <= 3.0 || right / thisVal <= 3.0)) {
                            g[this_pixel].insert(Map[w*row + col + 1]);
                            g[Map[w*row + col + 1]].insert(this_pixel);
                            //boost::add_edge(this_pixel, map.at(row * SWTImage->width + col + 1), g);
                        }
                    }
                    if (row + 1 < h) {
                        if (col + 1 < w) {
                            float right_down = SWTImage[w*(row + 1) + col + 1];
                            if (right_down > 0 && (thisVal / right_down <= 3.0 || right_down / thisVal <= 3.0)) {
                                g[this_pixel].insert(Map[w*(row + 1) + col + 1]);
                                g[Map[w*(row + 1) + col + 1]].insert(this_pixel);
                                // boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col + 1), g);
                            }
                        }
                        float down = SWTImage[w*(row + 1) + col];
                        if (down > 0 && (thisVal / down <= 3.0 || down / thisVal <= 3.0)) {
                            g[this_pixel].insert(Map[w*(row + 1) + col]);
                            g[Map[w*(row + 1) + col]].insert(this_pixel);
                            //boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col), g);
                        }
                        if (col - 1 >= 0) {
                            float left_down = SWTImage[w*(row + 1) + col - 1];
                            if (left_down > 0 && (thisVal / left_down <= 3.0 || left_down / thisVal <= 3.0)) {
                                g[this_pixel].insert(Map[w*(row + 1) + col - 1]);
                                g[Map[w*(row + 1) + col - 1]].insert(this_pixel);
                                //boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col - 1), g);
                            }
                        }
                    }
                }
            }
        }

        std::vector<int> c(num_vertices, -1);
        int num_comp = connected_components(g, c);

        components.reserve(num_comp);
        //std::cout << "Before filtering, " << num_comp << " components and " <<     num_vertices << " vertices" << std::endl;
        for (int j = 0; j < num_comp; j++) {
            std::vector<Point2d> tmp;
            components.push_back(tmp);
        }
        for (int j = 0; j < num_vertices; j++) {
            Point2d p = revmap[j];
            (components[c[j]]).push_back(p);
        }

        return components;
    }

    enum {
        EIN = 0,
        GXIN,
        GYIN,
        DOLFIN,
        MAXWIN,
        NIN
    };



    void swt_mex(const float* edgeImage, const float* gradientX, const float* gradientY, float* SWTImage, float* pComp, int* nstrokes, int w, int h, bool dark_on_light)
    {
        float maxWidth = w;

        std::vector<Ray> rays;
        strokeWidthTransform(edgeImage, gradientX, gradientY, dark_on_light, SWTImage, h, w, rays);
        SWTMedianFilter(SWTImage, h, w, rays, maxWidth);

        std::vector<std::vector<Point2d> > components = findLegallyConnectedComponents(SWTImage, h, w, rays);

        *nstrokes = components.size();

        for (int ci = 0; ci < components.size(); ci++) {
            for (std::vector<Point2d>::iterator it = components[ci].begin(); it != components[ci].end(); it++) {
                pComp[w * it->y + it->x] = ci + 1;
            }
        }
    }


    void swt(const cv::Mat1b& img, cv::Mat1f& strokes, int* nstrokes, bool dark_on_light = true)
    {
        cv::Mat1b edgeMap;
        cv::Canny(img, edgeMap, 400, 200);

        cv::Mat1f floatEdgeMap;
        edgeMap.convertTo(floatEdgeMap, CV_32F);

        cv::Mat1b blurred;
        cv::GaussianBlur(img, blurred, cv::Size(5, 5), 0.3*(2.5 - 1) + .8);

        cv::Mat1f gx, gy;
        cv::Sobel(blurred, gx, CV_32F, 1, 0);
        cv::Sobel(blurred, gy, CV_32F, 0, 1);

        cv::medianBlur(gx, gx, 3);
        cv::medianBlur(gy, gy, 3);

        cv::Mat1f swtimg(img.rows, img.cols, -1.f);
        strokes = cv::Mat1f(img.rows, img.cols, 0.f);

        swt_mex((float*)floatEdgeMap.data, (float*)gx.data, (float*)gy.data, (float*)swtimg.data, (float*)strokes.data, nstrokes, img.cols, img.rows, dark_on_light);

    }
}

主要

#include <opencv2/opencv.hpp>
#include "swt.h"
using namespace cv;

int main(int, char** argv)
{
    Mat1b img = cv::imread("path_to_image", IMREAD_GRAYSCALE);

    // Compute SWT
    Mat1f strokes;
    int nstrokes;
    sw::swt(img, strokes, &nstrokes);

    // Create color table
    vector<Vec3b> colors(nstrokes+1);
    colors[0] = Vec3b(0, 0, 0);

    RNG rng;
    for (int i = 0; i < nstrokes; ++i)
    {
        colors[i + 1] = Vec3b(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
    }

    // Colors strokes
    Mat3b coloredStrokes(strokes.size(), Vec3b(0,0,0));

    for (int r = 0; r < strokes.rows; ++r)
    {
        for (int c = 0; c < strokes.cols; ++c)
        {
            coloredStrokes(r, c) = colors[strokes(r,c)];
        }
    }

    imshow("Strokes", coloredStrokes);
    waitKey();

    return 0;
}

I have been trying to detect text from screenshots. The screenshots can contain arbitrary content. I just want to locate the text content.

It's OK that if some non-text content is detected as text. My bottom line is no text content is missed.

I found the following article:

Detecting Text in Natural Scenes with Stroke Width Transform. Boris Epshtein, Yonathan Wexler, and Eyal Ofek. IEEE International Conference on Computer Vision and Pattern Recognition, 2010.

But I haven't found a working implementation on Windows. And so far I only see it used with natural scenes, not screenshot. If anyone has implemented it on other platforms, could you try it with the following image so I can get a quick evaluation before I make up my mind to implement it on Windows? Thanks.

解决方案

UPDATE

The code from this answer seems to not work as expected (at least I didn't manage to make it work satisfactorily).

So I run the code on which that implementation was based, that you can find here

The (more reasonble) result is:

I'll leave the code below for future references.

I adapted the mex implementation from here. The result on your image with the below code is:

I'll let you evaluate if this is helpful to you. The code is below.

swt.h

#include <opencv2\opencv.hpp>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

using namespace std;

namespace sw
{

#define PI 3.14159265

    struct Point2d {
        int x;
        int y;
        float SWT;
    };

    struct Point2dFloat {
        float x;
        float y;
    };

    struct Ray {
        Point2d p;
        Point2d q;
        std::vector<Point2d> points;
    };


    void strokeWidthTransform(const float * edgeImage,
        const float * gradientX,
        const float * gradientY,
        bool dark_on_light,
        float * SWTImage,
        int h, int w,
        std::vector<Ray> & rays) {
        // First pass
        float prec = .05f;
        for (int row = 0; row < h; row++){
            const float* ptr = edgeImage + row*w;
            for (int col = 0; col < w; col++){
                if (*ptr > 0) {
                    Ray r;

                    Point2d p;
                    p.x = col;
                    p.y = row;
                    r.p = p;
                    std::vector<Point2d> points;
                    points.push_back(p);

                    float curX = (float)col + 0.5f;
                    float curY = (float)row + 0.5f;
                    int curPixX = col;
                    int curPixY = row;
                    float G_x = gradientX[col + row*w];
                    float G_y = gradientY[col + row*w];
                    // normalize gradient
                    float mag = sqrt((G_x * G_x) + (G_y * G_y));
                    if (dark_on_light){
                        G_x = -G_x / mag;
                        G_y = -G_y / mag;
                    }
                    else {
                        G_x = G_x / mag;
                        G_y = G_y / mag;
                    }
                    while (true) {
                        curX += G_x*prec;
                        curY += G_y*prec;
                        if ((int)(floor(curX)) != curPixX || (int)(floor(curY)) != curPixY)     {
                            curPixX = (int)(floor(curX));
                            curPixY = (int)(floor(curY));
                            // check if pixel is outside boundary of image
                            if (curPixX < 0 || (curPixX >= w) || curPixY < 0 || (curPixY >= h)) {
                                break;
                            }
                            Point2d pnew;
                            pnew.x = curPixX;
                            pnew.y = curPixY;
                            points.push_back(pnew);

                            if (edgeImage[curPixY*w + curPixX] > 0) {
                                r.q = pnew;
                                // dot product
                                float G_xt = gradientX[curPixY*w + curPixX];
                                float G_yt = gradientY[curPixY*w + curPixX];
                                mag = sqrt((G_xt * G_xt) + (G_yt * G_yt));
                                if (dark_on_light){
                                    G_xt = -G_xt / mag;
                                    G_yt = -G_yt / mag;
                                }
                                else {
                                    G_xt = G_xt / mag;
                                    G_yt = G_yt / mag;
                                }

                                if (acos(G_x * -G_xt + G_y * -G_yt) < PI / 2.0) {
                                    float length = sqrt(((float)r.q.x - (float)r.p.x)*((float)r.q.x - (float)r.p.x) + ((float)r.q.y - (float)r.p.y)*((float)r.q.y - (float)r.p.y));
                                    for (std::vector<Point2d>::iterator pit = points.begin(); pit != points.end(); pit++) {
                                        float* pSWT = SWTImage + w * pit->y + pit->x;
                                        if (*pSWT < 0) {
                                            *pSWT = length;
                                        }
                                        else {
                                            *pSWT = std::min(length, *pSWT);
                                        }
                                    }
                                    r.points = points;
                                    rays.push_back(r);
                                }
                                break;
                            }
                        }
                    }
                }
                ptr++;
            }
        }
    }


    bool Point2dSort(const Point2d &lhs, const Point2d &rhs) {
        return lhs.SWT < rhs.SWT;
    }

    void SWTMedianFilter(float * SWTImage, int h, int w,
        std::vector<Ray> & rays, float maxWidth = -1) {
        for (std::vector<Ray>::iterator rit = rays.begin(); rit != rays.end(); rit++) {
            for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) {
                pit->SWT = SWTImage[w*pit->y + pit->x];
            }
            std::sort(rit->points.begin(), rit->points.end(), &Point2dSort);
            //std::nth_element( rit->points.begin(), rit->points.end(), rit->points.size()/2, &Point2dSort );
            float median = (rit->points[rit->points.size() / 2]).SWT;
            if (maxWidth > 0 && median >= maxWidth) {
                median = -1;
            }
            for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) {
                SWTImage[w*pit->y + pit->x] = std::min(pit->SWT, median);
            }
        }
    }

    typedef std::vector< std::set<int> > graph_t; // graph as a list of neighbors per node

    void connComp(const graph_t& g, std::vector<int>& c, int i, int l) {
        // starting from node i labe this conn-comp with label l
        if (i < 0 || i > g.size()) {
            return;
        }
        std::vector< int > stack;
        // push i
        stack.push_back(i);
        c[i] = l;
        while (!stack.empty()) {
            // pop
            i = stack.back();
            stack.pop_back();
            // go over all nieghbors
            for (std::set<int>::const_iterator it = g[i].begin(); it != g[i].end(); it++) {
                if (c[*it] < 0) {
                    stack.push_back(*it);
                    c[*it] = l;
                }
            }
        }
    }
    int findNextToLabel(const graph_t& g, const vector<int>& c) {
        for (int i = 0; i < c.size(); i++) {
            if (c[i] < 0) {
                return i;
            }
        }
        return c.size();
    }

    int connected_components(const graph_t& g, vector<int>& c) {
        // check for empty graph!
        if (g.empty()) {
            return 0;
        }
        int i = 0;
        int num_conn = 0;
        do {
            connComp(g, c, i, num_conn);
            num_conn++;
            i = findNextToLabel(g, c);
        } while (i < g.size());
        return num_conn;
    }

    std::vector< std::vector<Point2d> >
        findLegallyConnectedComponents(const float* SWTImage, int h, int w,
        std::vector<Ray> & rays) {
        std::map<int, int> Map;
        std::map<int, Point2d> revmap;
        std::vector<std::vector<Point2d> > components; // empty
        int num_vertices = 0, idx = 0;
        graph_t g;
        // Number vertices for graph.  Associate each point with number
        for (int row = 0; row < h; row++){
            for (int col = 0; col < w; col++){
                idx = col + w * row;
                if (SWTImage[idx] > 0) {
                    Map[idx] = num_vertices;
                    Point2d p;
                    p.x = col;
                    p.y = row;
                    revmap[num_vertices] = p;
                    num_vertices++;
                    std::set<int> empty;
                    g.push_back(empty);
                }
            }
        }
        if (g.empty()) {
            return components; // nothing to do with an empty graph...
        }
        for (int row = 0; row < h; row++){
            for (int col = 0; col < w; col++){
                idx = col + w * row;
                if (SWTImage[idx] > 0) {
                    // check pixel to the right, right-down, down, left-down
                    int this_pixel = Map[idx];
                    float thisVal = SWTImage[idx];
                    if (col + 1 < w) {
                        float right = SWTImage[w*row + col + 1];
                        if (right > 0 && (thisVal / right <= 3.0 || right / thisVal <= 3.0)) {
                            g[this_pixel].insert(Map[w*row + col + 1]);
                            g[Map[w*row + col + 1]].insert(this_pixel);
                            //boost::add_edge(this_pixel, map.at(row * SWTImage->width + col + 1), g);
                        }
                    }
                    if (row + 1 < h) {
                        if (col + 1 < w) {
                            float right_down = SWTImage[w*(row + 1) + col + 1];
                            if (right_down > 0 && (thisVal / right_down <= 3.0 || right_down / thisVal <= 3.0)) {
                                g[this_pixel].insert(Map[w*(row + 1) + col + 1]);
                                g[Map[w*(row + 1) + col + 1]].insert(this_pixel);
                                // boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col + 1), g);
                            }
                        }
                        float down = SWTImage[w*(row + 1) + col];
                        if (down > 0 && (thisVal / down <= 3.0 || down / thisVal <= 3.0)) {
                            g[this_pixel].insert(Map[w*(row + 1) + col]);
                            g[Map[w*(row + 1) + col]].insert(this_pixel);
                            //boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col), g);
                        }
                        if (col - 1 >= 0) {
                            float left_down = SWTImage[w*(row + 1) + col - 1];
                            if (left_down > 0 && (thisVal / left_down <= 3.0 || left_down / thisVal <= 3.0)) {
                                g[this_pixel].insert(Map[w*(row + 1) + col - 1]);
                                g[Map[w*(row + 1) + col - 1]].insert(this_pixel);
                                //boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col - 1), g);
                            }
                        }
                    }
                }
            }
        }

        std::vector<int> c(num_vertices, -1);
        int num_comp = connected_components(g, c);

        components.reserve(num_comp);
        //std::cout << "Before filtering, " << num_comp << " components and " <<     num_vertices << " vertices" << std::endl;
        for (int j = 0; j < num_comp; j++) {
            std::vector<Point2d> tmp;
            components.push_back(tmp);
        }
        for (int j = 0; j < num_vertices; j++) {
            Point2d p = revmap[j];
            (components[c[j]]).push_back(p);
        }

        return components;
    }

    enum {
        EIN = 0,
        GXIN,
        GYIN,
        DOLFIN,
        MAXWIN,
        NIN
    };



    void swt_mex(const float* edgeImage, const float* gradientX, const float* gradientY, float* SWTImage, float* pComp, int* nstrokes, int w, int h, bool dark_on_light)
    {
        float maxWidth = w;

        std::vector<Ray> rays;
        strokeWidthTransform(edgeImage, gradientX, gradientY, dark_on_light, SWTImage, h, w, rays);
        SWTMedianFilter(SWTImage, h, w, rays, maxWidth);

        std::vector<std::vector<Point2d> > components = findLegallyConnectedComponents(SWTImage, h, w, rays);

        *nstrokes = components.size();

        for (int ci = 0; ci < components.size(); ci++) {
            for (std::vector<Point2d>::iterator it = components[ci].begin(); it != components[ci].end(); it++) {
                pComp[w * it->y + it->x] = ci + 1;
            }
        }
    }


    void swt(const cv::Mat1b& img, cv::Mat1f& strokes, int* nstrokes, bool dark_on_light = true)
    {
        cv::Mat1b edgeMap;
        cv::Canny(img, edgeMap, 400, 200);

        cv::Mat1f floatEdgeMap;
        edgeMap.convertTo(floatEdgeMap, CV_32F);

        cv::Mat1b blurred;
        cv::GaussianBlur(img, blurred, cv::Size(5, 5), 0.3*(2.5 - 1) + .8);

        cv::Mat1f gx, gy;
        cv::Sobel(blurred, gx, CV_32F, 1, 0);
        cv::Sobel(blurred, gy, CV_32F, 0, 1);

        cv::medianBlur(gx, gx, 3);
        cv::medianBlur(gy, gy, 3);

        cv::Mat1f swtimg(img.rows, img.cols, -1.f);
        strokes = cv::Mat1f(img.rows, img.cols, 0.f);

        swt_mex((float*)floatEdgeMap.data, (float*)gx.data, (float*)gy.data, (float*)swtimg.data, (float*)strokes.data, nstrokes, img.cols, img.rows, dark_on_light);

    }
}

main

#include <opencv2/opencv.hpp>
#include "swt.h"
using namespace cv;

int main(int, char** argv)
{
    Mat1b img = cv::imread("path_to_image", IMREAD_GRAYSCALE);

    // Compute SWT
    Mat1f strokes;
    int nstrokes;
    sw::swt(img, strokes, &nstrokes);

    // Create color table
    vector<Vec3b> colors(nstrokes+1);
    colors[0] = Vec3b(0, 0, 0);

    RNG rng;
    for (int i = 0; i < nstrokes; ++i)
    {
        colors[i + 1] = Vec3b(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
    }

    // Colors strokes
    Mat3b coloredStrokes(strokes.size(), Vec3b(0,0,0));

    for (int r = 0; r < strokes.rows; ++r)
    {
        for (int c = 0; c < strokes.cols; ++c)
        {
            coloredStrokes(r, c) = colors[strokes(r,c)];
        }
    }

    imshow("Strokes", coloredStrokes);
    waitKey();

    return 0;
}

这篇关于SWT(笔划宽度变换)可以通过屏幕截图帮助OCR吗?的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

SWT(笔划宽度变换)可以通过屏幕截图帮助OCR吗? [英] Can SWT (Stroke Width Transform) help OCR with screenshots?

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

SWT(笔划宽度变换)可以通过屏幕截图帮助OCR吗? [英] Can SWT (Stroke Width Transform) help OCR with screenshots?

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭