OpenCV图像识别-设置ANN MLP [英] OpenCV image recognition - setting up ANN MLP
问题描述
我是OpenCV世界和神经网络的新手,但是我在C ++/Java方面有一定的编码经验.
I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.
我创建了我的第一个ANN MLP并了解了XOR:
I created my first ANN MLP and learned it the XOR:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <iomanip>
using namespace cv;
using namespace ml;
using namespace std;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << "]" << endl;
}
}
}
int main()
{
const int hiddenLayerSize = 4;
float inputTrainingDataArray[4][2] = {
{ 0.0, 0.0 },
{ 0.0, 1.0 },
{ 1.0, 0.0 },
{ 1.0, 1.0 }
};
Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);
float outputTrainingDataArray[4][1] = {
{ 0.0 },
{ 1.0 },
{ 1.0 },
{ 0.0 }
};
Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
mlp->train(trainingData
/*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
);
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
Mat result;
mlp->predict(sample, result);
cout << sample << " -> ";// << result << endl;
print(result, 0);
cout << endl;
}
return 0;
}
对于这个简单的问题,它非常有效,我也学习了该网络从1到10的二进制转换.
It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.
但是我需要使用MLP进行简单的图像分类-路标.我编写了用于加载训练图像和准备矩阵进行学习的代码,但我无法训练网络-即使经过1000000次迭代,它也可以在一秒钟内学习"!并产生垃圾结果,所有输入都一样!
But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!
这是我的测试图像和源代码:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>
#include <Windows.h>
using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;
const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[ ";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << " ]" << endl;
}
}
}
bool loadImage(string imagePath, Mat& outputImage)
{
// load image in grayscale
Mat image = imread(imagePath, IMREAD_GRAYSCALE);
Mat temp;
// check for invalid input
if (image.empty()) {
cout << "Could not open or find the image" << std::endl;
return false;
}
// resize the image
Size size(WIDTH_SIZE, HEIGHT_SIZE);
resize(image, temp, size, 0, 0, CV_INTER_AREA);
// convert to float 1-channel
temp.convertTo(outputImage, CV_32FC1, 1.0/255.0);
return true;
}
vector<string> getFilesNamesInFolder(string folder)
{
vector<string> names;
char search_path[200];
sprintf(search_path, "%s/*.*", folder.c_str());
WIN32_FIND_DATA fd;
HANDLE hFind = ::FindFirstFile(search_path, &fd);
if (hFind != INVALID_HANDLE_VALUE) {
do {
// read all (real) files in current folder
// , delete '!' read other 2 default folder . and ..
if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
names.push_back(fd.cFileName);
}
} while (::FindNextFile(hFind, &fd));
::FindClose(hFind);
}
return names;
}
class Sign {
public:
enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' };
Mat image;
Category category;
int number;
Sign(Mat& image, string name) :image(image) {
category = static_cast<Category>(name.at(0));
number = stoi(name.substr(2, name.length()));
};
};
vector<Sign> loadSignsFromFolder(String folderName) {
vector<Sign> roadSigns;
for (string fileName : getFilesNamesInFolder(folderName)) {
Mat image;
loadImage(folderName + fileName, image);
roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png
}
return roadSigns;
}
void showSignsInWindows(vector<Sign> roadSigns) {
for (Sign sign : roadSigns) {
String windowName = "Sign " + to_string(sign.number);
namedWindow(windowName, WINDOW_AUTOSIZE);
imshow(windowName, sign.image);
}
waitKey(0);
}
Mat getInputDataFromSignsVector(vector<Sign> roadSigns) {
Mat roadSignsImageData;
for (Sign sign : roadSigns) {
Mat signImageDataInOneRow = sign.image.reshape(0, 1);
roadSignsImageData.push_back(signImageDataInOneRow);
}
return roadSignsImageData;
}
Mat getOutputDataFromSignsVector(vector<Sign> roadSigns) {
int signsCount = (int) roadSigns.size();
int signsVectorSize = signsCount + 1;
Mat roadSignsData(0, signsVectorSize, CV_32FC1);
int i = 1;
for (Sign sign : roadSigns) {
vector<float> outputTraningVector(signsVectorSize);
fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0);
outputTraningVector[i++] = 1.0;
Mat tempMatrix(outputTraningVector, false);
roadSignsData.push_back(tempMatrix.reshape(0, 1));
}
return roadSignsData;
}
int main(int argc, char* argv[])
{
if (argc != 2) {
cout << " Usage: display_image ImageToLoadAndDisplay" << endl;
return -1;
}
const int hiddenLayerSize = 500;
vector<Sign> roadSigns = loadSignsFromFolder("../../../Znaki/A/");
Mat inputTrainingData = getInputDataFromSignsVector(roadSigns);
Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05);
//mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS,
,100 //(int) INT_MAX
,0.000001
);
mlp->setTermCriteria(termCrit);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
auto start = system_clock::now();
mlp->train(trainingData
//, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS
, ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
);
auto duration = duration_cast<milliseconds> (system_clock::now() - start);
cout << "Training time: " << duration.count() << "ms" << endl;
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat result;
//mlp->predict(inputTrainingData.row(i), result);
mlp->predict(roadSigns[i].image.reshape(0, 1), result);
//cout << result << endl;
print(result, 2);
}
//showSignsInWindows(roadSigns);
return 0;
}
此代码有什么问题,即XOR有效但图像无效?我检查了输入和输出矩阵,它们是正确的...可以有人解释我何时/应该使用ANN_MLP :: TrainFlags :: NO_INPUT_SCALE和ANN_MLP :: TrainFlags :: NO_OUTPUT_SCALE或setActivationFunction和setTrainMethod参数的值是什么我应该使用吗?
What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction and setTrainMethod parameters should I use?
谢谢!
推荐答案
后置支撑体重秤参数存在问题-太大了,ANN无法学习更多困难的东西.
There was a problem in backprop weight scale parameter - it was too big and the ANN couldn't learn more difficult things.
我将行更改为mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001);
,将隐藏层的大小更改为100(以加快学习速度)-现在可以正常工作了!
I changed the line to mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001);
and the hidden layer size to 100 (to speed up the learning) - now it's working!
这篇关于OpenCV图像识别-设置ANN MLP的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!