使用非ASCII字符打开文件 [英] Open File with Non ASCII Characters

查看:121
本文介绍了使用非ASCII字符打开文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我试图计算 SHA-256 文件。我有以下代码,当路径有效时给出校验和的正确值。它是ASCII。我有以下代码:

  #include< openssl\evp.h> 
#include< sys\stat.h>
#include< iostream>
#include< string>
#include< fstream>
#include< cstdio>
const int MAX_BUFFER_SIZE = 1024;
std :: string FileChecksum(std :: string,std :: string);

long long int GetFileSize(std :: string filename)
{
struct _stat64 stat_buf;
int rc = _stat64(filename.c_str(),& stat_buf);
return rc == 0? stat_buf.st_size:-1;
}

std :: string fname =D:\\Private\\Test\\\ ddf; //需要支持这个D:\\Private \\Test\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ string checksum = FileChecksum(fname,sha256);
std :: cout<<校验和< std :: endl;
return 0;
}


static std :: string FileChecksum(std :: string file_path,std :: string algorithm =sha256)
{
EVP_MD_CTX * mdctx;
const EVP_MD * md;
unsigned char md_value [EVP_MAX_MD_SIZE];
int i;
unsigned int md_len;

OpenSSL_add_all_digests();
md = EVP_get_digestbyname(algorithm.c_str());

if(!md){
printf(Unknown message digest%s\\\
,algorithm);
return;
}

mdctx = EVP_MD_CTX_create();
std :: ifstream readfile(file_path,std :: ifstream :: binary);
if(!readfile.is_open())
{
std :: cout< COuldnot打开文件\\\
;
return;
}
readfile.seekg(0,std :: ios :: end);
long long filelen = readfile.tellg();
std :: cout<< LEN IS< filelen< std :: endl;
readfile.seekg(0,std :: ios :: beg);
if(filelen == -1)
{
std :: cout<< Return Null \\\
;
return;
}

EVP_DigestInit_ex(mdctx,md,NULL);
long long temp_fil = filelen;
while(!readfile.eof()&& amp;& amp;& amp; temp_fil> 0)
{

int bufferS =(temp_fil< MAX_BUFFER_SIZE )? temp_fil:MAX_BUFFER_SIZE;
char * buffer = new char [bufferS + 1];
buffer [bufferS] = 0;
readfile.read(buffers,bufferS);
EVP_DigestUpdate(mdctx,buffer,bufferS);
temp_fil - = bufferS;
delete [] buffer;
}
EVP_DigestFinal_ex(mdctx,md_value,& md_len);
EVP_MD_CTX_destroy(mdctx);
char str [128] = {0};
char * ptr = str;
std :: string ret;
for(i = 0; i {
// _ snprintf(checksum_msg + cx,md_len-cx,%02x,md_value [i]);
sprintf(ptr,%02x,md_value [i]);
ptr + = 2;
}

ret = str;
/ *在退出前调用一次。 * /
EVP_cleanup();
return ret;
}

代码将给出具有有效名称的文件的正确校验和。但一旦给出非ASCII字符文件,程序失败。我使用 std :: wstring ,它似乎修复了该问题,但网站此处不要使用 std :: wstring 说明 不要在接受UTF-16的API之外的任何位置使用wchar_t或std :: wstring。 / strong>如果我跟着这个,我如何使这个代码适用于所有类型的路径。我使用的是VS2010。

解决方案

wchar_t ,因为在某些平台(Windows)上为2字节(UTF-16),而在其他平台(Linux等)上为4字节(UTF-32)。这是网站警告您的。



在您的特殊情况下,你只专注于Windows,所以 std :: wstring 是完全可以使用,因为它使用UTF-16,这是与Win32 API相同的编码使用遍布整个地方。您正在寻找的是Microsoft的 _wstat64()函数和Microsoft的非标准 std :: ifstream 构造函数接受a wchar_t * filename:

  long long int GetFileSize(std :: wstring filename)
{
struct _stat64 stat_buf;
int rc = _wstat64(filename.c_str(),& stat_buf);
return rc == 0? stat_buf.st_size:-1;
}

  std :: wstring file_path = LD:\\Private\\Test\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ 

...

static std :: string FileChecksum(std :: wstring file_path,std :: string algorithm =sha256)
{
...
std :: ifstream readfile(file_path.c_str(),std :: ifstream :: binary);
...
}

c> FileChecksum()函数更复杂,那么它需要是,它不能正确清理,如果一个错误发生,它不验证 std :: ifstream :: read()实际上正在读取您所请求的字节数(它可能读取更少),并且是误用 std :: ifstream :: eof()

 

include< openssl\evp.h>
#include< sys\stat.h>
#include< iostream>
#include< string>
#include< fstream>
#include< sstream>
#include< iomanip>

const int MAX_BUFFER_SIZE = 1024;
std :: string FileChecksum(std :: wstring file_path,std :: string algorithm =sha256);

std :: wstring fname = LD:\\Private\\Test\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

int main()
{
std :: string checksum = FileChecksum(fname,sha256);
std :: cout<<校验和< std :: endl;
return 0;
}

std :: string FileChecksum(std :: wstring file_path,std :: string algorithm)
{
EVP_MD_CTX * mdctx =
const EVP_MD * md;
unsigned char md_value [EVP_MAX_MD_SIZE];
char缓冲区[MAX_BUFFER_SIZE];
unsigned int md_len;
std :: ostringstream oss;
std :: string ret;

std :: ifstream readfile(file_path.c_str(),std :: ifstream :: binary);
if(readfile.fail())
{
std :: cout< 无法打开文件\\\
;
goto finished;
}

OpenSSL_add_all_digests();

md = EVP_get_digestbyname(algorithm.c_str());
if(!md){
std :: cout<< 未知消息摘要<算法< \\\
;
goto cleanup;
}

mdctx = EVP_MD_CTX_create();
if(!mdctx){
std :: cout< 无法为消息摘要创建上下文<算法< \\\
;
goto cleanup;
}

EVP_DigestInit_ex(mdctx,md,NULL);

do
{
readfile.read(buffer,sizeof(buffer));
if((readfile.fail())&&(!readfile.eof()))
{
std :: cout< 无法读取file\\\
;
goto cleanup;
}

EVP_DigestUpdate(mdctx,buffer,readfile.gcount());
}
while(!readfile.eof());

EVP_DigestFinal_ex(mdctx,md_value,& md_len);

for(unsigned int i = 0; i< md_len; i ++)
oss< std :: hex<< std :: setw(2)<< std :: setfill('0')<< (int)md_value [i];
ret = oss.str();

cleanup:
if(mdctx)EVP_MD_CTX_destroy(mdctx);
EVP_cleanup();

finished:
return ret;
}


I am trying to compute SHA-256 of file. I have the following code that gives correct value of Checksum when the path is valid ie. It is ASCII. I have the following code:

#include <openssl\evp.h>
#include <sys\stat.h>
#include <iostream>
#include <string>
#include <fstream>
#include <cstdio>
const int MAX_BUFFER_SIZE = 1024;
std::string FileChecksum(std::string, std::string);

long long int GetFileSize(std::string filename)
{
    struct _stat64 stat_buf;
    int rc = _stat64(filename.c_str(), &stat_buf);
    return rc == 0 ? stat_buf.st_size : -1;
}

std::string fname = "D:\\Private\\Test\\asdf.txt"; // Need to support this D:\\Private\\Test\\सर्वज्ञ पन्त.txt

int main()
{
    std::string checksum = FileChecksum(fname , "sha256");
    std::cout << checksum << std::endl;
    return 0;
}


static std::string FileChecksum(std::string file_path, std::string algorithm="sha256")
{
    EVP_MD_CTX *mdctx;
    const EVP_MD *md;
    unsigned char md_value[EVP_MAX_MD_SIZE];
    int i;
    unsigned int md_len;

    OpenSSL_add_all_digests();
    md = EVP_get_digestbyname(algorithm.c_str());

    if(!md) {
        printf("Unknown message digest %s\n",algorithm);
        return "";
    }

    mdctx = EVP_MD_CTX_create();
    std::ifstream readfile(file_path,std::ifstream::binary);
    if(!readfile.is_open())
    {
        std::cout << "COuldnot open file\n";
        return "";
    }
    readfile.seekg(0, std::ios::end);
    long long filelen = readfile.tellg();
    std::cout << "LEN IS " << filelen << std::endl;
    readfile.seekg(0, std::ios::beg);
    if(filelen == -1)
    {
        std::cout << "Return Null \n";
        return "";
    }

    EVP_DigestInit_ex(mdctx, md, NULL);
    long long temp_fil = filelen;
    while(!readfile.eof() && readfile.is_open() && temp_fil>0)
    {

        int bufferS = (temp_fil < MAX_BUFFER_SIZE) ? temp_fil : MAX_BUFFER_SIZE;
        char *buffer = new char[bufferS+1];
        buffer[bufferS] = 0;
        readfile.read(buffer, bufferS);
        EVP_DigestUpdate(mdctx, buffer, bufferS);
        temp_fil -= bufferS;
        delete[] buffer;
    }
    EVP_DigestFinal_ex(mdctx, md_value, &md_len);
    EVP_MD_CTX_destroy(mdctx);
    char str[128] = { 0 };
    char *ptr = str;
    std::string ret;
    for(i = 0; i < md_len; i++)
    {
    //_snprintf(checksum_msg+cx,md_len-cx,"%02x",md_value[i]);
        sprintf(ptr,"%02x", md_value[i]);
        ptr += 2;
    }

    ret = str;
    /* Call this once before exit. */
    EVP_cleanup();
    return ret;
}

The code will give correct checksum of files with valid name. But once non-ascii character files are given, the program fails. I used std::wstring and it seems to fix the issue but the site here discourages to use std::wstring by saying Do not use wchar_t or std::wstring in any place other than adjacent point to APIs accepting UTF-16. If I were to follow this, how do I make this code work for all types of path. I am using VS2010.

解决方案

wchar_t is not portable across multiple platforms, as it is 2 bytes (UTF-16) on some platforms (Windows) but is 4 bytes (UTF-32) on other platforms (Linux, etc). That is what the site is warning you about.

In your particular case, you are only focusing on Windows, so std::wstring is perfectly fine to use, since it uses UTF-16, which is the same encoding that the Win32 API uses all over the place. What you are looking for is Microsoft's _wstat64() function, and Microsoft's non-standard std::ifstream constructor that accepts a wchar_t* filename:

long long int GetFileSize(std::wstring filename)
{
    struct _stat64 stat_buf;
    int rc = _wstat64(filename.c_str(), &stat_buf);
    return rc == 0 ? stat_buf.st_size : -1;
}

std::wstring file_path = L"D:\\Private\\Test\\सर्वज्ञ पन्त.txt";

...

static std::string FileChecksum(std::wstring file_path, std::string algorithm="sha256")
{
    ...
    std::ifstream readfile(file_path.c_str(), std::ifstream::binary);
    ...
}

That being said, your FileChecksum() function is more complicated then it needs to be, it is not cleaning up correctly if an error occurs, it is not validating that std::ifstream::read() is actually reading as many bytes as you requested (it could read less), and it is misusing std::ifstream::eof().

Try something more like this instead:

#include <openssl\evp.h>
#include <sys\stat.h>
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <iomanip>

const int MAX_BUFFER_SIZE = 1024;
std::string FileChecksum(std::wstring file_path, std::string algorithm = "sha256");

std::wstring fname = L"D:\\Private\\Test\\सर्वज्ञ पन्त.txt";

int main()
{
    std::string checksum = FileChecksum(fname, "sha256");
    std::cout << checksum << std::endl;
    return 0;
}

std::string FileChecksum(std::wstring file_path, std::string algorithm)
{
    EVP_MD_CTX *mdctx = NULL;
    const EVP_MD *md;
    unsigned char md_value[EVP_MAX_MD_SIZE];
    char buffer[MAX_BUFFER_SIZE];
    unsigned int md_len;
    std::ostringstream oss;
    std::string ret;

    std::ifstream readfile(file_path.c_str(), std::ifstream::binary);
    if (readfile.fail())
    {
        std::cout << "Could not open file\n";
        goto finished;
    }

    OpenSSL_add_all_digests();

    md = EVP_get_digestbyname(algorithm.c_str());    
    if (!md) {
        std::cout << "Unknown message digest " << algorithm << "\n";
        goto cleanup;
    }

    mdctx = EVP_MD_CTX_create();
    if (!mdctx) {
        std::cout << "Could not create context for message digest " << algorithm << "\n";
        goto cleanup;
    }

    EVP_DigestInit_ex(mdctx, md, NULL);

    do
    {
        readfile.read(buffer, sizeof(buffer));
        if ((readfile.fail()) && (!readfile.eof()))
        {
            std::cout << "Could not read from file\n";
            goto cleanup;
        }

        EVP_DigestUpdate(mdctx, buffer, readfile.gcount());
    }
    while (!readfile.eof());

    EVP_DigestFinal_ex(mdctx, md_value, &md_len);

    for(unsigned int i = 0; i < md_len; i++)
        oss << std::hex << std::setw(2) << std::setfill('0') << (int) md_value[i];
    ret = oss.str();

cleanup:
    if (mdctx) EVP_MD_CTX_destroy(mdctx);
    EVP_cleanup();

finished:
    return ret;
}

这篇关于使用非ASCII字符打开文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆