为什么这个专门的char_traits< uint8_t>和codecvt< uint8_t>用于basic_ifstream模板throw std :: bad_cast? [英] Why does this specialized char_traits<uint8_t> and codecvt<uint8_t> for use with the basic_ifstream template throw std::bad_cast?
问题描述
有 已经 问题这里Stackoverflow问为什么 basic_fstream< uint8_t>
不工作。答案说, char_traits
只专用于 char
和 wchar_t
(在C ++ 11中加上 char16_t
, char32_t
),你应该坚持使用 basic_fstream ; char>
读取二进制数据并在需要时投射。
好了,这不够好! :)
没有答案(我可以找到)说如何来专门处理 char_traits< uint8_t>
并使用它与 basic_fstream
模板,或者如果它甚至可能。
在Windows 7 64位上使用Visual Studio Express 2013 RC,在Kubuntu GNU上使用g ++ - 4.7时,以下编译无错误/ Linux 13.04 64bit。但是它在运行时会抛出一个std :: bad_cast异常。我没有访问clang ++与libc ++来测试这个组合。
#include< cinttypes>
#include< cstring>
#include< algorithm>
#include< fstream>
#include< iostream>
#include< locale>
#ifdef _WIN32
#define constexpr
#define NOEXCEPT throw()
#else
#define NOEXCEPT noexcept
#endif
//将此更改为char并且它工作。
using byte_type = std :: uint8_t;
命名空间std
{
//专业化std :: char_traits
模板<> struct char_traits< std :: uint8_t>
{
使用char_type = std :: uint8_t;
using int_type = int;
using off_type = std :: streamoff;
using pos_type = std :: streampos;
using state_type = std :: mbstate_t;
static void assign(char_type& value1,const char_type& value2)
{
value1 = value2;
}
static char_type * assign(char_type * ptr,std :: size_t count,char_type value)
{
return static_cast< char_type *>(std: :memset(ptr,value,count));
}
static constexpr bool eq(const char_type& value1,const char_type& value2)NOEXCEPT
{
return value1 == value2;
}
static constexpr bool lt(const char_type value1,const char_type value2)NOEXCEPT
{
return value1< value2;
}
static std :: size_t length(const char_type * ptr)
{
std :: size_t i = 0;
while(!eq(ptr [i],char_type()))
{
++ i;
}
return i;
}
static int compare(const char_type * ptr1,const char_type * ptr2,std :: size_t count)
{
return std :: memcmp(ptr1, ptr2,count);
}
static const char_type * find(const char_type * ptr,std :: size_t count,const char_type& value)
{
return static_cast< const char_type * >(std :: memchr(ptr,value,count));
}
static char_type * move(char_type * dest,const char_type * src,std :: size_t count)
{
return static_cast< char_type * std :: memmove(dest,src,count));
}
static char_type * copy(char_type * dest,const char_type * src,std :: size_t count)
{
return static_cast< char_type *> std :: memcpy(dest,src,count));
}
static constexpr char_type to_char_type(const int_type& value)NOEXCEPT
{
return static_cast< char_type>(value);
}
static constexpr int_type to_int_type(const char_type& value)NOEXCEPT
{
return static_cast< int_type>(value);
}
static constexpr bool eq_int_type(const int_type& value1,const int_type& value2)NOEXCEPT
{
return value1 == value2;
}
static constexpr int_type eof()NOEXCEPT
{
return static_cast< int_type>(std :: char_traits< char> :: eof());
}
static constexpr int_type not_eof(const int_type& value)NOEXCEPT
{
return(value == eof())? 0:value;
}
};
//专业化std :: codecvt
模板<>类codecvt std :: uint8_t,char,std :: mbstate_t> :public locale :: facet,public codecvt_base
{
public:
using internal_type = std :: uint8_t;
using external_type = char;
using state_type = std :: mbstate_t;
static std :: locale :: id id;
codecvt(std :: size_t refs = 0)
:locale :: facet(refs)
{}
std :: codecvt_base ::结果输出(state_type& state,const internal_type * from,const internal_type * from_end,const internal_type *& from_next,external_type * to,external_type * to_end,external_type *& to_next)const
{
return do_out (state,from,from_end,from_next,to,to_end,to_next);
}
std :: codecvt_base :: result(state_type& state,const external_type * from,const external_type * from_end,const external_type *& from_next,internal_type * to,internal_type * to_end ,internal_type *& to_next)const
{
return do_in(state,from,from_end,from_next,to,to_end,to_next);
}
std :: codecvt_base :: result unshift(state_type& state,external_type * to,external_type * to_end,external_type *& to_next)const
{
return do_unshift(state,to,to_end,to_next);
}
int length(state_type& state,const external_type * from,const external_type * from_end,std :: size_t max)const
{
return do_length ,from,from_end,max);
}
int max_length()const NOEXCEPT
{
return do_max_length();
}
int encoding()const NOEXCEPT
{
return do_encoding();
}
bool always_noconv()const NOEXCEPT
{
return do_always_noconv();
}
protected:
virtual〜codecvt(){}
virtual std :: codecvt_base :: result do_out(state_type& state,const internal_type * from,const internal_type * from_end,const internal_type *& from_next,external_type * to,external_type * to_end,external_type *& to_next)const;
virtual std :: codecvt_base :: result do_in(state_type& state,const external_type * from,const external_type * from_end,const external_type *& from_next,internal_type * to,internal_type * to_end,internal_type *& to_next)const ;
virtual std :: codecvt_base :: result do_unshift(state_type& state,external_type * to,external_type * to_end,external_type *& to_next)const;
virtual int do_length(state_type& state,const external_type * from,const external_type * from_end,std :: size_t max)const;
virtual int do_max_length()const NOEXCEPT;
virtual int do_encoding()const NOEXCEPT;
virtual bool do_always_noconv()const NOEXCEPT;
}; // class codecvt
locale :: id codecvt< std :: uint8_t,char,std :: mbstate_t> :: id;
codecvt_base :: result codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_out(state_type& state,const internal_type * from,const internal_type * from_end,const internal_type *& from_next,external_type * to,external_type * to_end,external_type *& to_next)const
{
(void)state; (void)from_end; (void)to_end; //未使用的参数
from_next = from;
to_next = to;
return codecvt_base :: noconv;
}
codecvt_base :: result codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_in(state_type& state,const external_type * from,const external_type * from_end,const external_type *& from_next,internal_type * to,internal_type * to_end,internal_type *& to_next)const
{
(void)state; (void)from_end; (void)to_end; //未使用的参数
from_next = from;
to_next = to;
return std :: codecvt_base :: noconv;
}
codecvt_base :: result codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_unshift(state_type& state,external_type * to,external_type * to_end,external_type *& to_next)const
{
(void)to_end; //未使用的参数
to_next = to;
return std :: codecvt_base :: noconv;
}
int codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_length(state_type& state,const external_type * from,const external_type * from_end,std :: size_t max)const
{
州; //未使用的参数
return static_cast< int>(std :: min< std :: size_t>(max,static_cast< std :: size_t>(from_end - from)
}
int codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_max_length()const NOEXCEPT
{
return 1;
}
int codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_encoding()const NOEXCEPT
{
return 1;
}
bool codecvt< std :: uint8_t,char,std :: mbstate_t> :: do_always_noconv()const NOEXCEPT
{
return true;
}
} //命名空间std
int main(int argc,char * argv [])
{
if < 2)
{
std :: cerr<< argv [0]< {file to read}<< std :: endl;
return EXIT_FAILURE;
}
使用stream_type = std :: basic_ifstream< byte_type,std :: char_traits< byte_type> > ;;
stream_type stream(argv [1],std :: ifstream :: in | std :: ifstream :: binary);
if(stream.is_open()== false)
{
std :: cerr<< file not found<< std :: endl;
return EXIT_FAILURE;
}
stream.exceptions(std :: ifstream :: badbit);
static const auto read_size = 4;
stream_type :: char_type buffer [read_size];
stream.read(buffer,read_size);
std :: cout<< Got:<< stream.gcount()< std :: endl;
return EXIT_SUCCESS;
}
使用g ++和GNU / Linux编译和运行:
$ g ++ -std = c ++ 11 -Wall -Wextra -pedantic stream.cpp -o stream&& ./stream / dev / random
抛出一个'std :: bad_cast'实例后调用终止
what():std :: bad_cast
中止(内核转储)
使用Visual Studio Express RC 2013:
在traits test.exe中的0x76A6C41F的第一次例外:Microsoft C ++异常:std :: bad_cast在内存位置0x0038F978。
未处理的异常在0x76A6C41F在traits test.exe:Microsoft C ++异常:std :: bad_cast在内存位置0x0038F978。
将
byte_type
更改为char
给出预期的输出:$ g ++ -std = c ++ 11 - Wextra -pedantic stream.cpp -o stream&& ./stream / dev / random
Got:4
std :: bad_cast如何解决?
解决方案(AIX上的4.7.2)。
你得到的原因是gcc库实现者优化了
basic_filebuf :: xsgetn
(从basic_istream :: read 中调用)调用plain C
fread
从文件中读取locale是非转换的(也就是说,你不是试图读一个UTF-8或者GB18030文件到一个UTF-32字符串或者什么东西),这是绝对正确的事情。要确定是否为非转换,请检查 codecvt :: always_noconv
您可以通过执行
$ b $来重新生成异常b
std :: cout<< std :: use_facet<
std :: codecvt< std :: uint8_t,char,std :: mbstate_t>
>(stream.getloc())。always_noconv()<< '\\\
';
我没有访问Visual Studio,看看为什么它在那里工作code> basic_fstream :: read()?)中的每个字符的code> basic_filebuf :: sgetc() case,您需要为您的内部和外部类型(uint8_t
和char
的组合提供一个codecvt facet
>stream.imbue(std :: locale(stream.getloc(),
new std :: codecvt< uint8_t,char,std :: mbstate_t&stream.read
之前的任何地方
,imbue the global:
std :: locale :: global(std :: locale(std :: locale(),new std :: codecvt< uint8_t,char,std :: mbstate_t& / code>在任何地方构建
basic_ifstream
There are already questions here on Stackoverflow asking why
basic_fstream<uint8_t>
doesn't work. The answers say thatchar_traits
is only specialized forchar
andwchar_t
(pluschar16_t
,char32_t
in C++11) and you should stick withbasic_fstream<char>
to read binary data and cast it if required.Well darn it, that isn't good enough! :)
None of the answers (that I can find) say how to specialize
char_traits<uint8_t>
and use it with abasic_fstream
template, or if it's even possible. So I thought I'd try implement it myself.The following compiles without error when using Visual Studio Express 2013 RC on Windows 7 64bit and with g++-4.7 on Kubuntu GNU/Linux 13.04 64bit. However it throws a std::bad_cast exception at runtime. I don't have access to clang++ with libc++ to test that combination.
#include <cinttypes> #include <cstring> #include <algorithm> #include <fstream> #include <iostream> #include <locale> #ifdef _WIN32 #define constexpr #define NOEXCEPT throw() #else #define NOEXCEPT noexcept #endif // Change this to char and it works. using byte_type = std::uint8_t; namespace std { // Specialization of std::char_traits template <> struct char_traits< std::uint8_t > { using char_type = std::uint8_t; using int_type = int; using off_type = std::streamoff; using pos_type = std::streampos; using state_type = std::mbstate_t; static void assign(char_type& value1, const char_type& value2) { value1 = value2; } static char_type* assign(char_type* ptr, std::size_t count, char_type value) { return static_cast<char_type*>(std::memset(ptr, value, count)); } static constexpr bool eq(const char_type& value1, const char_type& value2) NOEXCEPT { return value1 == value2; } static constexpr bool lt(const char_type value1, const char_type value2) NOEXCEPT { return value1 < value2; } static std::size_t length(const char_type* ptr) { std::size_t i = 0; while (!eq(ptr[i], char_type())) { ++i; } return i; } static int compare(const char_type* ptr1, const char_type* ptr2, std::size_t count) { return std::memcmp(ptr1, ptr2, count); } static const char_type* find(const char_type* ptr, std::size_t count, const char_type& value) { return static_cast<const char_type*>(std::memchr(ptr, value, count)); } static char_type* move(char_type* dest, const char_type* src, std::size_t count) { return static_cast<char_type*>(std::memmove(dest, src, count)); } static char_type* copy(char_type* dest, const char_type* src, std::size_t count) { return static_cast<char_type*>(std::memcpy(dest, src, count)); } static constexpr char_type to_char_type(const int_type& value) NOEXCEPT { return static_cast<char_type>(value); } static constexpr int_type to_int_type(const char_type& value) NOEXCEPT { return static_cast<int_type>(value); } static constexpr bool eq_int_type(const int_type& value1, const int_type& value2) NOEXCEPT { return value1 == value2; } static constexpr int_type eof() NOEXCEPT { return static_cast<int_type>(std::char_traits<char>::eof()); } static constexpr int_type not_eof(const int_type& value) NOEXCEPT { return (value == eof()) ? 0 : value; } }; // Specialization of std::codecvt template<> class codecvt< std::uint8_t, char, std::mbstate_t > : public locale::facet, public codecvt_base { public: using internal_type = std::uint8_t; using external_type = char; using state_type = std::mbstate_t; static std::locale::id id; codecvt(std::size_t refs = 0) : locale::facet(refs) {} std::codecvt_base::result out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const { return do_out(state, from, from_end, from_next, to, to_end, to_next); } std::codecvt_base::result in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const { return do_in(state, from, from_end, from_next, to, to_end, to_next); } std::codecvt_base::result unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const { return do_unshift(state, to, to_end, to_next); } int length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const { return do_length(state, from, from_end, max); } int max_length() const NOEXCEPT { return do_max_length(); } int encoding() const NOEXCEPT { return do_encoding(); } bool always_noconv() const NOEXCEPT { return do_always_noconv(); } protected: virtual ~codecvt() {} virtual std::codecvt_base::result do_out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const; virtual std::codecvt_base::result do_in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const; virtual std::codecvt_base::result do_unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const; virtual int do_length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const; virtual int do_max_length() const NOEXCEPT; virtual int do_encoding() const NOEXCEPT; virtual bool do_always_noconv() const NOEXCEPT; }; // class codecvt locale::id codecvt< std::uint8_t, char, std::mbstate_t >::id; codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const { (void) state; (void) from_end; (void) to_end; // Unused parameters from_next = from; to_next = to; return codecvt_base::noconv; } codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const { (void) state; (void) from_end; (void) to_end; // Unused parameters from_next = from; to_next = to; return std::codecvt_base::noconv; } codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const { (void) state; (void) to_end; // Unused perameters to_next = to; return std::codecvt_base::noconv; } int codecvt< std::uint8_t, char, std::mbstate_t >::do_length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const { (void) state; // Unused parameter return static_cast<int>(std::min< std::size_t >(max, static_cast<std::size_t>(from_end - from))); } int codecvt< std::uint8_t, char, std::mbstate_t >::do_max_length() const NOEXCEPT { return 1; } int codecvt< std::uint8_t, char, std::mbstate_t >::do_encoding() const NOEXCEPT { return 1; } bool codecvt< std::uint8_t, char, std::mbstate_t >::do_always_noconv() const NOEXCEPT { return true; } } // namespace std int main(int argc, char *argv []) { if (argc < 2) { std::cerr << argv[0] << " {file to read}" << std::endl; return EXIT_FAILURE; } using stream_type = std::basic_ifstream< byte_type, std::char_traits<byte_type> >; stream_type stream(argv[1], std::ifstream::in | std::ifstream::binary); if (stream.is_open() == false) { std::cerr << "file not found" << std::endl; return EXIT_FAILURE; } stream.exceptions(std::ifstream::badbit); static const auto read_size = 4; stream_type::char_type buffer[read_size]; stream.read(buffer, read_size); std::cout << "Got:" << stream.gcount() << std::endl; return EXIT_SUCCESS; }
Compile and run with g++ and GNU/Linux:
$ g++ -std=c++11 -Wall -Wextra -pedantic stream.cpp -o stream && ./stream /dev/random terminate called after throwing an instance of 'std::bad_cast' what(): std::bad_cast Aborted (core dumped)
And with Visual Studio Express RC 2013:
First-chance exception at 0x76A6C41F in traits test.exe: Microsoft C++ exception: std::bad_cast at memory location 0x0038F978. Unhandled exception at 0x76A6C41F in traits test.exe: Microsoft C++ exception: std::bad_cast at memory location 0x0038F978.
Changing
byte_type
tochar
gives the expected output:$ g++ -std=c++11 -Wall -Wextra -pedantic stream.cpp -o stream && ./stream /dev/random Got:4
Why is this throwing std::bad_cast and how can I fix it?
解决方案I was able to reproduce a bad_cast on my gcc (4.7.2 on AIX).
The reason you got it is that gcc library implementors optimized
basic_filebuf::xsgetn
(which is calledfrom basic_istream::read
) to call plain Cfread
to read from the file if your stream's locale is non-converting (that is, you're not trying to read a UTF-8 or maybe GB18030 file into a UTF-32 string or something), which is absolutely the right thing to do. To find out if it is non-converting, it checks codecvt::always_noconv on the codecvt facet of the locale imbued in your stream... which doesn't exist.You can reproduce the exception by executing
std::cout << std::use_facet< std::codecvt<std::uint8_t, char, std::mbstate_t> >(stream.getloc()).always_noconv() << '\n';
I don't have access to Visual Studio to see why it works there (do they just call
basic_filebuf::sgetc()
for every char inbasic_fstream::read()
?), but to use basic_filestream in any case, you need to provide a codecvt facet for your combination of internal and external types (uint8_t
andchar
, in this case).EDIT: You're almost there, the last missing piece is the line
stream.imbue(std::locale(stream.getloc(), new std::codecvt<uint8_t, char, std::mbstate_t>));
anywhere before
stream.read
or, alternatively, imbue the global:std::locale::global(std::locale(std::locale(), new std::codecvt<uint8_t, char, std::mbstate_t>));
anywhere before you construct thebasic_ifstream
这篇关于为什么这个专门的char_traits< uint8_t>和codecvt< uint8_t>用于basic_ifstream模板throw std :: bad_cast?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!