Windows cmd编码更改导致Python崩溃 [英] Windows cmd encoding change causes Python crash

查看:289
本文介绍了Windows cmd编码更改导致Python崩溃的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

首先,我将Windows CMD编码改为utf-8并运行Python解释器:

  chcp 65001 
python

然后我尝试在其中打印一个unicode sting,当我这样做时,Python以特殊的方式崩溃(我只是在同一个窗口中得到一个cmd提示符)。

 >>>导入sys 
>>>>打印u'ëèæîð'.encode(sys.stdin.encoding)

任何想法为什么会发生什么以及如何使其工作?



UPD sys.stdin.encoding 返回'cp65001'



UPD2 :刚刚来到我这个问题可能与事实上,utf-8使用多字节字符集(kcwu做了一个好点那)。我试着用'windows-1250'运行整个例子,并得到'ëeaî?'。 Windows-1250使用单字符集,因此它适用于它所理解的字符。但是我仍然不知道如何在这里使'utf-8'工作。



UPD3 :哦,我发现这是一个<一个href =http://bugs.python.org/issue1602 =noreferrer>已知的Python错误。我猜,发生的是Python将cmd编码复制为cp65001为sys.stdin.encoding,并尝试将其应用于所有输入。由于它无法理解'cp65001',所以它会在包含非ASCII字符的任何输入中崩溃。

解决方案

code> cp65001 到UTF-8,而不更改 encodings\aliases.py

  import codecs 
codecs.register(lambda name:codecs.lookup('utf-8')if name =='cp65001'else None)
($ IM HO IM IM IM IM $ $ $ $ $ $ $ $ $ $ $ $ $>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> c>与 http://bugs.python.org/issue6058#msg97731。它的意图是一样的,即使微软的编解码器有一些小错误。)



这是一些代码(为Tahoe-LAFS,tahoe-lafs编写)。 org),不管 chcp 代码页的,控制台输出是否正常工作,并且还读取Unicode命令行参数。感谢 Michael Kaplan 了解此解决方案的想法。如果stdout或stderr被重定向,它将输出UTF-8。如果你想要一个字节顺序标记,你需要明确写。



  import sys 
如果sys.platform ==win32:
导入编解码器
从ctypes导入WINFUNCTYPE,windll,POINTER,byref,c_int
从ctypes.wintypes import BOOL ,HANDLE,DWORD,LPWSTR,LPCWSTR,LPVOID

original_stderr = sys.stderr

#如果在此代码中发生任何异常,我们可能会尝试在stderr上打印,
#,这使得令人沮丧的调试,如果stderr被引导到我们的包装器。
#所以关于捕捉错误并将其报告给original_stderr,
#,以便我们至少可以看到它们是偏执狂的。
def _complain(message):
print>> original_stderr,message if isinstance(message,str)else repr(message)

#解决 codecs.register(lambda name:codecs.lookup('utf-8')if name =='cp65001'else None)

#使Unicode控制台输出独立于当前代码页。
#这也修复了< http://bugs.python.org/issue1602> ;.
#发给Michael Kaplan< http://www.siao2.com/2010/04/07/9989346.aspx>
#和TZOmegaTZIOY
#< http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462> ;.
try:
#< http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
#HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
#返回INVALID_HANDLE_VALUE,NULL或有效句柄

#< http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx> ;
#DWORD WINAPI GetFileType(DWORD hFile);

#< http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
#BOOL WINAPI GetConsoleMode(HANDLE hConsole,LPDWORD lpMode);

GetStdHandle = WINFUNCTYPE(HANDLE,DWORD)((GetStdHandle,windll.kernel32))
STD_OUTPUT_HANDLE = DWORD(-11)
STD_ERROR_HANDLE = DWORD(-12)
GetFileType = WINFUNCTYPE(DWORD,DWORD)((GetFileType,windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = WINFUNCTYPE(BOOL,HANDLE,POINTER DWORD))((GetConsoleMode,windll.kernel32))
INVALID_HANDLE_VALUE = DWORD(-1).value

def not_a_console(handle):
如果handle == INVALID_HANDLE_VALUE或者handle为None:
return True
return((GetFileType(handle)&〜FILE_TYPE_REMOTE)!= FILE_TYPE_CHAR
或GetConsoleMode(handle,byref(DWORD()))

old_stdout_fileno =无
old_stderr_fileno =无
如果hasattr(sys.stdout,'fileno'):
old_stdout_fileno = sys.stdout.f ileno()
if hasattr(sys.stderr,'fileno'):
old_stderr_fileno = sys.stderr.fileno()

STDOUT_FILENO = 1
STDERR_FILENO = 2
real_stdout =(old_stdout_fileno == STDOUT_FILENO)
real_stderr =(old_stderr_fileno == STDERR_FILENO)

如果real_stdout:
hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
if not_a_console(hStdout):
real_stdout = False

如果real_stderr:
hStderr = GetStdHandle(STD_ERROR_HANDLE)
如果not_a_console(hStderr):
real_stderr = False

如果real_stdout或real_stderr:
#BOOL WINAPI WriteConsoleW(HANDLE hOutput,LPWSTR lpBuffer,DWORD nChars,
#LPDWORD lpCharsWritten,LPVOID lpReserved);

WriteConsoleW = WINFUNCTYPE(BOOL,HANDLE,LPWSTR,DWORD,POINTER(DWORD),LPVOID)((WriteConsoleW,windll.kernel32))

class UnicodeOutput:
def __init __(self,hConsole,stream,fileno,name):
self._hConsole = hConsole
self._stream = stream
self._fileno = fileno
self。 closed = False
self.softspace = False
self.mode ='w'
self.encoding ='utf-8'
self.name = name
self .flush()

def isatty(self):
return False

def close(self):
#不要真的关闭句柄,这只会造成问题
self.closed = True

def fileno(self):
return self._fileno

def flush(self):
如果self._hConsole为None:
try:
self._stream.flush()
除了例外e:
_complain(%s.flush:%r from%r%(self.name,e,self._stream))
raise

def write(self,text):
try:
如果self._hConsole为None:
if isinstance(text,unicode):
text = text.encode('utf-8 ')
self._stream.write(text)
else:
如果不是isinstance(text,unicode):
text = str(text).decode('utf-8 ')
剩余= len(文本)
同时保留ing
n = DWORD(0)
#传递给WriteConsoleW的字符串的
#长度存在一个比文档更短的限制(参见
#< http: //tahoe-lafs.org/trac/tahoe-lafs/ticket/1232> ;.
retval = WriteConsoleW(self._hConsole,text,min(remaining,10000),byref(n),None)
如果retval == 0或n.value == 0:
raise IOError(WriteConsoleW返回%r,n.value =%r%(retval,n.value))
剩余 - = n.value
如果不剩余:
break
文本=文本[n.value:]
除了异常作为e:
_complain(%s.write:%r%(self.name,e))
raise

def writelines(self,lines):
try:
for line in lines:
self.write(line)
except Exception as e:
_complain(%s.writelines:%r%(self.name,e))


如果real_stdout:
sys.stdout = UnicodeOutput(hStdout,None,STDOUT_FILENO,'< Unicode console stdout>')
else:
sys.stdout = UnicodeOutput(None,sys.stdout,old_stdout_fileno,'< Unicode redirected stdout>')

如果real_stderr:
sys.stderr = UnicodeOutput(hStderr,None,STDERR_FILENO,'& Unicode控制台stderr>')
else:
sys.stderr = UnicodeOutput(None,sys.stderr,old_stderr_fileno,'< Unicode redirected stderr>')
除了例外为e:
_complain(异常%r固定sys.stdout和sys.stderr%(e,))


#在我们在它的时候,让我们解开命令 - 行参数:

#这可以在< http://bugs.python.org/issue2128> ;.
GetCommandLineW = WINFUNCTYPE((GetCommandLineW,windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR),LPCWSTR,POINTER(c_int))((CommandLineToArgvW,windll.shell32 )

argc = c_int(0)
argv_unicode = CommandLineToArgvW(GetCommandLineW(),byref(argc))

argv = [argv_unicode [i] .encode ('utf-8')for i in xrange(0,argc.value)]

如果没有hasattr(sys,'freeze'):
#如果这是一个可执行程序, py2exe或bbfreeze,那么它将
#直接调用。否则,unicode_argv [0]是Python
#解释器,所以跳过。
argv = argv [1:]

#也跳过Python解释器的选项参数。
while len(argv)> 0:
arg = argv [0]
如果不是arg.startswith(u - )或arg == u - :
break
argv = argv [1 :]
如果arg == u'-m':
#sys.argv [0]应该是模块源的绝对路径,
#但不介意
break
如果arg == u'-c':
argv [0] = u'-c'
break

#如果你喜欢:
sys.argv = argv

最后,它可以授予ΤZΩΤZOΙΟΥ的希望使用DejaVu Sans Mono,我同意是一个很好的字体,用于控制台。



您可以找到有关字体要求的信息,以及如何添加新的字体Windows控制台中的必要的字体条件在命令窗口中可用Microsoft KB



但是在Windows上(可能也是Win7):




  • HKEY_LOCAL_MACHINE_SOFTWARE\Microsoft\Windows NT\CurrentVersion \Console\TrueTypeFont ,将0设置为DejaVu Sans Mono ;

  • HKEY_CURRENT_USER\Console 下的每个子项设置FaceNameDejaVu Sans Mono



在XP上,检查线程更改命令提示字体?在LockerGnome论坛


First I change Windows CMD encoding to utf-8 and run Python interpreter:

chcp 65001
python

Then I try to print a unicode sting inside it and when i do this Python crashes in a peculiar way (I just get a cmd prompt in the same window).

>>> import sys
>>> print u'ëèæîð'.encode(sys.stdin.encoding)

Any ideas why it happens and how to make it work?

UPD: sys.stdin.encoding returns 'cp65001'

UPD2: It just came to me that the issue might be connected with the fact that utf-8 uses multi-byte character set (kcwu made a good point on that). I tried running the whole example with 'windows-1250' and got 'ëeaî?'. Windows-1250 uses single-character set so it worked for those characters it understands. However I still have no idea how to make 'utf-8' work here.

UPD3: Oh, I found out it is a known Python bug. I guess what happens is that Python copies the cmd encoding as 'cp65001 to sys.stdin.encoding and tries to apply it to all the input. Since it fails to understand 'cp65001' it crashes on any input that contains non-ascii characters.

解决方案

Here's how to alias cp65001 to UTF-8 without changing encodings\aliases.py:

import codecs
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)

(IMHO, don't pay any attention to the silliness about cp65001 not being identical to UTF-8 at http://bugs.python.org/issue6058#msg97731 . It's intended to be the same, even if Microsoft's codec has some minor bugs.)

Here is some code (written for Tahoe-LAFS, tahoe-lafs.org) that makes console output work regardless of the chcp code page, and also reads Unicode command-line arguments. Credit to Michael Kaplan for the idea behind this solution. If stdout or stderr are redirected, it will output UTF-8. If you want a Byte Order Mark, you'll need to write it explicitly.

[Edit: This version uses WriteConsoleW instead of the _O_U8TEXT flag in the MSVC runtime library, which is buggy. WriteConsoleW is also buggy relative to the MS documentation, but less so.]

import sys
if sys.platform == "win32":
    import codecs
    from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
    from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID

    original_stderr = sys.stderr

    # If any exception occurs in this code, we'll probably try to print it on stderr,
    # which makes for frustrating debugging if stderr is directed to our wrapper.
    # So be paranoid about catching errors and reporting them to original_stderr,
    # so that we can at least see them.
    def _complain(message):
        print >>original_stderr, message if isinstance(message, str) else repr(message)

    # Work around <http://bugs.python.org/issue6058>.
    codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)

    # Make Unicode console output work independently of the current code page.
    # This also fixes <http://bugs.python.org/issue1602>.
    # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx>
    # and TZOmegaTZIOY
    # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
    try:
        # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
        # HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
        # returns INVALID_HANDLE_VALUE, NULL, or a valid handle
        #
        # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>
        # DWORD WINAPI GetFileType(DWORD hFile);
        #
        # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
        # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);

        GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32))
        STD_OUTPUT_HANDLE = DWORD(-11)
        STD_ERROR_HANDLE = DWORD(-12)
        GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32))
        FILE_TYPE_CHAR = 0x0002
        FILE_TYPE_REMOTE = 0x8000
        GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32))
        INVALID_HANDLE_VALUE = DWORD(-1).value

        def not_a_console(handle):
            if handle == INVALID_HANDLE_VALUE or handle is None:
                return True
            return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
                    or GetConsoleMode(handle, byref(DWORD())) == 0)

        old_stdout_fileno = None
        old_stderr_fileno = None
        if hasattr(sys.stdout, 'fileno'):
            old_stdout_fileno = sys.stdout.fileno()
        if hasattr(sys.stderr, 'fileno'):
            old_stderr_fileno = sys.stderr.fileno()

        STDOUT_FILENO = 1
        STDERR_FILENO = 2
        real_stdout = (old_stdout_fileno == STDOUT_FILENO)
        real_stderr = (old_stderr_fileno == STDERR_FILENO)

        if real_stdout:
            hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
            if not_a_console(hStdout):
                real_stdout = False

        if real_stderr:
            hStderr = GetStdHandle(STD_ERROR_HANDLE)
            if not_a_console(hStderr):
                real_stderr = False

        if real_stdout or real_stderr:
            # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,
            #                           LPDWORD lpCharsWritten, LPVOID lpReserved);

            WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32))

            class UnicodeOutput:
                def __init__(self, hConsole, stream, fileno, name):
                    self._hConsole = hConsole
                    self._stream = stream
                    self._fileno = fileno
                    self.closed = False
                    self.softspace = False
                    self.mode = 'w'
                    self.encoding = 'utf-8'
                    self.name = name
                    self.flush()

                def isatty(self):
                    return False

                def close(self):
                    # don't really close the handle, that would only cause problems
                    self.closed = True

                def fileno(self):
                    return self._fileno

                def flush(self):
                    if self._hConsole is None:
                        try:
                            self._stream.flush()
                        except Exception as e:
                            _complain("%s.flush: %r from %r" % (self.name, e, self._stream))
                            raise

                def write(self, text):
                    try:
                        if self._hConsole is None:
                            if isinstance(text, unicode):
                                text = text.encode('utf-8')
                            self._stream.write(text)
                        else:
                            if not isinstance(text, unicode):
                                text = str(text).decode('utf-8')
                            remaining = len(text)
                            while remaining:
                                n = DWORD(0)
                                # There is a shorter-than-documented limitation on the
                                # length of the string passed to WriteConsoleW (see
                                # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
                                retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)
                                if retval == 0 or n.value == 0:
                                    raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value))
                                remaining -= n.value
                                if not remaining:
                                    break
                                text = text[n.value:]
                    except Exception as e:
                        _complain("%s.write: %r" % (self.name, e))
                        raise

                def writelines(self, lines):
                    try:
                        for line in lines:
                            self.write(line)
                    except Exception as e:
                        _complain("%s.writelines: %r" % (self.name, e))
                        raise

            if real_stdout:
                sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')
            else:
                sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')

            if real_stderr:
                sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')
            else:
                sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')
    except Exception as e:
        _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))


    # While we're at it, let's unmangle the command-line arguments:

    # This works around <http://bugs.python.org/issue2128>.
    GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
    CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))

    argc = c_int(0)
    argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))

    argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)]

    if not hasattr(sys, 'frozen'):
        # If this is an executable produced by py2exe or bbfreeze, then it will
        # have been invoked directly. Otherwise, unicode_argv[0] is the Python
        # interpreter, so skip that.
        argv = argv[1:]

        # Also skip option arguments to the Python interpreter.
        while len(argv) > 0:
            arg = argv[0]
            if not arg.startswith(u"-") or arg == u"-":
                break
            argv = argv[1:]
            if arg == u'-m':
                # sys.argv[0] should really be the absolute path of the module source,
                # but never mind
                break
            if arg == u'-c':
                argv[0] = u'-c'
                break

    # if you like:
    sys.argv = argv

Finally, it is possible to grant ΤΖΩΤΖΙΟΥ's wish to use DejaVu Sans Mono, which I agree is an excellent font, for the console.

You can find information on the font requirements and how to add new fonts for the windows console in the 'Necessary criteria for fonts to be available in a command window' Microsoft KB

But basically, on Vista (probably also Win7):

  • under HKEY_LOCAL_MACHINE_SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont, set "0" to "DejaVu Sans Mono";
  • for each of the subkeys under HKEY_CURRENT_USER\Console, set "FaceName" to "DejaVu Sans Mono".

On XP, check the thread 'Changing Command Prompt fonts?' in LockerGnome forums.

这篇关于Windows cmd编码更改导致Python崩溃的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆