如何在Windows控制台中的python中打印unicode字符串 [英] How to print a unicode string in python in Windows console

查看:259
本文介绍了如何在Windows控制台中的python中打印unicode字符串的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在研究一个可以在多个平台上以多种语言打印文本到控制台的python应用程序。
该程序在所有UNIX平台上运行良好,但在Windows中,在命令行中打印unicode字符串时出错。



已经有一个相关的线程:
(Windows html = b $ b,但我找不到我的具体答案。



例如,对于以下亚洲文本,在Linux中,我可以运行:

 >>>打印u\\\引\\\起\\\的\\\或.encode(utf-8)
引起的或

但在Windows中,我得到:

 >>>打印u\\\引\\\起\\\的\\\或.encode(utf-8)
σ╝ץΦ╡╖τתהμטצ

在执行类似操作时,我成功显示正确的文本信息框:

 >>> file(bla.vbs,w)。write(u'MsgBox\\\引\\\起\\\的\\\或,4,MyTitle'。encode(utf-16))
>>>> os.system(cscript // U // NoLogo bla.vbs)

但是,我想能够在Windows控制台中执行此操作,最好是在我的python代码之外不需要太多的配置(因为我的应用程序将被分发给许多主机)。



是这可能吗?



编辑:如果不可能 - 我很乐意接受一些其他建议,在Windows中编写控制台应用程序unicode,例如一个替代Windows控制台的python实现

解决方案

有一个WriteConsoleW解决方案提供了一个unicode argv和stdout(print),而不是stdin : Windows cmd编码更改导致Python崩溃



我唯一修改的是sys.argv来保持unicode。原始版本utf-8由于某些原因编码。

 #!/ usr / bin / env python 
# - * - 编码:utf-8 - * -

https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash#answer- 3259271


import sys

如果sys.platform ==win32:
导入编解码器
从ctypes import WINFUNCTYPE ,windll,POINTER,byref,c_int
from ctypes.wintypes import BOOL,HANDLE,DWORD,LPWSTR,LPCWSTR,LPVOID

original_stderr = sys.stderr

#如果在此代码中发生异常,我们可能会尝试在stderr,
#上打印它,这使得如果stderr被引导到我们的包装器上,这样做会令人沮丧的调试。
#所以关于捕捉错误并将其报告给original_stderr,
#,以便我们至少可以看到它们是偏执狂的。
def _complain(message):
print>> original_stderr,message if isinstance(message,str)else repr(message)

#解决 codecs.register(lambda name:codecs.lookup('utf-8')if name =='cp65001'else None)

#使Unicode控制台输出独立于当前代码页。
#这也修复了< http://bugs.python.org/issue1602> ;.
#发给Michael Kaplan< http://www.siao2.com/2010/04/07/9989346.aspx>
#和TZOmegaTZIOY
#< https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462> ;.
try:
#< http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
#HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
#返回INVALID_HANDLE_VALUE,NULL或有效句柄

#< http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx> ;
#DWORD WINAPI GetFileType(DWORD hFile);

#< http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
#BOOL WINAPI GetConsoleMode(HANDLE hConsole,LPDWORD lpMode);

GetStdHandle = WINFUNCTYPE(HANDLE,DWORD)((GetStdHandle,windll.kernel32))
STD_OUTPUT_HANDLE = DWORD(-11)
STD_ERROR_HANDLE = DWORD(-12)
GetFileType = WINFUNCTYPE(DWORD,DWORD)((GetFileType,windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = WINFUNCTYPE(BOOL,HANDLE,POINTER DWORD))((GetConsoleMode,windll.kernel32))
INVALID_HANDLE_VALUE = DWORD(-1).value

def not_a_console(handle):
如果handle == INVALID_HANDLE_VALUE或者handle为None:
return True
return((GetFileType(handle)&〜FILE_TYPE_REMOTE)!= FILE_TYPE_CHAR
或GetConsoleMode(handle,byref(DWORD()))

old_stdout_fileno =无
old_stderr_fileno =无
如果hasattr(sys.stdout,'fileno'):
old_stdout_fileno = sys.stdout.f ileno()
if hasattr(sys.stderr,'fileno'):
old_stderr_fileno = sys.stderr.fileno()

STDOUT_FILENO = 1
STDERR_FILENO = 2
real_stdout =(old_stdout_fileno == STDOUT_FILENO)
real_stderr =(old_stderr_fileno == STDERR_FILENO)

如果real_stdout:
hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
if not_a_console(hStdout):
real_stdout = False

如果real_stderr:
hStderr = GetStdHandle(STD_ERROR_HANDLE)
如果not_a_console(hStderr):
real_stderr = False

如果real_stdout或real_stderr:
#BOOL WINAPI WriteConsoleW(HANDLE hOutput,LPWSTR lpBuffer,DWORD nChars,
#LPDWORD lpCharsWritten,LPVOID lpReserved);

WriteConsoleW = WINFUNCTYPE(BOOL,HANDLE,LPWSTR,DWORD,POINTER(DWORD),LPVOID)((WriteConsoleW,windll.kernel32))

class UnicodeOutput:
def __init __(self,hConsole,stream,fileno,name):
self._hConsole = hConsole
self._stream = stream
self._fileno = fileno
self。 closed = False
self.softspace = False
self.mode ='w'
self.encoding ='utf-8'
self.name = name
self .flush()

def isatty(self):
return False

def close(self):
#不要真的关闭句柄,这只会造成问题
self.closed = True

def fileno(self):
return self._fileno

def flush(self):
如果self._hConsole为None:
try:
self._stream.flush()
除了例外e:
_complain(%s.flush:%r from%r%(self.name,e,self._stream))
raise

def write(self,text):
try:
如果self._hConsole为None:
if isinstance(text,unicode):
text = text.encode('utf-8 ')
self._stream.write(text)
else:
如果不是isinstance(text,unicode):
text = str(text).decode('utf-8 ')
剩余= len(文本)
同时保留ing
n = DWORD(0)
#传递给WriteConsoleW的字符串的
#长度存在一个比文档更短的限制(参见
#< http: //tahoe-lafs.org/trac/tahoe-lafs/ticket/1232> ;.
retval = WriteConsoleW(self._hConsole,text,min(remaining,10000),byref(n),None)
如果retval == 0或n.value == 0:
raise IOError(WriteConsoleW返回%r,n.value =%r%(retval,n.value))
剩余 - = n.value
如果不剩余:
break
文本=文本[n.value:]
除了异常作为e:
_complain(%s.write:%r%(self.name,e))
raise

def writelines(self,lines):
try:
for line in lines:
self.write(line)
except Exception as e:
_complain(%s.writelines:%r%(self.name,e))


如果real_stdout:
sys.stdout = UnicodeOutput(hStdout,None,STDOUT_FILENO,'< Unicode console stdout>')
else:
sys.stdout = UnicodeOutput(None,sys.stdout,old_stdout_fileno,'< Unicode redirected stdout>')

如果real_stderr:
sys.stderr = UnicodeOutput(hStderr,None,STDERR_FILENO,'& Unicode控制台stderr>')
else:
sys.stderr = UnicodeOutput(None,sys.stderr,old_stderr_fileno,'< Unicode redirected stderr>')
除了例外为e:
_complain(异常%r固定sys.stdout和sys.stderr%(e,))


#在我们在它的时候,让我们解开命令 - 行参数:

#这可以在< http://bugs.python.org/issue2128> ;.
GetCommandLineW = WINFUNCTYPE((GetCommandLineW,windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR),LPCWSTR,POINTER(c_int))((CommandLineToArgvW,windll.shell32 )

argc = c_int(0)
argv_unicode = CommandLineToArgvW(GetCommandLineW(),byref(argc))

argv = [argv_unicode [i] for i在xrange(0,argc.value)]

#argv = [argv_unicode [i] .encode('utf-8')for x in xrange(0,argc.value)]

如果不是hasattr(sys,'freeze'):
#如果这是由py2exe或bbfreeze生成的可执行文件,那么将直接调用
#。否则,unicode_argv [0]是Python
#解释器,所以跳过。
argv = argv [1:]

#也跳过Python解释器的选项参数。
while len(argv)> 0:
arg = argv [0]
如果不是arg.startswith(u - )或arg == u - :
break
argv = argv [1 :]
如果arg == u'-m':
#sys.argv [0]应该是模块源的绝对路径,
#但不介意
break
如果arg == u'-c':
argv [0] = u'-c'
break

#如果你喜欢:
sys.argv = argv


I'm working on a python application that can print text in multiple languages to the console in multiple platforms. The program works well on all UNIX platforms, but in windows there are errors printing unicode strings in command-line.

There's already a relevant thread regarding this: ( Windows cmd encoding change causes Python crash ) but I couldn't find my specific answer there.

For example, for the following Asian text, in Linux, I can run:

>>> print u"\u5f15\u8d77\u7684\u6216".encode("utf-8")
引起的或

But in windows I get:

>>> print u"\u5f15\u8d77\u7684\u6216".encode("utf-8")
σ╝ץΦ╡╖τתהµטצ

I succeeded displaying the correct text with a message box when doing something like that:

>>> file("bla.vbs", "w").write(u'MsgBox "\u5f15\u8d77\u7684\u6216", 4, "MyTitle"'.encode("utf-16"))
>>> os.system("cscript //U //NoLogo bla.vbs")

But, I want to be able to do it in windows console, and preferably - without requiring too much configuration outside my python code (because my application will be distributed to many hosts).

Is this possible?

Edit: If it's not possible - I would be happy to accept some other suggestions of writing a console application in windows that displays unicode, e.g. a python implementation of an alternative windows console

解决方案

There's a WriteConsoleW solution that provides a unicode argv and stdout (print) but not stdin: Windows cmd encoding change causes Python crash

The only thing I modified is sys.argv to keep it unicode. The original version utf-8 encoded it for some reason.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

""" https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash#answer-3259271
"""

import sys

if sys.platform == "win32":
    import codecs
    from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
    from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID

    original_stderr = sys.stderr

    # If any exception occurs in this code, we'll probably try to print it on stderr,
    # which makes for frustrating debugging if stderr is directed to our wrapper.
    # So be paranoid about catching errors and reporting them to original_stderr,
    # so that we can at least see them.
    def _complain(message):
        print >>original_stderr, message if isinstance(message, str) else repr(message)

    # Work around <http://bugs.python.org/issue6058>.
    codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)

    # Make Unicode console output work independently of the current code page.
    # This also fixes <http://bugs.python.org/issue1602>.
    # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx>
    # and TZOmegaTZIOY
    # <https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
    try:
        # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
        # HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
        # returns INVALID_HANDLE_VALUE, NULL, or a valid handle
        #
        # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>
        # DWORD WINAPI GetFileType(DWORD hFile);
        #
        # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
        # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);

        GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32))
        STD_OUTPUT_HANDLE = DWORD(-11)
        STD_ERROR_HANDLE = DWORD(-12)
        GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32))
        FILE_TYPE_CHAR = 0x0002
        FILE_TYPE_REMOTE = 0x8000
        GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32))
        INVALID_HANDLE_VALUE = DWORD(-1).value

        def not_a_console(handle):
            if handle == INVALID_HANDLE_VALUE or handle is None:
                return True
            return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
                    or GetConsoleMode(handle, byref(DWORD())) == 0)

        old_stdout_fileno = None
        old_stderr_fileno = None
        if hasattr(sys.stdout, 'fileno'):
            old_stdout_fileno = sys.stdout.fileno()
        if hasattr(sys.stderr, 'fileno'):
            old_stderr_fileno = sys.stderr.fileno()

        STDOUT_FILENO = 1
        STDERR_FILENO = 2
        real_stdout = (old_stdout_fileno == STDOUT_FILENO)
        real_stderr = (old_stderr_fileno == STDERR_FILENO)

        if real_stdout:
            hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
            if not_a_console(hStdout):
                real_stdout = False

        if real_stderr:
            hStderr = GetStdHandle(STD_ERROR_HANDLE)
            if not_a_console(hStderr):
                real_stderr = False

        if real_stdout or real_stderr:
            # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,
            #                           LPDWORD lpCharsWritten, LPVOID lpReserved);

            WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32))

            class UnicodeOutput:
                def __init__(self, hConsole, stream, fileno, name):
                    self._hConsole = hConsole
                    self._stream = stream
                    self._fileno = fileno
                    self.closed = False
                    self.softspace = False
                    self.mode = 'w'
                    self.encoding = 'utf-8'
                    self.name = name
                    self.flush()

                def isatty(self):
                    return False

                def close(self):
                    # don't really close the handle, that would only cause problems
                    self.closed = True

                def fileno(self):
                    return self._fileno

                def flush(self):
                    if self._hConsole is None:
                        try:
                            self._stream.flush()
                        except Exception as e:
                            _complain("%s.flush: %r from %r" % (self.name, e, self._stream))
                            raise

                def write(self, text):
                    try:
                        if self._hConsole is None:
                            if isinstance(text, unicode):
                                text = text.encode('utf-8')
                            self._stream.write(text)
                        else:
                            if not isinstance(text, unicode):
                                text = str(text).decode('utf-8')
                            remaining = len(text)
                            while remaining:
                                n = DWORD(0)
                                # There is a shorter-than-documented limitation on the
                                # length of the string passed to WriteConsoleW (see
                                # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
                                retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)
                                if retval == 0 or n.value == 0:
                                    raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value))
                                remaining -= n.value
                                if not remaining:
                                    break
                                text = text[n.value:]
                    except Exception as e:
                        _complain("%s.write: %r" % (self.name, e))
                        raise

                def writelines(self, lines):
                    try:
                        for line in lines:
                            self.write(line)
                    except Exception as e:
                        _complain("%s.writelines: %r" % (self.name, e))
                        raise

            if real_stdout:
                sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')
            else:
                sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')

            if real_stderr:
                sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')
            else:
                sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')
    except Exception as e:
        _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))


    # While we're at it, let's unmangle the command-line arguments:

    # This works around <http://bugs.python.org/issue2128>.
    GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
    CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))

    argc = c_int(0)
    argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))

    argv = [argv_unicode[i] for i in xrange(0, argc.value)]

#    argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)]

    if not hasattr(sys, 'frozen'):
        # If this is an executable produced by py2exe or bbfreeze, then it will
        # have been invoked directly. Otherwise, unicode_argv[0] is the Python
        # interpreter, so skip that.
        argv = argv[1:]

        # Also skip option arguments to the Python interpreter.
        while len(argv) > 0:
            arg = argv[0]
            if not arg.startswith(u"-") or arg == u"-":
                break
            argv = argv[1:]
            if arg == u'-m':
                # sys.argv[0] should really be the absolute path of the module source,
                # but never mind
                break
            if arg == u'-c':
                argv[0] = u'-c'
                break

    # if you like:
    sys.argv = argv

这篇关于如何在Windows控制台中的python中打印unicode字符串的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆