如何在Windows控制台中的python中打印unicode字符串 [英] How to print a unicode string in python in Windows console
问题描述
该程序在所有UNIX平台上运行良好,但在Windows中,在命令行中打印unicode字符串时出错。
已经有一个相关的线程:
(Windows html = b $ b,但我找不到我的具体答案。
例如,对于以下亚洲文本,在Linux中,我可以运行:
>>>打印u\\\引\\\起\\\的\\\或.encode(utf-8)
引起的或
但在Windows中,我得到:
>>>打印u\\\引\\\起\\\的\\\或.encode(utf-8)
σ╝ץΦ╡╖τתהμטצ
在执行类似操作时,我成功显示正确的文本信息框:
>>> file(bla.vbs,w)。write(u'MsgBox\\\引\\\起\\\的\\\或,4,MyTitle'。encode(utf-16))
>>>> os.system(cscript // U // NoLogo bla.vbs)
但是,我想能够在Windows控制台中执行此操作,最好是在我的python代码之外不需要太多的配置(因为我的应用程序将被分发给许多主机)。
是这可能吗?
编辑:如果不可能 - 我很乐意接受一些其他建议,在Windows中编写控制台应用程序unicode,例如一个替代Windows控制台的python实现
有一个WriteConsoleW解决方案提供了一个unicode argv和stdout(print),而不是stdin : Windows cmd编码更改导致Python崩溃
我唯一修改的是sys.argv来保持unicode。原始版本utf-8由于某些原因编码。
#!/ usr / bin / env python
# - * - 编码:utf-8 - * -
https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash#answer- 3259271
import sys
如果sys.platform ==win32:
导入编解码器
从ctypes import WINFUNCTYPE ,windll,POINTER,byref,c_int
from ctypes.wintypes import BOOL,HANDLE,DWORD,LPWSTR,LPCWSTR,LPVOID
original_stderr = sys.stderr
#如果在此代码中发生异常,我们可能会尝试在stderr,
#上打印它,这使得如果stderr被引导到我们的包装器上,这样做会令人沮丧的调试。
#所以关于捕捉错误并将其报告给original_stderr,
#,以便我们至少可以看到它们是偏执狂的。
def _complain(message):
print>> original_stderr,message if isinstance(message,str)else repr(message)
#解决 codecs.register(lambda name:codecs.lookup('utf-8')if name =='cp65001'else None)
#使Unicode控制台输出独立于当前代码页。
#这也修复了< http://bugs.python.org/issue1602> ;.
#发给Michael Kaplan< http://www.siao2.com/2010/04/07/9989346.aspx>
#和TZOmegaTZIOY
#< https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462> ;.
try:
#< http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
#HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
#返回INVALID_HANDLE_VALUE,NULL或有效句柄
#
#< http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx> ;
#DWORD WINAPI GetFileType(DWORD hFile);
#
#< http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
#BOOL WINAPI GetConsoleMode(HANDLE hConsole,LPDWORD lpMode);
GetStdHandle = WINFUNCTYPE(HANDLE,DWORD)((GetStdHandle,windll.kernel32))
STD_OUTPUT_HANDLE = DWORD(-11)
STD_ERROR_HANDLE = DWORD(-12)
GetFileType = WINFUNCTYPE(DWORD,DWORD)((GetFileType,windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = WINFUNCTYPE(BOOL,HANDLE,POINTER DWORD))((GetConsoleMode,windll.kernel32))
INVALID_HANDLE_VALUE = DWORD(-1).value
def not_a_console(handle):
如果handle == INVALID_HANDLE_VALUE或者handle为None:
return True
return((GetFileType(handle)&〜FILE_TYPE_REMOTE)!= FILE_TYPE_CHAR
或GetConsoleMode(handle,byref(DWORD()))
old_stdout_fileno =无
old_stderr_fileno =无
如果hasattr(sys.stdout,'fileno'):
old_stdout_fileno = sys.stdout.f ileno()
if hasattr(sys.stderr,'fileno'):
old_stderr_fileno = sys.stderr.fileno()
STDOUT_FILENO = 1
STDERR_FILENO = 2
real_stdout =(old_stdout_fileno == STDOUT_FILENO)
real_stderr =(old_stderr_fileno == STDERR_FILENO)
如果real_stdout:
hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
if not_a_console(hStdout):
real_stdout = False
如果real_stderr:
hStderr = GetStdHandle(STD_ERROR_HANDLE)
如果not_a_console(hStderr):
real_stderr = False
如果real_stdout或real_stderr:
#BOOL WINAPI WriteConsoleW(HANDLE hOutput,LPWSTR lpBuffer,DWORD nChars,
#LPDWORD lpCharsWritten,LPVOID lpReserved);
WriteConsoleW = WINFUNCTYPE(BOOL,HANDLE,LPWSTR,DWORD,POINTER(DWORD),LPVOID)((WriteConsoleW,windll.kernel32))
class UnicodeOutput:
def __init __(self,hConsole,stream,fileno,name):
self._hConsole = hConsole
self._stream = stream
self._fileno = fileno
self。 closed = False
self.softspace = False
self.mode ='w'
self.encoding ='utf-8'
self.name = name
self .flush()
def isatty(self):
return False
def close(self):
#不要真的关闭句柄,这只会造成问题
self.closed = True
def fileno(self):
return self._fileno
def flush(self):
如果self._hConsole为None:
try:
self._stream.flush()
除了例外e:
_complain(%s.flush:%r from%r%(self.name,e,self._stream))
raise
def write(self,text):
try:
如果self._hConsole为None:
if isinstance(text,unicode):
text = text.encode('utf-8 ')
self._stream.write(text)
else:
如果不是isinstance(text,unicode):
text = str(text).decode('utf-8 ')
剩余= len(文本)
同时保留ing
n = DWORD(0)
#传递给WriteConsoleW的字符串的
#长度存在一个比文档更短的限制(参见
#< http: //tahoe-lafs.org/trac/tahoe-lafs/ticket/1232> ;.
retval = WriteConsoleW(self._hConsole,text,min(remaining,10000),byref(n),None)
如果retval == 0或n.value == 0:
raise IOError(WriteConsoleW返回%r,n.value =%r%(retval,n.value))
剩余 - = n.value
如果不剩余:
break
文本=文本[n.value:]
除了异常作为e:
_complain(%s.write:%r%(self.name,e))
raise
def writelines(self,lines):
try:
for line in lines:
self.write(line)
except Exception as e:
_complain(%s.writelines:%r%(self.name,e))
如果real_stdout:
sys.stdout = UnicodeOutput(hStdout,None,STDOUT_FILENO,'< Unicode console stdout>')
else:
sys.stdout = UnicodeOutput(None,sys.stdout,old_stdout_fileno,'< Unicode redirected stdout>')
如果real_stderr:
sys.stderr = UnicodeOutput(hStderr,None,STDERR_FILENO,'& Unicode控制台stderr>')
else:
sys.stderr = UnicodeOutput(None,sys.stderr,old_stderr_fileno,'< Unicode redirected stderr>')
除了例外为e:
_complain(异常%r固定sys.stdout和sys.stderr%(e,))
#在我们在它的时候,让我们解开命令 - 行参数:
#这可以在< http://bugs.python.org/issue2128> ;.
GetCommandLineW = WINFUNCTYPE((GetCommandLineW,windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR),LPCWSTR,POINTER(c_int))((CommandLineToArgvW,windll.shell32 )
argc = c_int(0)
argv_unicode = CommandLineToArgvW(GetCommandLineW(),byref(argc))
argv = [argv_unicode [i] for i在xrange(0,argc.value)]
#argv = [argv_unicode [i] .encode('utf-8')for x in xrange(0,argc.value)]
如果不是hasattr(sys,'freeze'):
#如果这是由py2exe或bbfreeze生成的可执行文件,那么将直接调用
#。否则,unicode_argv [0]是Python
#解释器,所以跳过。
argv = argv [1:]
#也跳过Python解释器的选项参数。
while len(argv)> 0:
arg = argv [0]
如果不是arg.startswith(u - )或arg == u - :
break
argv = argv [1 :]
如果arg == u'-m':
#sys.argv [0]应该是模块源的绝对路径,
#但不介意
break
如果arg == u'-c':
argv [0] = u'-c'
break
#如果你喜欢:
sys.argv = argv
I'm working on a python application that can print text in multiple languages to the console in multiple platforms. The program works well on all UNIX platforms, but in windows there are errors printing unicode strings in command-line.
There's already a relevant thread regarding this: ( Windows cmd encoding change causes Python crash ) but I couldn't find my specific answer there.
For example, for the following Asian text, in Linux, I can run:
>>> print u"\u5f15\u8d77\u7684\u6216".encode("utf-8")
引起的或
But in windows I get:
>>> print u"\u5f15\u8d77\u7684\u6216".encode("utf-8")
σ╝ץΦ╡╖τתהµטצ
I succeeded displaying the correct text with a message box when doing something like that:
>>> file("bla.vbs", "w").write(u'MsgBox "\u5f15\u8d77\u7684\u6216", 4, "MyTitle"'.encode("utf-16"))
>>> os.system("cscript //U //NoLogo bla.vbs")
But, I want to be able to do it in windows console, and preferably - without requiring too much configuration outside my python code (because my application will be distributed to many hosts).
Is this possible?
Edit: If it's not possible - I would be happy to accept some other suggestions of writing a console application in windows that displays unicode, e.g. a python implementation of an alternative windows console
There's a WriteConsoleW solution that provides a unicode argv and stdout (print) but not stdin: Windows cmd encoding change causes Python crash
The only thing I modified is sys.argv to keep it unicode. The original version utf-8 encoded it for some reason.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash#answer-3259271
"""
import sys
if sys.platform == "win32":
import codecs
from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID
original_stderr = sys.stderr
# If any exception occurs in this code, we'll probably try to print it on stderr,
# which makes for frustrating debugging if stderr is directed to our wrapper.
# So be paranoid about catching errors and reporting them to original_stderr,
# so that we can at least see them.
def _complain(message):
print >>original_stderr, message if isinstance(message, str) else repr(message)
# Work around <http://bugs.python.org/issue6058>.
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
# Make Unicode console output work independently of the current code page.
# This also fixes <http://bugs.python.org/issue1602>.
# Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx>
# and TZOmegaTZIOY
# <https://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
try:
# <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
# HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
# returns INVALID_HANDLE_VALUE, NULL, or a valid handle
#
# <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>
# DWORD WINAPI GetFileType(DWORD hFile);
#
# <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
# BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);
GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32))
STD_OUTPUT_HANDLE = DWORD(-11)
STD_ERROR_HANDLE = DWORD(-12)
GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32))
INVALID_HANDLE_VALUE = DWORD(-1).value
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
or GetConsoleMode(handle, byref(DWORD())) == 0)
old_stdout_fileno = None
old_stderr_fileno = None
if hasattr(sys.stdout, 'fileno'):
old_stdout_fileno = sys.stdout.fileno()
if hasattr(sys.stderr, 'fileno'):
old_stderr_fileno = sys.stderr.fileno()
STDOUT_FILENO = 1
STDERR_FILENO = 2
real_stdout = (old_stdout_fileno == STDOUT_FILENO)
real_stderr = (old_stderr_fileno == STDERR_FILENO)
if real_stdout:
hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
if not_a_console(hStdout):
real_stdout = False
if real_stderr:
hStderr = GetStdHandle(STD_ERROR_HANDLE)
if not_a_console(hStderr):
real_stderr = False
if real_stdout or real_stderr:
# BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,
# LPDWORD lpCharsWritten, LPVOID lpReserved);
WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32))
class UnicodeOutput:
def __init__(self, hConsole, stream, fileno, name):
self._hConsole = hConsole
self._stream = stream
self._fileno = fileno
self.closed = False
self.softspace = False
self.mode = 'w'
self.encoding = 'utf-8'
self.name = name
self.flush()
def isatty(self):
return False
def close(self):
# don't really close the handle, that would only cause problems
self.closed = True
def fileno(self):
return self._fileno
def flush(self):
if self._hConsole is None:
try:
self._stream.flush()
except Exception as e:
_complain("%s.flush: %r from %r" % (self.name, e, self._stream))
raise
def write(self, text):
try:
if self._hConsole is None:
if isinstance(text, unicode):
text = text.encode('utf-8')
self._stream.write(text)
else:
if not isinstance(text, unicode):
text = str(text).decode('utf-8')
remaining = len(text)
while remaining:
n = DWORD(0)
# There is a shorter-than-documented limitation on the
# length of the string passed to WriteConsoleW (see
# <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)
if retval == 0 or n.value == 0:
raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value))
remaining -= n.value
if not remaining:
break
text = text[n.value:]
except Exception as e:
_complain("%s.write: %r" % (self.name, e))
raise
def writelines(self, lines):
try:
for line in lines:
self.write(line)
except Exception as e:
_complain("%s.writelines: %r" % (self.name, e))
raise
if real_stdout:
sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')
else:
sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')
if real_stderr:
sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')
else:
sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')
except Exception as e:
_complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))
# While we're at it, let's unmangle the command-line arguments:
# This works around <http://bugs.python.org/issue2128>.
GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
argc = c_int(0)
argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
argv = [argv_unicode[i] for i in xrange(0, argc.value)]
# argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)]
if not hasattr(sys, 'frozen'):
# If this is an executable produced by py2exe or bbfreeze, then it will
# have been invoked directly. Otherwise, unicode_argv[0] is the Python
# interpreter, so skip that.
argv = argv[1:]
# Also skip option arguments to the Python interpreter.
while len(argv) > 0:
arg = argv[0]
if not arg.startswith(u"-") or arg == u"-":
break
argv = argv[1:]
if arg == u'-m':
# sys.argv[0] should really be the absolute path of the module source,
# but never mind
break
if arg == u'-c':
argv[0] = u'-c'
break
# if you like:
sys.argv = argv
这篇关于如何在Windows控制台中的python中打印unicode字符串的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!