Python服务器的实时分析 [英] Live Profiling of Python Server
本文介绍了Python服务器的实时分析的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我回答我自己的问题。
解决方案
import os,re,sys,time,datetime,collections,thread,threading,atexit,traceback
u'''
debug_live.start(seconds_float)启动一个监视器线程,将所有线程的stacktrace
打印成一个日志文件。
您可以使用此脚本报告哪些行最多执行:
app_foo_d @ server:〜$ python djangotools / utils / debug_live.py -h
用法:debug_live.py [-h] [-most-common N] {sum-all-frames,sum-last-frame}
读取栈跟踪日志
位置参数:
{sum-all-frames,sum-last-frame}
可选参数:
-h,--help显示此帮助消息并退出
- 最常见N在堆栈跟踪中显示N个最常见的行
-------------------------------- -
你可以开始看你的django中间件这样的线程:
class FOOMiddleware:
def __init __(self):
你的代码在wsgi工作进程开始之后被执行一次。
秒= getattr(设置,'debug_live_interval',无)
如果秒:
秒= float(秒)
从djangotools.utils导入debug_live
debug_live.start(秒)
#settings.py
debug_live_interval = 0.3#有0.3秒
#灵感来自http://code.google .com / p / modwsgi / wiki / DebuggingTechniques
您可以从下面获取堆栈跟踪日志文件的简单报告。不是来自django的
行被标记为< ====。这很可能是你的代码
,这可能是瓶颈。
python .... / debug_live.py读
1971文件:/home/foo_bar_p/django/core/handlers/wsgi.py,第272行,__call__
response = self.get_response(request)
1812文件:/home/foo_bar_p/django/core/handlers/base.py,第111行,get_response
response = callback(request,* callback_args ,** callback_kwargs)
1725文件:/home/foo_bar_p/django/db/backends/postgresql_psycopg2/base.py,第44行,执行
return self.cursor.execute(query,args )
1724文件:/home/foo_bar_p/django/db/models/sql/compiler.py,行735,在execute_sql
cursor.execute(sql,params)
1007文件:/home/foo_bar_p/django/db/models/sql/compiler.py,第680行,在result_iter
for self.execute_sql(MULTI)中的行:
796文件:/ home / foo_bar_p / django / db / models / query.py,第273行,迭代器
在compile.results_iter()中的行:
763 File:/ home / foo_bar_p / foo / utils / ticketutils。 py,第135行,__init__< === =
filter = type_filter(root_node = self.root_node)
684文件:/home/foo_bar_p/django/db/models/query.py,第334行,计数
返回自身.query.get_count(using = self.db)
679文件:/home/foo_bar_p/django/db/models/sql/query.py,第367行,get_aggregation
result = query。 get_compiler(using).execute_sql(SINGLE)
677文件:/home/foo_bar_p/django/db/models/sql/query.py,第401行,in get_count
number = obj.get_aggregation(使用=使用)[无]
'''
$ b $ d从django.conf导入设置
outfile = os.path.expanduser('〜/ tmp / debug_live.log')
other_code = re.compile(r'/(django | python ...)/')
def stacktraces():
code = []
now = datetime.datetime.now()
pid = os.getpid()
my_thread_id =线程.get_ident()
for thread_id,stack in sys._current_frames()。items():
如果thread_id == my_thread_id:
continue#D不打印此监视器线程
code.append(\\\
\\\
#开始日期:%s\\\
#ProcessId:%s\\\
#ThreadID:%s%(现在,pid ,thread_id))
的文件名,lineno,名称,行在traceback.extract_stack(stack):
code.append('File:%s,行%d,在%s'%文件名,lineno,名称))
如果行:
code.append(%s%(line.strip()))
code.append('#END')
如果不是代码:
return
fd = open(outfile,'at')
fd.write('\\\
'.join(code))
fd。关闭()
def monitor(interval):
while monitor_thread:
stacktraces()
time.sleep(interval)
monitor_thread =无
def exiting():
全局monitor_thread
monitor_thread =无
def start(interval):
global monitor_thread
如果monitor_thread:
return
assert not os.path.islink(outfile), outfile#众所周知的临时名称... symlink攻击...
monitor_thread = threading.Thread(target = monitor,args = [interval])
monitor_thread.setDaemon(True)
atexit
monitor_thread.start()
def read_logs(args):
#outfile可以是巨大的,不要将整个文件读入内存。
counter = collections.Counter()
cur_stack = []
py_line =''
code_line =''
如果args.action =='sum-all-框架':
sum_all_frames = True
else:
sum_all_frames = False
for open in open(outfile):
if line.startswith('#END'):
如果sum_all_frames:
frames = cur_stack
else:
frames = cur_stack [-1:]
counter.update(frames)
cur_stack = []
继续
如果'\#'中的行[0]:
继续
如果line.startswith('File:'):
py_line =行。 rstrip()
continue
如果line.startswith(''):
code_line = line.rstrip()
如果没有(py_line,code_line)在cur_stack中:
#如果有递归,则每堆栈跟踪
cur_stack.ap只计算一次该行pend((py_line,code_line))
continue
print'ERROR unparsed',line
for(py,code),c in counter.most_common(args.most_common):
如果不是other_code.search(py):
py ='%s< ===='%py
print'%5d%s\\\
%s'%(c,py,code )
def main():
import argparse
parser = argparse.ArgumentParser(description ='读栈跟踪日志')
parser.add_argument('action' options = ['sum-all-frames','sum-last-frame'])
parser.add_argument(' - most-common',metavar ='N',default = 30,type = help ='在堆栈跟踪中显示N个最常见的行)
args = parser.parse_args()
return read_logs(args)
如果__name __ =='__ main__'
main()
I want to know where the python interpreter spends the most time. I use it on a live django application, but it should work for all long running python processes.
I answer my own question.
解决方案
import os, re, sys, time, datetime, collections, thread, threading, atexit, traceback
u'''
debug_live.start(seconds_float) starts a monitor thread which print
the stacktrace of all threads into a logfile.
You can report which lines are executed the most with this script:
app_foo_d@server:~$ python djangotools/utils/debug_live.py -h
usage: debug_live.py [-h] [--most-common N] {sum-all-frames,sum-last-frame}
Read stacktrace log
positional arguments:
{sum-all-frames,sum-last-frame}
optional arguments:
-h, --help show this help message and exit
--most-common N Display the N most common lines in the stacktraces
---------------------------------
You can start the watching thread your django middleware like this:
class FOOMiddleware:
def __init__(self):
u'This code gets executed once after the start of the wsgi worker process. Not for every request!'
seconds=getattr(settings, 'debug_live_interval', None)
if seconds:
seconds=float(seconds)
from djangotools.utils import debug_live
debug_live.start(seconds)
# settings.py
debug_live_interval=0.3 # ever 0.3 second
# Inspired by http://code.google.com/p/modwsgi/wiki/DebuggingTechniques
You can get a simple report of the log file of stacktraces like below. The lines
which are not from django are marked with "<====". That's most likely your code
and this could be a bottle neck.
python ..../debug_live.py read
1971 File: "/home/foo_bar_p/django/core/handlers/wsgi.py", line 272, in __call__
response = self.get_response(request)
1812 File: "/home/foo_bar_p/django/core/handlers/base.py", line 111, in get_response
response = callback(request, *callback_args, **callback_kwargs)
1725 File: "/home/foo_bar_p/django/db/backends/postgresql_psycopg2/base.py", line 44, in execute
return self.cursor.execute(query, args)
1724 File: "/home/foo_bar_p/django/db/models/sql/compiler.py", line 735, in execute_sql
cursor.execute(sql, params)
1007 File: "/home/foo_bar_p/django/db/models/sql/compiler.py", line 680, in results_iter
for rows in self.execute_sql(MULTI):
796 File: "/home/foo_bar_p/django/db/models/query.py", line 273, in iterator
for row in compiler.results_iter():
763 File: "/home/foo_bar_p/foo/utils/ticketutils.py", line 135, in __init__ <====
filter=type_filter(root_node=self.root_node)
684 File: "/home/foo_bar_p/django/db/models/query.py", line 334, in count
return self.query.get_count(using=self.db)
679 File: "/home/foo_bar_p/django/db/models/sql/query.py", line 367, in get_aggregation
result = query.get_compiler(using).execute_sql(SINGLE)
677 File: "/home/foo_bar_p/django/db/models/sql/query.py", line 401, in get_count
number = obj.get_aggregation(using=using)[None]
'''
from django.conf import settings
outfile = os.path.expanduser('~/tmp/debug_live.log')
other_code=re.compile(r'/(django|python...)/')
def stacktraces():
code=[]
now=datetime.datetime.now()
pid=os.getpid()
my_thread_id=thread.get_ident()
for thread_id, stack in sys._current_frames().items():
if thread_id==my_thread_id:
continue # Don't print this monitor thread
code.append("\n\n#START date: %s\n# ProcessId: %s\n# ThreadID: %s" % (now, pid, thread_id))
for filename, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
if line:
code.append(" %s" % (line.strip()))
code.append('#END')
if not code:
return
fd=open(outfile, 'at')
fd.write('\n'.join(code))
fd.close()
def monitor(interval):
while monitor_thread:
stacktraces()
time.sleep(interval)
monitor_thread=None
def exiting():
global monitor_thread
monitor_thread=None
def start(interval):
global monitor_thread
if monitor_thread:
return
assert not os.path.islink(outfile), outfile # well known temporary name.... symlink attack...
monitor_thread = threading.Thread(target=monitor, args=[interval])
monitor_thread.setDaemon(True)
atexit.register(exiting)
monitor_thread.start()
def read_logs(args):
# The outfile can be huge, don't read the whole file into memory.
counter=collections.Counter()
cur_stack=[]
py_line=''
code_line=''
if args.action=='sum-all-frames':
sum_all_frames=True
else:
sum_all_frames=False
for line in open(outfile):
if line.startswith('#END'):
if sum_all_frames:
frames=cur_stack
else:
frames=cur_stack[-1:]
counter.update(frames)
cur_stack=[]
continue
if line[0] in '\n#':
continue
if line.startswith('File:'):
py_line=line.rstrip()
continue
if line.startswith(' '):
code_line=line.rstrip()
if not (py_line, code_line) in cur_stack:
# If there is a recursion, count the line only once per stacktrace
cur_stack.append((py_line, code_line))
continue
print 'ERROR unparsed', line
for (py, code), c in counter.most_common(args.most_common):
if not other_code.search(py):
py='%s <====' % py
print '% 5d %s\n %s' % (c, py, code)
def main():
import argparse
parser=argparse.ArgumentParser(description='Read stacktrace log')
parser.add_argument('action', choices=['sum-all-frames', 'sum-last-frame'])
parser.add_argument('--most-common', metavar='N', default=30, type=int, help='Display the N most common lines in the stacktraces')
args=parser.parse_args()
return read_logs(args)
if __name__=='__main__':
main()
这篇关于Python服务器的实时分析的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文