Python Python One Line Web服务器

# will listen on port 8000
# http://127.0.0.1:8000

python -m SimpleHTTPServer

Python 打开文件

inPath = "input.txt"
outPath = "output.txt"

#Open a file for reading
file = open(inPath, 'rU')
if file:
    # read from the file
    file.close()
else:
    print "Error Opening File."

#Open a file for writing
file = open(outPath, 'wb')
if file:
    # write to the file
    file.close()
else:
    print "Error Opening File."

Python 搜索具有特定扩展名的文件并将其放入列表中

import os

def dirLS (dirPath, ext):
	files = os.listdir(dirPath)
	foundFiles = []
	for curfile in files:
		try:
			prefix, frame, suffix = curfile.split('.')
			print suffix
			if suffix == ext :
				foundFiles.append(dirPath + '/' + curfile)
		except:
			''''''
	if len(foundFiles) == 0:
		foundFiles == None
	return foundFiles

files =  (dirLS('c:/temp','exr'))

for file in files:
	print file

Python 在python中读取CSV

# Use reader() to create a an object for reading data from a CSV file. The reader can be used as an iterator
#  to process the rows of the file in order. For example:	

import csv
import sys

f = open(sys.argv[1], 'rt')
try:
    reader = csv.reader(f)
    for row in reader:
        print row
finally:
    f.close()


# the original file:

"Title 1","Title 2","Title 3"
1,"a",08/18/07
2,"b",08/19/07
3,"c",08/20/07
4,"d",08/21/07
5,"e",08/22/07
6,"f",08/23/07
7,"g",08/24/07
8,"h",08/25/07
9,"i",08/26/07


# the result:

$ python csv_reader.py testdata.csv

['Title 1', 'Title 2', 'Title 3']
['1', 'a', '08/18/07']
['2', 'b', '08/19/07']
['3', 'c', '08/20/07']
['4', 'd', '08/21/07']
['5', 'e', '08/22/07']
['6', 'f', '08/23/07']
['7', 'g', '08/24/07']
['8', 'h', '08/25/07']
['9', 'i', '08/26/07']







# In addition to working with sequences of data, the csv module includes classes for working  # with rows as dictionaries so that the fields can be named. The DictReader and DictWriter # #  classes translate rows to dictionaries instead of lists. Keys for the dictionary can be # #  passed in, or inferred from the first row in the input (when the row contains headers).



import csv
import sys

f = open(sys.argv[1], 'rt')
try:
    reader = csv.DictReader(f)
    for row in reader:
        print row
finally:
    f.close()


# returns:


$ python csv_dictreader.py testdata.csv

{'Title 1': '1', 'Title 3': '08/18/07', 'Title 2': 'a'}
{'Title 1': '2', 'Title 3': '08/19/07', 'Title 2': 'b'}
{'Title 1': '3', 'Title 3': '08/20/07', 'Title 2': 'c'}
{'Title 1': '4', 'Title 3': '08/21/07', 'Title 2': 'd'}
{'Title 1': '5', 'Title 3': '08/22/07', 'Title 2': 'e'}
{'Title 1': '6', 'Title 3': '08/23/07', 'Title 2': 'f'}
{'Title 1': '7', 'Title 3': '08/24/07', 'Title 2': 'g'}
{'Title 1': '8', 'Title 3': '08/25/07', 'Title 2': 'h'}
{'Title 1': '9', 'Title 3': '08/26/07', 'Title 2': 'i'}

Python 刮简单

class RedditSpider(GenericSpider):
    name = "reddit"
    start_urls = ["https://www.reddit.com/"]

    class Meta:
        items = ".thing"
        targets = [
            CssTarget("rank", ".rank::text"),
            CssTarget("upvoted", ".upvoted::text"),
            CssTarget("dislikes", ".dislikes::text"),
            CssTarget("likes", ".likes::text"),
            CssTarget("title", "a.title::text"),
            CssTarget("domain", ".domain > a::text"),
            CssTarget("datetime", ".tagline > time::attr(datetime)"),
            CssTarget("author", ".tagline > .author::text"),
            CssTarget("subreddit", ".tagline > .subreddit::text"),
            CssTarget("comments", ".comments::text")
        ]


class RedditSpider2(IndexDetailSpider):
    name = "reddit2"
    start_urls = ["https://www.reddit.com/"]

    class Meta:
        detail_path = CssTarget("detail_path", ".title > a::attr(href)", [absolute_url])
        detail_targets = [
            CssTarget("content", ".usertext-body > div > p::text", [join]),
        ]
        items = ".thing"
        targets = [
            CssTarget("rank", ".rank::text"),
            CssTarget("upvoted", ".upvoted::text"),
            CssTarget("dislikes", ".dislikes::text"),
            CssTarget("likes", ".likes::text"),
            CssTarget("title", "a.title::text"),
            CssTarget("domain", ".domain > a::text"),
            CssTarget("datetime", ".tagline > time::attr(datetime)"),
            CssTarget("author", ".tagline > .author::text"),
            CssTarget("subreddit", ".tagline > .subreddit::text"),
            CssTarget("comments", ".comments::text")
        ]

Python 更好的Unicode文本环绕功能

# This recipe refers:
#
#  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061

import re
rx=re.compile(u\"([\\u2e80-\\uffff])\", re.UNICODE)

def cjkwrap(text, width, encoding=\"utf8\"):
     return reduce(lambda line, word, width=width: \'%s%s%s\' %              
                (line,
                 [\' \',\'\\n\', \'\'][(len(line)-line.rfind(\'\\n\')-1
                       + len(word.split(\'\\n\',1)[0] ) >= width) or
                      line[-1:] == \'\\0\' and 2],
                 word),
                rx.sub(r\'\\1\\0 \', unicode(text,encoding)).split(\' \')
            ).replace(\'\\0\', \'\').encode(encoding)

Python Python - 从MP3文件中获取id3

def getID3(filename):
    fp = open(filename, 'r')
    fp.seek(-128, 2)

    fp.read(3) # TAG iniziale
    title   = fp.read(30)
    artist  = fp.read(30)
    album   = fp.read(30)
    anno    = fp.read(4)
    comment = fp.read(28)

    fp.close()

    return {'title':title, 'artist':artist, 'album':album, 'anno':anno}

Python Google AppEngine Bulkuploader

visit

http://localhost:8080/_ah/admin/interactive

and type

from google.appengine.ext import bulkload
print help(bulkload.Loader)

in the textare and press RunProgram.

Python YouTube批量下载脚本

#!/usr/bin/python
import sgmllib
import sys

class MyParser(sgmllib.SGMLParser):
    "A simple parser class."

    def parse(self, s):
        "Parse the given string 's'."
        self.feed(s)
        self.close()

    def __init__(self, verbose=0):
        "Initialise an object, passing 'verbose' to the superclass."

        sgmllib.SGMLParser.__init__(self, verbose)
        self.hyperlinks = []

    def start_a(self, attributes):
        "Process a hyperlink and its 'attributes'."

        for name, value in attributes:
            if name == "href":
		if value.startswith("/watch?v="):
			self.hyperlinks.append("youtube-dl -t http://youtube.com"+value)

    def get_hyperlinks(self):
        "Return the list of hyperlinks."

        return self.hyperlinks

def f2(seq): 
   # order preserving
   checked = []
   for e in seq:
       if e not in checked:
           checked.append(e)
   return checked

import urllib, sgmllib

# Get something to work with.
f = urllib.urlopen(sys.argv[1])
s = f.read()

# Try and process the page.
# The class should have been defined first, remember.
myparser = MyParser()
myparser.parse(s)

# Get the hyperlinks.
print f2(myparser.get_hyperlinks())
yt = open("youtube_links","w")

for link in f2(myparser.get_hyperlinks()):
	yt.write(link)
	yt.write("\n")

print "\nYouTube Batch Download Script \"youtube_links\" has been generated. Execute it !"

Python Python CLI(命令行)进度条

import sys
import time
import math

# Output example: [=======   ] 75%

# width defines bar width
# percent defines current percentage
def progress(width, percent):
    marks = math.floor(width * (percent / 100.0))
    spaces = math.floor(width - marks)
    
    loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
      
    sys.stdout.write("%s %d%%\r" % (loader, percent))
    if percent >= 100:
        sys.stdout.write("\n")
    sys.stdout.flush()
    
# Simulate doing something...
for i in xrange(100):
    progress(50, (i + 1)) # +1 because xrange is only 99
    time.sleep(0.1) # Slow it down for demo