Python Webdriver多线程 [英] Python Webdriver Multithread
问题描述
我正在尝试使用来自以下代码的代码生成多个Webdriver实例: http ://www.ibm.com/developerworks/aix/library/au-threadingpython/
I'm trying to spawn multiple webdriver instances with the code from: http://www.ibm.com/developerworks/aix/library/au-threadingpython/
import time
import Queue
import urllib2
import threading
from selenium import webdriver
from BeautifulSoup import BeautifulSoup
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
"http://ibm.com", "http://apple.com"]
queue = Queue.Queue
out_queue = Queue.Queue
class Login_Driver(threading.Thread):
def __init__(self, queue, out_queue, driver):
threading.Thread.__init__(self)
self.queue = queue
self.out_queue = out_queue
self.driver = driver
print driver.title
def run(self):
while True:
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and then grabs chunk of webpage
driver.get(host)
chunk = driver.page_source()
#place chunk into out queue
self.out_queue.put(chunk)
#signals to queue job is done
self.queue.task_done()
class Poster(threading.Thread):
def __init__(self, driver, out_queue):
self.out_queue = out_queue
self.driver = driver
print driver.name
def run(self):
while True:
#grabs host from queue
chunk = self.out_queue.get()
#parse the chunk
soup = BeautifulSoup(chunk)
print soup.findAll(['title'])
#signals to queue job is done
self.out_queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
driver = webdriver.Firefox()
t = Login_Driver(queue, out_queue, driver)
t.setDaemon(True)
t.start()
time.sleep(20)
#populate queue with data
for host in hosts:
queue.put(host)
for i in range(5):
dt = Poster(out_queue)
dt.setDaemon(True)
dt.start()
#wait on the queue until everything has been processed
queue.join()
out_queue.join()
main()
print "Elapsed Time: %s" % (time.time() - start)
错误:TypeError:必须以Queque实例作为第一个参数来调用未绑定方法get()(而是什么也不要)
It errors: TypeError: unbound method get() must be called with Queque instance as first argument (got nothing instead)
我是线程,类,进程的新手,能否请您告诉我什么可以使用的更好的方法,线程或进程,并且如果可以给我一个例子将是一个很好的选择. 谢谢你们.
I'm a newbie on threads, classes, processes, can you please tell me what is more ok to use, threads or processes and if can give me an example would be great. Thank you guys.
更新
工作代码:
import time
import Queue
import urllib2
import threading
from selenium import webdriver
from BeautifulSoup import BeautifulSoup
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
"http://ibm.com", "http://apple.com"]
queue = Queue.Queue()
out_queue = Queue.Queue()
class Login_Driver(threading.Thread):
#def __init__(self, driver):
def __init__(self, queue, out_queue, driver):
threading.Thread.__init__(self)
self.queue = queue
self.out_queue = out_queue
self.driver = driver
print "In init first class.."
def run(self):
while True:
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and then grabs chunk of webpage
self.driver.get(host)
chunk = self.driver.page_source
#place chunk into out queue
self.out_queue.put(chunk)
#signals to queue job is done
print self.driver.title
self.queue.task_done()
class Poster(threading.Thread):
def __init__(self, out_queue, driver):
threading.Thread.__init__(self)
self.out_queue = out_queue
self.driver = driver
print "In init a second class.."
def run(self):
while True:
#grabs host from queue
chunk = self.out_queue.get()
#parse the chunk
soup = BeautifulSoup(chunk)
print soup.findAll(['title'])
#signals to queue job is done
print self.driver.name
self.out_queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
driver = webdriver.Firefox()
t = Login_Driver(queue, out_queue, driver)
t.setDaemon(True)
t.start()
print "Started webdriver: --- "+str(i)+" --- from main"
print "All started"
time.sleep(3)
#populate queue with data
for host in hosts:
queue.put(host)
print "Opening website: "+host
print "All sites passed for opening.."
time.sleep(3)
for i in range(5):
dt = Poster(out_queue, driver)
dt.setDaemon(True)
dt.start()
print "Starting second class/title and name beautifull soup and webdriver: --- "+str(i)+" --- from main"
print "Started secound class.."
time.sleep(3)
#wait on the queue until everything has been processed
queue.join()
out_queue.join()
print "out_queue.join()"
main()
print "Elapsed Time: %s" % (time.time() - start)
推荐答案
您需要使用Queue.Queue()
而不是Queue.Queue
这篇关于Python Webdriver多线程的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!