Python Webdriver多线程 [英] Python Webdriver Multithread

查看:268
本文介绍了Python Webdriver多线程的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试使用来自以下代码的代码生成多个Webdriver实例: http ://www.ibm.com/developerworks/aix/library/au-threadingpython/

I'm trying to spawn multiple webdriver instances with the code from: http://www.ibm.com/developerworks/aix/library/au-threadingpython/

import time
import Queue
import urllib2
import threading
from selenium import webdriver
from BeautifulSoup import BeautifulSoup
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
    "http://ibm.com", "http://apple.com"]
queue = Queue.Queue
out_queue = Queue.Queue

class Login_Driver(threading.Thread):
    def __init__(self, queue, out_queue, driver):
        threading.Thread.__init__(self)
        self.queue = queue
        self.out_queue = out_queue
        self.driver = driver
        print driver.title
    def run(self):
        while True:
            #grabs host from queue
            host = self.queue.get()
            #grabs urls of hosts and then grabs chunk of webpage
            driver.get(host)
            chunk = driver.page_source()
            #place chunk into out queue
            self.out_queue.put(chunk)
            #signals to queue job is done
            self.queue.task_done()
class Poster(threading.Thread):
    def __init__(self, driver, out_queue):
        self.out_queue = out_queue
        self.driver = driver
        print driver.name
    def run(self):
        while True:
            #grabs host from queue
            chunk = self.out_queue.get()
            #parse the chunk
            soup = BeautifulSoup(chunk)
            print soup.findAll(['title'])
            #signals to queue job is done
            self.out_queue.task_done()
start = time.time()
def main():
    #spawn a pool of threads, and pass them queue instance
    for i in range(5):
        driver = webdriver.Firefox()
        t = Login_Driver(queue, out_queue, driver)
        t.setDaemon(True)
        t.start()
        time.sleep(20)
    #populate queue with data
    for host in hosts:
        queue.put(host)
    for i in range(5):
        dt = Poster(out_queue)
        dt.setDaemon(True)
        dt.start()
    #wait on the queue until everything has been processed
    queue.join()
    out_queue.join()
main()
print "Elapsed Time: %s" % (time.time() - start)

错误:TypeError:必须以Queque实例作为第一个参数来调用未绑定方法get()(而是什么也不要)

It errors: TypeError: unbound method get() must be called with Queque instance as first argument (got nothing instead)

我是线程,类,进程的新手,能否请您告诉我什么可以使用的更好的方法,线程或进程,并且如果可以给我一个例子将是一个很好的选择. 谢谢你们.

I'm a newbie on threads, classes, processes, can you please tell me what is more ok to use, threads or processes and if can give me an example would be great. Thank you guys.

更新

工作代码:

import time
import Queue
import urllib2
import threading
from selenium import webdriver
from BeautifulSoup import BeautifulSoup

hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
        "http://ibm.com", "http://apple.com"]
queue = Queue.Queue()
out_queue = Queue.Queue()

class Login_Driver(threading.Thread):
#def __init__(self, driver):
    def __init__(self, queue, out_queue, driver):
        threading.Thread.__init__(self)
        self.queue = queue
        self.out_queue = out_queue
        self.driver = driver
        print "In init first class.."
    def run(self):
        while True:
            #grabs host from queue
            host = self.queue.get()
            #grabs urls of hosts and then grabs chunk of webpage
            self.driver.get(host)
            chunk = self.driver.page_source
            #place chunk into out queue
            self.out_queue.put(chunk)
            #signals to queue job is done
            print self.driver.title
            self.queue.task_done()
class Poster(threading.Thread):
    def __init__(self, out_queue, driver):
        threading.Thread.__init__(self)
        self.out_queue = out_queue
        self.driver = driver
        print "In init a second class.."
    def run(self):
        while True:
            #grabs host from queue
            chunk = self.out_queue.get()
            #parse the chunk
            soup = BeautifulSoup(chunk)
            print soup.findAll(['title'])
            #signals to queue job is done
            print self.driver.name
            self.out_queue.task_done()
start = time.time()
def main():
    #spawn a pool of threads, and pass them queue instance
    for i in range(5):
        driver = webdriver.Firefox()
        t = Login_Driver(queue, out_queue, driver)
        t.setDaemon(True)
        t.start()
        print "Started webdriver: --- "+str(i)+" --- from main"
    print "All started"
    time.sleep(3)
    #populate queue with data
    for host in hosts:
        queue.put(host)
        print "Opening website: "+host
    print "All sites passed for opening.."
    time.sleep(3)
    for i in range(5):
        dt = Poster(out_queue, driver)
        dt.setDaemon(True)
        dt.start()
        print "Starting second class/title and name beautifull soup and webdriver: --- "+str(i)+" --- from main"
    print "Started secound class.."
    time.sleep(3)
    #wait on the queue until everything has been processed
    queue.join()
    out_queue.join()
    print "out_queue.join()"
main()
print "Elapsed Time: %s" % (time.time() - start)

推荐答案

您需要使用Queue.Queue()而不是Queue.Queue

这篇关于Python Webdriver多线程的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆