带有PhantomJS URLError的硒(在Windows中无法在Ubuntu16.04中运行) [英] Selenium with PhantomJS URLError( Works in Windows fails in Ubuntu16.04)

查看:82
本文介绍了带有PhantomJS URLError的硒(在Windows中无法在Ubuntu16.04中运行)的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我的抓取代码在本地计算机(Windows 8)上运行良好,但在Digital Ocean Dropping(Ubuntu 16.04)中失败.问题...

My scraping code works perfectly in my local computer(Windows 8) but fails in Digital Ocean droplet(Ubuntu 16.04)..Always fails on the second loop giving Bad Status Line or URLError..Have already spent a few hours googling this problem ...

设置:

PhantomJS 2.1.1

PhantomJS 2.1.1

硒2.53.6

Python 2.7

Python 2.7

class Elitebet:
    t1 = time.time()
    driver = webdriver.PhantomJS()

    def controller(self):
        self.driver.get("http://www.elitebetkenya.com/coupon.php?d")
        element = WebDriverWait(self.driver, 10).until(
        EC.presence_of_element_located((By.ID, "page")))    
        soup = BeautifulSoup(self.driver.page_source.encode('utf-8'),"html.parser")
        page_number = self.number_of_pages(soup)
        self.eliteparser(soup)
        for i in range(0, page_number - 10):
            page_click = self.driver.find_element_by_xpath("//input[@value='Next']")
            page_click.click() 
            element = WebDriverWait(self.driver, 10).until(
        EC.presence_of_element_located((By.ID, "page")))
            time.sleep(randint(1,2))
            soup = BeautifulSoup(self.driver.page_source.encode('utf-8'),"html.parser")
            self.eliteparser(soup)


        t2 = time.time() - self.t1
        print t2

    def number_of_pages(self, x):
        numbers = x.find("div", {"class" : "pgLnx"}).contents[2]
        return int(re.findall(r'\d+', numbers)[0])

    def eliteparser(self,x):
        tbody = x.find("tbody")
        # league level

        for i in tbody.findAll("tr", {"class": "league"}):
            league = i.get_text()
            handicap01_1, handicap01_draw, handicap01_2, handicap10_1, \
            handicap10_2, handicap10_draw, overfh15, underfh15, under25,\
            over25 = None,None, None, None, None, None, None, None, None, None
        # fixture level
            for each in i.find_next_siblings("tr"):
                if "league" in each.get("class", []):
                    break
                if "fixture" in each.get("class", []):
                    home = each.find("span", {"class" :"home uc"}).get_text(strip=True)
                    away = each.find("span", {"class":"away uc"}).get_text(strip=True)
                    fixture_time = each.br.get_text().strip()
                    # print "{} vs {}".format(home,away)
                    for foo in each.find_next_siblings("tr"):
                        if "fixture" in foo.get("class", []):
                            break
                        tds =  foo.findAll("td")                    
                        if tds[0].get_text().strip() == "Win-Draw-Win":
                            home_odds = tds[3].get_text()
                            draw_odds = tds[4].get_text()
                            away_odds = tds[5].get_text()
                        elif tds[0].text == "Handicap (0:1)":
                            handicap01_1 =  tds[3].get_text()
                            handicap01_draw =  tds[4].get_text()
                            handicap01_2 =  tds[5].get_text()
                        elif tds[0].text == "Double Chance":
                            oneordraw =  tds[3].get_text()
                            oneortwo =  tds[4].get_text()
                            drawortwo =  tds[5].get_text()
                        elif tds[0].text == "Asian (Draw No Bet)":
                            asian1 =  tds[3].get_text()
                            asian2 =  tds[4].get_text()
                        elif tds[0].text == "Goal Under/Over (2.5)":
                            under25 =  tds[3].get_text()
                            over25 =  tds[4].get_text()
                        elif tds[0].text == "1st Half Goal Under/Over (1.5)":
                            underfh15 =  tds[3].get_text()
                            overfh15 =  tds[4].get_text()
                        elif tds[0].text == "Goal - No Goal":
                            goal =  tds[3].get_text()
                            no_goal =  tds[4].get_text()
                        elif tds[0].text == "Odd - Even Goal":
                            odd =  tds[3].get_text()
                            even =  tds[4].get_text()
                        elif tds[0].text == "Handicap (1:0)":
                            handicap10_1 =  tds[3].get_text()
                            handicap10_draw =  tds[4].get_text()
                            handicap10_2 =  tds[5].get_text()   
                    print league
                    print "{} vs {}".format(home,away)



elite = Elitebet()

elite.controller()

错误消息如下:

  File "elitebet.py", line 147, in <module>
    elite.controller()
  File "elitebet.py", line 45, in controller
    page_click.click()
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 72, in click
    self._execute(Command.CLICK_ELEMENT)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 461, in _execute
    return self._parent.execute(command, params)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 234, in execute
    response = self.command_executor.execute(driver_command, params)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 401, in execute
    return self._request(command_info[0], url, body=data)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 471, in _request
    resp = opener.open(request, timeout=self._timeout)
  File "/usr/lib/python2.7/urllib2.py", line 429, in open
    response = self._open(req, data)
  File "/usr/lib/python2.7/urllib2.py", line 447, in _open
    '_open', req)
  File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
    result = func(*args)
  File "/usr/lib/python2.7/urllib2.py", line 1228, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/lib/python2.7/urllib2.py", line 1198, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>

推荐答案

这可能是SSL错误引起的,我建议您使用以下命令行选项:

That may be caused by SSL error, i suggest you to use these command-line options:

--ignore-ssl-errors=true --ssl-protocol=any --debug=true

您需要使用 onResourceError 回调来查找出问题了.

You need to use onResourceError callback, to find out, what's going wrong.

这篇关于带有PhantomJS URLError的硒(在Windows中无法在Ubuntu16.04中运行)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆