抓取网站数据,插入 Excel 单元格,然后继续下一步 [英] Scrape website data, insert into an Excel cell, then move on to next
问题描述
我的项目是将汽车注册插入税收和 mot 网站,点击按钮,加载页面,然后获取日期.
My project is to insert a car reg into the tax and mot website click the buttons, load the page and then take the dates.
我遇到的一个问题是在一个强 li 元素中提取数据,该元素是我在两个单元格中需要的税收和 mot 的日期/日期.
An issue I had it is to extract data within a strong li element which is the date/ dates for the tax and mot of which I need in two cells.
Sub searchbot()
'dimension (declare or set aside memory for) our variables
Dim objIE As InternetExplorer 'special object variable representing the IE browser
Dim liEle As HTMLLinkElement 'special object variable for an <li> (link) element
Dim pEle As HTMLLinkElement 'special object variable for an <a> (link) element
Dim y As Integer 'integer variable we'll use as a counter
'''''''''''''''''''''''''''''''''''''''''''
'open internet
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'''''''''''''''''''''''''''''''''''''''''''
'open tax/mot page
'navigate IE to this web page (a pretty neat search engine really)
objIE.Navigate "https://vehicleenquiry.service.gov.uk/"
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop
'''''''''''''''''''''''''''''''''''''''''''
'enter details in to page
'in the search box put cell "b2" value, the word "in" and cell "C" value
objIE.Document.getElementById("Vrm").Value = _
Sheets("INPUT & DATA RESULTS").Range("F3").Value
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'click the 'Continue' button
objIE.Document.getElementsByClassName("button")(0).Click
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'click the 'Yes' button
objIE.Document.getElementById("Correct_True").Click
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'click the 'Continue' button
objIE.Document.getElementsByClassName("button")(0).Click
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop
'above works
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'HELP FROM HERE PLEASE
'take tax and mot dates and insert in to cells next to each other
'the first search result will go in row 2
y = 2
'TAKE TAX EXPIRY DATE AND PUT IN CELL
'I have tried reading up on extracting data from li elements, parent and child elements but struggling
For Each itemEle In objIE.Document.getElementsByClassName("top-section-list")
data = itemEle.getElementsByTagName("li")(0).innerText
'TAKE MOT EXPIRY DATE AND PUT IN CELL
'I have tried reading up on extracting data from li elements, parent and child elements but struggling
For Each itemEle In objIE.Document.getElementsByClassName("top-section-list")
data = itemEle.getElementsByTagName("li")(0).innerText
'increment our row counter, so the next result goes below
y = y + 1
'repeat times cells have car regs in
'Next
'take next car reg and do the same as above until there are no cells in rows with a car reg
Next
Range("A3").Value = data
'''''''''''''''''''''''''''''''''''''''''''
'close the browser
objIE.Quit
'''''''''''''''''''''''''''''''''''''''''''
'exit our SearchBot subroutine and start new row for new website data
End Sub
我是一名欺诈调查员,正在尝试自学 VBA.
I am a fraud investigator trying to teach myself VBA.
推荐答案
您想要的项目在 strong
(bold) 标签中,并且是页面上的前两个所以你可以使用更快的 strong
css 选择器并做
The items you want are in strong
(bold) tags and are the first two on the page so you can use faster css selector of strong
and do
Dim items As Object, i As Long, taxInfo As String, motInfo As String
Set items = ie.document.querySelectorAll("strong")
taxInfo = items.item(0).innerText
motInfo = items.item(1).innerText
仅针对日期:
taxInfo = Replace$(items.item(0).innerText,"Tax due: ",vbNullString)
motInfo = Replace$(items.item(1).innerText,"Expires: ",vbNullString)
这里有类似的使用 css 选择器的东西,现代网页已经针对它进行了优化,速度更快.# 是一个 id 选择器.我使用了定时等待来确保存在用于输入注册的搜索框.如果未找到车辆,将进行初步检查.
Here is something similar using css selectors, which modern webpages are optimized for, so faster. The # is an id selector. I used a timed wait to ensure search box is present for entering registration. There is a rudimentary check in case vehicle not found.
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub CheckTax()
Dim ie As InternetExplorer, searchBox As Object, t As Date, ws As Worksheet
Const MAX_WAIT_SEC As Long = 20
Dim inputValues(), i As Long
Set ie = New InternetExplorer
Set ws = ThisWorkbook.Worksheets("INPUT & DATA RESULTS")
inputValues = Application.Transpose(ws.Range("F3:F5").Value) '<=change range here for range containing values to lookup
With ie
.Visible = True
For i = LBound(inputValues) To UBound(inputValues)
.Navigate2 "https://vehicleenquiry.service.gov.uk/"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set searchBox = .document.querySelector("#Vrm")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While searchBox Is Nothing
If searchBox Is Nothing Then
Exit Sub
Else
searchBox.Focus
searchBox.Value = inputValues(i)
End If
.document.querySelector(".button").Click
While .Busy Or .readyState < 4: DoEvents: Wend
If .document.querySelectorAll("h3").Length > 0 Then
ws.Cells(i + 2, "G") = "Vehicle details could not be found"
ws.Cells(i + 2, "H") = "Vehicle details could not be found"
Else
t = Timer
Do
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ie.document.querySelectorAll("#Correct_True").Length = 0
ie.document.querySelector("#Correct_True").Click
While .Busy Or .readyState < 4: DoEvents: Wend
.document.querySelector(".button").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim items As Object, taxInfo As String, motInfo As String
t = Timer
Do
On Error Resume Next
Set items = ie.document.querySelectorAll("strong")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While items.Length = 0
'taxInfo = items.item(0).innerText
'motInfo = items.item(1).innerText
'Debug.Print taxInfo, motInfo
taxInfo = Replace$(items.item(0).innerText, "Tax due: ", vbNullString)
motInfo = Replace$(items.item(1).innerText, "Expires: ", vbNullString)
ws.Cells(i + 2, "G") = taxInfo
ws.Cells(i + 2, "H") = motInfo
End If
Set searchBox = Nothing: Set items = Nothing
Next
.Quit
End With
End Sub
这篇关于抓取网站数据,插入 Excel 单元格,然后继续下一步的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!