如何使用 VBA 从嵌套的 div 中提取值 [英] How to extract values from nested divs using VBA
问题描述
我已经查看了此链接中提供的解决方案
公共子GetInfo()Const URL As String = "https://www.espncricinfo.com/scores"Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()Set ws = ThisWorkbook.Worksheets("Sheet1")设置 html = 新建 HTMLDocument使用 CreateObject("MSXML2.XMLHTTP").打开GET",URL,假.发送html.body.innerHTML = .responseText结束于设置 hDiv = html.querySelector("div.cscore")Dim ul As Object, div As Object, r As Long, c As Longr = 1与 ws对于每个 div 在 hDiv.getElementsByClassName("cscore_link")r = r + 1:c = 1如果 r >3 然后对于每个 ul 在 div.getElementsByClassName("cscore_score").Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)c = c + 1下一个万一下一个结束于结束子
如果能帮助我将每个 div 中的分数提取到工作表中,我将不胜感激.
您可以使用更快的 css 选择器(仅使用 class 比使用 tag/type 快),如果使用如下所示,您还可以降低代码的复杂性和通过只有一个循环来提高性能.然后可以将结果存储在一个数组中并一次性写出 - 又一次提高了效率.
请注意,我通过在前面连接'"来确保分数在输出时保持正确格式.
<小时>如果您想在同一行中获得相同匹配的分数:
选项显式公共子 GetData()Dim html As HTMLDocument, ws As Worksheet, countries As Object, score As Object, results(), i As Long, r As LongSet ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1使用 CreateObject("MSXML2.XMLHTTP").打开GET",https://www.espncricinfo.com/scores",假.发送html.body.innerHTML = .responseText结束于设置国家 = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): 设置分数 = html.querySelectorAll(".cscore_score")ReDim 结果(1 To countries.Length/2, 1 To 4)For i = 0 To countries.Length - 1 Step 2结果(r, 1) = countries.item(i).innerText: results(r, 2) = "'" &score.item(i).innerText结果(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" &score.item(i + 1).innerTextr = r + 1下一个ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = 结果结束子
<小时>
每个团队在不同行的得分:
选项显式公共子 GetData()Dim html As HTMLDocument, ws As Worksheet, countries As Object, score As Object, results(), i As LongSet ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument使用 CreateObject("MSXML2.XMLHTTP").打开GET",https://www.espncricinfo.com/scores",假.发送html.body.innerHTML = .responseText结束于设置国家 = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): 设置分数 = html.querySelectorAll(".cscore_score")ReDim 结果(1 到国家/地区.长度,1 到 2)对于 i = 0 To countries.Length - 1结果(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" &score.item(i).innerText下一个ws.Cells(1, 1) = "国家": ws.Cells(1, 2) = "分数"ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = 结果结束子
<小时>
附加列:
公共子GetData()Dim html As HTMLDocument, ws As Worksheet, countries As Object, score As ObjectDim descs As Object, results(), i As Long, r As LongSet ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1使用 CreateObject("MSXML2.XMLHTTP").打开GET",https://www.espncricinfo.com/scores",假.发送html.body.innerHTML = .responseText结束于设置国家 = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): 设置分数 = html.querySelectorAll(".cscore_score")设置 descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")ReDim 结果(1 To countries.Length/2, 1 To 5)For i = 0 To countries.Length - 1 Step 2结果(r, 1) = descs.Item(i/2).innerText结果(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" &score.Item(i).innerText结果(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" &score.Item(i + 1).innerTextr = r + 1下一个ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = 结果结束子
I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below
The URL is: https://www.espncricinfo.com/scores
The div class is: cscore_score
The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.
Here's a screenshot of the table structure:
Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long
r = 1
With ws
For Each div In hDiv.getElementsByClassName("cscore_link")
r = r + 1: c = 1
If r > 3 Then
For Each ul In div.getElementsByClassName("cscore_score")
.Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
I would be grateful to receive any help to extract those scores from each div into the sheet.
You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.
Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.
If you want scores for same match on same row:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Scores on different rows for every team:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length, 1 To 2)
For i = 0 To countries.Length - 1
results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
Next
ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Additional column:
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
Dim descs As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
ReDim results(1 To countries.Length / 2, 1 To 5)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = descs.Item(i / 2).innerText
results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
这篇关于如何使用 VBA 从嵌套的 div 中提取值的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!