使用VBA宏循环遍历javascrape网页上的每个表 [英] Loop through each table on javascrape webpage with VBA macro
问题描述
我正在尝试从网站上浏览多个表格。到目前为止,我已经建立了一个excel VBA宏来做到这一点。我还想出了如何在网站上的多个页面上获取所有数据。例如,如果我有1000个结果,但每个页面上显示50个结果。问题是我在多个页面上有相同的5个表,因为每个表都有1000个结果。
我的代码只能循环遍历每个页面1个表。我也编写了代码来抓取每个表,但是我无法弄清楚如何为50个搜索结果(每页)中的每个搜索结果。
如何循环遍历多个表,然后点击该过程中的下一页来捕获所有数据?
Sub ETFDat()
Dim ie As Object,As As Long,strText As String
Dim jj As Long
Dim hBody As Object ,hTR As Object,hTD As Object
Dim tb As Object,bb As Object,Tr As Object,Td As Object,ii As Long
Dim doc As Object,hTable As Object
Dim y As Long,z As Long,wb As Excel.Workbook,ws As Excel.Worksheet
设置wb = Excel.ActiveWorkbook
设置ws = wb.ActiveSheet
设置ie = CreateObject(InternetExplorer.Application)
ie.Visible = True
y = 1'Excel中的列A
z = 1'Excel中的行1
表格(基金会基础)。激活
Cells.Select
Selection.Clear
ie.navigatehttp://www.etf.com/channels/smart-beta -etfs / channels / smart-beta-etfs?qt-tabs = 0#qt-tabs',,,Content-Type:application / x-www-form-urlencoded& vbCrLf
Do While ie.busy:DoEvents:Loop
Do While ie.ReadyState<> 4:DoEvents:Loop
设置doc = ie.document
设置hTable = doc.getElementsByTagName(table)'.GetElementByID(tablePerformance)
ii = 1
尽管ii< = 17
对于每个tb在hTable
设置hBody = tb.getElementsByTagName(tbody)
对于每个bb在hBody
设置hTR = bb.getElementsByTagName(tr)
对于每个Tr在hTR
设置hTD = Tr。 getElementsByTagName(td)
y = 1'重置到列A
对于每个Td在hTD
ws.Cells(z,y).Value = Td.innerText
y = y + 1
下一个Td
DoEvents
z = z + 1
下一个Tr
退出
下一个bb
退出
下一页tb
与doc
设置elems = .getElementsByTagName(a)
对于每个e在elems
如果(e.getAttribute(id)=nextPage)然后
e.Click
退出
结束如果
下一步e
结束
ii = ii + 1
Application.Wait(Now + TimeValue(00:00:05))
循环
MsgBox 完成
End Sub
p>有一个例子,显示如何使用XHR和JSON解析从网站检索数据,它由几个步骤组成。
- 检索数据。
我使用Chrome Developer Tools网络标签查看了XHR的一些内容。
我发现的最相关数据是GET XHR从
结果表包含803行和标题,列如下:
productId
fund
ticker
inceptionDate
launchDate
hasSegmentReport
genericReport
hasReport
fundInSegment
economicDevelopment
totalRows
fundBasics_issuer
fundBasics_expenseRatio_value
fundBasics_aum_value
fundBasics_spreadPct_value
fundBasics_segment
performance_priceTrAsOf
performance_priceTr1Mo_value
performance_priceTr3Mo_value
performance_priceTr1Yr_value
performance_priceTr3YrAnnualized_value
performance_priceTr5YrAnnualized_value
performance_p riceTr10YrAnnualized_value
analysis_analystPick
analysis_opportunitiesList
analysis_letterGrade
analysis_efficiencyScore
analysis_tradabilityScore
analysis_fitScore
analysis_leveragedFactor
analysis_exposureReset
analysis_avgDailyDollarVolume
analysis_avgDailyShareVolume
analysis_spread_value
analysis_fundClosureRisk
fundamentals_dividendYield_value
fundamentals_equity_pe
fundamentals_fixedIncome_duration
fundamentals_fixedIncome_creditQuality
fundamentals_fixedIncome_ytm_value
classification_assetClass
class_strategy
classification_region
classification_geography
classification_category
classification_focus
classification_niche
classification_inverse
classification_leveraged
classification_etn
classification_selectionCriteria
classification_weightingScheme
classification_activePerSec
classificati on_underlyingIndex
classification_indexProvider
classification_brand
tax_legalStructure
tax_maxLtCapitalGainsRate
tax_maxStCapitalGainsRate
tax_taxReporting
将以下代码放入VBA项目标准模块中:
Option Explicit
Sub GetData()
Dim sJSONString As String
Dim vJSON As Variant
Dim sState As String
Dim lRowsQty As Long
Dim aData()
Dim aHeader()
'下载并解析唯一的第一行以获取总行数qty
sJSONString = GetXHR(http:/ /www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1)
JSON.Parse sJSONString,vJSON,sState
lRowsQty = vJSON (0)(totalRows)
'下载并解析整个数据
sJSONString = GetXHR(http://www.etf.com/etf-finder-channel-tag/Smart-Beta% 20ETF / -aum / 0 /& lRowsQty& / 1)
JSON.Parse sJSONString,vJSON,sState
'将JSON转换为2d数组
JSON.ToArray vJSON,aData,aHeader
'输出
带(1)
.Cells.Delete
OutputArray .Cells(1,1),aHeader
Output2DArray .Cells(2,1),aData
.Cells.Columns.AutoFit
结束
End Sub
函数GetXHR(sURL As String)As String
使用CreateObject(MSXML2.XMLHTTP)
。打开GET,sURL,False
。发送
GetXHR = .responseText
结束
结束功能
Sub OutputArray(oDstRng As Range,aCells As Variant)
with oDstRng
.Parent.Select
With .Resize(_
1,_
UBound(aCells) - LBound(aCells)+ 1)
.NumberFormat =@
.Value = aCells
结束
结束
End Sub
Sub Output2DArr ay(oDstRng As Range,aCells As Variant)
with oDstRng
.Parent.Select
With .Resize(_
UBound(aCells,1) - LBound( aCells,1)+ 1,_
UBound(aCells,2) - LBound(aCells,2)+ 1)
.NumberFormat =@
.Value = aCells
结束
结束
结束Sub
再创建一个标准模块,将其命名为 JSON
并将以下代码放入其中,此代码提供JSON处理功能:
Option Explicit
私人sBuffer As String
私有oTokens作为对象
私有oRegEx作为对象
私有bMatch As Boolean
私有oChunks作为对象
私有oHeader作为对象
私有aData()作为变量
私有我长
子解析(ByVal sSample As String,vJSON As Variant,sState As String)
'Backus-Naur f orm JSON解析器实现基于RegEx
'输入:
'sSample - 源JSON字符串
'输出:
'vJson - 创建的对象或数组作为结果返回
'sState - string Object | Array |根据处理
sBuffer = sSample
设置oTokens = CreateObject(Scripting.Dictionary)
设置oRegEx = CreateObject(VBScript .RegExp)
使用oRegEx'基于规范的模式http://www.json.org/
.Global = True
.MultiLine = True
.IgnoreCase = True'未指定True,False,Null接受
.Pattern =(?:'[^'] *'|(?:\\| [^])*) = \s * [,\:\] \}])'双引号字符串,未指定引用的字符串
令牌表示
.Pattern =[+ - ]? ?:???\d + \.\d * | \.\d + | \d +)(?: E [+ - ] \d +)(= \s * [\] \}])'号码,E符号号码
Tokeniz ed
.Pattern =\b(?:true | false | null)(?= \s * [,\] \}])常量true,false, b $ b Tokenizec
.Pattern =\b [A-Za-z_] \w *(?= \s * \ :)'未指定的非双引号属性名称接受
令牌n
.Pattern =\s +
sBuffer = .Replace(sBuffer,)'删除不必要的空格
.MultiLine = False
Do
bMatch = False
.Pattern =< \d +(?:[sn])> \:< \d + [codas]> 'Object property structure
Tokenizep
.Pattern =\ {(?:< \d + p>(?:,< \d + p>)*)? \}'对象结构
Tokenizeo
.Pattern =\ [(?:< \d + [codas]>(?:,< \d + ] *)?\]'数组结构
令牌a
循环while bMatch
.Pattern =^&\d + [oa]> $ '顶级对象结构,未指定的数组接受
如果.Test(sBuffer)和oTokens.Exists(sBuffer)然后
检索sBuffer,vJSON
sState = IIf(IsObject(vJSON),对象,Array)
Else
vJSON = Null
sState =Error
End If
End With
Set oTokens = Nothing
set oRegEx = Nothing
End Sub
Private Sub Tokenize(sType)
Dim aContent()As String
Dim lCopyIndex As Long
Dim i As Long
Dim sKey As String
带oRegEx.Execute(sBuffer)
如果.Count = 0然后退出Sub
ReDim aContent(0 To .Count - 1)
lCopyIndex = 1
For i = 0 To .Count - 1
With .Item(i)
sKey =< &安培; oTokens.Count& sType& > 中
oTokens(sKey)= .Value
aContent(i)= Mid(sBuffer,lCopyIndex,.FirstIndex - lCopyIndex + 1)& sKey
lCopyIndex = .FirstIndex + .Length + 1
结束
下一个
结束
sBuffer = Join(aContent,)& Mid(sBuffer,lCopyIndex,Len(sBuffer) - lCopyIndex + 1)
bMatch = True
End Sub
Private Sub Retrieve(sTokenKey,vTransfer)
Dim sTokenValue As String
Dim sName As String
Dim vValue As Variant
Dim aTokens()As String
Dim i As Long
sTokenValue = oTokens(sTokenKey)
与oRegEx
.Global = True
选择案例左(右(sTokenKey,2),1)
案例o
设置vTransfer = CreateObject(Scripting.Dictionary)
aTokens = Split(sTokenValue,<)
对于i = 1 To UBound(aTokens)
检索< &安培;分割(aTokens(i),>,2)(0)& >,vTransfer
下一个
案例p
aTokens = Split(sTokenValue,<,4)
检索< &安培;分割(aTokens(1),>,2)(0)& >,sName
检索< &安培;分割(aTokens(2),>,2)(0)& >,vValue
如果IsObject(vValue)然后
设置vTransfer(sName)= vValue
Else
vTransfer(sName)= vValue
End If
Casea
aTokens = Split(sTokenValue,<)
如果UBound(aTokens)= 0然后
vTransfer = Array()
Else
ReDim vTransfer(0到UBound(aTokens) - 1)
对于i = 1 To UBound(aTokens)
检索< &安培;分割(aTokens(i),>,2)(0)& >,vValue
如果IsObject(vValue)然后
设置vTransfer(i - 1)= vValue
Else
vTransfer(i - 1)= vValue
结束如果
下一个
结束如果
案例n
vTransfer = sTokenValue
案例
vTransfer =替换(替换(替换替换(替换(_
中(sTokenValue,2,Len(sTokenValue) - 2),_
\,),_
\\,\),_
\ /,/),_
\b,Chr(8)),_
\f,Chr(12)),_
\\\
,vbLf),_
\r,vbCr),_
\t ,vbTab)
.Global = False
.Pattern =\\u [0-9a-fA-F] {4}
尽管.Test(vTransfer)
vTransfer = .Replace(vTransfer,ChrW((& H&右(.Execute(vTransfer)(0).Value,4))* 1))
循环
案例d
vTransfer =评估(sTokenValue)
案例c
选择案例LCase(sTokenValue)
案例true
vTransfer = True
案例false
vTransfer = False
案例null
vTransfer = Null
结束选择
结束选择
结束
End Sub
函数序列化(vJSON As Variant)As String
设置oChunks = CreateObject(Scripting.Dictionary)
SerializeElement vJSON
Serialize = Join(oChunks.Items(),)
设置oChunks = Nothing
结束函数
Private Sub SerializeElement(vElement As Variant,ByVal sIndent As String)
Dim aKeys()As Variant
Dim i As Long
W ith oChunks
选择案例VarType(vElement)
案例vbObject
如果vElement.Count = 0然后
.Item(.Count)={}
Else
.Item(.Count)={& vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count)= sIndent& vbTab& & aKeys(i)& & :
SerializeElement vElement(aKeys(i)),sIndent& vbTab
如果不是(i = UBound(aKeys))然后.Item(.Count)=,
.Item(.Count)= vbCrLf
下一个
.Item .Count)= sIndent& }
End If
Case is> = vbArray
如果UBound(vElement)= -1然后
.Item(.Count)=[]
Else
.Item(.Count)=[& vbCrLf
对于i = 0到UBound(vElement)
.Item(.Count)= sIndent& vbTab
SerializeElement vElement(i),sIndent& vbTab
如果不是(i = UBound(vElement))Then .Item(.Count)=,'sResult = sResult& ,
.Item(.Count)= vbCrLf
下一个
.Item(.Count)= sIndent& ]
End If
案例vbInteger,vbLong
.Item(.Count)= vElement
案例vbSingle,vbDouble
.Item(.Count)=替换( vElement,,,。)
案例vbNull
.Item(.Count)=null
案例vbBoolean
.Item(.Count)= IIf(vElement ,true,false)
Case Else
.Item(.Count)=& _
替换(替换(替换(替换(替换(替换(vElement,_
\,\\),_
,\),_
/,\ /),_
Chr(8),\b),_
Chr ),_
vbLf,\\\
),_
vbCr,\r),_
vbTab,\t) &安培; _
结束选择
结束
End Sub
函数ToString(vJSON As Variant)As String
选择案例VarType(vJSON)
案例vbObject,Is> = vbArray
设置oChunks = CreateObject(Scripting.Dictionary)
ToStringElement vJSON
oChunks.Remove 0
ToString = Join(oChunks.Items(),)
设置oChunks = Nothing
案例vbNull
ToString =Null
案例vbBoolean
ToString = IIf(vJSON,True,False)
案例Else
ToString = CStr(vJSON)
结束选择
结束函数
Private Sub ToStringElement(vElement As Variant,ByVal sIndent As String)
Dim aKeys()As Variant
Dim i As Long
与oChunks
选择案例VarType(vElement)
案例vbObject
如果vE lement.Count = 0然后
.Item(.Count)=''
Else
.Item(.Count)= vbCrLf
aKeys = vElement.Keys
对于i = 0到UBound(aKeys)
.Item(.Count)= sIndent& aKeys(i)& :
ToStringElement vElement(aKeys(i)),sIndent& vbTab
如果不是(i = UBound(aKeys))然后.Item(.Count)= vbCrLf
下一个
结束If
案例是> = vbArray
如果UBound(vElement)= -1然后
.Item(.Count)=''
Else
.Item(.Count)= vbCrLf
对于i = 0到UBound (vElement)
.Item(.Count)= sIndent&我& :
ToStringElement vElement(i),sIndent& vbTab
如果不是(i = UBound(vElement))然后.Item(.Count)= vbCrLf
下一个
结束如果
案例vbNull
.Item(.Count )=Null
案例vbBoolean
.Item(.Count)= IIf(vElement,True,False)
案例Else
.Item(.Count) = CStr(vElement)
结束选择
结束
End Sub
Sub ToArray(vJSON As Variant,aRows()As Variant,aHeader )作为变体)
'输入:
'vJSON - 包含行数据的数组或对象
'输出:
'aData - 表示JSON数据的2d数组
'aHeader - 1d数组的属性名称
Dim sName As Variant
设置oHeader = CreateObject(Scripting.Dictionary)
选择案例VarType(vJSON )
案例vbObject
如果vJSON.Count> 0然后
ReDim aData(0到vJSON.Count - 1,0到0)
oHeader(#)= 0
i = 0
对于每个sName在vJSON
aData(i,0)=#& sName
ToArrayElement vJSON(sName),
i = i + 1
下一个
Else
ReDim aData(0到0,0到0)
结束如果
案例是> = vbArray
如果UBound(vJSON)> = 0然后
ReDim aData(0到UBound(vJSON),0到0)
对于i = 0到UBound(vJSON)
ToArrayElement vJSON(i),
下一个
Else
ReDim aData(0到0,0到0)
结束If
案例Else
ReDim aData(0到0,0到0)
aData(0,0)= ToString(vJSON)
结束选择
aHeader = oHeader。 key()
设置oHeader = Nothing
aRows = aData
擦除aData
End Sub
Private Sub ToArrayElement(vElement As Variant, sFieldName As String)
Dim sName As Variant
Dim j As Long
选择案例VarType(vElement)
案例vbObject对象的集合
对于每个sName在vElement中
ToArrayElement vElement(sName),sFieldName& IIf(sFieldName =,,_)& sName
下一个
案例是> = vbArray'数组集合
对于j = 0到UBound(vElement)
ToArrayElement vElement(j),sFieldName& IIf(sFieldName =,,_)& #& j
下一个
案例Else
如果不是oHeader.Exists(sFieldName)然后
oHeader(sFieldName)= oHeader.Count
如果UBound(aData,2) oHeader.Count - 1然后ReDim保存aData(0到UBound(aData,1),0 To oHeader.Count - 1)
End If
j = oHeader(sFieldName)
aData(i, j)= ToString(vElement)
结束选择
End Sub
I'm trying to webscrape multiple tables from a website. So far I have built an excel VBA macro to do this. I also figured out how to get all the data when it is on multiple pages in the website. For instance, if I have 1000 results but 50 are displayed on each page. The problem is that I have the same 5 tables on multiple pages because each table has 1000 results.
My code can only loop through each page for 1 table. I also have written code to grab each table, but I cannot figure out how to do that for each of the 50 search results (each page).
How can I loop through multiple tables and click the next page in the process to capture all the data?
Sub ETFDat()
Dim ie As Object, i As Long, strText As String
Dim jj As Long
Dim hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, Tr As Object, Td As Object, ii As Long
Dim doc As Object, hTable As Object
Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
Sheets("Fund Basics").Activate
Cells.Select
Selection.Clear
ie.navigate "http://www.etf.com/channels/smart-beta-etfs/channels/smart- beta-etfs?qt-tabs=0#qt-tabs" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
Do While ie.busy: DoEvents: Loop
Do While ie.ReadyState <> 4: DoEvents: Loop
Set doc = ie.document
Set hTable = doc.getElementsByTagName("table") '.GetElementByID("tablePerformance")
ii = 1
Do While ii <= 17
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each Td In hTD
ws.Cells(z, y).Value = Td.innerText
y = y + 1
Next Td
DoEvents
z = z + 1
Next Tr
Exit For
Next bb
Exit For
Next tb
With doc
Set elems = .getElementsByTagName("a")
For Each e In elems
If (e.getAttribute("id") = "nextPage") Then
e.Click
Exit For
End If
Next e
End With
ii = ii + 1
Application.Wait (Now + TimeValue("00:00:05"))
Loop
MsgBox "Done"
End Sub
There is the example showing how the data could be retrieved from the website using XHRs and JSON parsing, it consists of several steps.
- Retrieve the data.
I looked into a little with XHRs using Chrome Developer Tools Network tab. Most relevant data I found is JSON string returned by GET XHR from http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/1 after I clicked the next page button:
The response has the following structure for single row item:
[
{
"productId": 576,
"fund": "iShares Russell 1000 Value ETF",
"ticker": "IWD",
"inceptionDate": "2000-05-22",
"launchDate": "2000-05-22",
"hasSegmentReport": "true",
"genericReport": "false",
"hasReport": "true",
"fundsInSegment": 20,
"economicDevelopment": "Developed Markets",
"totalRows": 803,
"fundBasics": {
"issuer": "<a href='/channels/blackrock-etfs' alt='BlackRock'>BlackRock</a>",
"expenseRatio": {
"value": 20
},
"aum": {
"value": 36957230250
},
"spreadPct": {
"value": 0.000094
},
"segment": "Equity: U.S. - Large Cap Value"
},
"performance": {
"priceTrAsOf": "2017-02-27",
"priceTr1Mo": {
"value": 0.031843
},
"priceTr3Mo": {
"value": 0.070156
},
"priceTr1Yr": {
"value": 0.281541
},
"priceTr3YrAnnualized": {
"value": 0.099171
},
"priceTr5YrAnnualized": {
"value": 0.13778
},
"priceTr10YrAnnualized": {
"value": 0.061687
}
},
"analysis": {
"analystPick": null,
"opportunitiesList": null,
"letterGrade": "A",
"efficiencyScore": 97.977103,
"tradabilityScore": 99.260541,
"fitScore": 84.915658,
"leveragedFactor": null,
"exposureReset": null,
"avgDailyDollarVolume": 243848188.037378,
"avgDailyShareVolume": 2148400.688889,
"spread": {
"value": 0.010636
},
"fundClosureRisk": "Low"
},
"fundamentals": {
"dividendYield": {
"value": 0.021543
},
"equity": {
"pe": 27.529645,
"pb": 1.964124
},
"fixedIncome": {
"duration": null,
"creditQuality": null,
"ytm": {
"value": null
}
}
},
"classification": {
"assetClass": "Equity",
"strategy": "Value",
"region": "North America",
"geography": "U.S.",
"category": "Size and Style",
"focus": "Large Cap",
"niche": "Value",
"inverse": "false",
"leveraged": "false",
"etn": "false",
"selectionCriteria": "Multi-Factor",
"weightingScheme": "Multi-Factor",
"activePerSec": "false",
"underlyingIndex": "Russell 1000 Value Index",
"indexProvider": "Russell",
"brand": "iShares"
},
"tax": {
"legalStructure": "Open-Ended Fund",
"maxLtCapitalGainsRate": 20,
"maxStCapitalGainsRate": 39.6,
"taxReporting": "1099"
}
}
]
The property
"totalRows": 803
specifies the total rows count. So to make data retrieving as fast as it possible, better to make the request to get the first row. As you can see from the URL, there is../-aum/50/50/..
tail, which points sorting order, item to start from, and total items to return. Thus to get the only row it should behttp://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1
Parse retrieved JSON, get the total number of rows from
totalRows
property.Make another one request to get the entire table.
Parse the entire table JSON, convert it to 2d array and output. You can perform further processing with direct access to the array.
For the table shown below:
The resulting table contains 803 rows and header with columns as follows:
productId
fund
ticker
inceptionDate
launchDate
hasSegmentReport
genericReport
hasReport
fundsInSegment
economicDevelopment
totalRows
fundBasics_issuer
fundBasics_expenseRatio_value
fundBasics_aum_value
fundBasics_spreadPct_value
fundBasics_segment
performance_priceTrAsOf
performance_priceTr1Mo_value
performance_priceTr3Mo_value
performance_priceTr1Yr_value
performance_priceTr3YrAnnualized_value
performance_priceTr5YrAnnualized_value
performance_priceTr10YrAnnualized_value
analysis_analystPick
analysis_opportunitiesList
analysis_letterGrade
analysis_efficiencyScore
analysis_tradabilityScore
analysis_fitScore
analysis_leveragedFactor
analysis_exposureReset
analysis_avgDailyDollarVolume
analysis_avgDailyShareVolume
analysis_spread_value
analysis_fundClosureRisk
fundamentals_dividendYield_value
fundamentals_equity_pe
fundamentals_equity_pb
fundamentals_fixedIncome_duration
fundamentals_fixedIncome_creditQuality
fundamentals_fixedIncome_ytm_value
classification_assetClass
classification_strategy
classification_region
classification_geography
classification_category
classification_focus
classification_niche
classification_inverse
classification_leveraged
classification_etn
classification_selectionCriteria
classification_weightingScheme
classification_activePerSec
classification_underlyingIndex
classification_indexProvider
classification_brand
tax_legalStructure
tax_maxLtCapitalGainsRate
tax_maxStCapitalGainsRate
tax_taxReporting
Put the below code into VBA Project standard module:
Option Explicit
Sub GetData()
Dim sJSONString As String
Dim vJSON As Variant
Dim sState As String
Dim lRowsQty As Long
Dim aData()
Dim aHeader()
' Download and parse the only first row to get total rows qty
sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1")
JSON.Parse sJSONString, vJSON, sState
lRowsQty = vJSON(0)("totalRows")
' Download and parse the entire data
sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/" & lRowsQty & "/1")
JSON.Parse sJSONString, vJSON, sState
' Convert JSON to 2d array
JSON.ToArray vJSON, aData, aHeader
' Output
With Sheets(1)
.Cells.Delete
OutputArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aData
.Cells.Columns.AutoFit
End With
End Sub
Function GetXHR(sURL As String) As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sURL, False
.Send
GetXHR = .responseText
End With
End Function
Sub OutputArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
1, _
UBound(aCells) - LBound(aCells) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
Create one more standard module, name it JSON
and put the below code into it, this code provides JSON processing functionality:
Option Explicit
Private sBuffer As String
Private oTokens As Object
Private oRegEx As Object
Private bMatch As Boolean
Private oChunks As Object
Private oHeader As Object
Private aData() As Variant
Private i As Long
Sub Parse(ByVal sSample As String, vJSON As Variant, sState As String)
' Backus–Naur form JSON parser implementation based on RegEx
' Input:
' sSample - source JSON string
' Output:
' vJson - created object or array to be returned as result
' sState - string Object|Array|Error depending on processing
sBuffer = sSample
Set oTokens = CreateObject("Scripting.Dictionary")
Set oRegEx = CreateObject("VBScript.RegExp")
With oRegEx ' Patterns based on specification http://www.json.org/
.Global = True
.MultiLine = True
.IgnoreCase = True ' Unspecified True, False, Null accepted
.Pattern = "(?:'[^']*'|""(?:\\""|[^""])*"")(?=\s*[,\:\]\}])" ' Double-quoted string, unspecified quoted string
Tokenize "s"
.Pattern = "[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:e[+-]?\d+)?(?=\s*[,\]\}])" ' Number, E notation number
Tokenize "d"
.Pattern = "\b(?:true|false|null)(?=\s*[,\]\}])" ' Constants true, false, null
Tokenize "c"
.Pattern = "\b[A-Za-z_]\w*(?=\s*\:)" ' Unspecified non-double-quoted property name accepted
Tokenize "n"
.Pattern = "\s+"
sBuffer = .Replace(sBuffer, "") ' Remove unnecessary spaces
.MultiLine = False
Do
bMatch = False
.Pattern = "<\d+(?:[sn])>\:<\d+[codas]>" ' Object property structure
Tokenize "p"
.Pattern = "\{(?:<\d+p>(?:,<\d+p>)*)?\}" ' Object structure
Tokenize "o"
.Pattern = "\[(?:<\d+[codas]>(?:,<\d+[codas]>)*)?\]" ' Array structure
Tokenize "a"
Loop While bMatch
.Pattern = "^<\d+[oa]>$" ' Top level object structure, unspecified array accepted
If .Test(sBuffer) And oTokens.Exists(sBuffer) Then
Retrieve sBuffer, vJSON
sState = IIf(IsObject(vJSON), "Object", "Array")
Else
vJSON = Null
sState = "Error"
End If
End With
Set oTokens = Nothing
Set oRegEx = Nothing
End Sub
Private Sub Tokenize(sType)
Dim aContent() As String
Dim lCopyIndex As Long
Dim i As Long
Dim sKey As String
With oRegEx.Execute(sBuffer)
If .Count = 0 Then Exit Sub
ReDim aContent(0 To .Count - 1)
lCopyIndex = 1
For i = 0 To .Count - 1
With .Item(i)
sKey = "<" & oTokens.Count & sType & ">"
oTokens(sKey) = .Value
aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey
lCopyIndex = .FirstIndex + .Length + 1
End With
Next
End With
sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1)
bMatch = True
End Sub
Private Sub Retrieve(sTokenKey, vTransfer)
Dim sTokenValue As String
Dim sName As String
Dim vValue As Variant
Dim aTokens() As String
Dim i As Long
sTokenValue = oTokens(sTokenKey)
With oRegEx
.Global = True
Select Case Left(Right(sTokenKey, 2), 1)
Case "o"
Set vTransfer = CreateObject("Scripting.Dictionary")
aTokens = Split(sTokenValue, "<")
For i = 1 To UBound(aTokens)
Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vTransfer
Next
Case "p"
aTokens = Split(sTokenValue, "<", 4)
Retrieve "<" & Split(aTokens(1), ">", 2)(0) & ">", sName
Retrieve "<" & Split(aTokens(2), ">", 2)(0) & ">", vValue
If IsObject(vValue) Then
Set vTransfer(sName) = vValue
Else
vTransfer(sName) = vValue
End If
Case "a"
aTokens = Split(sTokenValue, "<")
If UBound(aTokens) = 0 Then
vTransfer = Array()
Else
ReDim vTransfer(0 To UBound(aTokens) - 1)
For i = 1 To UBound(aTokens)
Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vValue
If IsObject(vValue) Then
Set vTransfer(i - 1) = vValue
Else
vTransfer(i - 1) = vValue
End If
Next
End If
Case "n"
vTransfer = sTokenValue
Case "s"
vTransfer = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace( _
Mid(sTokenValue, 2, Len(sTokenValue) - 2), _
"\""", """"), _
"\\", "\"), _
"\/", "/"), _
"\b", Chr(8)), _
"\f", Chr(12)), _
"\n", vbLf), _
"\r", vbCr), _
"\t", vbTab)
.Global = False
.Pattern = "\\u[0-9a-fA-F]{4}"
Do While .Test(vTransfer)
vTransfer = .Replace(vTransfer, ChrW(("&H" & Right(.Execute(vTransfer)(0).Value, 4)) * 1))
Loop
Case "d"
vTransfer = Evaluate(sTokenValue)
Case "c"
Select Case LCase(sTokenValue)
Case "true"
vTransfer = True
Case "false"
vTransfer = False
Case "null"
vTransfer = Null
End Select
End Select
End With
End Sub
Function Serialize(vJSON As Variant) As String
Set oChunks = CreateObject("Scripting.Dictionary")
SerializeElement vJSON, ""
Serialize = Join(oChunks.Items(), "")
Set oChunks = Nothing
End Function
Private Sub SerializeElement(vElement As Variant, ByVal sIndent As String)
Dim aKeys() As Variant
Dim i As Long
With oChunks
Select Case VarType(vElement)
Case vbObject
If vElement.Count = 0 Then
.Item(.Count) = "{}"
Else
.Item(.Count) = "{" & vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count) = sIndent & vbTab & """" & aKeys(i) & """" & ": "
SerializeElement vElement(aKeys(i)), sIndent & vbTab
If Not (i = UBound(aKeys)) Then .Item(.Count) = ","
.Item(.Count) = vbCrLf
Next
.Item(.Count) = sIndent & "}"
End If
Case Is >= vbArray
If UBound(vElement) = -1 Then
.Item(.Count) = "[]"
Else
.Item(.Count) = "[" & vbCrLf
For i = 0 To UBound(vElement)
.Item(.Count) = sIndent & vbTab
SerializeElement vElement(i), sIndent & vbTab
If Not (i = UBound(vElement)) Then .Item(.Count) = "," 'sResult = sResult & ","
.Item(.Count) = vbCrLf
Next
.Item(.Count) = sIndent & "]"
End If
Case vbInteger, vbLong
.Item(.Count) = vElement
Case vbSingle, vbDouble
.Item(.Count) = Replace(vElement, ",", ".")
Case vbNull
.Item(.Count) = "null"
Case vbBoolean
.Item(.Count) = IIf(vElement, "true", "false")
Case Else
.Item(.Count) = """" & _
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(vElement, _
"\", "\\"), _
"""", "\"""), _
"/", "\/"), _
Chr(8), "\b"), _
Chr(12), "\f"), _
vbLf, "\n"), _
vbCr, "\r"), _
vbTab, "\t") & _
""""
End Select
End With
End Sub
Function ToString(vJSON As Variant) As String
Select Case VarType(vJSON)
Case vbObject, Is >= vbArray
Set oChunks = CreateObject("Scripting.Dictionary")
ToStringElement vJSON, ""
oChunks.Remove 0
ToString = Join(oChunks.Items(), "")
Set oChunks = Nothing
Case vbNull
ToString = "Null"
Case vbBoolean
ToString = IIf(vJSON, "True", "False")
Case Else
ToString = CStr(vJSON)
End Select
End Function
Private Sub ToStringElement(vElement As Variant, ByVal sIndent As String)
Dim aKeys() As Variant
Dim i As Long
With oChunks
Select Case VarType(vElement)
Case vbObject
If vElement.Count = 0 Then
.Item(.Count) = "''"
Else
.Item(.Count) = vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count) = sIndent & aKeys(i) & ": "
ToStringElement vElement(aKeys(i)), sIndent & vbTab
If Not (i = UBound(aKeys)) Then .Item(.Count) = vbCrLf
Next
End If
Case Is >= vbArray
If UBound(vElement) = -1 Then
.Item(.Count) = "''"
Else
.Item(.Count) = vbCrLf
For i = 0 To UBound(vElement)
.Item(.Count) = sIndent & i & ": "
ToStringElement vElement(i), sIndent & vbTab
If Not (i = UBound(vElement)) Then .Item(.Count) = vbCrLf
Next
End If
Case vbNull
.Item(.Count) = "Null"
Case vbBoolean
.Item(.Count) = IIf(vElement, "True", "False")
Case Else
.Item(.Count) = CStr(vElement)
End Select
End With
End Sub
Sub ToArray(vJSON As Variant, aRows() As Variant, aHeader() As Variant)
' Input:
' vJSON - Array or Object which contains rows data
' Output:
' aData - 2d array representing JSON data
' aHeader - 1d array of property names
Dim sName As Variant
Set oHeader = CreateObject("Scripting.Dictionary")
Select Case VarType(vJSON)
Case vbObject
If vJSON.Count > 0 Then
ReDim aData(0 To vJSON.Count - 1, 0 To 0)
oHeader("#") = 0
i = 0
For Each sName In vJSON
aData(i, 0) = "#" & sName
ToArrayElement vJSON(sName), ""
i = i + 1
Next
Else
ReDim aData(0 To 0, 0 To 0)
End If
Case Is >= vbArray
If UBound(vJSON) >= 0 Then
ReDim aData(0 To UBound(vJSON), 0 To 0)
For i = 0 To UBound(vJSON)
ToArrayElement vJSON(i), ""
Next
Else
ReDim aData(0 To 0, 0 To 0)
End If
Case Else
ReDim aData(0 To 0, 0 To 0)
aData(0, 0) = ToString(vJSON)
End Select
aHeader = oHeader.Keys()
Set oHeader = Nothing
aRows = aData
Erase aData
End Sub
Private Sub ToArrayElement(vElement As Variant, sFieldName As String)
Dim sName As Variant
Dim j As Long
Select Case VarType(vElement)
Case vbObject ' collection of objects
For Each sName In vElement
ToArrayElement vElement(sName), sFieldName & IIf(sFieldName = "", "", "_") & sName
Next
Case Is >= vbArray ' collection of arrays
For j = 0 To UBound(vElement)
ToArrayElement vElement(j), sFieldName & IIf(sFieldName = "", "", "_") & "#" & j
Next
Case Else
If Not oHeader.Exists(sFieldName) Then
oHeader(sFieldName) = oHeader.Count
If UBound(aData, 2) < oHeader.Count - 1 Then ReDim Preserve aData(0 To UBound(aData, 1), 0 To oHeader.Count - 1)
End If
j = oHeader(sFieldName)
aData(i, j) = ToString(vElement)
End Select
End Sub
这篇关于使用VBA宏循环遍历javascrape网页上的每个表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!