使用VBA宏循环遍历javascrape网页上的每个表 [英] Loop through each table on javascrape webpage with VBA macro

查看:126
本文介绍了使用VBA宏循环遍历javascrape网页上的每个表的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试从网站上浏览多个表格。到目前为止,我已经建立了一个excel VBA宏来做到这一点。我还想出了如何在网站上的多个页面上获取所有数据。例如,如果我有1000个结果,但每个页面上显示50个结果。问题是我在多个页面上有相同的5个表,因为每个表都有1000个结果。



我的代码只能循环遍历每个页面1个表。我也编写了代码来抓取每个表,但是我无法弄清楚如何为50个搜索结果(每页)中的每个搜索结果。



如何循环遍历多个表,然后点击该过程中的下一页来捕获所有数据?

  Sub ETFDat()

Dim ie As Object,As As Long,strText As String
Dim jj As Long
Dim hBody As Object ,hTR As Object,hTD As Object
Dim tb As Object,bb As Object,Tr As Object,Td As Object,ii As Long
Dim doc As Object,hTable As Object
Dim y As Long,z As Long,wb As Excel.Workbook,ws As Excel.Worksheet

设置wb = Excel.ActiveWorkbook
设置ws = wb.ActiveSheet

设置ie = CreateObject(InternetExplorer.Application)
ie.Visible = True

y = 1'Excel中的列A
z = 1'Excel中的行1
表格(基金会基础)。激活
Cells.Select
Selection.Clear

ie.navigatehttp://www.etf.com/channels/smart-beta -etfs / channels / smart-beta-etfs?qt-tabs = 0#qt-tabs',,,Content-Type:application / x-www-form-urlencoded& vbCrLf

Do While ie.busy:DoEvents:Loop
Do While ie.ReadyState<> 4:DoEvents:Loop

设置doc = ie.document
设置hTable = doc.getElementsByTagName(table)'.GetElementByID(tablePerformance)

ii = 1
尽管ii< = 17

对于每个tb在hTable

设置hBody = tb.getElementsByTagName(tbody)
对于每个bb在hBody

设置hTR = bb.getElementsByTagName(tr)
对于每个Tr在hTR


设置hTD = Tr。 getElementsByTagName(td)
y = 1'重置到列A
对于每个Td在hTD
ws.Cells(z,y).Value = Td.innerText
y = y + 1
下一个Td
DoEvents
z = z + 1
下一个Tr
退出
下一个bb
退出
下一页tb

与doc


设置elems = .getElementsByTagName(a)
对于每个e在elems

如果(e.getAttribute(id)=nextPage)然后
e.Click
退出
结束如果

下一步e

结束

ii = ii + 1
Application.Wait(Now + TimeValue(00:00:05))
循环

MsgBox 完成

End Sub


解决方案

p>有一个例子,显示如何使用XHR和JSON解析从网站检索数据,它由几个步骤组成。


  1. 检索数据。

我使用Chrome Developer Tools网络标签查看了XHR的一些内容。
我发现的最相关数据是GET XHR从



结果表包含803行和标题,列如下:

  productId 
fund
ticker
inceptionDate
launchDate
hasSegmentReport
genericReport
hasReport
fundInSegment
economicDevelopment
totalRows
fundBasics_issuer
fundBasics_expenseRatio_value
fundBasics_aum_value
fundBasics_spreadPct_value
fundBasics_segment
performance_priceTrAsOf
performance_priceTr1Mo_value
performance_priceTr3Mo_value
performance_priceTr1Yr_value
performance_priceTr3YrAnnualized_value
performance_priceTr5YrAnnualized_value
performance_p riceTr10YrAnnualized_value
analysis_analystPick
analysis_opportunitiesList
analysis_letterGrade
analysis_efficiencyScore
analysis_tradabilityScore
analysis_fitScore
analysis_leveragedFactor
analysis_exposureReset
analysis_avgDailyDollarVolume
analysis_avgDailyShareVolume
analysis_spread_value
analysis_fundClosureRisk
fundamentals_dividendYield_value
fundamentals_equity_pe
fundamentals_fixedIncome_duration
fundamentals_fixedIncome_creditQuality
fundamentals_fixedIncome_ytm_value
classification_assetClass
class_strategy
classification_region
classification_geography
classification_category
classification_focus
classification_niche
classification_inverse
classification_leveraged
classification_etn
classification_selectionCriteria
classification_weightingScheme
classification_activePerSec
classificati on_underlyingIndex
classification_indexProvider
classification_brand
tax_legalStructure
tax_maxLtCapitalGainsRate
tax_maxStCapitalGainsRate
tax_taxReporting

将以下代码放入VBA项目标准模块中:

  Option Explicit 

Sub GetData()

Dim sJSONString As String
Dim vJSON As Variant
Dim sState As String
Dim lRowsQty As Long
Dim aData()
Dim aHeader()

'下载并解析唯一的第一行以获取总行数qty
sJSONString = GetXHR(http:/ /www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1)
JSON.Parse sJSONString,vJSON,sState
lRowsQty = vJSON (0)(totalRows)
'下载并解析整个数据
sJSONString = GetXHR(http://www.etf.com/etf-finder-channel-tag/Smart-Beta% 20ETF / -aum / 0 /& lRowsQty& / 1)
JSON.Parse sJSONString,vJSON,sState
'将JSON转换为2d数组
JSON.ToArray vJSON,aData,aHeader
'输出
带(1)
.Cells.Delete
OutputArray .Cells(1,1),aHeader
Output2DArray .Cells(2,1),aData
.Cells.Columns.AutoFit
结束

End Sub

函数GetXHR(sURL As String)As String

使用CreateObject(MSXML2.XMLHTTP)
。打开GET,sURL,False
。发送
GetXHR = .responseText
结束

结束功能

Sub OutputArray(oDstRng As Range,aCells As Variant)

with oDstRng
.Parent.Select
With .Resize(_
1,_
UBound(aCells) - LBound(aCells)+ 1)
.NumberFormat =@
.Value = aCells
结束
结束

End Sub

Sub Output2DArr ay(oDstRng As Range,aCells As Variant)

with oDstRng
.Parent.Select
With .Resize(_
UBound(aCells,1) - LBound( aCells,1)+ 1,_
UBound(aCells,2) - LBound(aCells,2)+ 1)
.NumberFormat =@
.Value = aCells
结束
结束

结束Sub

再创建一个标准模块,将其命名为 JSON 并将以下代码放入其中,此代码提供JSON处理功能:

  Option Explicit 

私人sBuffer As String
私有oTokens作为对象
私有oRegEx作为对象
私有bMatch As Boolean
私有oChunks作为对象
私有oHeader作为对象
私有aData()作为变量
私有我长

子解析(ByVal sSample As String,vJSON As Variant,sState As String)

'Backus-Naur f orm JSON解析器实现基于RegEx
'输入:
'sSample - 源JSON字符串
'输出:
'vJson - 创建的对象或数组作为结果返回
'sState - string Object | Array |根据处理

sBuffer = sSample
设置oTokens = CreateObject(Scripting.Dictionary)
设置oRegEx = CreateObject(VBScript .RegExp)
使用oRegEx'基于规范的模式http://www.json.org/
.Global = True
.MultiLine = True
.IgnoreCase = True'未指定True,False,Null接受
.Pattern =(?:'[^'] *'|(?:\\| [^])*) = \s * [,\:\] \}])'双引号字符串,未指定引用的字符串
令牌表示
.Pattern =[+ - ]? ?:???\d + \.\d * | \.\d + | \d +)(?: E [+ - ] \d +)(= \s * [\] \}])'号码,E符号号码
Tokeniz ed
.Pattern =\b(?:true | false | null)(?= \s * [,\] \}])常量true,false, b $ b Tokenizec
.Pattern =\b [A-Za-z_] \w *(?= \s * \ :)'未指定的非双引号属性名称接受
令牌n
.Pattern =\s +
sBuffer = .Replace(sBuffer,)'删除不必要的空格
.MultiLine = False
Do
bMatch = False
.Pattern =< \d +(?:[sn])> \:< \d + [codas]> 'Object property structure
Tokenizep
.Pattern =\ {(?:< \d + p>(?:,< \d + p>)*)? \}'对象结构
Tokenizeo
.Pattern =\ [(?:< \d + [codas]>(?:,< \d + ] *)?\]'数组结构
令牌a
循环while bMatch
.Pattern =^&\d + [oa]> $ '顶级对象结构,未指定的数组接受
如果.Test(sBuffer)和oTokens.Exists(sBuffer)然后
检索sBuffer,vJSON
sState = IIf(IsObject(vJSON),对象,Array)
Else
vJSON = Null
sState =Error
End If
End With
Set oTokens = Nothing
set oRegEx = Nothing

End Sub

Private Sub Tokenize(sType)

Dim aContent()As String
Dim lCopyIndex As Long
Dim i As Long
Dim sKey As String

带oRegEx.Execute(sBuffer)
如果.Count = 0然后退出Sub
ReDim aContent(0 To .Count - 1)
lCopyIndex = 1
For i = 0 To .Count - 1
With .Item(i)
sKey =< &安培; oTokens.Count& sType& > 中
oTokens(sKey)= .Value
aContent(i)= Mid(sBuffer,lCopyIndex,.FirstIndex - lCopyIndex + 1)& sKey
lCopyIndex = .FirstIndex + .Length + 1
结束
下一个
结束
sBuffer = Join(aContent,)& Mid(sBuffer,lCopyIndex,Len(sBuffer) - lCopyIndex + 1)
bMatch = True

End Sub

Private Sub Retrieve(sTokenKey,vTransfer)

Dim sTokenValue As String
Dim sName As String
Dim vValue As Variant
Dim aTokens()As String
Dim i As Long

sTokenValue = oTokens(sTokenKey)
与oRegEx
.Global = True
选择案例左(右(sTokenKey,2),1)
案例o
设置vTransfer = CreateObject(Scripting.Dictionary)
aTokens = Split(sTokenValue,<)
对于i = 1 To UBound(aTokens)
检索< &安培;分割(aTokens(i),>,2)(0)& >,vTransfer
下一个
案例p
aTokens = Split(sTokenValue,<,4)
检索< &安培;分割(aTokens(1),>,2)(0)& >,sName
检索< &安培;分割(aTokens(2),>,2)(0)& >,vValue
如果IsObject(vValue)然后
设置vTransfer(sName)= vValue
Else
vTransfer(sName)= vValue
End If
Casea
aTokens = Split(sTokenValue,<)
如果UBound(aTokens)= 0然后
vTransfer = Array()
Else
ReDim vTransfer(0到UBound(aTokens) - 1)
对于i = 1 To UBound(aTokens)
检索< &安培;分割(aTokens(i),>,2)(0)& >,vValue
如果IsObject(vValue)然后
设置vTransfer(i - 1)= vValue
Else
vTransfer(i - 1)= vValue
结束如果
下一个
结束如果
案例n
vTransfer = sTokenValue
案例
vTransfer =替换(替换(替换替换(替换(_
中(sTokenValue,2,Len(sTokenValue) - 2),_
\,),_
\\,\),_
\ /,/),_
\b,Chr(8)),_
\f,Chr(12)),_
\\\
,vbLf),_
\r,vbCr),_
\t ,vbTab)
.Global = False
.Pattern =\\u [0-9a-fA-F] {4}
尽管.Test(vTransfer)
vTransfer = .Replace(vTransfer,ChrW((& H&右(.Execute(vTransfer)(0).Value,4))* 1))
循环
案例d
vTransfer =评估(sTokenValue)
案例c
选择案例LCase(sTokenValue)
案例true
vTransfer = True
案例false
vTransfer = False
案例null
vTransfer = Null
结束选择
结束选择
结束

End Sub

函数序列化(vJSON As Variant)As String

设置oChunks = CreateObject(Scripting.Dictionary)
SerializeElement vJSON
Serialize = Join(oChunks.Items(),)
设置oChunks = Nothing

结束函数

Private Sub SerializeElement(vElement As Variant,ByVal sIndent As String)

Dim aKeys()As Variant
Dim i As Long

W ith oChunks
选择案例VarType(vElement)
案例vbObject
如果vElement.Count = 0然后
.Item(.Count)={}
Else
.Item(.Count)={& vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count)= sIndent& vbTab& & aKeys(i)& & :
SerializeElement vElement(aKeys(i)),sIndent& vbTab
如果不是(i = UBound(aKeys))然后.Item(.Count)=,
.Item(.Count)= vbCrLf
下一个
.Item .Count)= sIndent& }
End If
Case is> = vbArray
如果UBound(vElement)= -1然后
.Item(.Count)=[]
Else
.Item(.Count)=[& vbCrLf
对于i = 0到UBound(vElement)
.Item(.Count)= sIndent& vbTab
SerializeElement vElement(i),sIndent& vbTab
如果不是(i = UBound(vElement))Then .Item(.Count)=,'sResult = sResult& ,
.Item(.Count)= vbCrLf
下一个
.Item(.Count)= sIndent& ]
End If
案例vbInteger,vbLong
.Item(.Count)= vElement
案例vbSingle,vbDouble
.Item(.Count)=替换( vElement,,,。)
案例vbNull
.Item(.Count)=null
案例vbBoolean
.Item(.Count)= IIf(vElement ,true,false)
Case Else
.Item(.Count)=& _
替换(替换(替换(替换(替换(替换(vElement,_
\,\\),_
,\),_
/,\ /),_
Chr(8),\b),_
Chr ),_
vbLf,\\\
),_
vbCr,\r),_
vbTab,\t) &安培; _

结束选择
结束

End Sub

函数ToString(vJSON As Variant)As String

选择案例VarType(vJSON)
案例vbObject,Is> = vbArray
设置oChunks = CreateObject(Scripting.Dictionary)
ToStringElement vJSON
oChunks.Remove 0
ToString = Join(oChunks.Items(),)
设置oChunks = Nothing
案例vbNull
ToString =Null
案例vbBoolean
ToString = IIf(vJSON,True,False)
案例Else
ToString = CStr(vJSON)
结束选择

结束函数

Private Sub ToStringElement(vElement As Variant,ByVal sIndent As String)

Dim aKeys()As Variant
Dim i As Long

与oChunks
选择案例VarType(vElement)
案例vbObject
如果vE lement.Count = 0然后
.Item(.Count)=''
Else
.Item(.Count)= vbCrLf
aKeys = vElement.Keys
对于i = 0到UBound(aKeys)
.Item(.Count)= sIndent& aKeys(i)& :
ToStringElement vElement(aKeys(i)),sIndent& vbTab
如果不是(i = UBound(aKeys))然后.Item(.Count)= vbCrLf
下一个
结束If
案例是> = vbArray
如果UBound(vElement)= -1然后
.Item(.Count)=''
Else
.Item(.Count)= vbCrLf
对于i = 0到UBound (vElement)
.Item(.Count)= sIndent&我& :
ToStringElement vElement(i),sIndent& vbTab
如果不是(i = UBound(vElement))然后.Item(.Count)= vbCrLf
下一个
结束如果
案例vbNull
.Item(.Count )=Null
案例vbBoolean
.Item(.Count)= IIf(vElement,True,False)
案例Else
.Item(.Count) = CStr(vElement)
结束选择
结束

End Sub

Sub ToArray(vJSON As Variant,aRows()As Variant,aHeader )作为变体)

'输入:
'vJSON - 包含行数据的数组或对象
'输出:
'aData - 表示JSON数据的2d数组
'aHeader - 1d数组的属性名称

Dim sName As Variant

设置oHeader = CreateObject(Scripting.Dictionary)
选择案例VarType(vJSON )
案例vbObject
如果vJSON.Count> 0然后
ReDim aData(0到vJSON.Count - 1,0到0)
oHeader(#)= 0
i = 0
对于每个sName在vJSON
aData(i,0)=#& sName
ToArrayElement vJSON(sName),
i = i + 1
下一个
Else
ReDim aData(0到0,0到0)
结束如果
案例是> = vbArray
如果UBound(vJSON)> = 0然后
ReDim aData(0到UBound(vJSON),0到0)
对于i = 0到UBound(vJSON)
ToArrayElement vJSON(i),
下一个
Else
ReDim aData(0到0,0到0)
结束If
案例Else
ReDim aData(0到0,0到0)
aData(0,0)= ToString(vJSON)
结束选择
aHeader = oHeader。 key()
设置oHeader = Nothing
aRows = aData
擦除aData

End Sub

Private Sub ToArrayElement(vElement As Variant, sFieldName As String)

Dim sName As Variant
Dim j As Long

选择案例VarType(vElement)
案例vbObject对象的集合
对于每个sName在vElement中
ToArrayElement vElement(sName),sFieldName& IIf(sFieldName =,,_)& sName
下一个
案例是> = vbArray'数组集合
对于j = 0到UBound(vElement)
ToArrayElement vElement(j),sFieldName& IIf(sFieldName =,,_)& #& j
下一个
案例Else
如果不是oHeader.Exists(sFieldName)然后
oHeader(sFieldName)= oHeader.Count
如果UBound(aData,2) oHeader.Count - 1然后ReDim保存aData(0到UBound(aData,1),0 To oHeader.Count - 1)
End If
j = oHeader(sFieldName)
aData(i, j)= ToString(vElement)
结束选择

End Sub


I'm trying to webscrape multiple tables from a website. So far I have built an excel VBA macro to do this. I also figured out how to get all the data when it is on multiple pages in the website. For instance, if I have 1000 results but 50 are displayed on each page. The problem is that I have the same 5 tables on multiple pages because each table has 1000 results.

My code can only loop through each page for 1 table. I also have written code to grab each table, but I cannot figure out how to do that for each of the 50 search results (each page).

How can I loop through multiple tables and click the next page in the process to capture all the data?

 Sub ETFDat()

 Dim ie As Object, i As Long, strText As String
  Dim jj As Long
 Dim hBody As Object, hTR As Object, hTD As Object
 Dim tb As Object, bb As Object, Tr As Object, Td As Object, ii As Long
 Dim doc As Object, hTable As Object
 Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet

  Set wb = Excel.ActiveWorkbook
  Set ws = wb.ActiveSheet

  Set ie = CreateObject("InternetExplorer.Application")
  ie.Visible = True

  y = 1   'Column A in Excel
  z = 1   'Row 1 in Excel
 Sheets("Fund Basics").Activate
 Cells.Select
 Selection.Clear

 ie.navigate "http://www.etf.com/channels/smart-beta-etfs/channels/smart-       beta-etfs?qt-tabs=0#qt-tabs" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf

 Do While ie.busy: DoEvents: Loop
 Do While ie.ReadyState <> 4: DoEvents: Loop

 Set doc = ie.document
 Set hTable = doc.getElementsByTagName("table")    '.GetElementByID("tablePerformance")

 ii = 1
Do While ii <= 17

 For Each tb In hTable

Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody

    Set hTR = bb.getElementsByTagName("tr")
    For Each Tr In hTR


        Set hTD = Tr.getElementsByTagName("td")
        y = 1 ' Resets back to column A
        For Each Td In hTD
           ws.Cells(z, y).Value = Td.innerText
           y = y + 1
         Next Td
         DoEvents
         z = z + 1
       Next Tr
      Exit For
   Next bb
  Exit For
  Next tb

With doc


Set elems = .getElementsByTagName("a")
For Each e In elems

If (e.getAttribute("id") = "nextPage") Then
    e.Click
    Exit For
End If

Next e

End With

ii = ii + 1
Application.Wait (Now + TimeValue("00:00:05"))
 Loop

 MsgBox "Done"

End Sub 

解决方案

There is the example showing how the data could be retrieved from the website using XHRs and JSON parsing, it consists of several steps.

  1. Retrieve the data.

I looked into a little with XHRs using Chrome Developer Tools Network tab. Most relevant data I found is JSON string returned by GET XHR from http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/1 after I clicked the next page button:

The response has the following structure for single row item:

[
  {
    "productId": 576,
    "fund": "iShares Russell 1000 Value ETF",
    "ticker": "IWD",
    "inceptionDate": "2000-05-22",
    "launchDate": "2000-05-22",
    "hasSegmentReport": "true",
    "genericReport": "false",
    "hasReport": "true",
    "fundsInSegment": 20,
    "economicDevelopment": "Developed Markets",
    "totalRows": 803,
    "fundBasics": {
      "issuer": "<a href='/channels/blackrock-etfs' alt='BlackRock'>BlackRock</a>",
      "expenseRatio": {
        "value": 20
      },
      "aum": {
        "value": 36957230250
      },
      "spreadPct": {
        "value": 0.000094
      },
      "segment": "Equity: U.S. - Large Cap Value"
    },
    "performance": {
      "priceTrAsOf": "2017-02-27",
      "priceTr1Mo": {
        "value": 0.031843
      },
      "priceTr3Mo": {
        "value": 0.070156
      },
      "priceTr1Yr": {
        "value": 0.281541
      },
      "priceTr3YrAnnualized": {
        "value": 0.099171
      },
      "priceTr5YrAnnualized": {
        "value": 0.13778
      },
      "priceTr10YrAnnualized": {
        "value": 0.061687
      }
    },
    "analysis": {
      "analystPick": null,
      "opportunitiesList": null,
      "letterGrade": "A",
      "efficiencyScore": 97.977103,
      "tradabilityScore": 99.260541,
      "fitScore": 84.915658,
      "leveragedFactor": null,
      "exposureReset": null,
      "avgDailyDollarVolume": 243848188.037378,
      "avgDailyShareVolume": 2148400.688889,
      "spread": {
        "value": 0.010636
      },
      "fundClosureRisk": "Low"
    },
    "fundamentals": {
      "dividendYield": {
        "value": 0.021543
      },
      "equity": {
        "pe": 27.529645,
        "pb": 1.964124
      },
      "fixedIncome": {
        "duration": null,
        "creditQuality": null,
        "ytm": {
          "value": null
        }
      }
    },
    "classification": {
      "assetClass": "Equity",
      "strategy": "Value",
      "region": "North America",
      "geography": "U.S.",
      "category": "Size and Style",
      "focus": "Large Cap",
      "niche": "Value",
      "inverse": "false",
      "leveraged": "false",
      "etn": "false",
      "selectionCriteria": "Multi-Factor",
      "weightingScheme": "Multi-Factor",
      "activePerSec": "false",
      "underlyingIndex": "Russell 1000 Value Index",
      "indexProvider": "Russell",
      "brand": "iShares"
    },
    "tax": {
      "legalStructure": "Open-Ended Fund",
      "maxLtCapitalGainsRate": 20,
      "maxStCapitalGainsRate": 39.6,
      "taxReporting": "1099"
    }
  }
]

  1. The property "totalRows": 803 specifies the total rows count. So to make data retrieving as fast as it possible, better to make the request to get the first row. As you can see from the URL, there is ../-aum/50/50/.. tail, which points sorting order, item to start from, and total items to return. Thus to get the only row it should be http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1

  2. Parse retrieved JSON, get the total number of rows from totalRows property.

  3. Make another one request to get the entire table.

  4. Parse the entire table JSON, convert it to 2d array and output. You can perform further processing with direct access to the array.

For the table shown below:

The resulting table contains 803 rows and header with columns as follows:

productId
fund
ticker
inceptionDate
launchDate
hasSegmentReport
genericReport
hasReport
fundsInSegment
economicDevelopment
totalRows
fundBasics_issuer
fundBasics_expenseRatio_value
fundBasics_aum_value
fundBasics_spreadPct_value
fundBasics_segment
performance_priceTrAsOf
performance_priceTr1Mo_value
performance_priceTr3Mo_value
performance_priceTr1Yr_value
performance_priceTr3YrAnnualized_value
performance_priceTr5YrAnnualized_value
performance_priceTr10YrAnnualized_value
analysis_analystPick
analysis_opportunitiesList
analysis_letterGrade
analysis_efficiencyScore
analysis_tradabilityScore
analysis_fitScore
analysis_leveragedFactor
analysis_exposureReset
analysis_avgDailyDollarVolume
analysis_avgDailyShareVolume
analysis_spread_value
analysis_fundClosureRisk
fundamentals_dividendYield_value
fundamentals_equity_pe
fundamentals_equity_pb
fundamentals_fixedIncome_duration
fundamentals_fixedIncome_creditQuality
fundamentals_fixedIncome_ytm_value
classification_assetClass
classification_strategy
classification_region
classification_geography
classification_category
classification_focus
classification_niche
classification_inverse
classification_leveraged
classification_etn
classification_selectionCriteria
classification_weightingScheme
classification_activePerSec
classification_underlyingIndex
classification_indexProvider
classification_brand
tax_legalStructure
tax_maxLtCapitalGainsRate
tax_maxStCapitalGainsRate
tax_taxReporting

Put the below code into VBA Project standard module:

Option Explicit

Sub GetData()

    Dim sJSONString As String
    Dim vJSON As Variant
    Dim sState As String
    Dim lRowsQty As Long
    Dim aData()
    Dim aHeader()

    ' Download and parse the only first row to get total rows qty
    sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1")
    JSON.Parse sJSONString, vJSON, sState
    lRowsQty = vJSON(0)("totalRows")
    ' Download and parse the entire data
    sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/" & lRowsQty & "/1")
    JSON.Parse sJSONString, vJSON, sState
    ' Convert JSON to 2d array
    JSON.ToArray vJSON, aData, aHeader
    ' Output
    With Sheets(1)
        .Cells.Delete
        OutputArray .Cells(1, 1), aHeader
        Output2DArray .Cells(2, 1), aData
        .Cells.Columns.AutoFit
    End With

End Sub

Function GetXHR(sURL As String) As String

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", sURL, False
        .Send
        GetXHR = .responseText
    End With

End Function

Sub OutputArray(oDstRng As Range, aCells As Variant)

    With oDstRng
        .Parent.Select
        With .Resize( _
                1, _
                UBound(aCells) - LBound(aCells) + 1)
            .NumberFormat = "@"
            .Value = aCells
        End With
    End With

End Sub

Sub Output2DArray(oDstRng As Range, aCells As Variant)

    With oDstRng
        .Parent.Select
        With .Resize( _
                UBound(aCells, 1) - LBound(aCells, 1) + 1, _
                UBound(aCells, 2) - LBound(aCells, 2) + 1)
            .NumberFormat = "@"
            .Value = aCells
        End With
    End With

End Sub

Create one more standard module, name it JSON and put the below code into it, this code provides JSON processing functionality:

Option Explicit

Private sBuffer As String
Private oTokens As Object
Private oRegEx As Object
Private bMatch As Boolean
Private oChunks As Object
Private oHeader As Object
Private aData() As Variant
Private i As Long

Sub Parse(ByVal sSample As String, vJSON As Variant, sState As String)

    ' Backus–Naur form JSON parser implementation based on RegEx
    ' Input:
    ' sSample - source JSON string
    ' Output:
    ' vJson - created object or array to be returned as result
    ' sState - string Object|Array|Error depending on processing

    sBuffer = sSample
    Set oTokens = CreateObject("Scripting.Dictionary")
    Set oRegEx = CreateObject("VBScript.RegExp")
    With oRegEx ' Patterns based on specification http://www.json.org/
        .Global = True
        .MultiLine = True
        .IgnoreCase = True ' Unspecified True, False, Null accepted
        .Pattern = "(?:'[^']*'|""(?:\\""|[^""])*"")(?=\s*[,\:\]\}])" ' Double-quoted string, unspecified quoted string
        Tokenize "s"
        .Pattern = "[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:e[+-]?\d+)?(?=\s*[,\]\}])" ' Number, E notation number
        Tokenize "d"
        .Pattern = "\b(?:true|false|null)(?=\s*[,\]\}])" ' Constants true, false, null
        Tokenize "c"
        .Pattern = "\b[A-Za-z_]\w*(?=\s*\:)" ' Unspecified non-double-quoted property name accepted
        Tokenize "n"
        .Pattern = "\s+"
        sBuffer = .Replace(sBuffer, "") ' Remove unnecessary spaces
        .MultiLine = False
        Do
            bMatch = False
            .Pattern = "<\d+(?:[sn])>\:<\d+[codas]>" ' Object property structure
            Tokenize "p"
            .Pattern = "\{(?:<\d+p>(?:,<\d+p>)*)?\}" ' Object structure
            Tokenize "o"
            .Pattern = "\[(?:<\d+[codas]>(?:,<\d+[codas]>)*)?\]" ' Array structure
            Tokenize "a"
        Loop While bMatch
        .Pattern = "^<\d+[oa]>$" ' Top level object structure, unspecified array accepted
        If .Test(sBuffer) And oTokens.Exists(sBuffer) Then
            Retrieve sBuffer, vJSON
            sState = IIf(IsObject(vJSON), "Object", "Array")
        Else
            vJSON = Null
            sState = "Error"
        End If
    End With
    Set oTokens = Nothing
    Set oRegEx = Nothing

End Sub

Private Sub Tokenize(sType)

    Dim aContent() As String
    Dim lCopyIndex As Long
    Dim i As Long
    Dim sKey As String

    With oRegEx.Execute(sBuffer)
        If .Count = 0 Then Exit Sub
        ReDim aContent(0 To .Count - 1)
        lCopyIndex = 1
        For i = 0 To .Count - 1
            With .Item(i)
                sKey = "<" & oTokens.Count & sType & ">"
                oTokens(sKey) = .Value
                aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey
                lCopyIndex = .FirstIndex + .Length + 1
            End With
        Next
    End With
    sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1)
    bMatch = True

End Sub

Private Sub Retrieve(sTokenKey, vTransfer)

    Dim sTokenValue As String
    Dim sName As String
    Dim vValue As Variant
    Dim aTokens() As String
    Dim i As Long

    sTokenValue = oTokens(sTokenKey)
    With oRegEx
        .Global = True
        Select Case Left(Right(sTokenKey, 2), 1)
            Case "o"
                Set vTransfer = CreateObject("Scripting.Dictionary")
                aTokens = Split(sTokenValue, "<")
                For i = 1 To UBound(aTokens)
                    Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vTransfer
                Next
            Case "p"
                aTokens = Split(sTokenValue, "<", 4)
                Retrieve "<" & Split(aTokens(1), ">", 2)(0) & ">", sName
                Retrieve "<" & Split(aTokens(2), ">", 2)(0) & ">", vValue
                If IsObject(vValue) Then
                    Set vTransfer(sName) = vValue
                Else
                    vTransfer(sName) = vValue
                End If
            Case "a"
                aTokens = Split(sTokenValue, "<")
                If UBound(aTokens) = 0 Then
                    vTransfer = Array()
                Else
                    ReDim vTransfer(0 To UBound(aTokens) - 1)
                    For i = 1 To UBound(aTokens)
                        Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vValue
                        If IsObject(vValue) Then
                            Set vTransfer(i - 1) = vValue
                        Else
                            vTransfer(i - 1) = vValue
                        End If
                    Next
                End If
            Case "n"
                vTransfer = sTokenValue
            Case "s"
                vTransfer = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace( _
                    Mid(sTokenValue, 2, Len(sTokenValue) - 2), _
                    "\""", """"), _
                    "\\", "\"), _
                    "\/", "/"), _
                    "\b", Chr(8)), _
                    "\f", Chr(12)), _
                    "\n", vbLf), _
                    "\r", vbCr), _
                    "\t", vbTab)
                .Global = False
                .Pattern = "\\u[0-9a-fA-F]{4}"
                Do While .Test(vTransfer)
                    vTransfer = .Replace(vTransfer, ChrW(("&H" & Right(.Execute(vTransfer)(0).Value, 4)) * 1))
                Loop
            Case "d"
                vTransfer = Evaluate(sTokenValue)
            Case "c"
                Select Case LCase(sTokenValue)
                    Case "true"
                        vTransfer = True
                    Case "false"
                        vTransfer = False
                    Case "null"
                        vTransfer = Null
                End Select
        End Select
    End With

End Sub

Function Serialize(vJSON As Variant) As String

    Set oChunks = CreateObject("Scripting.Dictionary")
    SerializeElement vJSON, ""
    Serialize = Join(oChunks.Items(), "")
    Set oChunks = Nothing

End Function

Private Sub SerializeElement(vElement As Variant, ByVal sIndent As String)

    Dim aKeys() As Variant
    Dim i As Long

    With oChunks
        Select Case VarType(vElement)
            Case vbObject
                If vElement.Count = 0 Then
                    .Item(.Count) = "{}"
                Else
                    .Item(.Count) = "{" & vbCrLf
                    aKeys = vElement.Keys
                    For i = 0 To UBound(aKeys)
                        .Item(.Count) = sIndent & vbTab & """" & aKeys(i) & """" & ": "
                        SerializeElement vElement(aKeys(i)), sIndent & vbTab
                        If Not (i = UBound(aKeys)) Then .Item(.Count) = ","
                        .Item(.Count) = vbCrLf
                    Next
                    .Item(.Count) = sIndent & "}"
                End If
            Case Is >= vbArray
                If UBound(vElement) = -1 Then
                    .Item(.Count) = "[]"
                Else
                    .Item(.Count) = "[" & vbCrLf
                    For i = 0 To UBound(vElement)
                        .Item(.Count) = sIndent & vbTab
                        SerializeElement vElement(i), sIndent & vbTab
                        If Not (i = UBound(vElement)) Then .Item(.Count) = "," 'sResult = sResult & ","
                        .Item(.Count) = vbCrLf
                    Next
                    .Item(.Count) = sIndent & "]"
                End If
            Case vbInteger, vbLong
                .Item(.Count) = vElement
            Case vbSingle, vbDouble
                .Item(.Count) = Replace(vElement, ",", ".")
            Case vbNull
                .Item(.Count) = "null"
            Case vbBoolean
                .Item(.Count) = IIf(vElement, "true", "false")
            Case Else
                .Item(.Count) = """" & _
                    Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(vElement, _
                        "\", "\\"), _
                        """", "\"""), _
                        "/", "\/"), _
                        Chr(8), "\b"), _
                        Chr(12), "\f"), _
                        vbLf, "\n"), _
                        vbCr, "\r"), _
                        vbTab, "\t") & _
                    """"
        End Select
    End With

End Sub

Function ToString(vJSON As Variant) As String

    Select Case VarType(vJSON)
        Case vbObject, Is >= vbArray
            Set oChunks = CreateObject("Scripting.Dictionary")
            ToStringElement vJSON, ""
            oChunks.Remove 0
            ToString = Join(oChunks.Items(), "")
            Set oChunks = Nothing
        Case vbNull
            ToString = "Null"
        Case vbBoolean
            ToString = IIf(vJSON, "True", "False")
        Case Else
            ToString = CStr(vJSON)
    End Select

End Function

Private Sub ToStringElement(vElement As Variant, ByVal sIndent As String)

    Dim aKeys() As Variant
    Dim i As Long

    With oChunks
        Select Case VarType(vElement)
            Case vbObject
                If vElement.Count = 0 Then
                    .Item(.Count) = "''"
                Else
                    .Item(.Count) = vbCrLf
                    aKeys = vElement.Keys
                    For i = 0 To UBound(aKeys)
                        .Item(.Count) = sIndent & aKeys(i) & ": "
                        ToStringElement vElement(aKeys(i)), sIndent & vbTab
                        If Not (i = UBound(aKeys)) Then .Item(.Count) = vbCrLf
                    Next
                End If
            Case Is >= vbArray
                If UBound(vElement) = -1 Then
                    .Item(.Count) = "''"
                Else
                    .Item(.Count) = vbCrLf
                    For i = 0 To UBound(vElement)
                        .Item(.Count) = sIndent & i & ": "
                        ToStringElement vElement(i), sIndent & vbTab
                        If Not (i = UBound(vElement)) Then .Item(.Count) = vbCrLf
                    Next
                End If
            Case vbNull
                .Item(.Count) = "Null"
            Case vbBoolean
                .Item(.Count) = IIf(vElement, "True", "False")
            Case Else
                .Item(.Count) = CStr(vElement)
        End Select
    End With

End Sub

Sub ToArray(vJSON As Variant, aRows() As Variant, aHeader() As Variant)

    ' Input:
    ' vJSON - Array or Object which contains rows data
    ' Output:
    ' aData - 2d array representing JSON data
    ' aHeader - 1d array of property names

    Dim sName As Variant

    Set oHeader = CreateObject("Scripting.Dictionary")
    Select Case VarType(vJSON)
        Case vbObject
            If vJSON.Count > 0 Then
                ReDim aData(0 To vJSON.Count - 1, 0 To 0)
                oHeader("#") = 0
                i = 0
                For Each sName In vJSON
                    aData(i, 0) = "#" & sName
                    ToArrayElement vJSON(sName), ""
                    i = i + 1
                Next
            Else
                ReDim aData(0 To 0, 0 To 0)
            End If
        Case Is >= vbArray
            If UBound(vJSON) >= 0 Then
                ReDim aData(0 To UBound(vJSON), 0 To 0)
                For i = 0 To UBound(vJSON)
                    ToArrayElement vJSON(i), ""
                Next
            Else
                ReDim aData(0 To 0, 0 To 0)
            End If
        Case Else
            ReDim aData(0 To 0, 0 To 0)
            aData(0, 0) = ToString(vJSON)
    End Select
    aHeader = oHeader.Keys()
    Set oHeader = Nothing
    aRows = aData
    Erase aData

End Sub

Private Sub ToArrayElement(vElement As Variant, sFieldName As String)

    Dim sName As Variant
    Dim j As Long

    Select Case VarType(vElement)
        Case vbObject ' collection of objects
            For Each sName In vElement
                ToArrayElement vElement(sName), sFieldName & IIf(sFieldName = "", "", "_") & sName
            Next
        Case Is >= vbArray  ' collection of arrays
            For j = 0 To UBound(vElement)
                ToArrayElement vElement(j), sFieldName & IIf(sFieldName = "", "", "_") & "#" & j
            Next
        Case Else
            If Not oHeader.Exists(sFieldName) Then
                oHeader(sFieldName) = oHeader.Count
                If UBound(aData, 2) < oHeader.Count - 1 Then ReDim Preserve aData(0 To UBound(aData, 1), 0 To oHeader.Count - 1)
            End If
            j = oHeader(sFieldName)
            aData(i, j) = ToString(vElement)
    End Select

End Sub

这篇关于使用VBA宏循环遍历javascrape网页上的每个表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆