使用发布数据和 xlmlhttp [英] Using post data and xlmlhttp
本文介绍了使用发布数据和 xlmlhttp的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我仍在努力学习更多关于抓取的知识,我可以设计出一个代码来让我获得想要的结果.
I am still trying to learn more about scraping and I could devise a code that enables me to get the desired results.
这是代码
Sub Test()
Dim e As Variant
Dim ie As Object
Dim ulElem As Object
Dim liElem As Object
Dim anchElem As Object
Dim dt As Date
Dim lDay As Integer
Dim lMnth As Integer
Dim lYear As Integer
Dim r As Long
Set ie = CreateObject("InternetExplorer.Application")
dt = Date - 2
lDay = Day(dt)
lMnth = Month(dt)
lYear = Year(dt)
With ie
.Visible = False
.Navigate "http://www.handelsregisterbekanntmachungen.de/?aktion=suche#Ergebnis"
Do: DoEvents: Loop Until .readyState = 4
For Each e In ie.document.getElementsByTagName("select")
If Len(e.innerText) = 56 Then
e.selectedIndex = lDay
ElseIf Len(e.innerText) = 18 Then
e.selectedIndex = lMnth
ElseIf Left(e.innerText, 8) = "----2000" Then
e.selectedIndex = lYear - 1999
ElseIf InStr(e.innerText, "Alle Bekanntmachungen") > 0 Then
e.selectedIndex = 1
End If
Next e
For Each e In ie.document.getElementsByTagName("input")
If e.Value = "Suche starten" Then e.Click: Exit For
Next e
Do: DoEvents: Loop Until .readyState = 4
Application.Wait Now() + TimeValue("00:00:05")
If InStr(ie.document.body.innerHTML, "Es wurden 0 Treffer gefunden.") > 0 Then
MsgBox "No Results Found", vbExclamation: Exit Sub
Else
For Each ulElem In ie.document.getElementsByTagName("b")
For Each liElem In ulElem.getElementsByTagName("li")
Set anchElem = liElem.getElementsByTagName("a")
If anchElem.Length > 0 Then
r = r + 1
Cells(r, 1) = Mid(anchElem.Item(0).innerText, 11)
End If
Next liElem
Next ulElem
End If
End With
End Sub
但是为了尝试了解有关 XMLHTTP 请求的更多信息,我正在寻找一种方法来获得相同的结果但不使用 IE.所以我认为使用 XMLHTTP 会更有效,特别是我可以在为搜索过程设置所需的选项后看到发布数据.
But as a matter of trying to learn more about XMLHTTP requests I am seeking for a way to get the same results but without using IE. so I think using XMLHTTP will be more efficient specially I could see post data after setting up the desired choices for the search process.
推荐答案
看下面的例子:
Option Explicit
Sub Test()
Dim sState As String
Dim sCourt As String
Dim dtFrom As Date
Dim dtTill As Date
Dim sSubject As String
Dim sOrder As String
Dim oStates As Object
Dim oCourts As Object
Dim oSubjects As Object
Dim oOrders As Object
Dim sStateCode As String
Dim sCourtId As String
Dim sSubjectVal As String
Dim sOrderVal As String
Dim aData
' Set query data
sState = ""
sCourt = ""
dtFrom = DateSerial(2018, 2, 11)
dtTill = DateSerial(2018, 2, 11)
sSubject = ""
sOrder = "Aktenzeichen"
' Retrieve options
GetOptions oStates, oCourts, oSubjects, oOrders
' Validate query parameters
If Not oStates.Exists(sState) Then MsgBox "State valid values:" & vbCrLf & vbCrLf & """" & Join(oStates.Keys(), """" & vbCrLf & """") & """": Exit Sub
If Not oCourts(oStates(sState)).Exists(sCourt) Then MsgBox "Court valid values:" & vbCrLf & vbCrLf & """" & Join(oCourts(oStates(sState)).Keys(), """" & vbCrLf & """") & """": Exit Sub
If Not oSubjects.Exists(sSubject) Then MsgBox "Subject valid values:" & vbCrLf & vbCrLf & """" & Join(oSubjects.Keys(), """" & vbCrLf & """") & """": Exit Sub
If Not oOrders.Exists(sOrder) Then MsgBox "Order valid values:" & vbCrLf & vbCrLf & """" & Join(oOrders.Keys(), """" & vbCrLf & """") & """": Exit Sub
' Request data
sStateCode = oStates(sState)
sCourtId = oCourts(sStateCode)(sCourt)
sSubjectVal = oSubjects(sSubject)
sOrderVal = oOrders(sOrder)
GetData sStateCode, sCourt, sCourtId, dtFrom, dtTill, sSubjectVal, sOrderVal, aData
' Rebuild nested arrays to 2d array for output
aData = Denestify(aData)
' Output
With ThisWorkbook.Sheets(1)
.Cells.Delete
Output2DArray .Cells(1, 1), aData
.Columns.AutoFit
End With
MsgBox "Completed"
End Sub
Sub GetOptions(oStates As Object, oCourts As Object, oSubjects As Object, oOrders As Object)
Dim sContent As String
Dim aTmp0
Dim aTmp1
Dim vItem
Dim oTmp
Dim i As Long
' Retrieve request options from search page
XmlHttpRequest "GET", "https://www.handelsregisterbekanntmachungen.de/?aktion=suche", "", "", "", sContent
' Get each state and code
ExtractOptions sContent, "land", oStates
' Get courts with courts ids for each federal state
Set oCourts = CreateObject("Scripting.Dictionary")
For Each vItem In oStates.Items()
' Put courts and ids into temp dictionary
Set oTmp = CreateObject("Scripting.Dictionary")
If vItem <> "" Then
' Extract the whole JS array statement with courts names
ParseResponse "BundeslandArray['" & vItem & "']=new Array(('[^']*'(?:,'[^']*')*));", sContent, aTmp0, False
' Extract each court name into temp array
ParseResponse "'([^']*)'", (aTmp0(0)), aTmp0, False
' Extract the whole JS array statement with courts ids
ParseResponse "BundeslandArrayId['" & vItem & "']=new Array(('[^']*'(?:,'[^']*')*));", sContent, aTmp1, False
' Extract each court id into temp array
ParseResponse "'([^']*)'", (aTmp1(0)), aTmp1, False
For i = 0 To UBound(aTmp0)
oTmp(DecodeHTMLEntities((aTmp0(i)))) = DecodeHTMLEntities((aTmp1(i)))
Next
End If
' Add dummy item
oTmp("") = ""
' Put courts-ids for the state code into dictionary
Set oCourts(vItem) = oTmp
Next
' Add dummy item
oStates("") = ""
' Get subjects
ExtractOptions sContent, "gegenstand", oSubjects
' Add dummy item
oSubjects("") = "0"
' Get sort order types
ExtractOptions sContent, "order", oOrders
End Sub
Sub GetData(sStateCode As String, sCourt As String, sCourtId As String, dtFrom As Date, dtTill As Date, sSubjectVal As String, sOrderVal As String, aData)
Dim i As Long
Dim oQuery As Object
Dim sQuery As String
Dim sContent As String
' Set query parameters
Set oQuery = CreateObject("Scripting.Dictionary")
With oQuery
.Add "suchart", "uneingeschr"
.Add "button", "Start search"
.Add "land", sStateCode
.Add "gericht", sCourtId
.Add "gericht_name", sCourt
.Add "seite", ""
.Add "l", ""
.Add "r", ""
.Add "all", "false"
.Add "vt", Day(dtFrom)
.Add "vm", Month(dtFrom)
.Add "vj", Year(dtFrom)
.Add "bt", Day(dtTill)
.Add "bm", Month(dtTill)
.Add "bj", Year(dtTill)
.Add "fname", ""
.Add "fsitz", ""
.Add "rubrik", ""
.Add "az", ""
.Add "gegenstand", sSubjectVal
.Add "anzv", "alle"
.Add "order", sOrderVal
End With
sQuery = EncodeQueryParams(oQuery)
' Retrieve search results
XmlHttpRequest "POST", _
"https://www.handelsregisterbekanntmachungen.de/de/index.php?aktion=suche", _
Array( _
Array("Content-Type", "application/x-www-form-urlencoded"), _
Array("Content-Length", Len(sQuery) _
) _
), _
sQuery, _
"", _
sContent
' Parse response
sContent = Replace(sContent, "<br>", vbCrLf)
ParseResponse "<li[^>]*><a[^>]*?href=""javascript:NeuFenster('([^']*)')""[^>]*>([^<]*)<ul[^>]*>([sS]*?)</ul>", sContent, aData, False
For i = 0 To UBound(aData, 1)
aData(i)(0) = "http://www.handelsregisterbekanntmachungen.de/en/skripte/hrb.php?" & aData(i)(0)
Next
End Sub
Sub ExtractOptions(sContent As String, sName As String, oOptions As Object)
Dim aTmp0
Dim vItem
' Extract the whole <select> for parameter
ParseResponse "<select[^>]* name=" & sName & "[^>]*>[^<]*((?:<option[^>]*>[^<]*</option>[^<]*)+)[^<]*</[^>]*>", sContent, aTmp0, False
' Extract each parameter <option>
ParseResponse "<option[^>]*value=(""[^""]*""|[^s>]*)[^>]*>([^<]*)</option>", (aTmp0(0)), aTmp0, False
' Put each parameter and value into dictionary
Set oOptions = CreateObject("Scripting.Dictionary")
For Each vItem In aTmp0
oOptions(DecodeHTMLEntities((vItem(1)))) = DecodeHTMLEntities(Replace(vItem(0), """", ""))
Next
End Sub
Sub XmlHttpRequest(sMethod, sUrl, aSetHeaders, sFormData, sRespHeaders, sRespText)
Dim aHeader
' With CreateObject("MSXML2.ServerXMLHTTP")
' .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
With CreateObject("MSXML2.XMLHTTP")
.Open sMethod, sUrl, False
If IsArray(aSetHeaders) Then
For Each aHeader In aSetHeaders
.SetRequestHeader aHeader(0), aHeader(1)
Next
End If
.Send (sFormData)
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Sub ParseResponse(sPattern, sResponse, aData, Optional bAppend As Boolean = True, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True)
Dim oMatch
Dim aTmp0()
Dim sSubMatch
If Not (IsArray(aData) And bAppend) Then aData = Array()
With CreateObject("VBScript.RegExp")
.Global = bGlobal
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
If oMatch.SubMatches.Count = 1 Then
PushItem aData, oMatch.SubMatches(0)
Else
aTmp0 = Array()
For Each sSubMatch In oMatch.SubMatches
PushItem aTmp0, sSubMatch
Next
PushItem aData, aTmp0
End If
Next
End With
End Sub
Sub PushItem(aData, vItem, Optional bAppend As Boolean = True)
If Not (IsArray(aData) And bAppend) Then aData = Array()
ReDim Preserve aData(UBound(aData) + 1)
aData(UBound(aData)) = vItem
End Sub
Function DecodeHTMLEntities(sText As String) As String
Static oHtmlfile As Object
Static oDiv As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.Open
Set oDiv = oHtmlfile.createElement("div")
End If
oDiv.innerHTML = sText
DecodeHTMLEntities = oDiv.innerText
End Function
Function EncodeQueryParams(oParams As Object) As String
Dim aParams
Dim i As Long
aParams = oParams.Keys()
For i = 0 To UBound(aParams)
aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i))))
Next
EncodeQueryParams = Join(aParams, "&")
End Function
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function Denestify(aRows)
Dim aData()
Dim aItems()
Dim i As Long
Dim j As Long
If UBound(aRows) = -1 Then Exit Function
ReDim aData(1 To UBound(aRows) + 1, 1 To 1)
For j = 0 To UBound(aRows)
If IsArray(aRows(j)) Then
aItems = aRows(j)
For i = 0 To UBound(aItems)
If i + 1 > UBound(aData, 2) Then ReDim Preserve aData(1 To UBound(aRows) + 1, 1 To i + 1)
aData(j + 1, i + 1) = aItems(i)
Next
Else
aData(j + 1, 1) = aRows(j)
End If
Next
Denestify = aData
End Function
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
这篇关于使用发布数据和 xlmlhttp的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文