引用电子表格中的单元格并填充相应的单元格 [英] Referencing Cells from Spreadsheet and Populating Corresponding Cells
问题描述
更多信息-该程序的目的是从现有的名称列表中检索,搜索网站并带回相应的NPI编号.感谢用户@omegastripes,我被建议将重点转移到XHR上. 我的问题是,如何用提供者的名称填充搜索,然后进行循环,以便它将为其余提供者返回电子表格中下一个单元格中的NPI.
More information - the objective of this program is to pull from an existing list of names, search the website, and bring back the corresponding NPI numbers. Thanks to user @omegastripes I was advised to shift my focus to XHR. My question is regarding, how to populate the search with the names of the providers, and loop so that it will return the NPI's in the next cells over in the spread sheet for the remaining providers.
相关,如果搜索中没有任何内容,该怎么办
Related, what to do in the event nothing populates from the search
原始帖子:标题-您要继续吗? Internet Explorer弹出-VBA
original post: Title - Do you want to continue? Internet Explorer pop up - VBA
Internet Security弹出窗口阻止我的代码继续运行.通常,我会禁用此请求,但是由于使用工作计算机,因此我的计算机安全访问受到限制.
Internet Security pop up prevents my code from continuing. Normally I would disable this request but my computer security access is limited due to using a work computer.
我的问题,是否可以使用VBA在此弹出窗口上单击是"?
My question, is there a way to click "Yes" on this pop up using VBA?
到目前为止,这是我的代码.
Here is my code so far.
Sub GetNpi()
Dim ie As Object
'create a new instance of ie
Set ie = New InternetExplorer
ie.Visible = True
'goes to site
ie.navigate "npinumberlookup.org"
Do While ie.readyState <> READYSTATE_COMPLETE
Loop
Set ieDoc = ie.document
'select search box last name and Fill in Search Box
ie.document.getElementById("last").Focus
ie.document.getElementById("last").Value = "testlastname"
'select search box first name and Fill in Search Box
ie.document.getElementById("first").Focus
ie.document.getElementById("first").Value = "testfirstname"
Do While ie.readyState <> READYSTATE_COMPLETE
Loop
'select state drop down box enter TX
ie.document.getElementById("pracstate").Focus
ie.document.getElementById("pracstate").Value = "TX"
'click submit button
ie.document.getElementById("submit").Click
推荐答案
更新
尝试以下代码从工作表中检索名称的NPI(指定姓,名和州):
Try the below code to retrieve NPI for the names from the worksheet (specify last name, first name and state):
Option Explicit
Sub TestListNPI()
' Prefix type + func
' Type: s - string, l - long, a - array
' Func: q - query, r - result
Dim i As Long
Dim j As Long
Dim k As Long
Dim sqLN As String
Dim sqFN As String
Dim aqFN
Dim sqSt As String
Dim arHdr
Dim arRows
Dim srMsg As String
Dim srLN As String
Dim srFN As String
Dim arFN
Dim lrMNQty As Long
Dim sOutput As String
i = 2
With Sheets(1)
Do
sqLN = .Cells(i, 1)
If sqLN = "" Then Exit Do
.Cells(i, 4) = "..."
sqFN = .Cells(i, 2).Value
aqFN = Split(sqFN)
sqSt = "" & .Cells(i, 3)
GetNPIData sqLN, aqFN(0), sqSt, arHdr, arRows, srMsg
If srMsg = "OK" Then
With CreateObject("Scripting.Dictionary")
For j = 0 To UBound(arRows, 1)
Do
srLN = arRows(j, 1)
If LCase(srLN) <> LCase(sqLN) Then Exit Do ' Last names should match
srFN = arRows(j, 3)
arFN = Split(srFN)
If LCase(arFN(0)) <> LCase(aqFN(0)) Then Exit Do ' First names should match
lrMNQty = UBound(arFN)
If UBound(aqFN) < lrMNQty Then lrMNQty = UBound(aqFN)
For k = 1 To lrMNQty
Select Case True
Case LCase(arFN(k)) = LCase(aqFN(k)) ' Full match
Case Len(arFN(k)) = 1 And LCase(arFN(k)) = LCase(Left(aqFN(k), 1)) ' First letter match
Case Len(arFN(k)) = 2 And Right(arFN(k), 1) = "." And LCase(Left(arFN(k), 1)) = LCase(Left(aqFN(k), 1)) ' First letter with dot match
Case Else ' No matches
Exit Do
End Select
Next
.Add arRows(j, 0), arRows(j, 1) & " " & arRows(j, 3)
Loop Until True
Next
Select Case .Count
Case 0
sOutput = "No matches"
Case 1
sOutput = .Keys()(0)
Case Else
sOutput = Join(.Items(), vbCrLf)
End Select
End With
Else
sOutput = srMsg
End If
.Cells(i, 4) = sOutput
DoEvents
i = i + 1
Loop
End With
MsgBox "Completed"
End Sub
Sub GetNPIData(sLastName, sFirstName, sState, aResultHeader, aResultRows, sStatus)
Dim sContent As String
Dim i As Long
Dim j As Long
Dim aHeader() As String
Dim aRows() As String
' Retrieve HTML content via XHR
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "http://npinumberlookup.org/getResults.php", False
.SetRequestHeader "content-type", "application/x-www-form-urlencoded"
.Send _
"last=" & EncodeUriComponent(sLastName) & _
"&first=" & EncodeUriComponent(sFirstName) & _
"&pracstate=" & EncodeUriComponent(sState) & _
"&npi=" & _
"&submit=Search" ' Setup request parameters
sContent = .ResponseText
End With
' Parse with RegEx
Do ' For break
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
' Minor HTML simplification
.Pattern = "<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t"
sContent = .Replace(sContent, "")
.Pattern = "<a [^>]*href=""([^""]*)"".*?</td>"
sContent = .Replace(sContent, "$1</td>")
.Pattern = "<(\w+)\b[^>]+>"
sContent = .Replace(sContent, "<$1>")
' Extract header
.Pattern = "<tr>((?:<th>.*?</th>)+)</tr>"
With .Execute(sContent)
If .Count <> 1 Then
sStatus = "No header"
Exit Do
End If
End With
.Pattern = "<th>(.*?)</th>"
With .Execute(sContent)
ReDim aHeader(0, 0 To .Count - 1)
For i = 0 To .Count - 1
aHeader(0, i) = .Item(i).SubMatches(0)
Next
End With
aResultHeader = aHeader
' Extract data
.Pattern = "<tr>((?:<td>.*?</td>)+)</tr>"
With .Execute(sContent)
If .Count = 0 Then
sStatus = "No rows"
Exit Do
End If
ReDim aRows(0 To .Count - 1, 0)
For i = 0 To .Count - 1
aRows(i, 0) = .Item(i).SubMatches(0)
Next
End With
.Pattern = "<td>(.*?)</td>"
For i = 0 To UBound(aRows, 1)
With .Execute(aRows(i, 0))
For j = 0 To .Count - 1
If UBound(aRows, 2) < j Then ReDim Preserve aRows(UBound(aRows, 1), j)
aRows(i, j) = Trim(.Item(j).SubMatches(0))
Next
End With
Next
aResultRows = aRows
End With
sStatus = "OK"
Loop Until True
End Sub
Function EncodeUriComponent(sText)
Static oHtmlfile As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = oHtmlfile.parentWindow.encode(sText)
End Function
对我来说输出如下:
对于乘法条目,所有名称都在最后一列而不是NPI中输出.
For multiply entries all names are output in the last column instead of NPI.
一些代码解释.通常,不建议使用RegEx进行HTML解析,因此有免责声明.在这种情况下要处理的数据非常简单,这就是为什么要用RegEx对其进行解析的原因.关于RegEx:简介(尤其是语法), JS简介,
Some explanation of the code. Generally RegEx's aren't recommended for HTML parsing, so there is disclaimer. Data being processed in this case is quite simple that is why it is parsed with RegEx. About RegEx: introduction (especially syntax), introduction JS, VB flavor. Simplification makes HTML code suitable for parsing in some degree. Patterns:
-
<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t
用于删除空格,并通过替换为""
来删除除表标记和链接以外的所有标签. -
<a [^>]*href="([^"]*)".*?</td>
通过替换为$1</td>
来保留链接地址. -
<(\w+)\b[^>]+>
通过替换为<$1>
删除所有不必要的标签属性. -
<tr>((?:<th>.*?</th>)+)</tr>
匹配每个表头行. -
<th>(.*?)</th>
匹配每个标头单元格. -
<tr>((?:<td>.*?</td>)+)</tr>
匹配每个表数据行. -
<td>(.*?)</td>
匹配每个数据单元.
<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t
is for removing white-spaces, and all tags but table markup and links by replacing with""
.<a [^>]*href="([^"]*)".*?</td>
keeps link address by replacing with$1</td>
.<(\w+)\b[^>]+>
removes all unnecessary tag attributes by replacing with<$1>
.<tr>((?:<th>.*?</th>)+)</tr>
matches each table header row.<th>(.*?)</th>
matches each header cell.<tr>((?:<td>.*?</td>)+)</tr>
matches each table data row.<td>(.*?)</td>
matches each data cell.
研究在替换网络的每个步骤中如何更改HTML内容.
Look into how does the HTML content is changed on each step of replacemnets.
初始答案
避免出现弹出窗口,而不要为它烦恼.
Avoid pop up appearing instead of bothering with it.
确保您使用的是安全HTTP协议https://npinumberlookup.org
.
Make sure you are using secure HTTP protocol https://npinumberlookup.org
.
您甚至可能根本不使用IE进行网络爬虫,XHR是更好的选择,因为它更可靠,更快捷,尽管它需要一些知识和经验.这是一个简单的例子:
You may even not use IE for webscraping at all, XHR is better choice, as it is more reliable and fast, though it requires some knowledge and experience. Here is the simple example of that:
Option Explicit
Sub Test()
Dim sContent As String
Dim i As Long
Dim j As Long
Dim aHeader() As String
Dim aRows() As String
' Retrieve HTML content via XHR
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "http://npinumberlookup.org/getResults.php", False
.SetRequestHeader "content-type", "application/x-www-form-urlencoded"
.Send _
"last=smith" & _
"&first=michael" & _
"&pracstate=NC" & _
"&npi=" & _
"&submit=Search" ' Setup request parameters
sContent = .ResponseText
End With
' Parse with RegEx
Do ' For break
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
' Minor HTML simplification
.Pattern = "<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t"
sContent = .Replace(sContent, "")
.Pattern = "<a [^>]*href=""([^""]*)"".*?</td>"
sContent = .Replace(sContent, "$1</td>")
.Pattern = "<(\w+)\b[^>]+>"
sContent = .Replace(sContent, "<$1>")
' Extract header
.Pattern = "<tr>((?:<th>.*?</th>)+)</tr>"
With .Execute(sContent)
If .Count <> 1 Then
MsgBox "No header found"
Exit Do
End If
End With
.Pattern = "<th>(.*?)</th>"
With .Execute(sContent)
ReDim aHeader(0, 0 To .Count - 1)
For i = 0 To .Count - 1
aHeader(0, i) = .Item(i).SubMatches(0)
Next
End With
' Extract data
.Pattern = "<tr>((?:<td>.*?</td>)+)</tr>"
With .Execute(sContent)
If .Count = 0 Then
MsgBox "No rows found"
Exit Do
End If
ReDim aRows(0 To .Count - 1, 0)
For i = 0 To .Count - 1
aRows(i, 0) = .Item(i).SubMatches(0)
Next
End With
.Pattern = "<td>(.*?)</td>"
For i = 0 To UBound(aRows, 1)
With .Execute(aRows(i, 0))
For j = 0 To .Count - 1
If UBound(aRows, 2) < j Then ReDim Preserve aRows(UBound(aRows, 1), j)
aRows(i, j) = .Item(j).SubMatches(0)
Next
End With
Next
End With
Loop Until True
' Output
With ThisWorkbook.Sheets(1)
.Cells.Delete
Output2DArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aRows
.Columns.AutoFit
End With
MsgBox "Completed"
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
单击提交后,可以从网络"选项卡上的浏览器开发人员工具轻松获取代码中的所有数据,例如:
All the data in the code could be easily obtained from browser developer tools on network tab after you click submit, as an example:
上面的代码为我返回的输出如下:
The above code returns the output for me as follows:
这篇关于引用电子表格中的单元格并填充相应的单元格的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!