将 HTML 文档的 javascript 部分中的字段提取到表格中?地理坐标 [英] Extracting fields from a javascript section of an HTML document into tables? Geographic coordinates
问题描述
我有一个 HTML 文档,其中包含一段 JavaScript 中的地理信息.这是这个网页的源代码:https://energy.ehawaii.gov/epd/public/energy-projects-map.html
I have an HTML document that contains geographic information within a block of Javascript. It is the source code from this web page: https://energy.ehawaii.gov/epd/public/energy-projects-map.html
这可以看作是一张地图,也可以看作是一个列表.
This can be viewed as a map and also as a list.
我想要实现的是在 Excel 中拥有该列表,但有一个纬度"字段和一个经度"字段.Google 地图标记在 Javascript 中指定 LatLng
.
What I want to achieve is to have that list in Excel, but with a field for "Latitude" and a field for "Longitude". The Google Maps marker specifies the LatLng
in the Javascript.
如何使用 VB 之类的东西来处理 HTML 文件的源代码,并将其组织成具有以下字段/列的表格:
How can I use something like VB to process the source code of the HTML file, and organize into a table that has the following fields/columns:
- Description (from the
<a ... title="such and such">
) - Technology (from
<p>Technology: Solar</p>
for example) - Latitude (from
google.maps.LatLng(latitude, longitude);
- Longtitude (from the same code line as latitude, but using the second variable)?
感谢所有帮助!
推荐答案
试试这个基于 XMLHTTP 请求的 VBScript 解决方案.只需复制下面的代码,粘贴到文本文件,将其保存为 .vbs
并运行它.脚本没有优化,所有请求都不是异步的,所以在我的电脑上大约需要 40 秒才能获取所有数据.
Try this VBScript solution based on XMLHTTP requests. Just copy the code below, paste to text file, save it as .vbs
and run it. Script hasn't been optimized, all requests are not async, so it takes about 40 seconds on my PC to get all data.
Option Explicit
Dim arrCells(), arrList, arrTmp, sRespHeaders, sRespText, arrSetHeaders, i, j, iTotal, oApp, oWB, oWS, oOutput
' Create output window
Output oOutput
' Get cookies
oOutput.write "Get cookies"
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-map.html", Array(), sRespHeaders, sRespText
ParseResponse "^Set-(Cookie): ([S]*?=[S]*?);[sS]*?$", sRespHeaders, arrSetHeaders
' Get project list
oOutput.write "Get project list"
arrList = Array()
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-list.json?sEcho=2&iColumns=5&sColumns=&iDisplayStart=1&iDisplayLength=0&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&bSearchable_4=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&bSortable_4=true", arrSetHeaders, "", sRespText
ParseProjects sRespText, arrList, iTotal
oOutput.write "Get project list: " & (UBound(arrList) + 1) & " of " & iTotal
' Rearrange to 2-dimensional array, get LatLng
ReDim arrCells(UBound(arrList), 8) ' Name, Technology, Island, Capacity, Location, RID, Type, Lat, Lng
For i = 0 To UBound(arrList)
For j = 0 To 6
arrCells(i, j) = arrList(i)(j)
Next
oOutput.write "Get LatLng: " & (i + 1) & " of " & iTotal
arrTmp = RequestLatLng(arrList(i)(5))
arrCells(i, 7) = arrTmp(0)
arrCells(i, 8) = arrTmp(1)
Next
' Create Excel worksheet, output data
oOutput.write "Export to Excel"
Set oApp = CreateObject("Excel.Application")
oApp.Visible = True
Set oWB = oApp.Workbooks.Add(-4167) ' xlWBATWorksheet
Set oWS = oWB.Worksheets(1)
oWS.Range(oWS.Cells(1, 1), oWS.Cells(UBound(arrCells) + 1, 9)).Value = arrCells
oWS.Columns.AutoFit
oWB.Saved = True
oOutput.write "Completed"
Sub XmlHttpGet(sQuery, arrSetHeaders, sRespHeaders, sRespText)
Dim arrHeader
With CreateObject("MSXML2.ServerXMLHTTP")
.SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open "GET", sQuery, False
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
.Send ""
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Sub ParseResponse(sPattern, sResponse, aData)
Dim oMatch, aTmp, sSubMatch
aData = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
If oMatch.SubMatches.Count = 1 Then
PushItem aData, oMatch.SubMatches(0)
Else
aTmp = Array()
For Each sSubMatch In oMatch.SubMatches
PushItem aTmp, sSubMatch
Next
PushItem aData, aTmp
End If
Next
End With
End Sub
Sub PushItem(aList, vItem)
ReDim Preserve aList(UBound(aList) + 1)
aList(UBound(aList)) = vItem
End Sub
Sub ParseProjects(sJson, arrProj, iTotalRecords)
Dim i, q
With CreateObject("htmlfile")
With .parentwindow
.execscript ";", "jscript"
.eval ("json = " & sJson & ";")
iTotalRecords = CInt(.json.iTotalRecords)
Do While .json.aaData.Length
ReDim Preserve arrProj(UBound(arrProj) + 1)
With .json.aaData.Shift()
arrProj(UBound(arrProj)) = Array(.Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift())
End With
Loop
End With
End With
End Sub
Function RequestLatLng(sRid)
Dim sRespText, arrTmp, sTmp
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-project-details.html?rid=" & sRid, Array(), "", sRespText
arrTmp = Split(sRespText, "google.maps.LatLng(")
If UBound(arrTmp) >= 1 Then
sTmp = arrTmp(1)
arrTmp = Split(sTmp, "),")
If UBound(arrTmp) >= 1 Then
RequestLatLng = Split(arrTmp(0), ", ")
Exit Function
End If
End If
RequestLatLng = Array("#", "#")
End Function
Sub Output(oWnd)
Set oWnd = ShowWindow("energy.ehawaii.gov", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAWIAAAB2CAYAAADybJlDAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAIGNIUk0AAHolAACAgwAA+f8AAIDpAAB1MAAA6mAAADqYAAAXb5JfxUYAAAUjSURBVHja7N05ztxGEIBRjqHciZ34Bn3/w9QNnPsE49QwtPwc9lLd9V4kAdKwyQE+Fjnb6/1+XwCs85tDACDEAEIMgBADCDEAQgwgxAAIMYAQAyDEAEIMgBADCDEAQgwgxACM980hgD28Xq/Td7Hd/PdxzHPri+FBiBPH9hPbBVqIQYh3D+/2YRZiEOIT47tVmIUYhLhCfFMHWYhBiKsFOF2UhRiEuHKAUwRZiEGIBXhxkIUYhFiAFwdZiEGIBXhxjIUYhDhLhCP5OocFWYhBiGeHLaxdiEGI54YsNljj0n0SYhDiEYGLhGtKG2MhBiHOFuCdXiDsEmMhhtoh7hW9SLKOLWMsxFA3xD3iVznA3Y6FEEPNED8NoAB3PC5CDPVCvDLCFT4gcvv4CDHUCvGTEFYKcMw8Vn48FOpYEeG28RQ87cRjIoYaE/GnMdxpCo7O2336eH9f1/WPEIMQr4jwyo8Zt4HbaqOOoRCDEGePcCzc9pQYCzGcHeJZER45ia6ewmP0MfViHZxrxwjHle9XltvoYyTEIMIZIvwkwDNvvfT6/0IMpInw0wn4SYTj4Xa63T4RYjANr4hwj1sQKybhIc+NEIMIz4xwr3vAvSI8eyo2EQNLAphhGo1Ej92EGESyW0AWhbD3fkaHbT86lkIMNc2KcO+3ox3500lCDPWm4ZkRXjnx31lDTDgeP3wcIYZaEZ712Bk+lBG7bFOIoZbYdBsz3g0SC/a/CTGw6lZAlok/xZqEGGrFsg163FW3AnqvY8lULMQg3pkj3JKsYyghhjrT8KhL66oRbr3WI8Qg5OUm0Gz7JMQgxhmDdcoJ4UvrEmKoF9AfPVbbdN+2n8qFGNg5fkfcGhFiIFP82mb72Ho8lhDD+dOrqdxEDCyc6mLAY54+lQsxkDrspvwBx06IQUDtj4kYGDA5xnf+Hpvv07GEGEzHd4LZO5qVXqALIQZ6xaxtuu602xNiYNV03A46FkIM7BuhpNPwVEIM50VvxddBjvqC+hKEGAQ9++PG6U+IEAOzp+M7wS7xHmghBnaajk3EgEA+nI4rT8NCDNy+hO4VQtPxL3xzCIAvTKWz3zOcYRr+a9ZaTcSA2wTf9/usDQkxcCfGMWk7pQgx1PGHUAoxsNafHUM7ajouGXkhBjKFc8RXbQoxUCLGgizEgCB33+60KwIhBgR5ceB9oAPYNWTHvLBnIoa9Js2el9UnRPmIfRViYNol+CZBnh53tyaAE+O/1ScAhRgQ5cWEGBgZwa9OlW3SeqLj2rsdF/eIQQxXif/9OSYdlx73lLuuVYhhL9kvs3sELiau9b9RXnbyEmIg6wln5kln6U84CTGcK/Ptibjx72ZHefpz4cU6YJfwx6Tt3NnPLicIIYY9L9vb5us/Icg/W8OtfRRiOH9KPfXL1mPiND706kCIgenhKRblX/JiHdS5vM/wpUGzPnq81Qt8Qgym1dNPWOmjLMRQayquHPq0URZiMBVXO3GYiIEtYnjy7Yxs+xZCDDXCOipAuwW7fXB8h0/y3r4GZDthZIrwz9bfeh0XIYYzIvfVKNyJx0kfBmmDjnuXbbk1AZzukwjHhG2FEINL/4xTZpUIm4hBjKfGdtXtjZZ4WyHEwOk+jfCSk4YQg6k402S5Y4Tb0+0IMTAyPpF0XWkiLMRgKj5lKn7yy8yx+pgIMYhxlel8dYRDiEGMT5uK23VAhIUYqDgFp4rwdV3X6/1+e0phA6/Xa7dJ9rT3NQ+7B/0vAAAA//8DAERsQ7O6796eAAAAAElFTkSuQmCC", 354, 118)
End Sub
Function ShowWindow(sTitle, sBG, iWidth, iHeight)
Set ShowWindow = CreateWindow()
With ShowWindow
With .document
.title = sTitle
.getElementsByTagName("head")(0).appendChild .createElement("style")
.styleSheets(0).cssText = "* {font: 8pt tahoma; margin: 5px;}"
.body.style.background = "buttonface"
.body.style.backgroundRepeat = "no-repeat"
.body.style.backgroundImage = "url(" & sBG & ")"
.body.innerHTML = ""
End With
.resizeTo .screen.availWidth, .screen.availHeight
.resizeTo iWidth + .screen.availWidth - .document.body.offsetWidth, iHeight + .screen.availHeight - .document.body.offsetHeight
.moveTo CInt((.screen.availWidth - iWidth) / 2), CInt((.screen.availHeight - iHeight) / 2)
.execScript "var handlers, thunks = {body_onunload: function() {handlers.WSHQuit()}};"
Execute "Class clsHandlers: Public Sub WSHQuit(): WScript.Quit: End Sub: End Class"
Set .handlers = New clsHandlers
Set .document.body.onunload = .thunks.body_onunload
.execScript "var write = function(t) {document.body.innerHTML = t};"
End With
End Function
Function CreateWindow()
' source http://forum.script-coding.com/viewtopic.php?pid=75356#p75356
Dim sSignature, oShellWnd, oProc
On Error Resume Next
sSignature = Left(CreateObject("Scriptlet.TypeLib").Guid, 38)
Do
Set oProc = CreateObject("WScript.Shell").Exec("mshta ""about:<head><script>moveTo(-32000,-32000);document.title=' '</script><hta:application id=app border=thick minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=no selection=no innerborder=no icon=""""/><object id='shell' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=RegisterAsBrowser value=1></object><script>shell.putproperty('" & sSignature & "',document.parentWindow);</script></head>""")
Do
If oProc.Status > 0 Then Exit Do
For Each oShellWnd In CreateObject("Shell.Application").Windows
Set CreateWindow = oShellWnd.GetProperty(sSignature)
If Err.Number = 0 Then Exit Function
Err.Clear
Next
Loop
Loop
End Function
这篇关于将 HTML 文档的 javascript 部分中的字段提取到表格中?地理坐标的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!