将HTML文档的javascript部分中的字段提取到表中?地理坐标 [英] Extracting fields from a javascript section of an HTML document into tables? Geographic coordinates
问题描述
我有一个HTML文档,其中包含Javascript块中的地理信息。这是该网页的源代码: https://energy.ehawaii .gov / epd / public / energy-projects-map.html
I have an HTML document that contains geographic information within a block of Javascript. It is the source code from this web page: https://energy.ehawaii.gov/epd/public/energy-projects-map.html
这可以看作是地图,也可以作为列表。
This can be viewed as a map and also as a list.
我想要实现的是在Excel中使用该列表,但使用Latitude字段和Longitude字段。 Google地图标记在Javascript中指定 LatLng
。
What I want to achieve is to have that list in Excel, but with a field for "Latitude" and a field for "Longitude". The Google Maps marker specifies the LatLng
in the Javascript.
如何使用VB之类的东西来处理来源HTML文件的代码,并组织成一个包含以下字段/列的表:
How can I use something like VB to process the source code of the HTML file, and organize into a table that has the following fields/columns:
- 描述(来自
< a ... title =such and such>
) - 技术(来自
< p>技术: Solar< / p>
例如) - 纬度(来自
google.maps.LatLng(纬度,经度);
- Longtitude(来自与纬度相同的代码行,但是使用第二个变量)?
- Description (from the
<a ... title="such and such">
) - Technology (from
<p>Technology: Solar</p>
for example) - Latitude (from
google.maps.LatLng(latitude, longitude);
- Longtitude (from the same code line as latitude, but using the second variable)?
所有帮助表示赞赏!
推荐答案
尝试基于XMLHTTP请求的VBScript解决方案。只需复制下面的代码,粘贴到文本文件,保存为 .vbs
并运行它。脚本尚未优化,所有请求都不是异步的,所以我的电脑需要大约40秒才能获得所有数据。
Try this VBScript solution based on XMLHTTP requests. Just copy the code below, paste to text file, save it as .vbs
and run it. Script hasn't been optimized, all requests are not async, so it takes about 40 seconds on my PC to get all data.
Option Explicit
Dim arrCells(), arrList, arrTmp, sRespHeaders, sRespText, arrSetHeaders, i, j, iTotal, oApp, oWB, oWS, oOutput
' Create output window
Output oOutput
' Get cookies
oOutput.write "Get cookies"
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-map.html", Array(), sRespHeaders, sRespText
ParseResponse "^Set-(Cookie): ([\S]*?=[\S]*?);[\s\S]*?$", sRespHeaders, arrSetHeaders
' Get project list
oOutput.write "Get project list"
arrList = Array()
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-list.json?sEcho=2&iColumns=5&sColumns=&iDisplayStart=1&iDisplayLength=0&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&bSearchable_4=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&bSortable_4=true", arrSetHeaders, "", sRespText
ParseProjects sRespText, arrList, iTotal
oOutput.write "Get project list: " & (UBound(arrList) + 1) & " of " & iTotal
' Rearrange to 2-dimensional array, get LatLng
ReDim arrCells(UBound(arrList), 8) ' Name, Technology, Island, Capacity, Location, RID, Type, Lat, Lng
For i = 0 To UBound(arrList)
For j = 0 To 6
arrCells(i, j) = arrList(i)(j)
Next
oOutput.write "Get LatLng: " & (i + 1) & " of " & iTotal
arrTmp = RequestLatLng(arrList(i)(5))
arrCells(i, 7) = arrTmp(0)
arrCells(i, 8) = arrTmp(1)
Next
' Create Excel worksheet, output data
oOutput.write "Export to Excel"
Set oApp = CreateObject("Excel.Application")
oApp.Visible = True
Set oWB = oApp.Workbooks.Add(-4167) ' xlWBATWorksheet
Set oWS = oWB.Worksheets(1)
oWS.Range(oWS.Cells(1, 1), oWS.Cells(UBound(arrCells) + 1, 9)).Value = arrCells
oWS.Columns.AutoFit
oWB.Saved = True
oOutput.write "Completed"
Sub XmlHttpGet(sQuery, arrSetHeaders, sRespHeaders, sRespText)
Dim arrHeader
With CreateObject("MSXML2.ServerXMLHTTP")
.SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open "GET", sQuery, False
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
.Send ""
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Sub ParseResponse(sPattern, sResponse, aData)
Dim oMatch, aTmp, sSubMatch
aData = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
If oMatch.SubMatches.Count = 1 Then
PushItem aData, oMatch.SubMatches(0)
Else
aTmp = Array()
For Each sSubMatch In oMatch.SubMatches
PushItem aTmp, sSubMatch
Next
PushItem aData, aTmp
End If
Next
End With
End Sub
Sub PushItem(aList, vItem)
ReDim Preserve aList(UBound(aList) + 1)
aList(UBound(aList)) = vItem
End Sub
Sub ParseProjects(sJson, arrProj, iTotalRecords)
Dim i, q
With CreateObject("htmlfile")
With .parentwindow
.execscript ";", "jscript"
.eval ("json = " & sJson & ";")
iTotalRecords = CInt(.json.iTotalRecords)
Do While .json.aaData.Length
ReDim Preserve arrProj(UBound(arrProj) + 1)
With .json.aaData.Shift()
arrProj(UBound(arrProj)) = Array(.Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift())
End With
Loop
End With
End With
End Sub
Function RequestLatLng(sRid)
Dim sRespText, arrTmp, sTmp
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-project-details.html?rid=" & sRid, Array(), "", sRespText
arrTmp = Split(sRespText, "google.maps.LatLng(")
If UBound(arrTmp) >= 1 Then
sTmp = arrTmp(1)
arrTmp = Split(sTmp, "),")
If UBound(arrTmp) >= 1 Then
RequestLatLng = Split(arrTmp(0), ", ")
Exit Function
End If
End If
RequestLatLng = Array("#", "#")
End Function
Sub Output(oWnd)
Set oWnd = ShowWindow("energy.ehawaii.gov", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAWIAAAB2CAYAAADybJlDAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAIGNIUk0AAHolAACAgwAA+f8AAIDpAAB1MAAA6mAAADqYAAAXb5JfxUYAAAUjSURBVHja7N05ztxGEIBRjqHciZ34Bn3/w9QNnPsE49QwtPwc9lLd9V4kAdKwyQE+Fjnb6/1+XwCs85tDACDEAEIMgBADCDEAQgwgxAAIMYAQAyDEAEIMgBADCDEAQgwgxACM980hgD28Xq/Td7Hd/PdxzHPri+FBiBPH9hPbBVqIQYh3D+/2YRZiEOIT47tVmIUYhLhCfFMHWYhBiKsFOF2UhRiEuHKAUwRZiEGIBXhxkIUYhFiAFwdZiEGIBXhxjIUYhDhLhCP5OocFWYhBiGeHLaxdiEGI54YsNljj0n0SYhDiEYGLhGtKG2MhBiHOFuCdXiDsEmMhhtoh7hW9SLKOLWMsxFA3xD3iVznA3Y6FEEPNED8NoAB3PC5CDPVCvDLCFT4gcvv4CDHUCvGTEFYKcMw8Vn48FOpYEeG28RQ87cRjIoYaE/GnMdxpCo7O2336eH9f1/WPEIMQr4jwyo8Zt4HbaqOOoRCDEGePcCzc9pQYCzGcHeJZER45ia6ewmP0MfViHZxrxwjHle9XltvoYyTEIMIZIvwkwDNvvfT6/0IMpInw0wn4SYTj4Xa63T4RYjANr4hwj1sQKybhIc+NEIMIz4xwr3vAvSI8eyo2EQNLAphhGo1Ej92EGESyW0AWhbD3fkaHbT86lkIMNc2KcO+3ox3500lCDPWm4ZkRXjnx31lDTDgeP3wcIYZaEZ712Bk+lBG7bFOIoZbYdBsz3g0SC/a/CTGw6lZAlok/xZqEGGrFsg163FW3AnqvY8lULMQg3pkj3JKsYyghhjrT8KhL66oRbr3WI8Qg5OUm0Gz7JMQgxhmDdcoJ4UvrEmKoF9AfPVbbdN+2n8qFGNg5fkfcGhFiIFP82mb72Ho8lhDD+dOrqdxEDCyc6mLAY54+lQsxkDrspvwBx06IQUDtj4kYGDA5xnf+Hpvv07GEGEzHd4LZO5qVXqALIQZ6xaxtuu602xNiYNV03A46FkIM7BuhpNPwVEIM50VvxddBjvqC+hKEGAQ9++PG6U+IEAOzp+M7wS7xHmghBnaajk3EgEA+nI4rT8NCDNy+hO4VQtPxL3xzCIAvTKWz3zOcYRr+a9ZaTcSA2wTf9/usDQkxcCfGMWk7pQgx1PGHUAoxsNafHUM7ajouGXkhBjKFc8RXbQoxUCLGgizEgCB33+60KwIhBgR5ceB9oAPYNWTHvLBnIoa9Js2el9UnRPmIfRViYNol+CZBnh53tyaAE+O/1ScAhRgQ5cWEGBgZwa9OlW3SeqLj2rsdF/eIQQxXif/9OSYdlx73lLuuVYhhL9kvs3sELiau9b9RXnbyEmIg6wln5kln6U84CTGcK/Ptibjx72ZHefpz4cU6YJfwx6Tt3NnPLicIIYY9L9vb5us/Icg/W8OtfRRiOH9KPfXL1mPiND706kCIgenhKRblX/JiHdS5vM/wpUGzPnq81Qt8Qgym1dNPWOmjLMRQayquHPq0URZiMBVXO3GYiIEtYnjy7Yxs+xZCDDXCOipAuwW7fXB8h0/y3r4GZDthZIrwz9bfeh0XIYYzIvfVKNyJx0kfBmmDjnuXbbk1AZzukwjHhG2FEINL/4xTZpUIm4hBjKfGdtXtjZZ4WyHEwOk+jfCSk4YQg6k402S5Y4Tb0+0IMTAyPpF0XWkiLMRgKj5lKn7yy8yx+pgIMYhxlel8dYRDiEGMT5uK23VAhIUYqDgFp4rwdV3X6/1+e0phA6/Xa7dJ9rT3NQ+7B/0vAAAA//8DAERsQ7O6796eAAAAAElFTkSuQmCC", 354, 118)
End Sub
Function ShowWindow(sTitle, sBG, iWidth, iHeight)
Set ShowWindow = CreateWindow()
With ShowWindow
With .document
.title = sTitle
.getElementsByTagName("head")(0).appendChild .createElement("style")
.styleSheets(0).cssText = "* {font: 8pt tahoma; margin: 5px;}"
.body.style.background = "buttonface"
.body.style.backgroundRepeat = "no-repeat"
.body.style.backgroundImage = "url(" & sBG & ")"
.body.innerHTML = ""
End With
.resizeTo .screen.availWidth, .screen.availHeight
.resizeTo iWidth + .screen.availWidth - .document.body.offsetWidth, iHeight + .screen.availHeight - .document.body.offsetHeight
.moveTo CInt((.screen.availWidth - iWidth) / 2), CInt((.screen.availHeight - iHeight) / 2)
.execScript "var handlers, thunks = {body_onunload: function() {handlers.WSHQuit()}};"
Execute "Class clsHandlers: Public Sub WSHQuit(): WScript.Quit: End Sub: End Class"
Set .handlers = New clsHandlers
Set .document.body.onunload = .thunks.body_onunload
.execScript "var write = function(t) {document.body.innerHTML = t};"
End With
End Function
Function CreateWindow()
' source http://forum.script-coding.com/viewtopic.php?pid=75356#p75356
Dim sSignature, oShellWnd, oProc
On Error Resume Next
sSignature = Left(CreateObject("Scriptlet.TypeLib").Guid, 38)
Do
Set oProc = CreateObject("WScript.Shell").Exec("mshta ""about:<head><script>moveTo(-32000,-32000);document.title=' '</script><hta:application id=app border=thick minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=no selection=no innerborder=no icon=""""/><object id='shell' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=RegisterAsBrowser value=1></object><script>shell.putproperty('" & sSignature & "',document.parentWindow);</script></head>""")
Do
If oProc.Status > 0 Then Exit Do
For Each oShellWnd In CreateObject("Shell.Application").Windows
Set CreateWindow = oShellWnd.GetProperty(sSignature)
If Err.Number = 0 Then Exit Function
Err.Clear
Next
Loop
Loop
End Function
这篇关于将HTML文档的javascript部分中的字段提取到表中?地理坐标的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!