从亚马逊提取价格和卖家数据 [英] Extract price and seller data from Amazon
本文介绍了从亚马逊提取价格和卖家数据的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我正在使用以下代码从亚马逊提取数据。
I am using below code to extract data from Amazon.
Sub Macro1()
' Macro1 Macro
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://www.amazon.com/gp/offer-listing/B00N41UTWG/ref=olp_f_new?ie=UTF8&f_new=true" _
, Destination:=Range("$A$1"))
.Name = "oldOfferPrice" _
' "its_details_value_node.html?nsc=true&listId=www_s201_b9233&tsId=BBK01.ED0439"
.FieldNames = True
.RowNumbers = True
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = True
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub
上面的代码正在提取完整的页面数据,但我的要求是仅提取价格。页面价格采用这种格式。
Above code is extracting complete page data but my requirement is to extract only prices. Page prices are in this format.
<div class="a-row a-spacing-mini olpOffer">
<div class="a-column a-span2">
<span class="a-size-large a-color-price olpOfferPrice a-text-bold"> $171.99 </span>
<span class="a-color-price">
<span class="supersaver"><i class="a-icon a-icon-prime" aria-label="Amazon Prime TM"><span class="a-icon-alt">Amazon Prime TM</span></i></span>
</span>
我想提取两个值,即$ 171.99和Amazon Prime TM。一页中可能有多个价格和卖家价值,我想全部提取。
I want to extract two values i.e $171.99 and Amazon Prime TM. There may be multiple price and seller values in one page and I want to extract all.
推荐答案
下面是一个示例,显示了如何使用 XHR $检索某些ASIN的Amazon报价。 c $ c>和
Split
,并将结果输出到工作表:
Here is an example showing how you can retrieve Amazon offers for certain ASIN using XHR
and Split
, and output results to the sheet:
Sub TestExtractAmazonOffers()
Dim arrList() As Variant
' clear sheet
Sheets("Sheet1").Cells.Delete
' retrieve offers for certain ASIN
arrList = ExtractAmazonOffers("B07CR8D2DW")
' output data
Output Sheets("Sheet1"), 1, 1, arrList
End Sub
Function ExtractAmazonOffers(strASIN As String)
Dim strUrl As String
Dim arrTmp() As String
Dim strTmp As String
Dim arrItems() As String
Dim i As Long
Dim arrCols() As String
Dim strSellerName As String
Dim strOfferPrice As String
Dim strAmazonPrime As String
Dim strShippingPrice As String
Dim arrResults() As Variant
Dim arrCells() As Variant
' init
arrResults = Array(Array("Offer Price", "Amazon Prime TM", "Shipping Price", "Seller Name"))
strUrl = "https://www.amazon.com/gp/offer-listing/" & strASIN & "/ref=olp_f_new?ie=UTF8&f_new=true"
Do
' http get request of the search result page
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", strUrl, False
.Send
strResp = .ResponseText
End With
arrTmp = Split(strResp, "id=""olpOfferList""", 2)
If UBound(arrTmp) = 1 Then
arrItems = Split(arrTmp(1), "<div class=""a-row a-spacing-mini olpOffer""")
For i = 1 To UBound(arrItems)
' get item columns
arrCols = Split(arrItems(i), "<div class=""a-column", 6)
' retrieve seller name from column 4
strTmp = Split(arrCols(4), "olpSellerName", 2)(1)
arrTmp = Split(strTmp, "alt=""", 2)
If UBound(arrTmp) = 1 Then ' from image alt
strTmp = Split(arrTmp(1), """", 2)(0)
strSellerName = Trim(strTmp)
Else ' from link
strTmp = Split(strTmp, "<a", 2)(1)
strTmp = Split(strTmp, ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strSellerName = Trim(strTmp)
End If
' retrieve offer price from column 1
strTmp = Split(arrCols(1), "olpOfferPrice", 2)(1)
strTmp = Split(strTmp, ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strOfferPrice = Trim(strTmp)
' retrieve amazon prime
arrTmp = Split(arrCols(1), "olpShippingInfo", 2)
strAmazonPrime = IIf(InStr(arrTmp(0), "Amazon Prime") > 0, "Amazon Prime", "-")
' retrieve shipping info
arrTmp = Split(arrTmp(1), "olpShippingPrice", 2)
If UBound(arrTmp) = 1 Then
strTmp = Split(arrTmp(1), ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strShippingPrice = Trim(strTmp)
Else
strShippingPrice = "Free"
End If
' store data
ReDim Preserve arrResults(UBound(arrResults) + 1)
arrResults(UBound(arrResults)) = Array(strOfferPrice, strAmazonPrime, strShippingPrice, strSellerName)
Next
End If
' search for next page link
arrTmp = Split(strResp, "class=""a-last""", 2)
If UBound(arrTmp) = 0 Then Exit Do
strTmp = Split(arrTmp(1), "href=""", 2)(1)
strUrl = Split(strTmp, """", 2)(0)
If Left(strUrl, 1) = "/" Then strUrl = "https://www.amazon.com" & strUrl
Loop
' convert nested array to 2-dimensional array
ReDim arrCells(UBound(arrResults), 3)
For i = 0 To UBound(arrCells, 1)
For j = 0 To UBound(arrCells, 2)
arrCells(i, j) = arrResults(i)(j)
Next
Next
ExtractAmazonOffers = arrCells
End Function
Sub Output(objSheet As Worksheet, lngTop As Long, lngLeft As Long, arrCells As Variant)
With objSheet
.Select
With .Range(.Cells(lngTop, lngLeft), .Cells( _
UBound(arrCells, 1) - LBound(arrCells, 1) + lngTop, _
UBound(arrCells, 2) - LBound(arrCells, 2) + lngLeft))
.NumberFormat = "@"
.Value = arrCells
.Columns.AutoFit
End With
End With
End Sub
结果表如下:
这篇关于从亚马逊提取价格和卖家数据的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文