从亚马逊提取价格和卖家数据 [英] Extract price and seller data from Amazon

查看:143
本文介绍了从亚马逊提取价格和卖家数据的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用以下代码从亚马逊提取数据。

I am using below code to extract data from Amazon.

Sub Macro1()
    ' Macro1 Macro
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://www.amazon.com/gp/offer-listing/B00N41UTWG/ref=olp_f_new?ie=UTF8&f_new=true" _
        , Destination:=Range("$A$1"))
        .Name = "oldOfferPrice" _
        ' "its_details_value_node.html?nsc=true&listId=www_s201_b9233&tsId=BBK01.ED0439"
        .FieldNames = True
        .RowNumbers = True
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = True
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False

    End With
End Sub

上面的代码正在提取完整的页面数据,但我的要求是仅提取价格。页面价格采用这种格式。

Above code is extracting complete page data but my requirement is to extract only prices. Page prices are in this format.

<div class="a-row a-spacing-mini olpOffer">
                    <div class="a-column a-span2">
       <span class="a-size-large a-color-price olpOfferPrice a-text-bold">                $171.99                </span>
<span class="a-color-price">
<span class="supersaver"><i class="a-icon a-icon-prime" aria-label="Amazon Prime TM"><span class="a-icon-alt">Amazon Prime TM</span></i></span>
</span>

我想提取两个值,即$ 171.99和Amazon Prime TM。一页中可能有多个价格和卖家价值,我想全部提取。

I want to extract two values i.e $171.99 and Amazon Prime TM. There may be multiple price and seller values in one page and I want to extract all.

推荐答案

下面是一个示例,显示了如何使用 XHR Split ,并将结果输出到工作表:

Here is an example showing how you can retrieve Amazon offers for certain ASIN using XHR and Split, and output results to the sheet:

Sub TestExtractAmazonOffers()

    Dim arrList() As Variant

    ' clear sheet
    Sheets("Sheet1").Cells.Delete
    ' retrieve offers for certain ASIN
    arrList = ExtractAmazonOffers("B07CR8D2DW")
    ' output data
    Output Sheets("Sheet1"), 1, 1, arrList

End Sub

Function ExtractAmazonOffers(strASIN As String)

    Dim strUrl As String
    Dim arrTmp() As String
    Dim strTmp As String
    Dim arrItems() As String
    Dim i As Long
    Dim arrCols() As String
    Dim strSellerName As String
    Dim strOfferPrice As String
    Dim strAmazonPrime As String
    Dim strShippingPrice As String
    Dim arrResults() As Variant
    Dim arrCells() As Variant

    ' init
    arrResults = Array(Array("Offer Price", "Amazon Prime TM", "Shipping Price", "Seller Name"))
    strUrl = "https://www.amazon.com/gp/offer-listing/" & strASIN & "/ref=olp_f_new?ie=UTF8&f_new=true"
    Do
        ' http get request of the search result page
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", strUrl, False
            .Send
            strResp = .ResponseText
        End With
        arrTmp = Split(strResp, "id=""olpOfferList""", 2)
        If UBound(arrTmp) = 1 Then
            arrItems = Split(arrTmp(1), "<div class=""a-row a-spacing-mini olpOffer""")
            For i = 1 To UBound(arrItems)
                ' get item columns
                arrCols = Split(arrItems(i), "<div class=""a-column", 6)
                ' retrieve seller name from column 4
                strTmp = Split(arrCols(4), "olpSellerName", 2)(1)
                arrTmp = Split(strTmp, "alt=""", 2)
                If UBound(arrTmp) = 1 Then ' from image alt
                    strTmp = Split(arrTmp(1), """", 2)(0)
                    strSellerName = Trim(strTmp)
                Else ' from link
                    strTmp = Split(strTmp, "<a", 2)(1)
                    strTmp = Split(strTmp, ">", 2)(1)
                    strTmp = Split(strTmp, "<", 2)(0)
                    strSellerName = Trim(strTmp)
                End If
                ' retrieve offer price from column 1
                strTmp = Split(arrCols(1), "olpOfferPrice", 2)(1)
                strTmp = Split(strTmp, ">", 2)(1)
                strTmp = Split(strTmp, "<", 2)(0)
                strOfferPrice = Trim(strTmp)
                ' retrieve amazon prime
                arrTmp = Split(arrCols(1), "olpShippingInfo", 2)
                strAmazonPrime = IIf(InStr(arrTmp(0), "Amazon Prime") > 0, "Amazon Prime", "-")
                ' retrieve shipping info
                arrTmp = Split(arrTmp(1), "olpShippingPrice", 2)
                If UBound(arrTmp) = 1 Then
                    strTmp = Split(arrTmp(1), ">", 2)(1)
                    strTmp = Split(strTmp, "<", 2)(0)
                    strShippingPrice = Trim(strTmp)
                Else
                    strShippingPrice = "Free"
                End If
                ' store data
                ReDim Preserve arrResults(UBound(arrResults) + 1)
                arrResults(UBound(arrResults)) = Array(strOfferPrice, strAmazonPrime, strShippingPrice, strSellerName)
            Next
        End If
        ' search for next page link
        arrTmp = Split(strResp, "class=""a-last""", 2)
        If UBound(arrTmp) = 0 Then Exit Do
        strTmp = Split(arrTmp(1), "href=""", 2)(1)
        strUrl = Split(strTmp, """", 2)(0)
        If Left(strUrl, 1) = "/" Then strUrl = "https://www.amazon.com" & strUrl
    Loop
    ' convert nested array to 2-dimensional array
    ReDim arrCells(UBound(arrResults), 3)
    For i = 0 To UBound(arrCells, 1)
        For j = 0 To UBound(arrCells, 2)
            arrCells(i, j) = arrResults(i)(j)
        Next
    Next
    ExtractAmazonOffers = arrCells

End Function

Sub Output(objSheet As Worksheet, lngTop As Long, lngLeft As Long, arrCells As Variant)

    With objSheet
        .Select
        With .Range(.Cells(lngTop, lngLeft), .Cells( _
                UBound(arrCells, 1) - LBound(arrCells, 1) + lngTop, _
                UBound(arrCells, 2) - LBound(arrCells, 2) + lngLeft))
            .NumberFormat = "@"
            .Value = arrCells
            .Columns.AutoFit
        End With
    End With

End Sub

结果表如下:

这篇关于从亚马逊提取价格和卖家数据的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆