如果您检查该页面,您会发现最初仅显示 100 个结果HTMLTable
rows (tbody
准确地说)。
Enter the css selector [data-test="historical-prices"] tbody tr
in the browser elements tab search box (F12 to open dev tools)and you will see this:
当您向下滚动页面时,其余行会从数据存储中动态输入。当然,您当前的方法无法识别这些。事实上,您可以发出 xhr 请求,用正则表达式输出包含所有行的适当 javascript 对象,并使用 json 解析器进行解析。
您当前应该看到的响应大致如下:
我使用 jsonconverter.bas 作为我的 json 解析器。从下载原始代码here http://%20https//github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas并添加到名为的标准模块jsonConverter
。然后,您需要转到 VBE > 工具 > 引用 > 添加对 Microsoft 脚本运行时的引用。
@TimWilliams 写了一个更好的 unix 转换函数here https://stackoverflow.com/a/12326121/6241235但我想我应该尝试写一些不同的东西。我建议你坚持使用他的,因为它更安全、更快。
VBA:
Option Explicit
Public Sub GetYahooData()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
Dim startDate As String, endDate As String, stock As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
stock = "AAPL"
startDate = "1534809600"
endDate = "1566345600"
With xhr
.Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
s = GetJsonString(re, s)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)
WriteOutResults ws, json
End Sub
Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long
headers = json.item(1).keys
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1: c = 1
For Each key In item.keys
results(r, c) = item(key)
c = c + 1
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
If .test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Public Function GetCurrentUnix(ByVal t As Double) As String
With CreateObject("htmlfile")
.parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
End With
End Function
Regex:
Python:
如果感兴趣的话,我最初写为 python:
import requests, re, json
from bs4 import BeautifulSoup as bs
p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])