VBA - XMLHTTP and WinHttp request speed
In addition to the methods you've mentioned:
- IE automation
- WinHTTPRequest
- XMLHTTP
- ServerXMLHTTP
There are 2 other methods you can think about:
- Using the
CreateDocumentFromUrl
method of theMSHTML.HTMLDocument
object - Using the Windows API function
URLDownloadToFileA
There are some other Windows APIs that I am ignoring such as InternetOpen
, InternetOpenUrl
etc as potential performance will be outweighed by complexity of guess the response length, buffering the response, and so forth.
CreateDocumentFromUrl
With the CreateDocumentFromUrl
method it is a problem with your sample website because it attempts to create a HTMLDocument
in an frame which is not allowed with errors such as:
Framing Forbidden
and
To help protect the security of information you enter into this website, the publisher of this content does not allow it to be displayed in a frame.
So we should not use this method.
URLDownloadToFileA
I thought you need the php equivalent of file_get_contents
and found this method. It is easily used (check this link) and out-performs the other methods when used on a large request (e.g. try it when you go for >2000 baseball bats). The XMLHTTP
also method uses the URLMon
library so I guess this way is just cutting out a bit of middle-man logic and obviously there's a downside because you have to do some file system handling.
Private Declare Function URLDownloadToFileA Lib "urlmon" (ByVal pCaller As Long, _
ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, _
ByVal lpfnCB As Long) As Long
Sub TestUrlDownloadFile(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim strTempFileName As String
Dim strResponse As String
Dim objFso As FileSystemObject
On Error GoTo ExitFunction
dteStart = Now
strTempFileName = "D:\foo.txt"
DownloadFile strUrl, strTempFileName
Set objFso = New FileSystemObject
With objFso.OpenTextFile(strTempFileName, ForReading)
strResponse = .ReadAll
.Close
End With
objFso.DeleteFile strTempFileName
dteFinish = Now
Debug.Print "URL download file method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
End Sub
'http://www.vbaexpress.com/forum/archive/index.php/t-27050.html
Private Function DownloadFile(URL As String, LocalFilename As String) As Boolean
Dim lngRetVal As Long
lngRetVal = URLDownloadToFileA(0, URL, LocalFilename, 0, 0)
If lngRetVal = 0 Then DownloadFile = True
End Function
With the URLDownloadToFileA
it is taking me about 1-2 seconds to download you sample URL versus 4-5 seconds with the XMLHTTP
method (full code below).
The URL:
www.justbats.com/products/bat type~baseball/?sortBy=TotalSales Descending&page=1&size=2400
This is the output:
Testing...
XML HTTP method
Document length: 7869753 chars
Processed in: 4 seconds
URL download file method
Document length: 7869753 chars
Processed in: 1 seconds
Code
This includes all methods discussed e.g. IE automation, WinHTTPRequest, XMLHTTP, ServerXMLHTTP, CreateDocumentFromURL and URLDownloadFile.
You need all these references in project:
Here it is:
Option Explicit
Private Declare Function URLDownloadToFileA Lib "urlmon" (ByVal pCaller As Long, _
ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, _
ByVal lpfnCB As Long) As Long
Sub Test()
Dim strUrl As String
strUrl = "http://www.justbats.com/products/bat type~baseball/?sortBy=TotalSales Descending&page=1&size=2400"
Debug.Print "Testing..."
Debug.Print VBA.vbNewLine
'TestIE strUrl
'TestWinHHTP strUrl
TestXMLHTTP strUrl
'TestServerXMLHTTP strUrl
'TestCreateDocumentFromUrl strUrl
TestUrlDownloadFile strUrl
End Sub
Sub TestIE(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim objIe As InternetExplorer
Dim objHtml As MSHTML.HTMLDocument
Dim strResponse As String
On Error GoTo ExitFunction
dteStart = Now
Set objIe = New SHDocVw.InternetExplorer
With objIe
.navigate strUrl
.Visible = False
While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
Wend
Set objHtml = .document
strResponse = objHtml.DocumentElement.outerHTML
.Quit
End With
dteFinish = Now
Debug.Print "IE automation method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
If Not objIe Is Nothing Then
objIe.Quit
End If
Set objIe = Nothing
End Sub
Sub TestWinHHTP(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim objHttp As WinHttp.WinHttpRequest
Dim objDoc As HTMLDocument
Dim strResponse As String
On Error GoTo ExitFunction
dteStart = Now
Set objHttp = New WinHttp.WinHttpRequest
With objHttp
.Open "get", strUrl, False
.setRequestHeader "Cache-Control", "no-cache"
.setRequestHeader "Pragma", "no-cache"
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
.WaitForResponse
strResponse = .responseText
End With
dteFinish = Now
Debug.Print "WinHTTP method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
Set objDoc = Nothing
Set objHttp = Nothing
End Sub
Sub TestXMLHTTP(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim objXhr As MSXML2.XMLHTTP60
Dim objDoc As MSHTML.HTMLDocument
Dim strResponse As String
On Error GoTo ExitFunction
dteStart = Now
Set objXhr = New MSXML2.XMLHTTP60
With objXhr
.Open "get", strUrl, False
.setRequestHeader "Cache-Control", "no-cache"
.setRequestHeader "Pragma", "no-cache"
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
While .readyState <> 4
DoEvents
Wend
strResponse = .responseText
End With
dteFinish = Now
Debug.Print "XML HTTP method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
Set objDoc = Nothing
Set objXhr = Nothing
End Sub
Sub TestServerXMLHTTP(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim objXhr As MSXML2.ServerXMLHTTP60
Dim objDoc As MSHTML.HTMLDocument
Dim strResponse As String
On Error GoTo ExitFunction
dteStart = Now
Set objXhr = New MSXML2.ServerXMLHTTP60
With objXhr
.Open "get", strUrl, False
.setRequestHeader "Cache-Control", "no-cache"
.setRequestHeader "Pragma", "no-cache"
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
While .readyState <> 4
DoEvents
Wend
strResponse = .responseText
End With
dteFinish = Now
Debug.Print "Server XML HTTP method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
Set objDoc = Nothing
Set objXhr = Nothing
End Sub
Sub TestUrlDownloadFile(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim strTempFileName As String
Dim strResponse As String
Dim objFso As FileSystemObject
On Error GoTo ExitFunction
dteStart = Now
strTempFileName = "D:\foo.txt"
If DownloadFile(strUrl, strTempFileName) Then
Set objFso = New FileSystemObject
With objFso.OpenTextFile(strTempFileName, ForReading)
strResponse = .ReadAll
.Close
End With
objFso.DeleteFile strTempFileName
Else
Debug.Print "Error downloading file from URL: " & strUrl
GoTo ExitFunction
End If
dteFinish = Now
Debug.Print "URL download file method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
End Sub
'http://www.vbaexpress.com/forum/archive/index.php/t-27050.html
Private Function DownloadFile(URL As String, LocalFilename As String) As Boolean
Dim lngRetVal As Long
lngRetVal = URLDownloadToFileA(0, URL, LocalFilename, 0, 0)
If lngRetVal = 0 Then
DownloadFile = True
Else
DownloadFile = False
End If
End Function
Sub TestCreateDocumentFromUrl(strUrl As String)
Dim dteStart As Date
Dim dteFinish As Date
Dim strResponse As String
Dim objDoc1 As HTMLDocument
Dim objDoc2 As HTMLDocument
On Error GoTo ExitFunction
dteStart = Now
Set objDoc1 = New HTMLDocument
Set objDoc2 = objDoc1.createDocumentFromUrl(strUrl, "null")
While objDoc2.readyState <> "complete"
DoEvents
Wend
strResponse = objDoc2.DocumentElement.outerHTML
Debug.Print strResponse
dteFinish = Now
Debug.Print "HTML Document Create from URL method"
Debug.Print "Document length: " & Len(strResponse) & " chars"
Debug.Print "Processed in: " & Format(dteFinish - dteStart, "s") & " seconds"
Debug.Print VBA.vbNewLine
ExitFunction:
If Err.Number <> 0 Then
Debug.Print Err.Description
End If
Set objDoc2 = Nothing
Set objDoc1 = Nothing
End Sub
Most of the time is spent waiting for a response from the server. So if you want improve the execution time, then send the requests in parallel.
I would also use the "Msxml2.ServerXMLHTTP.6.0" object/interface since it doesn't implement any caching.
Here's a working example:
Sub TestRequests()
GetUrls _
"http://stackoverflow.com/questions/34880012", _
"http://stackoverflow.com/questions/34880013", _
"http://stackoverflow.com/questions/34880014", _
"http://stackoverflow.com/questions/34880015", _
"http://stackoverflow.com/questions/34880016", _
"http://stackoverflow.com/questions/34880017"
End Sub
Private Sub OnRequest(url, xhr)
xhr.Open "GET", url, True
xhr.setRequestHeader "Content-Type", "text/html; charset=UTF-8"
xhr.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
xhr.Send
End Sub
Private Sub OnResponse(url, xhr)
Debug.Print url, Len(xhr.ResponseText)
End Sub
Public Function GetUrls(ParamArray urls())
Const WORKERS = 10
' create http workers
Dim wkrs(0 To WORKERS * 2 - 1), i As Integer
For i = 0 To UBound(wkrs) Step 2
Set wkrs(i) = CreateObject("Msxml2.ServerXMLHTTP.6.0")
Next
' send the requests in parallele
Dim index As Integer, count As Integer, xhr As Object
While count <= UBound(urls)
For i = 0 To UBound(wkrs) Step 2
Set xhr = wkrs(i)
If xhr.readyState And 3 Then ' if busy
xhr.waitForResponse 0.01 ' wait 10ms
ElseIf Not VBA.IsEmpty(wkrs(i + 1)) And xhr.readyState = 4 Then
OnResponse urls(wkrs(i + 1)), xhr
count = count + 1
wkrs(i + 1) = Empty
End If
If VBA.IsEmpty(wkrs(i + 1)) And index <= UBound(urls) Then
wkrs(i + 1) = index
OnRequest urls(index), xhr
index = index + 1
End If
Next
Wend
End Function