Parsing JSON in Excel VBA
UPDATE 3 (Sep 24 '17)
Check VBA-JSON-parser on GitHub for the latest version and examples. Import JSON.bas module into the VBA project for JSON processing.
UPDATE 2 (Oct 1 '16)
However if you do want to parse JSON on 64-bit Office with ScriptControl
, then this answer may help you to get ScriptControl
to work on 64-bit.
UPDATE (Oct 26 '15)
Note that a ScriptControl
-based approachs makes the system vulnerable in some cases, since they allows a direct access to the drives (and other stuff) for the malicious JS code via ActiveX's. Let's suppose you are parsing web server response JSON, like JsonString = "{a:(function(){(new ActiveXObject('Scripting.FileSystemObject')).CreateTextFile('C:\\Test.txt')})()}"
. After evaluating it you'll find new created file C:\Test.txt
. So JSON parsing with ScriptControl
ActiveX is not a good idea.
Trying to avoid that, I've created JSON parser based on RegEx's. Objects {}
are represented by dictionaries, that makes possible to use dictionary's properties and methods: .Count
, .Exists()
, .Item()
, .Items
, .Keys
. Arrays []
are the conventional zero-based VB arrays, so UBound()
shows the number of elements. Here is the code with some usage examples:
Option Explicit
Sub JsonTest()
Dim strJsonString As String
Dim varJson As Variant
Dim strState As String
Dim varItem As Variant
' parse JSON string to object
' root element can be the object {} or the array []
strJsonString = "{""a"":[{}, 0, ""value"", [{""stuff"":""content""}]], b:null}"
ParseJson strJsonString, varJson, strState
' checking the structure step by step
Select Case False ' if any of the checks is False, the sequence is interrupted
Case IsObject(varJson) ' if root JSON element is object {},
Case varJson.Exists("a") ' having property a,
Case IsArray(varJson("a")) ' which is array,
Case UBound(varJson("a")) >= 3 ' having not less than 4 elements,
Case IsArray(varJson("a")(3)) ' where forth element is array,
Case UBound(varJson("a")(3)) = 0 ' having the only element,
Case IsObject(varJson("a")(3)(0)) ' which is object,
Case varJson("a")(3)(0).Exists("stuff") ' having property stuff,
Case Else
MsgBox "Check the structure step by step" & vbCrLf & varJson("a")(3)(0)("stuff") ' then show the value of the last one property.
End Select
' direct access to the property if sure of structure
MsgBox "Direct access to the property" & vbCrLf & varJson.Item("a")(3)(0).Item("stuff") ' content
' traversing each element in array
For Each varItem In varJson("a")
' show the structure of the element
MsgBox "The structure of the element:" & vbCrLf & BeautifyJson(varItem)
Next
' show the full structure starting from root element
MsgBox "The full structure starting from root element:" & vbCrLf & BeautifyJson(varJson)
End Sub
Sub BeautifyTest()
' put sourse JSON string to "desktop\source.json" file
' processed JSON will be saved to "desktop\result.json" file
Dim strDesktop As String
Dim strJsonString As String
Dim varJson As Variant
Dim strState As String
Dim strResult As String
Dim lngIndent As Long
strDesktop = CreateObject("WScript.Shell").SpecialFolders.Item("Desktop")
strJsonString = ReadTextFile(strDesktop & "\source.json", -2)
ParseJson strJsonString, varJson, strState
If strState <> "Error" Then
strResult = BeautifyJson(varJson)
WriteTextFile strResult, strDesktop & "\result.json", -1
End If
CreateObject("WScript.Shell").PopUp strState, 1, , 64
End Sub
Sub ParseJson(ByVal strContent As String, varJson As Variant, strState As String)
' strContent - source JSON string
' varJson - created object or array to be returned as result
' strState - Object|Array|Error depending on processing to be returned as state
Dim objTokens As Object
Dim objRegEx As Object
Dim bMatched As Boolean
Set objTokens = CreateObject("Scripting.Dictionary")
Set objRegEx = CreateObject("VBScript.RegExp")
With objRegEx
' specification http://www.json.org/
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = """(?:\\""|[^""])*""(?=\s*(?:,|\:|\]|\}))"
Tokenize objTokens, objRegEx, strContent, bMatched, "str"
.Pattern = "(?:[+-])?(?:\d+\.\d*|\.\d+|\d+)e(?:[+-])?\d+(?=\s*(?:,|\]|\}))"
Tokenize objTokens, objRegEx, strContent, bMatched, "num"
.Pattern = "(?:[+-])?(?:\d+\.\d*|\.\d+|\d+)(?=\s*(?:,|\]|\}))"
Tokenize objTokens, objRegEx, strContent, bMatched, "num"
.Pattern = "\b(?:true|false|null)(?=\s*(?:,|\]|\}))"
Tokenize objTokens, objRegEx, strContent, bMatched, "cst"
.Pattern = "\b[A-Za-z_]\w*(?=\s*\:)" ' unspecified name without quotes
Tokenize objTokens, objRegEx, strContent, bMatched, "nam"
.Pattern = "\s"
strContent = .Replace(strContent, "")
.MultiLine = False
Do
bMatched = False
.Pattern = "<\d+(?:str|nam)>\:<\d+(?:str|num|obj|arr|cst)>"
Tokenize objTokens, objRegEx, strContent, bMatched, "prp"
.Pattern = "\{(?:<\d+prp>(?:,<\d+prp>)*)?\}"
Tokenize objTokens, objRegEx, strContent, bMatched, "obj"
.Pattern = "\[(?:<\d+(?:str|num|obj|arr|cst)>(?:,<\d+(?:str|num|obj|arr|cst)>)*)?\]"
Tokenize objTokens, objRegEx, strContent, bMatched, "arr"
Loop While bMatched
.Pattern = "^<\d+(?:obj|arr)>$" ' unspecified top level array
If Not (.Test(strContent) And objTokens.Exists(strContent)) Then
varJson = Null
strState = "Error"
Else
Retrieve objTokens, objRegEx, strContent, varJson
strState = IIf(IsObject(varJson), "Object", "Array")
End If
End With
End Sub
Sub Tokenize(objTokens, objRegEx, strContent, bMatched, strType)
Dim strKey As String
Dim strRes As String
Dim lngCopyIndex As Long
Dim objMatch As Object
strRes = ""
lngCopyIndex = 1
With objRegEx
For Each objMatch In .Execute(strContent)
strKey = "<" & objTokens.Count & strType & ">"
bMatched = True
With objMatch
objTokens(strKey) = .Value
strRes = strRes & Mid(strContent, lngCopyIndex, .FirstIndex - lngCopyIndex + 1) & strKey
lngCopyIndex = .FirstIndex + .Length + 1
End With
Next
strContent = strRes & Mid(strContent, lngCopyIndex, Len(strContent) - lngCopyIndex + 1)
End With
End Sub
Sub Retrieve(objTokens, objRegEx, strTokenKey, varTransfer)
Dim strContent As String
Dim strType As String
Dim objMatches As Object
Dim objMatch As Object
Dim strName As String
Dim varValue As Variant
Dim objArrayElts As Object
strType = Left(Right(strTokenKey, 4), 3)
strContent = objTokens(strTokenKey)
With objRegEx
.Global = True
Select Case strType
Case "obj"
.Pattern = "<\d+\w{3}>"
Set objMatches = .Execute(strContent)
Set varTransfer = CreateObject("Scripting.Dictionary")
For Each objMatch In objMatches
Retrieve objTokens, objRegEx, objMatch.Value, varTransfer
Next
Case "prp"
.Pattern = "<\d+\w{3}>"
Set objMatches = .Execute(strContent)
Retrieve objTokens, objRegEx, objMatches(0).Value, strName
Retrieve objTokens, objRegEx, objMatches(1).Value, varValue
If IsObject(varValue) Then
Set varTransfer(strName) = varValue
Else
varTransfer(strName) = varValue
End If
Case "arr"
.Pattern = "<\d+\w{3}>"
Set objMatches = .Execute(strContent)
Set objArrayElts = CreateObject("Scripting.Dictionary")
For Each objMatch In objMatches
Retrieve objTokens, objRegEx, objMatch.Value, varValue
If IsObject(varValue) Then
Set objArrayElts(objArrayElts.Count) = varValue
Else
objArrayElts(objArrayElts.Count) = varValue
End If
varTransfer = objArrayElts.Items
Next
Case "nam"
varTransfer = strContent
Case "str"
varTransfer = Mid(strContent, 2, Len(strContent) - 2)
varTransfer = Replace(varTransfer, "\""", """")
varTransfer = Replace(varTransfer, "\\", "\")
varTransfer = Replace(varTransfer, "\/", "/")
varTransfer = Replace(varTransfer, "\b", Chr(8))
varTransfer = Replace(varTransfer, "\f", Chr(12))
varTransfer = Replace(varTransfer, "\n", vbLf)
varTransfer = Replace(varTransfer, "\r", vbCr)
varTransfer = Replace(varTransfer, "\t", vbTab)
.Global = False
.Pattern = "\\u[0-9a-fA-F]{4}"
Do While .Test(varTransfer)
varTransfer = .Replace(varTransfer, ChrW(("&H" & Right(.Execute(varTransfer)(0).Value, 4)) * 1))
Loop
Case "num"
varTransfer = Evaluate(strContent)
Case "cst"
Select Case LCase(strContent)
Case "true"
varTransfer = True
Case "false"
varTransfer = False
Case "null"
varTransfer = Null
End Select
End Select
End With
End Sub
Function BeautifyJson(varJson As Variant) As String
Dim strResult As String
Dim lngIndent As Long
BeautifyJson = ""
lngIndent = 0
BeautyTraverse BeautifyJson, lngIndent, varJson, vbTab, 1
End Function
Sub BeautyTraverse(strResult As String, lngIndent As Long, varElement As Variant, strIndent As String, lngStep As Long)
Dim arrKeys() As Variant
Dim lngIndex As Long
Dim strTemp As String
Select Case VarType(varElement)
Case vbObject
If varElement.Count = 0 Then
strResult = strResult & "{}"
Else
strResult = strResult & "{" & vbCrLf
lngIndent = lngIndent + lngStep
arrKeys = varElement.Keys
For lngIndex = 0 To UBound(arrKeys)
strResult = strResult & String(lngIndent, strIndent) & """" & arrKeys(lngIndex) & """" & ": "
BeautyTraverse strResult, lngIndent, varElement(arrKeys(lngIndex)), strIndent, lngStep
If Not (lngIndex = UBound(arrKeys)) Then strResult = strResult & ","
strResult = strResult & vbCrLf
Next
lngIndent = lngIndent - lngStep
strResult = strResult & String(lngIndent, strIndent) & "}"
End If
Case Is >= vbArray
If UBound(varElement) = -1 Then
strResult = strResult & "[]"
Else
strResult = strResult & "[" & vbCrLf
lngIndent = lngIndent + lngStep
For lngIndex = 0 To UBound(varElement)
strResult = strResult & String(lngIndent, strIndent)
BeautyTraverse strResult, lngIndent, varElement(lngIndex), strIndent, lngStep
If Not (lngIndex = UBound(varElement)) Then strResult = strResult & ","
strResult = strResult & vbCrLf
Next
lngIndent = lngIndent - lngStep
strResult = strResult & String(lngIndent, strIndent) & "]"
End If
Case vbInteger, vbLong, vbSingle, vbDouble
strResult = strResult & varElement
Case vbNull
strResult = strResult & "Null"
Case vbBoolean
strResult = strResult & IIf(varElement, "True", "False")
Case Else
strTemp = Replace(varElement, "\""", """")
strTemp = Replace(strTemp, "\", "\\")
strTemp = Replace(strTemp, "/", "\/")
strTemp = Replace(strTemp, Chr(8), "\b")
strTemp = Replace(strTemp, Chr(12), "\f")
strTemp = Replace(strTemp, vbLf, "\n")
strTemp = Replace(strTemp, vbCr, "\r")
strTemp = Replace(strTemp, vbTab, "\t")
strResult = strResult & """" & strTemp & """"
End Select
End Sub
Function ReadTextFile(strPath As String, lngFormat As Long) As String
' lngFormat -2 - System default, -1 - Unicode, 0 - ASCII
With CreateObject("Scripting.FileSystemObject").OpenTextFile(strPath, 1, False, lngFormat)
ReadTextFile = ""
If Not .AtEndOfStream Then ReadTextFile = .ReadAll
.Close
End With
End Function
Sub WriteTextFile(strContent As String, strPath As String, lngFormat As Long)
With CreateObject("Scripting.FileSystemObject").OpenTextFile(strPath, 2, True, lngFormat)
.Write (strContent)
.Close
End With
End Sub
One more opportunity of this JSON RegEx parser is that it works on 64-bit Office, where ScriptControl isn't available.
INITIAL (May 27 '15)
Here is one more method to parse JSON in VBA, based on ScriptControl
ActiveX, without external libraries:
Sub JsonTest()
Dim Dict, Temp, Text, Keys, Items
' Converting JSON string to appropriate nested dictionaries structure
' Dictionaries have numeric keys for JSON Arrays, and string keys for JSON Objects
' Returns Nothing in case of any JSON syntax issues
Set Dict = GetJsonDict("{a:[[{stuff:'result'}]], b:''}")
' You can use For Each ... Next and For ... Next loops through keys and items
Keys = Dict.Keys
Items = Dict.Items
' Referring directly to the necessary property if sure, without any checks
MsgBox Dict("a")(0)(0)("stuff")
' Auxiliary DrillDown() function
' Drilling down the structure, sequentially checking if each level exists
Select Case False
Case DrillDown(Dict, "a", Temp, "")
Case DrillDown(Temp, 0, Temp, "")
Case DrillDown(Temp, 0, Temp, "")
Case DrillDown(Temp, "stuff", "", Text)
Case Else
' Structure is consistent, requested value found
MsgBox Text
End Select
End Sub
Function GetJsonDict(JsonString As String)
With CreateObject("ScriptControl")
.Language = "JScript"
.ExecuteStatement "function gettype(sample) {return {}.toString.call(sample).slice(8, -1)}"
.ExecuteStatement "function evaljson(json, er) {try {var sample = eval('(' + json + ')'); var type = gettype(sample); if(type != 'Array' && type != 'Object') {return er;} else {return getdict(sample);}} catch(e) {return er;}}"
.ExecuteStatement "function getdict(sample) {var type = gettype(sample); if(type != 'Array' && type != 'Object') return sample; var dict = new ActiveXObject('Scripting.Dictionary'); if(type == 'Array') {for(var key = 0; key < sample.length; key++) {dict.add(key, getdict(sample[key]));}} else {for(var key in sample) {dict.add(key, getdict(sample[key]));}} return dict;}"
Set GetJsonDict = .Run("evaljson", JsonString, Nothing)
End With
End Function
Function DrillDown(Source, Prop, Target, Value)
Select Case False
Case TypeName(Source) = "Dictionary"
Case Source.exists(Prop)
Case Else
Select Case True
Case TypeName(Source(Prop)) = "Dictionary"
Set Target = Source(Prop)
Value = Empty
Case IsObject(Source(Prop))
Set Value = Source(Prop)
Set Target = Nothing
Case Else
Value = Source(Prop)
Set Target = Nothing
End Select
DrillDown = True
Exit Function
End Select
DrillDown = False
End Function
If you want to build on top of ScriptControl
, you can add a few helper method to get at the required information. The JScriptTypeInfo
object is a bit unfortunate: it contains all the relevant information (as you can see in the Watch window) but it seems impossible to get at it with VBA. However, the Javascript engine can help us:
Option Explicit
Private ScriptEngine As ScriptControl
Public Sub InitScriptEngine()
Set ScriptEngine = New ScriptControl
ScriptEngine.Language = "JScript"
ScriptEngine.AddCode "function getProperty(jsonObj, propertyName) { return jsonObj[propertyName]; } "
ScriptEngine.AddCode "function getKeys(jsonObj) { var keys = new Array(); for (var i in jsonObj) { keys.push(i); } return keys; } "
End Sub
Public Function DecodeJsonString(ByVal JsonString As String)
Set DecodeJsonString = ScriptEngine.Eval("(" + JsonString + ")")
End Function
Public Function GetProperty(ByVal JsonObject As Object, ByVal propertyName As String) As Variant
GetProperty = ScriptEngine.Run("getProperty", JsonObject, propertyName)
End Function
Public Function GetObjectProperty(ByVal JsonObject As Object, ByVal propertyName As String) As Object
Set GetObjectProperty = ScriptEngine.Run("getProperty", JsonObject, propertyName)
End Function
Public Function GetKeys(ByVal JsonObject As Object) As String()
Dim Length As Integer
Dim KeysArray() As String
Dim KeysObject As Object
Dim Index As Integer
Dim Key As Variant
Set KeysObject = ScriptEngine.Run("getKeys", JsonObject)
Length = GetProperty(KeysObject, "length")
ReDim KeysArray(Length - 1)
Index = 0
For Each Key In KeysObject
KeysArray(Index) = Key
Index = Index + 1
Next
GetKeys = KeysArray
End Function
Public Sub TestJsonAccess()
Dim JsonString As String
Dim JsonObject As Object
Dim Keys() As String
Dim Value As Variant
Dim j As Variant
InitScriptEngine
JsonString = "{""key1"": ""val1"", ""key2"": { ""key3"": ""val3"" } }"
Set JsonObject = DecodeJsonString(CStr(JsonString))
Keys = GetKeys(JsonObject)
Value = GetProperty(JsonObject, "key1")
Set Value = GetObjectProperty(JsonObject, "key2")
End Sub
A few notes:
- If the
JScriptTypeInfo
instance refers to a Javascript object,For Each ... Next
won't work. However, it does work if it refers to a Javascript array (seeGetKeys
function). - The access properties whose name is only known at run-time, use the functions
GetProperty
andGetObjectProperty
. - The Javascript array provides the properties
length
,0
,Item 0
,1
,Item 1
etc. With the VBA dot notation (jsonObject.property
), only the length property is accessible and only if you declare a variable calledlength
with all lowercase letters. Otherwise the case doesn't match and it won't find it. The other properties are not valid in VBA. So better use theGetProperty
function. - The code uses early binding. So you have to add a reference to "Microsoft Script Control 1.0".
- You have to call
InitScriptEngine
once before using the other functions to do some basic initialization.
To parse JSON in VBA without adding a huge library to your workbook project, I created the following solution. It's extremely fast and stores all of the keys and values in a dictionary for easy access:
Function ParseJSON(json$, Optional key$ = "obj") As Object
p = 1
token = Tokenize(json)
Set dic = CreateObject("Scripting.Dictionary")
If token(p) = "{" Then ParseObj key Else ParseArr key
Set ParseJSON = dic
End Function
Function ParseObj(key$)
Do: p = p + 1
Select Case token(p)
Case "]"
Case "[": ParseArr key
Case "{"
If token(p + 1) = "}" Then
p = p + 1
dic.Add key, "null"
Else
ParseObj key
End If
Case "}": key = ReducePath(key): Exit Do
Case ":": key = key & "." & token(p - 1)
Case ",": key = ReducePath(key)
Case Else: If token(p + 1) <> ":" Then dic.Add key, token(p)
End Select
Loop
End Function
Function ParseArr(key$)
Dim e&
Do: p = p + 1
Select Case token(p)
Case "}"
Case "{": ParseObj key & ArrayID(e)
Case "[": ParseArr key
Case "]": Exit Do
Case ":": key = key & ArrayID(e)
Case ",": e = e + 1
Case Else: dic.Add key & ArrayID(e), token(p)
End Select
Loop
End Function
The code above does use a few helper functions, but the above is the meat of it.
The strategy used here is to employ a recursive tokenizer. I found it interesting enough to write an article about this solution on Medium. It explains the details.
Here is the full (yet surprisingly short) code listing, including all of the helper functions:
'-------------------------------------------------------------------
' VBA JSON Parser
'-------------------------------------------------------------------
Option Explicit
Private p&, token, dic
Function ParseJSON(json$, Optional key$ = "obj") As Object
p = 1
token = Tokenize(json)
Set dic = CreateObject("Scripting.Dictionary")
If token(p) = "{" Then ParseObj key Else ParseArr key
Set ParseJSON = dic
End Function
Function ParseObj(key$)
Do: p = p + 1
Select Case token(p)
Case "]"
Case "[": ParseArr key
Case "{"
If token(p + 1) = "}" Then
p = p + 1
dic.Add key, "null"
Else
ParseObj key
End If
Case "}": key = ReducePath(key): Exit Do
Case ":": key = key & "." & token(p - 1)
Case ",": key = ReducePath(key)
Case Else: If token(p + 1) <> ":" Then dic.Add key, token(p)
End Select
Loop
End Function
Function ParseArr(key$)
Dim e&
Do: p = p + 1
Select Case token(p)
Case "}"
Case "{": ParseObj key & ArrayID(e)
Case "[": ParseArr key
Case "]": Exit Do
Case ":": key = key & ArrayID(e)
Case ",": e = e + 1
Case Else: dic.Add key & ArrayID(e), token(p)
End Select
Loop
End Function
'-------------------------------------------------------------------
' Support Functions
'-------------------------------------------------------------------
Function Tokenize(s$)
Const Pattern = """(([^""\\]|\\.)*)""|[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?|\w+|[^\s""']+?"
Tokenize = RExtract(s, Pattern, True)
End Function
Function RExtract(s$, Pattern, Optional bGroup1Bias As Boolean, Optional bGlobal As Boolean = True)
Dim c&, m, n, v
With CreateObject("vbscript.regexp")
.Global = bGlobal
.MultiLine = False
.IgnoreCase = True
.Pattern = Pattern
If .TEST(s) Then
Set m = .Execute(s)
ReDim v(1 To m.Count)
For Each n In m
c = c + 1
v(c) = n.value
If bGroup1Bias Then If Len(n.submatches(0)) Or n.value = """""" Then v(c) = n.submatches(0)
Next
End If
End With
RExtract = v
End Function
Function ArrayID$(e)
ArrayID = "(" & e & ")"
End Function
Function ReducePath$(key$)
If InStr(key, ".") Then ReducePath = Left(key, InStrRev(key, ".") - 1)
End Function
Function ListPaths(dic)
Dim s$, v
For Each v In dic
s = s & v & " --> " & dic(v) & vbLf
Next
Debug.Print s
End Function
Function GetFilteredValues(dic, match)
Dim c&, i&, v, w
v = dic.keys
ReDim w(1 To dic.Count)
For i = 0 To UBound(v)
If v(i) Like match Then
c = c + 1
w(c) = dic(v(i))
End If
Next
ReDim Preserve w(1 To c)
GetFilteredValues = w
End Function
Function GetFilteredTable(dic, cols)
Dim c&, i&, j&, v, w, z
v = dic.keys
z = GetFilteredValues(dic, cols(0))
ReDim w(1 To UBound(z), 1 To UBound(cols) + 1)
For j = 1 To UBound(cols) + 1
z = GetFilteredValues(dic, cols(j - 1))
For i = 1 To UBound(z)
w(i, j) = z(i)
Next
Next
GetFilteredTable = w
End Function
Function OpenTextFile$(f)
With CreateObject("ADODB.Stream")
.Charset = "utf-8"
.Open
.LoadFromFile f
OpenTextFile = .ReadText
End With
End Function
As Json is nothing but strings so it can easily be handled if we can manipulate it the right way, no matter how complex the structure is. I don't think it is necessary to use any external library or converter to do the trick. Here is an example where I've parsed json data using string manipulation.
Sub FetchData()
Dim str As Variant, N&, R&
With New XMLHTTP60
.Open "GET", "https://oresapp.asicanada.net/ores.imis.services/api/member/?address=&callback=angular.callbacks._0&city=&companyName=&personName=", False
.send
str = Split(.responseText, ":[{""Id"":")
End With
N = UBound(str)
For R = 1 To N
Cells(R, 1) = Split(Split(str(R), "FullName"":""")(1), """")(0)
Cells(R, 2) = Split(Split(str(R), "Phone"":""")(1), """")(0)
Cells(R, 3) = Split(Split(str(R), "Email"":""")(1), """")(0)
Next R
End Sub