How do I parse JSON sprinkled unpredictably into a string?
I could try to get around that by doing similar quote counting math, but then I also have to account for escaped quotes. At that point it feels like I'm redoing way too much of JSON.parse's job. Is there a better way to solve this problem?
I don't think so. Your input is pretty far from JSON. But accounting for all those things isn't that hard.
The following snippet should work:
function construct(str) {
const len = str.length
let lastSavedIndex = -1
let bracketLevel = 0
let inJsonString = false
let lastCharWasEscapeChar = false
let result = []
for(let i = 0; i < len; ++i) {
if(bracketLevel !== 0 && !lastCharWasEscapeChar && str[i] === '"') {
inJsonString = !inJsonString
}
else if (!inJsonString && str[i] === '{') {
if (bracketLevel === 0) {
result.push(str.substring(lastSavedIndex + 1, i))
lastSavedIndex = i - 1
}
++bracketLevel
}
else if (!inJsonString && str[i] === '}') {
--bracketLevel
if (bracketLevel === 0) {
result.push(JSON.parse(str.substring(lastSavedIndex + 1, i + 1)))
lastSavedIndex = i
}
}
else if (inJsonString && str[i] === '\\') {
lastCharWasEscapeChar = !lastCharWasEscapeChar
}
else {
lastCharWasEscapeChar = false
}
}
if(lastSavedIndex !== len -1) {
result.push(str.substring(lastSavedIndex + 1, len))
}
return result
}
const standardText = 'This is a string {"with":"json","in":"it"} followed by more text {"and":{"some":["more","json"]}} and more text. {"foo": "bar}"}'
const inputTA = document.getElementById('input')
const outputDiv = document.getElementById('output')
function updateOutput() {
outputDiv.innerText =
JSON.stringify(
construct(inputTA.value),
null,
2
)
}
inputTA.oninput = updateOutput
inputTA.value = standardText
updateOutput()
<textarea id="input" rows="5" cols="50"></textarea>
<pre id="output"><pre>
You can check if JSON.parse throws an error to determine if the chunk is a valid JSON object or not. If it throws an error then the unquoted }
are unbalanced:
const tests = [
'{"just":"json }}{}{}{{[]}}}}","x":[1,2,3]}',
'Just a string',
'This string has a tricky case: {"like":"this one } right here"}',
'This string {} has a tiny JSON object in it.',
'.{}.',
'This is a string {"with":"json","in":"it"} followed by more text {"and":{"some":["more","json"]}} and more text',
];
tests.forEach( test => console.log( parse_json_interleaved_string( test ) ) );
function parse_json_interleaved_string ( str ) {
const chunks = [ ];
let last_json_end_index = -1;
let json_index = str.indexOf( '{', last_json_end_index + 1 );
for ( ; json_index !== -1; json_index = str.indexOf( '{', last_json_end_index + 1 ) ) {
// Push the plain string before the JSON
if ( json_index !== last_json_end_index + 1 )
chunks.push( str.substring( last_json_end_index, json_index ) );
let json_end_index = str.indexOf( '}', json_index + 1 );
// Find the end of the JSON
while ( true ) {
try {
JSON.parse( str.substring( json_index, json_end_index + 1 ) );
break;
} catch ( e ) {
json_end_index = str.indexOf( '}', json_end_index + 1 );
if ( json_end_index === -1 )
throw new Error( 'Unterminated JSON object in string' );
}
}
// Push JSON
chunks.push( str.substring( json_index, json_end_index + 1 ) );
last_json_end_index = json_end_index + 1;
}
// Push final plain string if any
if ( last_json_end_index === - 1 )
chunks.push( str );
else if ( str.length !== last_json_end_index )
chunks.push( str.substr( last_json_end_index ) );
return chunks;
}
Here's a comparatively simple brute-force approach: split the whole input string on curly braces, then step through the array in order. Whenever you come across an open brace, find the longest chunk of the array from that starting point that successfully parses as JSON. Rinse and repeat.
This will not work if the input contains invalid JSON and/or unbalanced braces (see the last two test cases below.)
const tryJSON = input => {
try {
return JSON.parse(input);
} catch (e) {
return false;
}
}
const parse = input => {
let output = [];
let chunks = input.split(/([{}])/);
for (let i = 0; i < chunks.length; i++) {
if (chunks[i] === '{') {
// found some possible JSON; start at the last } and backtrack until it works.
for (let j = chunks.lastIndexOf('}'); j > i; j--) {
if (chunks[j] === '}') {
// Does it blend?
let parsed = tryJSON(chunks.slice(i, j + 1).join(""))
if (parsed) {
// it does! Grab the whole thing and skip ahead
output.push(parsed);
i = j;
}
}
}
} else if (chunks[i]) {
// neither JSON nor empty
output.push(chunks[i])
}
}
console.log(output)
return output
}
parse(`{"foo": "bar"}`)
parse(`test{"foo": "b}ar{{[[[{}}}}{}{}}"}`)
parse(`this {"is": "a st}ri{ng"} with {"json": ["in", "i{t"]}`)
parse(`{}`)
parse(`this {"i{s": invalid}`)
parse(`So is {this: "one"}`)