Split a string by commas but ignore commas within double-quotes using Javascript
Here is a JavaScript function to do it:
function splitCSVButIgnoreCommasInDoublequotes(str) {
//split the str first
//then merge the elments between two double quotes
var delimiter = ',';
var quotes = '"';
var elements = str.split(delimiter);
var newElements = [];
for (var i = 0; i < elements.length; ++i) {
if (elements[i].indexOf(quotes) >= 0) {//the left double quotes is found
var indexOfRightQuotes = -1;
var tmp = elements[i];
//find the right double quotes
for (var j = i + 1; j < elements.length; ++j) {
if (elements[j].indexOf(quotes) >= 0) {
indexOfRightQuotes = j;
break;
}
}
//found the right double quotes
//merge all the elements between double quotes
if (-1 != indexOfRightQuotes) {
for (var j = i + 1; j <= indexOfRightQuotes; ++j) {
tmp = tmp + delimiter + elements[j];
}
newElements.push(tmp);
i = indexOfRightQuotes;
}
else { //right double quotes is not found
newElements.push(elements[i]);
}
}
else {//no left double quotes is found
newElements.push(elements[i]);
}
}
return newElements;
}
regex: /,(?=(?:(?:[^"]*"){2})*[^"]*$)/
const input_line = '"2C95699FFC68","201 S BOULEVARDRICHMOND, VA 23220","8299600062754882","2018-09-23"'
let my_split = input_line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/)[4]
Output:
my_split[0]: "2C95699FFC68",
my_split[1]: "201 S BOULEVARDRICHMOND, VA 23220",
my_split[2]: "8299600062754882",
my_split[3]: "2018-09-23"
Reference following link for an explanation: regexr.com/44u6o
Here's what I would do.
var str = 'a, b, c, "d, e, f", g, h';
var arr = str.match(/(".*?"|[^",\s]+)(?=\s*,|\s*$)/g);
/* will match:
(
".*?" double quotes + anything but double quotes + double quotes
| OR
[^",\s]+ 1 or more characters excl. double quotes, comma or spaces of any kind
)
(?= FOLLOWED BY
\s*, 0 or more empty spaces and a comma
| OR
\s*$ 0 or more empty spaces and nothing else (end of string)
)
*/
arr = arr || [];
// this will prevent JS from throwing an error in
// the below loop when there are no matches
for (var i = 0; i < arr.length; i++) console.log('arr['+i+'] =',arr[i]);
Here's a non-regex one that assumes doublequotes will come in pairs:
function splitCsv(str) {
return str.split(',').reduce((accum,curr)=>{
if(accum.isConcatting) {
accum.soFar[accum.soFar.length-1] += ','+curr
} else {
accum.soFar.push(curr)
}
if(curr.split('"').length % 2 == 0) {
accum.isConcatting= !accum.isConcatting
}
return accum;
},{soFar:[],isConcatting:false}).soFar
}
console.log(splitCsv('asdf,"a,d",fdsa'),' should be ',['asdf','"a,d"','fdsa'])
console.log(splitCsv(',asdf,,fds,'),' should be ',['','asdf','','fds',''])
console.log(splitCsv('asdf,"a,,,d",fdsa'),' should be ',['asdf','"a,,,d"','fdsa'])