In c split char* on spaces with strtok function, except if between quotes
strtok
or any other function in the standard C library can't do this for you. To get it, you have to write code for it yourself, or you have to find some existing code in some external library.
This function takes delimiting, openblock and closeblock characters. Delimiting characters are ignored within the block and closing block characters must match the opening block characters. The example splits on space and blocks are defined by quote and brackets, braces and <>. Thanks to Jongware for comments!
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
char *strmbtok ( char *input, char *delimit, char *openblock, char *closeblock) {
static char *token = NULL;
char *lead = NULL;
char *block = NULL;
int iBlock = 0;
int iBlockIndex = 0;
if ( input != NULL) {
token = input;
lead = input;
}
else {
lead = token;
if ( *token == '\0') {
lead = NULL;
}
}
while ( *token != '\0') {
if ( iBlock) {
if ( closeblock[iBlockIndex] == *token) {
iBlock = 0;
}
token++;
continue;
}
if ( ( block = strchr ( openblock, *token)) != NULL) {
iBlock = 1;
iBlockIndex = block - openblock;
token++;
continue;
}
if ( strchr ( delimit, *token) != NULL) {
*token = '\0';
token++;
break;
}
token++;
}
return lead;
}
int main (int argc , char *argv[]) {
char *tok;
char acOpen[] = {"\"[<{"};
char acClose[] = {"\"]>}"};
char acStr[] = {"this contains blocks \"a [quoted block\" and a [bracketed \"block] and <other ]\" blocks>"};
tok = strmbtok ( acStr, " ", acOpen, acClose);
printf ( "%s\n", tok);
while ( ( tok = strmbtok ( NULL, " ", acOpen, acClose)) != NULL) {
printf ( "%s\n", tok);
}
return 0;
}
output
this
contains
blocks
"a [quoted block"
and
a
[bracketed "block]
and
No luck using strtok()
.
Fun opportunity to employ a state machine.
#include <stdio.h>
void printstring(const char *frm, const char *to) {
fputc('<', stdout); // <...>\n Added for output clarity
while (frm < to) {
fputc(*frm++, stdout);
}
fputc('>', stdout);
fputc('\n', stdout);
}
void split_space_not_quote(const char *s) {
const char *start;
int state = ' ';
while (*s) {
switch (state) {
case '\n': // Could add various white-space here like \f \t \r \v
case ' ': // Consuming spaces
if (*s == '\"') {
start = s;
state = '\"'; // begin quote
} else if (*s != ' ') {
start = s;
state = 'T';
}
break;
case 'T': // non-quoted text
if (*s == ' ') {
printstring(start, s);
state = ' ';
} else if (*s == '\"') {
state = '\"'; // begin quote
}
break;
case '\"': // Inside a quote
if (*s == '\"') {
state = 'T'; // end quote
}
break;
}
s++;
} // end while
if (state != ' ') {
printstring(start, s);
}
}
int main(void) {
split_space_not_quote("Insert \"hello world\" to dbms");
return 0;
}
<Insert>
<"hello world">
<to>
<dbms>