In c split char* on spaces with strtok function, except if between quotes

strtok or any other function in the standard C library can't do this for you. To get it, you have to write code for it yourself, or you have to find some existing code in some external library.


This function takes delimiting, openblock and closeblock characters. Delimiting characters are ignored within the block and closing block characters must match the opening block characters. The example splits on space and blocks are defined by quote and brackets, braces and <>. Thanks to Jongware for comments!

#include<stdlib.h>
#include<stdio.h>
#include<string.h>

char *strmbtok ( char *input, char *delimit, char *openblock, char *closeblock) {
    static char *token = NULL;
    char *lead = NULL;
    char *block = NULL;
    int iBlock = 0;
    int iBlockIndex = 0;

    if ( input != NULL) {
        token = input;
        lead = input;
    }
    else {
        lead = token;
        if ( *token == '\0') {
            lead = NULL;
        }
    }

    while ( *token != '\0') {
        if ( iBlock) {
            if ( closeblock[iBlockIndex] == *token) {
                iBlock = 0;
            }
            token++;
            continue;
        }
        if ( ( block = strchr ( openblock, *token)) != NULL) {
            iBlock = 1;
            iBlockIndex = block - openblock;
            token++;
            continue;
        }
        if ( strchr ( delimit, *token) != NULL) {
            *token = '\0';
            token++;
            break;
        }
        token++;
    }
    return lead;
}

int main (int argc , char *argv[]) {
    char *tok;
    char acOpen[]  = {"\"[<{"};
    char acClose[] = {"\"]>}"};
    char acStr[] = {"this contains blocks \"a [quoted block\" and a [bracketed \"block] and <other ]\" blocks>"};

    tok = strmbtok ( acStr, " ", acOpen, acClose);
    printf ( "%s\n", tok);
    while ( ( tok = strmbtok ( NULL, " ", acOpen, acClose)) != NULL) {
        printf ( "%s\n", tok);
    }

    return 0;
}

output
this
contains
blocks
"a [quoted block"
and
a
[bracketed "block]
and


No luck using strtok().

Fun opportunity to employ a state machine.

#include <stdio.h>

void printstring(const char *frm, const char *to) {
  fputc('<', stdout);  // <...>\n Added for output clarity
  while (frm < to) {
    fputc(*frm++, stdout);
  }
  fputc('>', stdout);
  fputc('\n', stdout);
}

void split_space_not_quote(const char *s) {
  const char *start;
  int state = ' ';
  while (*s) {
    switch (state) {
      case '\n': // Could add various white-space here like \f \t \r \v
      case ' ': // Consuming spaces
        if (*s == '\"') {
          start = s;
          state = '\"';  // begin quote
        } else if (*s != ' ') {
          start = s;
          state = 'T';
        }
        break;
      case 'T': // non-quoted text
        if (*s == ' ') {
          printstring(start, s);
          state = ' ';
        } else if (*s == '\"') {
          state = '\"'; // begin quote
        }
        break;
      case '\"': // Inside a quote
        if (*s == '\"') {
          state = 'T'; // end quote
        }
        break;
    }
    s++;
  } // end while
  if (state != ' ') {
    printstring(start, s);
  }
}

int main(void) {
  split_space_not_quote("Insert \"hello world\" to dbms");
  return 0;
}

<Insert>
<"hello world">
<to>
<dbms>

Tags:

C