Hexadecimal string to byte array in C
As far as I know, there's no standard function to do so, but it's simple to achieve in the following manner:
#include <stdio.h>
int main(int argc, char **argv) {
const char hexstring[] = "DEadbeef10203040b00b1e50", *pos = hexstring;
unsigned char val[12];
/* WARNING: no sanitization or error-checking whatsoever */
for (size_t count = 0; count < sizeof val/sizeof *val; count++) {
sscanf(pos, "%2hhx", &val[count]);
pos += 2;
}
printf("0x");
for(size_t count = 0; count < sizeof val/sizeof *val; count++)
printf("%02x", val[count]);
printf("\n");
return 0;
}
Edit
As Al pointed out, in case of an odd number of hex digits in the string, you have to make sure you prefix it with a starting 0. For example, the string "f00f5"
will be evaluated as {0xf0, 0x0f, 0x05}
erroneously by the above example, instead of the proper {0x0f, 0x00, 0xf5}
.
Amended the example a little bit to address the comment from @MassimoCallegari
For short strings, strtol
, strtoll
, and strtoimax
will work just fine (note that the third argument is the base to use in processing the string...set it to 16). If your input is longer than number-of-bits-in-the-longest-integer-type/4
then you'll need one of the more flexible methods suggested by other answers.
I found this question by Googling for the same thing. I don't like the idea of calling sscanf() or strtol() since it feels like overkill. I wrote a quick function which does not validate that the text is indeed the hexadecimal presentation of a byte stream, but will handle odd number of hex digits:
uint8_t tallymarker_hextobin(const char * str, uint8_t * bytes, size_t blen)
{
uint8_t pos;
uint8_t idx0;
uint8_t idx1;
// mapping of ASCII characters to hex values
const uint8_t hashmap[] =
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // !"#$%&'
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ()*+,-./
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // 01234567
0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 89:;<=>?
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, // @ABCDEFG
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // HIJKLMNO
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // PQRSTUVW
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // XYZ[\]^_
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, // `abcdefg
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hijklmno
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // pqrstuvw
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // xyz{|}~.
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ........
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // ........
};
bzero(bytes, blen);
for (pos = 0; ((pos < (blen*2)) && (pos < strlen(str))); pos += 2)
{
idx0 = (uint8_t)str[pos+0];
idx1 = (uint8_t)str[pos+1];
bytes[pos/2] = (uint8_t)(hashmap[idx0] << 4) | hashmap[idx1];
};
return(0);
}
Apart from the excellent answers above I though I would write a C function that does not use any libraries and has some guards against bad strings.
uint8_t* datahex(char* string) {
if(string == NULL)
return NULL;
size_t slength = strlen(string);
if((slength % 2) != 0) // must be even
return NULL;
size_t dlength = slength / 2;
uint8_t* data = malloc(dlength);
memset(data, 0, dlength);
size_t index = 0;
while (index < slength) {
char c = string[index];
int value = 0;
if(c >= '0' && c <= '9')
value = (c - '0');
else if (c >= 'A' && c <= 'F')
value = (10 + (c - 'A'));
else if (c >= 'a' && c <= 'f')
value = (10 + (c - 'a'));
else {
free(data);
return NULL;
}
data[(index/2)] += value << (((index + 1) % 2) * 4);
index++;
}
return data;
}
Explanation:
a. index / 2 | Division between integers will round down the value, so 0/2 = 0, 1/2 = 0, 2/2 = 1, 3/2 = 1, 4/2 = 2, 5/2 = 2, etc. So, for every 2 string characters we add the value to 1 data byte.
b. (index + 1) % 2 | We want odd numbers to result to 1 and even to 0 since the first digit of a hex string is the most significant and needs to be multiplied by 16. so for index 0 => 0 + 1 % 2 = 1, index 1 => 1 + 1 % 2 = 0 etc.
c. << 4 | Shift by 4 is multiplying by 16. example: b00000001 << 4 = b00010000