file name matching with wildcard
For wildcard name matching using '*' and '?' try this (if you want to avoid boost, use std::tr1::regex):
#include <boost/regex.hpp>
#include <boost/algorithm/string/replace.hpp>
using std::string;
bool MatchTextWithWildcards(const string &text, string wildcardPattern, bool caseSensitive /*= true*/)
{
// Escape all regex special chars
EscapeRegex(wildcardPattern);
// Convert chars '*?' back to their regex equivalents
boost::replace_all(wildcardPattern, "\\?", ".");
boost::replace_all(wildcardPattern, "\\*", ".*");
boost::regex pattern(wildcardPattern, caseSensitive ? regex::normal : regex::icase);
return regex_match(text, pattern);
}
void EscapeRegex(string ®ex)
{
boost::replace_all(regex, "\\", "\\\\");
boost::replace_all(regex, "^", "\\^");
boost::replace_all(regex, ".", "\\.");
boost::replace_all(regex, "$", "\\$");
boost::replace_all(regex, "|", "\\|");
boost::replace_all(regex, "(", "\\(");
boost::replace_all(regex, ")", "\\)");
boost::replace_all(regex, "{", "\\{");
boost::replace_all(regex, "{", "\\}");
boost::replace_all(regex, "[", "\\[");
boost::replace_all(regex, "]", "\\]");
boost::replace_all(regex, "*", "\\*");
boost::replace_all(regex, "+", "\\+");
boost::replace_all(regex, "?", "\\?");
boost::replace_all(regex, "/", "\\/");
}
Here's my attempt at this.
It's "C++", but I've deliberately kept it almost completely C-compatible.
All you need to do in order to convert it to C is to remove the template
section and change Pattern
and Text
to something like char const *
.
// TEST THIS before use! I've only done limited testing.
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
template<class Pattern, class Text>
bool wildcard(
Pattern const pat_begin, Pattern const pat_end,
Text text_begin, Text const text_end)
{
ptrdiff_t const pat_size = pat_end - pat_begin;
ptrdiff_t stackbuf[64];
size_t c = sizeof(stackbuf) / sizeof(*stackbuf);
ptrdiff_t *p = stackbuf;
size_t n = 0;
p[n++] = 0;
while (n > 0 && text_begin != text_end)
{
for (size_t i = 0; i < n; i++)
{
if (p[i] == pat_size)
{
p[i--] = p[--n];
continue;
}
switch (*(pat_begin + p[i]))
{
case '?': ++p[i]; break;
case '*':
ptrdiff_t off;
off = p[i];
while (off < pat_size &&
*(pat_begin + off) == '*')
{ ++off; }
if (n == c)
{
ptrdiff_t const *const old = p;
c *= 2;
if (c == 0) { ++c; }
size_t const size = c * sizeof(*p);
p = (ptrdiff_t *)realloc(
old == stackbuf ? NULL : p,
size);
if (old == stackbuf)
{ memcpy(p, old, n * sizeof(*old)); }
}
p[n++] = off;
break;
default:
if (*(pat_begin + p[i]) == *text_begin)
{ ++p[i]; }
else { p[i--] = p[--n]; }
break;
}
}
++text_begin;
}
bool success = false;
if (text_begin == text_end)
{
while (!success && n > 0)
{
--n;
while (p[n] != pat_size &&
*(pat_begin + p[n]) == '*')
{ ++p[n]; }
if (p[n] == pat_size)
{ success = true; }
}
}
if (p != stackbuf) { free(p); }
return success;
}
bool wildcard(char const *const pattern, char const *const text)
{
return wildcard(
pattern,
pattern + (pattern ? strlen(pattern) : 0),
text,
text + (text ? strlen(text) : 0));
}
bool wildcard(wchar_t const *const pattern, wchar_t const *const text)
{
return wildcard(
pattern,
pattern + (pattern ? wcslen(pattern) : 0),
text,
text + (text ? wcslen(text) : 0));
}
Of course, feel free to use the code in any way you want. :)
There are quite a few such functions around. Here's a directory of various implementations, sorted into recursive and non-recursive, etc.
In case you don't like the licensing there (or have trouble with the link, etc.) here's one possible implementation of a matching algorithm that at least closely approximates what Windows uses:
#include <string.h>
#include <iostream>
bool match(char const *needle, char const *haystack) {
for (; *needle != '\0'; ++needle) {
switch (*needle) {
case '?':
if (*haystack == '\0')
return false;
++haystack;
break;
case '*': {
if (needle[1] == '\0')
return true;
size_t max = strlen(haystack);
for (size_t i = 0; i < max; i++)
if (match(needle + 1, haystack + i))
return true;
return false;
}
default:
if (*haystack != *needle)
return false;
++haystack;
}
}
return *haystack == '\0';
}
#ifdef TEST
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
TEST_CASE("Matching", "[match]") {
REQUIRE(match("a", "a") == true);
REQUIRE(match("a", "b") == false);
REQUIRE(match("a*", "a") == true);
REQUIRE(match("a?", "a") == false);
REQUIRE(match("a?", "ab") == true);
REQUIRE(match("a*b", "ab") == true);
REQUIRE(match("a*b", "acb") == true);
REQUIRE(match("a*b", "abc") == false);
REQUIRE(match("*a*??????a?????????a???????????????",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") == true);
}
#endif
Since there was a discussion of complexity of some of the other answers, I'll note that I believe this has O(NM) complexity and O(M) storage use (where N is the size of the target string, and M is the size of the pattern).
With @masterxilo's test pair:
"*a*??????*a*?????????a???????????????", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
...this finds a match in approximately 3 microseconds on my machine. That is a lot slower than a typical pattern--most of my other tests run in about 300 nanoseconds or so on this particular machine.
At the same time, @masterxilo's code takes approximately 11 microseconds to run on the same machine, so this is still around 3 to 4 times faster (not to mention being somewhat smaller and simpler).
Have a look at the POSIX functions fnmatch
, glob
, and wordexp
.