diff options
author | Yorhel <git@yorhel.nl> | 2013-06-07 09:54:57 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2013-06-07 09:54:57 +0200 |
commit | 3bb19431969fa91b0d1ee3f2f88ba57cb82ad0b8 (patch) | |
tree | 8b787430eee17aae28e0e07473c399d2d8ff646a | |
parent | abd11f530f2e7e7f29fa3ecf1d350fc59022b494 (diff) |
util/casestr: Add casestr_eq() for "fast" case-sensitive comparison
-rw-r--r-- | src/util/casestr.c | 31 | ||||
-rw-r--r-- | src/util/casestr.h | 11 | ||||
-rw-r--r-- | test/casestr.c | 6 |
3 files changed, 39 insertions, 9 deletions
diff --git a/src/util/casestr.c b/src/util/casestr.c index c4a106c..dceb1f8 100644 --- a/src/util/casestr.c +++ b/src/util/casestr.c @@ -146,20 +146,33 @@ void casestr_orig(const char *buf, kstring_t *dest) { } -size_t casestr_len(const char *buf) { - size_t flen = strlen(buf); - /* No bitmask */ - if(!buf[flen+1]) - return flen + strlen(buf+flen+2) + 3; - - /* The size of the bitmask depends on the number of unicode characters, so - * this calculation is a bit slower. */ +/* Calculates the size of the bitmask, in bytes. */ +static size_t casestr_masklen(const char *buf) { size_t idx = 1; while(*buf) { buf += utf8proc_utf8class[*((uint8_t *)buf)]; idx++; } - return flen + 1 + (idx+7)/8; + return (idx+7)/8; +} + + +size_t casestr_len(const char *buf) { + size_t maskoff = strlen(buf) + 1; + return buf[maskoff] + ? maskoff + casestr_masklen(buf) + : maskoff + strlen(buf+maskoff+1) + 2; } + +bool casestr_eq(const char *a, const char *b) { + if(strcmp(a, b) != 0) + return false; + size_t maskoff = strlen(a)+1; /* == strlen(b)+1, after the above check */ + return a[maskoff] != b[maskoff] ? false : a[maskoff] + ? memcmp(a+maskoff, b+maskoff, casestr_masklen(a)) == 0 + : strcmp(a+maskoff, b+maskoff) == 0; +} + + /* vim: set noet sw=4 ts=4: */ diff --git a/src/util/casestr.h b/src/util/casestr.h index b6c62b9..daf586a 100644 --- a/src/util/casestr.h +++ b/src/util/casestr.h @@ -71,5 +71,16 @@ void casestr_orig(const char *buf, kstring_t *dest); * The returned value is equivalent to 'dest.l' after casestr_create(). */ size_t casestr_len(const char *buf); + +/* Compare two casestr buffers for equality. This should be equivalent to + * checking for strcmp(a, b) == 0 after obtaining the original strings using + * casestr_orig(), except that this function is faster. + * + * Note that two strings are NOT considered equivalent if one is represented + * with a bitmask and the other with the original string appended to it. This + * situation doesn't arise as long as both buffers were created using + * casestr_create(). */ +bool casestr_eq(const char *, const char *); + #endif /* vim: set noet sw=4 ts=4: */ diff --git a/test/casestr.c b/test/casestr.c index 95208e7..ed23db9 100644 --- a/test/casestr.c +++ b/test/casestr.c @@ -32,6 +32,7 @@ assert(b.l == sizeof out - 1);\ assert(memcmp(b.s, out, b.l) == 0);\ assert(casestr_len(b.s) == b.l);\ + assert(casestr_eq(b.s, out));\ kstring_t o = {};\ casestr_fold(in, &o);\ assert(!*in ? o.s == NULL : strcmp(o.s, b.s) == 0);\ @@ -47,6 +48,7 @@ int main(int argc, char **argv) { T("", "\0\1"); T("abc", "abc\0\1"); T("abcdefgh", "abcdefgh\0\1\0"); + T("abcdefg", "abcdefg\0\1"); T("A", "a\0\3"); T("aAa", "aaa\0\5"); T("aBcdEfgH", "abcdefgh\0\x25\1"); @@ -54,6 +56,10 @@ int main(int argc, char **argv) { T(" 月姫 ≠ ", " 月姫 ≠ \0\1"); T(" 月姫 ≠ вот", " 月姫 ≠ вот\0\1\0"); T("ПрогиШара", "прогишара\0\x43\0"); + + assert(!casestr_eq("aaa\0aAa", "aaa\0\5")); + assert( casestr_eq("aaa\0aaa", "aaa\0aaa")); + assert(!casestr_eq("прогишара\0\x43\0", "прогишара\0\x43\1")); return 0; } |