summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2013-06-07 09:54:57 +0200
committerYorhel <git@yorhel.nl>2013-06-07 09:54:57 +0200
commit3bb19431969fa91b0d1ee3f2f88ba57cb82ad0b8 (patch)
tree8b787430eee17aae28e0e07473c399d2d8ff646a
parentabd11f530f2e7e7f29fa3ecf1d350fc59022b494 (diff)
util/casestr: Add casestr_eq() for "fast" case-sensitive comparison
-rw-r--r--src/util/casestr.c31
-rw-r--r--src/util/casestr.h11
-rw-r--r--test/casestr.c6
3 files changed, 39 insertions, 9 deletions
diff --git a/src/util/casestr.c b/src/util/casestr.c
index c4a106c..dceb1f8 100644
--- a/src/util/casestr.c
+++ b/src/util/casestr.c
@@ -146,20 +146,33 @@ void casestr_orig(const char *buf, kstring_t *dest) {
}
-size_t casestr_len(const char *buf) {
- size_t flen = strlen(buf);
- /* No bitmask */
- if(!buf[flen+1])
- return flen + strlen(buf+flen+2) + 3;
-
- /* The size of the bitmask depends on the number of unicode characters, so
- * this calculation is a bit slower. */
+/* Calculates the size of the bitmask, in bytes. */
+static size_t casestr_masklen(const char *buf) {
size_t idx = 1;
while(*buf) {
buf += utf8proc_utf8class[*((uint8_t *)buf)];
idx++;
}
- return flen + 1 + (idx+7)/8;
+ return (idx+7)/8;
+}
+
+
+size_t casestr_len(const char *buf) {
+ size_t maskoff = strlen(buf) + 1;
+ return buf[maskoff]
+ ? maskoff + casestr_masklen(buf)
+ : maskoff + strlen(buf+maskoff+1) + 2;
}
+
+bool casestr_eq(const char *a, const char *b) {
+ if(strcmp(a, b) != 0)
+ return false;
+ size_t maskoff = strlen(a)+1; /* == strlen(b)+1, after the above check */
+ return a[maskoff] != b[maskoff] ? false : a[maskoff]
+ ? memcmp(a+maskoff, b+maskoff, casestr_masklen(a)) == 0
+ : strcmp(a+maskoff, b+maskoff) == 0;
+}
+
+
/* vim: set noet sw=4 ts=4: */
diff --git a/src/util/casestr.h b/src/util/casestr.h
index b6c62b9..daf586a 100644
--- a/src/util/casestr.h
+++ b/src/util/casestr.h
@@ -71,5 +71,16 @@ void casestr_orig(const char *buf, kstring_t *dest);
* The returned value is equivalent to 'dest.l' after casestr_create(). */
size_t casestr_len(const char *buf);
+
+/* Compare two casestr buffers for equality. This should be equivalent to
+ * checking for strcmp(a, b) == 0 after obtaining the original strings using
+ * casestr_orig(), except that this function is faster.
+ *
+ * Note that two strings are NOT considered equivalent if one is represented
+ * with a bitmask and the other with the original string appended to it. This
+ * situation doesn't arise as long as both buffers were created using
+ * casestr_create(). */
+bool casestr_eq(const char *, const char *);
+
#endif
/* vim: set noet sw=4 ts=4: */
diff --git a/test/casestr.c b/test/casestr.c
index 95208e7..ed23db9 100644
--- a/test/casestr.c
+++ b/test/casestr.c
@@ -32,6 +32,7 @@
assert(b.l == sizeof out - 1);\
assert(memcmp(b.s, out, b.l) == 0);\
assert(casestr_len(b.s) == b.l);\
+ assert(casestr_eq(b.s, out));\
kstring_t o = {};\
casestr_fold(in, &o);\
assert(!*in ? o.s == NULL : strcmp(o.s, b.s) == 0);\
@@ -47,6 +48,7 @@ int main(int argc, char **argv) {
T("", "\0\1");
T("abc", "abc\0\1");
T("abcdefgh", "abcdefgh\0\1\0");
+ T("abcdefg", "abcdefg\0\1");
T("A", "a\0\3");
T("aAa", "aaa\0\5");
T("aBcdEfgH", "abcdefgh\0\x25\1");
@@ -54,6 +56,10 @@ int main(int argc, char **argv) {
T(" 月姫 ≠ ", " 月姫 ≠ \0\1");
T(" 月姫 ≠ вот", " 月姫 ≠ вот\0\1\0");
T("ПрогиШара", "прогишара\0\x43\0");
+
+ assert(!casestr_eq("aaa\0aAa", "aaa\0\5"));
+ assert( casestr_eq("aaa\0aaa", "aaa\0aaa"));
+ assert(!casestr_eq("прогишара\0\x43\0", "прогишара\0\x43\1"));
return 0;
}