#include #include #include #include #include #include /* * Test: generate 1000 random UTF8 strings, sort them by strcoll, sanity- * check the sort result, sort them by strxfrm, sanity-check that result, * and compare the two sort orders. */ #define NSTRINGS 1000 #define MAXSTRLEN 20 #define MAXXFRMLEN (MAXSTRLEN * 10) typedef struct { char strval[MAXSTRLEN]; char xfrmval[MAXXFRMLEN]; int strsortpos; int xfrmsortpos; } OneString; /* qsort comparators */ static int strcoll_compare(const void *pa, const void *pb) { const OneString *a = (const OneString *) pa; const OneString *b = (const OneString *) pb; return strcoll(a->strval, b->strval); } static int strxfrm_compare(const void *pa, const void *pb) { const OneString *a = (const OneString *) pa; const OneString *b = (const OneString *) pb; return strcmp(a->xfrmval, b->xfrmval); } /* returns 1 if OK, 0 if inconsistency detected */ static int run_test_case(int is_utf8) { int ok = 1; OneString data[NSTRINGS]; int i, j; /* Generate random strings of length less than MAXSTRLEN bytes */ for (i = 0; i < NSTRINGS; i++) { char *p = data[i].strval; int len; len = 1 + (random() % (MAXSTRLEN - 1)); while (len > 0) { int c; /* Generate random printable char in ISO8859-1 range */ /* Bias towards producing a lot of spaces */ if ((random() % 16) < 3) c = ' '; else { do { c = random() & 0xFF; } while (!((c >= ' ' && c <= 127) || (c >= 0xA0 && c <= 0xFF))); } if (c <= 127 || !is_utf8) { *p++ = c; len--; } else { if (len < 2) break; /* Poor man's utf8-ification */ *p++ = 0xC0 + (c >> 6); len--; *p++ = 0x80 + (c & 0x3F); len--; } } *p = '\0'; /* strxfrm each string as we produce it */ if (strxfrm(data[i].xfrmval, data[i].strval, MAXXFRMLEN) >= MAXXFRMLEN) { fprintf(stderr, "strxfrm() result for %d-length string exceeded %d bytes\n", (int) strlen(data[i].strval), MAXXFRMLEN); exit(1); } #if 0 printf("%d %s\n", i, data[i].strval); #endif } /* Sort per strcoll(), and label, being careful in case some are equal */ qsort(data, NSTRINGS, sizeof(OneString), strcoll_compare); j = 0; for (i = 0; i < NSTRINGS; i++) { if (i > 0 && strcoll(data[i].strval, data[i-1].strval) != 0) j++; data[i].strsortpos = j; } /* Sanity-check: is each string <= those after it? */ for (i = 0; i < NSTRINGS; i++) { for (j = i + 1; j < NSTRINGS; j++) { if (strcoll(data[i].strval, data[j].strval) > 0) { fprintf(stdout, "strcoll sort inconsistency between positions %d and %d\n", i, j); ok = 0; } } } /* Sort per strxfrm(), and label, being careful in case some are equal */ qsort(data, NSTRINGS, sizeof(OneString), strxfrm_compare); j = 0; for (i = 0; i < NSTRINGS; i++) { if (i > 0 && strcmp(data[i].xfrmval, data[i-1].xfrmval) != 0) j++; data[i].xfrmsortpos = j; } /* Sanity-check: is each string <= those after it? */ for (i = 0; i < NSTRINGS; i++) { for (j = i + 1; j < NSTRINGS; j++) { if (strcmp(data[i].xfrmval, data[j].xfrmval) > 0) { fprintf(stdout, "strxfrm sort inconsistency between positions %d and %d\n", i, j); ok = 0; } } } /* Compare */ for (i = 0; i < NSTRINGS; i++) { if (data[i].strsortpos != data[i].xfrmsortpos) { fprintf(stdout, "inconsistency between strcoll (%d) and strxfrm (%d) orders\n", data[i].strsortpos, data[i].xfrmsortpos); ok = 0; } } return ok; } int main(int argc, char **argv) { const char *lc; const char *cset; int is_utf8; int ntries; int result = 0; /* Absorb locale from environment, and report what we're using */ if (setlocale(LC_ALL, "") == NULL) { perror("setlocale(LC_ALL) failed"); exit(1); } lc = setlocale(LC_COLLATE, NULL); if (lc) { printf("Using LC_COLLATE = \"%s\"\n", lc); } else { perror("setlocale(LC_COLLATE) failed"); exit(1); } lc = setlocale(LC_CTYPE, NULL); if (lc) { printf("Using LC_CTYPE = \"%s\"\n", lc); } else { perror("setlocale(LC_CTYPE) failed"); exit(1); } /* Identify encoding */ cset = nl_langinfo(CODESET); if (!cset) { perror("nl_langinfo(CODESET) failed"); exit(1); } if (strstr(cset, "utf") || strstr(cset, "UTF")) is_utf8 = 1; else if (strstr(cset, "iso-8859") || strstr(cset, "ISO-8859") || strstr(cset, "iso8859") || strstr(cset, "ISO8859")) is_utf8 = 0; else { fprintf(stderr, "unrecognized codeset name \"%s\"\n", cset); exit(1); } /* Ensure new random() values on every run */ srandom((unsigned int) time(NULL)); /* argv[1] can be the max number of tries to run */ if (argc > 1) ntries = atoi(argv[1]); else ntries = 1; /* Run one test instance per loop */ while (ntries-- > 0) { if (!run_test_case(is_utf8)) result = 1; } return result; }