FreeBSD Bugzilla – Attachment 168579 Details for
Bug 208266
strxfrm and strcoll do not always agree
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
Test program
strcolltest.c (text/plain), 4.81 KB, created by
Thomas Munro
on 2016-03-24 20:17:48 UTC
(
hide
)
Description:
Test program
Filename:
MIME Type:
Creator:
Thomas Munro
Created:
2016-03-24 20:17:48 UTC
Size:
4.81 KB
patch
obsolete
>#include <stdio.h> >#include <stdlib.h> >#include <string.h> >#include <locale.h> >#include <langinfo.h> >#include <time.h> > >/* > * Test: generate 1000 random UTF8 strings, sort them by strcoll, sanity- > * check the sort result, sort them by strxfrm, sanity-check that result, > * and compare the two sort orders. > */ >#define NSTRINGS 1000 >#define MAXSTRLEN 20 >#define MAXXFRMLEN (MAXSTRLEN * 10) > >typedef struct >{ > char strval[MAXSTRLEN]; > char xfrmval[MAXXFRMLEN]; > int strsortpos; > int xfrmsortpos; >} OneString; > >/* qsort comparators */ > >static int >strcoll_compare(const void *pa, const void *pb) >{ > const OneString *a = (const OneString *) pa; > const OneString *b = (const OneString *) pb; > > return strcoll(a->strval, b->strval); >} > >static int >strxfrm_compare(const void *pa, const void *pb) >{ > const OneString *a = (const OneString *) pa; > const OneString *b = (const OneString *) pb; > > return strcmp(a->xfrmval, b->xfrmval); >} > > >/* returns 1 if OK, 0 if inconsistency detected */ >static int >run_test_case(int is_utf8) >{ > int ok = 1; > OneString data[NSTRINGS]; > int i, > j; > > /* Generate random strings of length less than MAXSTRLEN bytes */ > for (i = 0; i < NSTRINGS; i++) > { > char *p = data[i].strval; > int len; > > len = 1 + (random() % (MAXSTRLEN - 1)); > while (len > 0) > { > int c; > > /* Generate random printable char in ISO8859-1 range */ > /* Bias towards producing a lot of spaces */ > if ((random() % 16) < 3) > c = ' '; > else > { > do > { > c = random() & 0xFF; > } while (!((c >= ' ' && c <= 127) || (c >= 0xA0 && c <= 0xFF))); > } > > if (c <= 127 || !is_utf8) > { > *p++ = c; > len--; > } > else > { > if (len < 2) > break; > /* Poor man's utf8-ification */ > *p++ = 0xC0 + (c >> 6); > len--; > *p++ = 0x80 + (c & 0x3F); > len--; > } > } > *p = '\0'; > > /* strxfrm each string as we produce it */ > if (strxfrm(data[i].xfrmval, data[i].strval, MAXXFRMLEN) >= MAXXFRMLEN) > { > fprintf(stderr, "strxfrm() result for %d-length string exceeded %d bytes\n", > (int) strlen(data[i].strval), MAXXFRMLEN); > exit(1); > } > >#if 0 > printf("%d %s\n", i, data[i].strval); >#endif > } > > /* Sort per strcoll(), and label, being careful in case some are equal */ > qsort(data, NSTRINGS, sizeof(OneString), strcoll_compare); > j = 0; > for (i = 0; i < NSTRINGS; i++) > { > if (i > 0 && strcoll(data[i].strval, data[i-1].strval) != 0) > j++; > data[i].strsortpos = j; > } > > /* Sanity-check: is each string <= those after it? */ > for (i = 0; i < NSTRINGS; i++) > { > for (j = i + 1; j < NSTRINGS; j++) > { > if (strcoll(data[i].strval, data[j].strval) > 0) > { > fprintf(stdout, "strcoll sort inconsistency between positions %d and %d\n", > i, j); > ok = 0; > } > } > } > > /* Sort per strxfrm(), and label, being careful in case some are equal */ > qsort(data, NSTRINGS, sizeof(OneString), strxfrm_compare); > j = 0; > for (i = 0; i < NSTRINGS; i++) > { > if (i > 0 && strcmp(data[i].xfrmval, data[i-1].xfrmval) != 0) > j++; > data[i].xfrmsortpos = j; > } > > /* Sanity-check: is each string <= those after it? */ > for (i = 0; i < NSTRINGS; i++) > { > for (j = i + 1; j < NSTRINGS; j++) > { > if (strcmp(data[i].xfrmval, data[j].xfrmval) > 0) > { > fprintf(stdout, "strxfrm sort inconsistency between positions %d and %d\n", > i, j); > ok = 0; > } > } > } > > /* Compare */ > for (i = 0; i < NSTRINGS; i++) > { > if (data[i].strsortpos != data[i].xfrmsortpos) > { > fprintf(stdout, "inconsistency between strcoll (%d) and strxfrm (%d) orders\n", > data[i].strsortpos, data[i].xfrmsortpos); > ok = 0; > } > } > > return ok; >} > >int >main(int argc, char **argv) >{ > const char *lc; > const char *cset; > int is_utf8; > int ntries; > int result = 0; > > /* Absorb locale from environment, and report what we're using */ > if (setlocale(LC_ALL, "") == NULL) > { > perror("setlocale(LC_ALL) failed"); > exit(1); > } > lc = setlocale(LC_COLLATE, NULL); > if (lc) > { > printf("Using LC_COLLATE = \"%s\"\n", lc); > } > else > { > perror("setlocale(LC_COLLATE) failed"); > exit(1); > } > lc = setlocale(LC_CTYPE, NULL); > if (lc) > { > printf("Using LC_CTYPE = \"%s\"\n", lc); > } > else > { > perror("setlocale(LC_CTYPE) failed"); > exit(1); > } > > /* Identify encoding */ > cset = nl_langinfo(CODESET); > if (!cset) > { > perror("nl_langinfo(CODESET) failed"); > exit(1); > } > if (strstr(cset, "utf") || strstr(cset, "UTF")) > is_utf8 = 1; > else if (strstr(cset, "iso-8859") || strstr(cset, "ISO-8859") || > strstr(cset, "iso8859") || strstr(cset, "ISO8859")) > is_utf8 = 0; > else > { > fprintf(stderr, "unrecognized codeset name \"%s\"\n", cset); > exit(1); > } > > /* Ensure new random() values on every run */ > srandom((unsigned int) time(NULL)); > > /* argv[1] can be the max number of tries to run */ > if (argc > 1) > ntries = atoi(argv[1]); > else > ntries = 1; > > /* Run one test instance per loop */ > while (ntries-- > 0) > { > if (!run_test_case(is_utf8)) > result = 1; > } > > return result; >}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 208266
: 168579 |
168580