Bug 51504

Summary: New file: src/share/mklocale/zh_CN.GBK.src
Product: Base System Reporter: statue <statue>
Component: confAssignee: Andrey A. Chernov <ache>
Status: Closed FIXED    
Severity: Affects Only Me    
Priority: Normal    
Version: 4.7-PRERELEASE   
Hardware: Any   
OS: Any   

Description statue 2003-04-28 19:30:12 UTC
this is src/share/mklocale/zh_CN.GBK.src, anyone can help me to review?

Fix: 

/*
 * GBK      first byte:   81-FE
 *          second byte:  40-7E, 80-FE
 *
 * $FreeBSD$
 */

ENCODING	"GBK"
/* VARIABLE     GBK character set */

/*
 * ASCII
 */
ALPHA		'A' - 'Z' 'a' - 'z'
CONTROL		0x00 - 0x1f 0x7f
DIGIT		'0' - '9'
GRAPH		0x21 - 0x7e
LOWER		'a' - 'z'
PUNCT		0x21 - 0x2f 0x3a - 0x40 0x5b - 0x60 0x7b - 0x7e
SPACE		0x09 - 0x0d 0x20
UPPER		'A' - 'Z'
XDIGIT		'0' - '9' 'a' - 'f' 'A' - 'F'
BLANK		' ' '\t'
PRINT		0x20 - 0x7e

MAPLOWER	< 'A' - 'Z' : 'a' > < 'a' - 'z' : 'a' >
MAPUPPER	< 'A' - 'Z' : 'A' > < 'a' - 'z' : 'A' >
TODIGIT		< '0' - '9' : 0 >
TODIGIT		< 'A' - 'F' : 10 > < 'a' - 'f' : 10 >

/*
 * the real thing
 */
IDEOGRAM	0x8140 - 0x817e 0x8180 - 0x81fe
IDEOGRAM	0x8240 - 0x827e 0x8280 - 0x82fe
IDEOGRAM	0x8340 - 0x837e 0x8380 - 0x83fe
IDEOGRAM	0x8440 - 0x847e 0x8480 - 0x84fe
IDEOGRAM	0x8540 - 0x857e 0x8580 - 0x85fe
IDEOGRAM	0x8640 - 0x867e 0x8680 - 0x86fe
IDEOGRAM	0x8740 - 0x877e 0x8780 - 0x87fe
IDEOGRAM	0x8840 - 0x887e 0x8880 - 0x88fe
IDEOGRAM	0x8940 - 0x897e 0x8980 - 0x89fe
IDEOGRAM	0x8a40 - 0x8a7e 0x8a80 - 0x8afe
IDEOGRAM	0x8b40 - 0x8b7e 0x8b80 - 0x8bfe
IDEOGRAM	0x8c40 - 0x8c7e 0x8c80 - 0x8cfe
IDEOGRAM	0x8d40 - 0x8d7e 0x8d80 - 0x8dfe
IDEOGRAM	0x8e40 - 0x8e7e 0x8e80 - 0x8efe
IDEOGRAM	0x8f40 - 0x8f7e 0x8f80 - 0x8ffe
IDEOGRAM	0x9040 - 0x907e 0x9080 - 0x90fe
IDEOGRAM	0x9140 - 0x917e 0x9180 - 0x91fe
IDEOGRAM	0x9240 - 0x927e 0x9280 - 0x92fe
IDEOGRAM	0x9340 - 0x937e 0x9380 - 0x93fe
IDEOGRAM	0x9440 - 0x947e 0x9480 - 0x94fe
IDEOGRAM	0x9540 - 0x957e 0x9580 - 0x95fe
IDEOGRAM	0x9640 - 0x967e 0x9680 - 0x96fe
IDEOGRAM	0x9740 - 0x977e 0x9780 - 0x97fe
IDEOGRAM	0x9840 - 0x987e 0x9880 - 0x98fe
IDEOGRAM	0x9940 - 0x997e 0x9980 - 0x99fe
IDEOGRAM	0x9a40 - 0x9a7e 0x9a80 - 0x9afe
IDEOGRAM	0x9b40 - 0x9b7e 0x9b80 - 0x9bfe
IDEOGRAM	0x9c40 - 0x9c7e 0x9c80 - 0x9cfe
IDEOGRAM	0x9d40 - 0x9d7e 0x9d80 - 0x9dfe
IDEOGRAM	0x9e40 - 0x9e7e 0x9e80 - 0x9efe
IDEOGRAM	0x9f40 - 0x9f7e 0x9f80 - 0x9ffe
IDEOGRAM	0xa040 - 0xa07e 0xa080 - 0xa0fe

SPACE		0xa1a1
BLANK		0xa1a1
PUNCT		0xa1a2 - 0xa1fe

/* full width 0 1 2 .. 9 */
/* ?DIGIT           0xa2a1 - 0xa2aa 0xa2b1 - 0xa2e2 0xa2e5 - 0xa2ee 0xa2f1 - 0xa2fc */

TODIGIT		< 0xa2a1 - 0xa2aa : 1 >
TODIGIT		< 0xa2b1 - 0xa2c4 : 1 >
TODIGIT		< 0xa2c5 - 0xa2d8 : 1 >
TODIGIT		< 0xa2d9 - 0xa2e2 : 1 >
TODIGIT		< 0xa2e5 - 0xa2ee : 1 >
TODIGIT		< 0xa2f1 - 0xa2fc : 1 >

ALPHA		0xa3c1 - 0xa3da 0xa3e1 - 0xa3fa
/* ?DIGIT           0xa3b0 - 0xa3b9 */
UPPER		0xa3c1 - 0xa3da
LOWER		0xa3e1 - 0xa3fa
PUNCT		0xa3a1 - 0xa3af 0xa3ba - 0xa3c0 0xa3db - 0xa3e0 0xa3fb - 0xa3fe
/* ?XDIGIT          0xa3b0 - 0xa3b9 0xa3c1 - 0xa3c6 0xa3e1 - 0xa3e6 */

MAPLOWER	< 0xa3c1 - 0xa3da : 0xa3e1 > < 0xa3e1 - 0xa3fa : 0xa3e1 >
MAPUPPER	< 0xa3c1 - 0xa3da : 0xa3c1 > < 0xa3b0 - 0xa3b9 : 0xa3c1 >
TODIGIT		< 0xa3b0 - 0xa3b9 : 0 >
TODIGIT		< 0xa3c1 - 0xa3c6 : 10 > < 0xa3e1 - 0xa3e6 : 10 >

PHONOGRAM	0xa4a1 - 0xa4f3			/* Hiragana */
PHONOGRAM	0xa5a1 - 0xa5f6			/* Katakana */

UPPER		0xa6a1 - 0xa6b8			/* Greek */
LOWER		0xa6c1 - 0xa6d8			/* Greek */
MAPLOWER	< 0xa6a1 - 0xa6b8 : 0xa6c1 > < 0xa6c1 - 0xa6d8 : 0xa6c1 >
MAPUPPER	< 0xa6a1 - 0xa6b8 : 0xa6a1 > < 0xa6c1 - 0xa6d8 : 0xa6a1 >

UPPER		0xa7a1 - 0xa7c1			/* Cyrillic */
LOWER		0xa7d1 - 0xa7f1			/* Cyrillic */
MAPLOWER	< 0xa7a1 - 0xa7c1 : 0xa7d1 > < 0xa7d1 - 0xa7f1 : 0xa7d1 >
MAPUPPER	< 0xa7a1 - 0xa7c1 : 0xa7a1 > < 0xa7d1 - 0xa7f1 : 0xa7a1 >

SPECIAL		0xa840 - 0xa87e 0xa880 - 0xa895 0xa8a1 - 0xa8c0
PHONOGRAM	0xa8c5 - 0xa8e9			/* Pin yin */
TODIGIT		< 0xa940 - 0xa948 : 1 >
SPECIAL		0xa949 - 0xa95a 0xa95c 0xa960 - 0xa97e 0xa980 - 0xa996
SPECIAL		0xa9a4 - 0xa9ef			/* Box drawings */

IDEOGRAM	0xaa40 - 0xaa7e 0xaa80 - 0xaaa0
IDEOGRAM	0xab40 - 0xab7e 0xab80 - 0xaba0
IDEOGRAM	0xac40 - 0xac7e 0xac80 - 0xaca0
IDEOGRAM	0xad40 - 0xad7e 0xad80 - 0xada0
IDEOGRAM	0xae40 - 0xae7e 0xae80 - 0xaea0
IDEOGRAM	0xaf40 - 0xaf7e 0xaf80 - 0xafa0
IDEOGRAM	0xb040 - 0xb07e 0xb080 - 0xb0fe
IDEOGRAM	0xb140 - 0xb17e 0xb180 - 0xb1fe
IDEOGRAM	0xb240 - 0xb27e 0xb280 - 0xb2fe
IDEOGRAM	0xb340 - 0xb37e 0xb380 - 0xb3fe
IDEOGRAM	0xb440 - 0xb47e 0xb480 - 0xb4fe
IDEOGRAM	0xb540 - 0xb57e 0xb580 - 0xb5fe
IDEOGRAM	0xb640 - 0xb67e 0xb680 - 0xb6fe
IDEOGRAM	0xb740 - 0xb77e 0xb780 - 0xb7fe
IDEOGRAM	0xb840 - 0xb87e 0xb880 - 0xb8fe
IDEOGRAM	0xb940 - 0xb97e 0xb980 - 0xb9fe
IDEOGRAM	0xba40 - 0xba7e 0xba80 - 0xbafe
IDEOGRAM	0xbb40 - 0xbb7e 0xbb80 - 0xbbfe
IDEOGRAM	0xbc40 - 0xbc7e 0xbc80 - 0xbcfe
IDEOGRAM	0xbd40 - 0xbd7e 0xbd80 - 0xbdfe
IDEOGRAM	0xbe40 - 0xbe7e 0xbe80 - 0xbefe
IDEOGRAM	0xbf40 - 0xbf7e 0xbf80 - 0xbffe
IDEOGRAM	0xc040 - 0xc07e 0xc080 - 0xc0fe
IDEOGRAM	0xc140 - 0xc17e 0xc180 - 0xc1fe
IDEOGRAM	0xc240 - 0xc27e 0xc280 - 0xc2fe
IDEOGRAM	0xc340 - 0xc37e 0xc380 - 0xc3fe
IDEOGRAM	0xc440 - 0xc47e 0xc480 - 0xc4fe
IDEOGRAM	0xc540 - 0xc57e 0xc580 - 0xc5fe
IDEOGRAM	0xc640 - 0xc67e 0xc680 - 0xc6fe
IDEOGRAM	0xc740 - 0xc77e 0xc780 - 0xc7fe
IDEOGRAM	0xc840 - 0xc87e 0xc880 - 0xc8fe
IDEOGRAM	0xc940 - 0xc97e 0xc980 - 0xc9fe
IDEOGRAM	0xca40 - 0xca7e 0xca80 - 0xcafe
IDEOGRAM	0xcb40 - 0xcb7e 0xcb80 - 0xcbfe
IDEOGRAM	0xcc40 - 0xcc7e 0xcc80 - 0xccfe
IDEOGRAM	0xcd40 - 0xcd7e 0xcd80 - 0xcdfe
IDEOGRAM	0xce40 - 0xce7e 0xce80 - 0xcefe
IDEOGRAM	0xcf40 - 0xcf7e 0xcf80 - 0xcfd3
IDEOGRAM	0xd040 - 0xd07e 0xd080 - 0xd0fe
IDEOGRAM	0xd140 - 0xd17e 0xd180 - 0xd1fe
IDEOGRAM	0xd240 - 0xd27e 0xd280 - 0xd2fe
IDEOGRAM	0xd340 - 0xd37e 0xd380 - 0xd3fe
IDEOGRAM	0xd440 - 0xd47e 0xd480 - 0xd4fe
IDEOGRAM	0xd540 - 0xd57e 0xd580 - 0xd5fe
IDEOGRAM	0xd640 - 0xd67e 0xd680 - 0xd6fe
IDEOGRAM	0xd740 - 0xd77e 0xd780 - 0xd7fe
IDEOGRAM	0xd840 - 0xd87e 0xd880 - 0xd8fe
IDEOGRAM	0xd940 - 0xd97e 0xd980 - 0xd9fe
IDEOGRAM	0xda40 - 0xda7e 0xda80 - 0xdafe
IDEOGRAM	0xdb40 - 0xdb7e 0xdb80 - 0xdbfe
IDEOGRAM	0xdc40 - 0xdc7e 0xdc80 - 0xdcfe
IDEOGRAM	0xdd40 - 0xdd7e 0xdd80 - 0xddfe
IDEOGRAM	0xde40 - 0xde7e 0xde80 - 0xdefe
IDEOGRAM	0xdf40 - 0xdf7e 0xdf80 - 0xdffe
IDEOGRAM	0xe040 - 0xe07e 0xe080 - 0xe0fe
IDEOGRAM	0xe140 - 0xe17e 0xe180 - 0xe1fe
IDEOGRAM	0xe240 - 0xe27e 0xe280 - 0xe2fe
IDEOGRAM	0xe340 - 0xe37e 0xe380 - 0xe3fe
IDEOGRAM	0xe440 - 0xe47e 0xe480 - 0xe4fe
IDEOGRAM	0xe540 - 0xe57e 0xe580 - 0xe5fe
IDEOGRAM	0xe640 - 0xe67e 0xe680 - 0xe6fe
IDEOGRAM	0xe740 - 0xe77e 0xe780 - 0xe7fe
IDEOGRAM	0xe840 - 0xe87e 0xe880 - 0xe8fe
IDEOGRAM	0xe940 - 0xe97e 0xe980 - 0xe9fe
IDEOGRAM	0xea40 - 0xea7e 0xea80 - 0xeafe
IDEOGRAM	0xeb40 - 0xeb7e 0xeb80 - 0xebfe
IDEOGRAM	0xec40 - 0xec7e 0xec80 - 0xecfe
IDEOGRAM	0xed40 - 0xed7e 0xed80 - 0xedfe
IDEOGRAM	0xee40 - 0xee7e 0xee80 - 0xeefe
IDEOGRAM	0xef40 - 0xef7e 0xef80 - 0xeffe
IDEOGRAM	0xf040 - 0xf07e 0xf080 - 0xf0fe
IDEOGRAM	0xf140 - 0xf17e 0xf180 - 0xf1fe
IDEOGRAM	0xf240 - 0xf27e 0xf280 - 0xf2fe
IDEOGRAM	0xf340 - 0xf37e 0xf380 - 0xf3fe
IDEOGRAM	0xf440 - 0xf47e 0xf480 - 0xf4fe
IDEOGRAM	0xf540 - 0xf57e 0xf580 - 0xf5fe
IDEOGRAM	0xf640 - 0xf67e 0xf680 - 0xf6fe
IDEOGRAM	0xf740 - 0xf77e 0xf780 - 0xf7fe
IDEOGRAM	0xf840 - 0xf87e 0xf880 - 0xf8a0
IDEOGRAM	0xf940 - 0xf97e 0xf980 - 0xf9a0
IDEOGRAM	0xfa40 - 0xfa7e 0xfa80 - 0xfaa0
IDEOGRAM	0xfb40 - 0xfb7e 0xfb80 - 0xfba0
IDEOGRAM	0xfc40 - 0xfc7e 0xfc80 - 0xfca0
IDEOGRAM	0xfd40 - 0xfd7e 0xfd80 - 0xfda0
IDEOGRAM	0xfe40 - 0xfe7e 0xfe80 - 0xfea0
Comment 1 statue 2003-04-28 19:41:06 UTC
--- zh_CN.GBK.src.orig	Tue Apr 29 02:39:53 2003
+++ zh_CN.GBK.src	Tue Apr 29 02:35:55 2003
@@ -103,7 +103,10 @@
 MAPLOWER	< 0xa7a1 - 0xa7c1 : 0xa7d1 > < 0xa7d1 - 0xa7f1 : 0xa7d1 >
 MAPUPPER	< 0xa7a1 - 0xa7c1 : 0xa7a1 > < 0xa7d1 - 0xa7f1 : 0xa7a1 >
 
-SPECIAL		0xa840 - 0xa87e 0xa880 - 0xa895 0xa8a1 - 0xa8c0
+PUNCT		0xa840 - 0xa853
+SPECIAL		0xa854 - 0xa877			/* Box drawings */
+PUNCT		0xa878 - 0xa87e 0xa880 - 0xa895
+SPECIAL		0xa8a1 - 0xa8c0
 PHONOGRAM	0xa8c5 - 0xa8e9			/* Pin yin */
 TODIGIT		< 0xa940 - 0xa948 : 1 >
 SPECIAL		0xa949 - 0xa95a 0xa95c 0xa960 - 0xa97e 0xa980 - 0xa996
@@ -146,7 +149,7 @@
 IDEOGRAM	0xcc40 - 0xcc7e 0xcc80 - 0xccfe
 IDEOGRAM	0xcd40 - 0xcd7e 0xcd80 - 0xcdfe
 IDEOGRAM	0xce40 - 0xce7e 0xce80 - 0xcefe
-IDEOGRAM	0xcf40 - 0xcf7e 0xcf80 - 0xcfd3
+IDEOGRAM	0xcf40 - 0xcf7e 0xcf80 - 0xcffe
 IDEOGRAM	0xd040 - 0xd07e 0xd080 - 0xd0fe
 IDEOGRAM	0xd140 - 0xd17e 0xd180 - 0xd1fe
 IDEOGRAM	0xd240 - 0xd27e 0xd280 - 0xd2fe
Comment 2 Андрей Чернов 2003-04-28 20:23:42 UTC
On Tue, Apr 29, 2003 at 02:24:46 +0800, Statue wrote:

> >Synopsis:       New file: src/share/mklocale/zh_CN.GBK.src

What GBK encoding is? Could you please point to some standard documents 
describing it in English?

> ENCODING	"GBK"

Such encoding name is not supported by libc runes code. It means your 
locale is not ever tested.
Comment 4 statue 2003-04-30 11:59:23 UTC
--- lib/libc/locale/Makefile.inc.orig	Wed Apr 30 18:34:26 2003
+++ lib/libc/locale/Makefile.inc	Wed Apr 30 18:35:04 2003
@@ -9,7 +9,7 @@
 	ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mbrune.c \
         mskanji.c nl_langinfo.c nomacros.c none.c rune.c \
 	runetype.c setinvalidrune.c setlocale.c setrunelocale.c table.c \
-	tolower.c toupper.c utf2.c utf8.c
+	tolower.c toupper.c utf2.c utf8.c gbk.c
 
 .if ${LIB} == "c"
 MAN+=	ctype.3 isalnum.3 isalpha.3 isascii.3 isblank.3 iscntrl.3 \
--- lib/libc/locale/gbk.c.orig	Wed Apr 30 18:56:09 2003
+++ lib/libc/locale/gbk.c	Wed Apr 30 18:53:19 2003
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)gbk.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <rune.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+rune_t	_GBK_sgetrune __P((const char *, size_t, char const **));
+int	_GBK_sputrune __P((rune_t, char *, size_t, char **));
+
+int
+_GBK_init(rl)
+	_RuneLocale *rl;
+{
+	rl->sgetrune = _GBK_sgetrune;
+	rl->sputrune = _GBK_sputrune;
+	_CurrentRuneLocale = rl;
+	__mb_cur_max = 2;
+	return (0);
+}
+
+static inline int
+_gbk_check(c)
+	u_int c;
+{
+	c &= 0xff;
+	return ((c >= 0x80 && c <= 0xfe) ? 2 : 1);
+}
+
+rune_t
+_GBK_sgetrune(string, n, result)
+	const char *string;
+	size_t n;
+	char const **result;
+{
+	rune_t rune = 0;
+	int len;
+
+	if (n < 1 || (len = _gbk_check(*string)) > n) {
+		if (result)
+			*result = string;
+		return (_INVALID_RUNE);
+	}
+	while (--len >= 0)
+		rune = (rune << 8) | ((u_int)(*string++) & 0xff);
+	if (result)
+		*result = string;
+	return rune;
+}
+
+int
+_GBK_sputrune(c, string, n, result)
+	rune_t c;
+	char *string, **result;
+	size_t n;
+{
+	if (c & 0x8000) {
+		if (n >= 2) {
+			string[0] = (c >> 8) & 0xff;
+			string[1] = c & 0xff;
+			if (result)
+				*result = string + 2;
+			return (2);
+		}
+	}
+	else {
+		if (n >= 1) {
+			*string = c & 0xff;
+			if (result)
+				*result = string + 1;
+			return (1);
+		}
+	}
+	if (result)
+		*result = string;
+	return (0);
+	
+}
--- lib/libc/locale/setrunelocale.c.orig	Wed Apr 30 18:35:31 2003
+++ lib/libc/locale/setrunelocale.c	Wed Apr 30 18:36:14 2003
@@ -139,6 +139,8 @@
 		ret = _BIG5_init(rl);
 	else if (strcmp(rl->encoding, "MSKanji") == 0)
 		ret = _MSKanji_init(rl);
+	else if (strcmp(rl->encoding, "GBK") == 0)
+		ret = _GBK_init(rl);
 	else
 		ret = EFTYPE;
 	if (ret == 0) {
Comment 5 Johan Karlsson freebsd_committer freebsd_triage 2003-05-06 20:30:36 UTC
Responsible Changed
From-To: freebsd-bugs->ache

Over to locale guru.
Comment 6 Andrey A. Chernov freebsd_committer freebsd_triage 2003-06-01 16:52:59 UTC
State Changed
From-To: open->patched

Committed into -current
Comment 7 Mark Linimon freebsd_committer freebsd_triage 2007-06-10 07:21:07 UTC
State Changed
From-To: patched->closed

RELENG_4 is now out of support, so this PR is obsolete.