Tcl Source Code

Check-in [2dc755cab2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Added 7-bit ASCII to big5 and gb2312 encodings [Bug: 632]
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | core-8-1-branch-old
Files: files | file ages | folders
SHA1: 2dc755cab230d354491850b3088b5c1f0787a1a0
User & Date: stanton 1999-03-26 00:13:39.000
Context
1999-03-26
00:16
*** empty log message *** check-in: a93b9c6b63 user: stanton tags: core-8-1-branch-old
00:13
Added 7-bit ASCII to big5 and gb2312 encodings [Bug: 632] check-in: 2dc755cab2 user: stanton tags: core-8-1-branch-old
1999-03-25
23:29
added missing argv0 declaration check-in: 25a525546f user: stanton tags: core-8-1-branch-old
Changes
Unified Diff Ignore Whitespace Patch
Changes to library/encoding/big5.enc.
1
2

3

















4
5
6
7
8
9
10
# Encoding file: big5, double-byte
D

A153 0 88

















A1
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
3000FF0C30013002FF0E2022FF1BFF1AFF1FFF01FE3020262025FE50FF64FE52
00B7FE54FE55FE56FE57FF5C2013FE312014FE33FFFDFE34FE4FFF08FF09FE35
|
<
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Encoding file: big5, multi-byte

M
003F 0 89
00
0000000100020003000400050006000700080009000A000B000C000D000E000F
0010001100120013001400150016001700180019001A001B001C001D001E001F
0020002100220023002400250026002700280029002A002B002C002D002E002F
0030003100320033003400350036003700380039003A003B003C003D003E003F
0040004100420043004400450046004700480049004A004B004C004D004E004F
0050005100520053005400550056005700580059005A005B005C005D005E005F
0060006100620063006400650066006700680069006A006B006C006D006E006F
0070007100720073007400750076007700780079007A007B007C007D007E007F
0080008100820083008400850086008700880089008A008B008C008D008E008F
0090009100920093009400950096009700980099009A009B009C009D009E009F
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
A1
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
3000FF0C30013002FF0E2022FF1BFF1AFF1FFF01FE3020262025FE50FF64FE52
00B7FE54FE55FE56FE57FF5C2013FE312014FE33FFFDFE34FE4FFF08FF09FE35
Changes to tools/encoding/Makefile.
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
	    ./txt2enc -e 0 -u 1 -s $$p > $$enc; \
	done

clean:
	@rm -f txt2enc *.enc euc-jp.txt euc-kr.txt

txt2enc: txt2enc.c
	@cc -o txt2enc txt2enc.c

euc-jp.txt: ascii.txt Makefile
	@echo Building euc-jp.txt from components.
	@cat ascii.txt > euc-jp.txt
	@grep '^0x[A-F]' jis0201.txt | sed 's/0x/0x8E/' >> euc-jp.txt
	@cat jis0208.txt | awk 'BEGIN {print "ibase=16"} index($$1,"#") != 1 {print substr($$2,3) "+" 8080; print substr($$3,3)}' | bc | awk '{ str = $$1; getline; printf "0x%04x 0x%04x\n", str, $$0}' >> euc-jp.txt








|







85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
	    ./txt2enc -e 0 -u 1 -s $$p > $$enc; \
	done

clean:
	@rm -f txt2enc *.enc euc-jp.txt euc-kr.txt

txt2enc: txt2enc.c
	@gcc -o txt2enc txt2enc.c

euc-jp.txt: ascii.txt Makefile
	@echo Building euc-jp.txt from components.
	@cat ascii.txt > euc-jp.txt
	@grep '^0x[A-F]' jis0201.txt | sed 's/0x/0x8E/' >> euc-jp.txt
	@cat jis0208.txt | awk 'BEGIN {print "ibase=16"} index($$1,"#") != 1 {print substr($$2,3) "+" 8080; print substr($$3,3)}' | bc | awk '{ str = $$1; getline; printf "0x%04x 0x%04x\n", str, $$0}' >> euc-jp.txt

Changes to tools/encoding/big5.txt.














1
2
3
4
5
6
7














#
#	Name:             BIG5 to Unicode table (complete)
#	Unicode version:  1.1
#	Table version:    0.0d3
#	Table format:     Format A
#	Date:             11 February 1994
#	Authors:          Glenn Adams <[email protected]>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# big5.txt --
#
#	BIG5 to Unicode table (modified)
#
# Copyright (c) 1998-1999 by Scriptics Corporation.
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# RCS: @(#) $Id: big5.txt,v 1.1.2.2 1999/03/26 00:13:41 stanton Exp $
#
# NOTE: this table has been modified to include the 7-bit ASCII
# characters that are allowed in BIG5 files.
#
#
#	Name:             BIG5 to Unicode table (complete)
#	Unicode version:  1.1
#	Table version:    0.0d3
#	Table format:     Format A
#	Date:             11 February 1994
#	Authors:          Glenn Adams <[email protected]>
81
82
83
84
85
86
87































































































88
89
90
91
92
93
94
#					expanded algorithmically by a parser or editor.
#
#	The entries are in BIG5 order
#
#	Any comments or problems, contact <[email protected]>
#
#































































































0xA140	0x3000	# IDEOGRAPHIC SPACE
0xA141	0xFF0C	# FULLWIDTH COMMA
0xA142	0x3001	# IDEOGRAPHIC COMMA
0xA143	0x3002	# IDEOGRAPHIC FULL STOP
0xA144	0xFF0E	# FULLWIDTH FULL STOP
0xA145	0x2022	# BULLET
0xA146	0xFF1B	# FULLWIDTH SEMICOLON







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#					expanded algorithmically by a parser or editor.
#
#	The entries are in BIG5 order
#
#	Any comments or problems, contact <[email protected]>
#
#
0x20	0x0020	# SPACE
0x21	0x0021	# EXCLAMATION MARK
0x22	0x0022	# QUOTATION MARK
0x23	0x0023	# NUMBER SIGN
0x24	0x0024	# DOLLAR SIGN
0x25	0x0025	# PERCENT SIGN
0x26	0x0026	# AMPERSAND
0x27	0x0027	# APOSTROPHE
0x28	0x0028	# LEFT PARENTHESIS
0x29	0x0029	# RIGHT PARENTHESIS
0x2A	0x002A	# ASTERISK
0x2B	0x002B	# PLUS SIGN
0x2C	0x002C	# COMMA
0x2D	0x002D	# HYPHEN-MINUS
0x2E	0x002E	# FULL STOP
0x2F	0x002F	# SOLIDUS
0x30	0x0030	# DIGIT ZERO
0x31	0x0031	# DIGIT ONE
0x32	0x0032	# DIGIT TWO
0x33	0x0033	# DIGIT THREE
0x34	0x0034	# DIGIT FOUR
0x35	0x0035	# DIGIT FIVE
0x36	0x0036	# DIGIT SIX
0x37	0x0037	# DIGIT SEVEN
0x38	0x0038	# DIGIT EIGHT
0x39	0x0039	# DIGIT NINE
0x3A	0x003A	# COLON
0x3B	0x003B	# SEMICOLON
0x3C	0x003C	# LESS-THAN SIGN
0x3D	0x003D	# EQUALS SIGN
0x3E	0x003E	# GREATER-THAN SIGN
0x3F	0x003F	# QUESTION MARK
0x40	0x0040	# COMMERCIAL AT
0x41	0x0041	# LATIN CAPITAL LETTER A
0x42	0x0042	# LATIN CAPITAL LETTER B
0x43	0x0043	# LATIN CAPITAL LETTER C
0x44	0x0044	# LATIN CAPITAL LETTER D
0x45	0x0045	# LATIN CAPITAL LETTER E
0x46	0x0046	# LATIN CAPITAL LETTER F
0x47	0x0047	# LATIN CAPITAL LETTER G
0x48	0x0048	# LATIN CAPITAL LETTER H
0x49	0x0049	# LATIN CAPITAL LETTER I
0x4A	0x004A	# LATIN CAPITAL LETTER J
0x4B	0x004B	# LATIN CAPITAL LETTER K
0x4C	0x004C	# LATIN CAPITAL LETTER L
0x4D	0x004D	# LATIN CAPITAL LETTER M
0x4E	0x004E	# LATIN CAPITAL LETTER N
0x4F	0x004F	# LATIN CAPITAL LETTER O
0x50	0x0050	# LATIN CAPITAL LETTER P
0x51	0x0051	# LATIN CAPITAL LETTER Q
0x52	0x0052	# LATIN CAPITAL LETTER R
0x53	0x0053	# LATIN CAPITAL LETTER S
0x54	0x0054	# LATIN CAPITAL LETTER T
0x55	0x0055	# LATIN CAPITAL LETTER U
0x56	0x0056	# LATIN CAPITAL LETTER V
0x57	0x0057	# LATIN CAPITAL LETTER W
0x58	0x0058	# LATIN CAPITAL LETTER X
0x59	0x0059	# LATIN CAPITAL LETTER Y
0x5A	0x005A	# LATIN CAPITAL LETTER Z
0x5B	0x005B	# LEFT SQUARE BRACKET
0x5C	0x005C	#REVERSE SOLIDUS (rendered as Halfwidth Yen Sign)
0x5D	0x005D	# RIGHT SQUARE BRACKET
0x5E	0x005E	# CIRCUMFLEX ACCENT
0x5F	0x005F	# LOW LINE
0x60	0x0060	# GRAVE ACCENT
0x61	0x0061	# LATIN SMALL LETTER A
0x62	0x0062	# LATIN SMALL LETTER B
0x63	0x0063	# LATIN SMALL LETTER C
0x64	0x0064	# LATIN SMALL LETTER D
0x65	0x0065	# LATIN SMALL LETTER E
0x66	0x0066	# LATIN SMALL LETTER F
0x67	0x0067	# LATIN SMALL LETTER G
0x68	0x0068	# LATIN SMALL LETTER H
0x69	0x0069	# LATIN SMALL LETTER I
0x6A	0x006A	# LATIN SMALL LETTER J
0x6B	0x006B	# LATIN SMALL LETTER K
0x6C	0x006C	# LATIN SMALL LETTER L
0x6D	0x006D	# LATIN SMALL LETTER M
0x6E	0x006E	# LATIN SMALL LETTER N
0x6F	0x006F	# LATIN SMALL LETTER O
0x70	0x0070	# LATIN SMALL LETTER P
0x71	0x0071	# LATIN SMALL LETTER Q
0x72	0x0072	# LATIN SMALL LETTER R
0x73	0x0073	# LATIN SMALL LETTER S
0x74	0x0074	# LATIN SMALL LETTER T
0x75	0x0075	# LATIN SMALL LETTER U
0x76	0x0076	# LATIN SMALL LETTER V
0x77	0x0077	# LATIN SMALL LETTER W
0x78	0x0078	# LATIN SMALL LETTER X
0x79	0x0079	# LATIN SMALL LETTER Y
0x7A	0x007A	# LATIN SMALL LETTER Z
0x7B	0x007B	# LEFT CURLY BRACKET
0x7C	0x007C	# VERTICAL LINE
0x7D	0x007D	# RIGHT CURLY BRACKET
0x7E	0x007E	# TILDE
0xA140	0x3000	# IDEOGRAPHIC SPACE
0xA141	0xFF0C	# FULLWIDTH COMMA
0xA142	0x3001	# IDEOGRAPHIC COMMA
0xA143	0x3002	# IDEOGRAPHIC FULL STOP
0xA144	0xFF0E	# FULLWIDTH FULL STOP
0xA145	0x2022	# BULLET
0xA146	0xFF1B	# FULLWIDTH SEMICOLON
Changes to tools/encoding/gb2312.txt.














1
2
3
4
5
6
7














#
#	Name:             GB2312-80 to Unicode table (complete, hex format)
#	Unicode version:  1.1
#	Table version:    0.0d2
#	Table format:     Format A
#	Date:             6 December 1993
#	Author:           Glenn Adams <[email protected]>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# gb2312.txt --
#
#	GB2312 to Unicode table (modified)
#
# Copyright (c) 1998-1999 by Scriptics Corporation.
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# RCS: @(#) $Id: gb2312.txt,v 1.1.2.2 1999/03/26 00:13:43 stanton Exp $
#
# NOTE: this table has been modified to include the 7-bit ASCII
# characters that are allowed in GB2312 files.
#
#
#	Name:             GB2312-80 to Unicode table (complete, hex format)
#	Unicode version:  1.1
#	Table version:    0.0d2
#	Table format:     Format A
#	Date:             6 December 1993
#	Author:           Glenn Adams <[email protected]>
50
51
52
53
54
55
56































































































57
58
59
60
61
62
63
#			the high and low bytes correspond to the ku and ten of
#			the kuten form.  For example, 0x2121 -> 0x0101 -> 0101;
#			0x777E -> 0x575E -> 8794
#
#	Any comments or problems, contact <[email protected]>
#
#































































































0x2121	0x3000	# IDEOGRAPHIC SPACE
0x2122	0x3001	# IDEOGRAPHIC COMMA
0x2123	0x3002	# IDEOGRAPHIC FULL STOP
0x2124	0x30FB	# KATAKANA MIDDLE DOT
0x2125	0x02C9	# MODIFIER LETTER MACRON (Mandarin Chinese first tone)
0x2126	0x02C7	# CARON (Mandarin Chinese third tone)
0x2127	0x00A8	# DIAERESIS







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#			the high and low bytes correspond to the ku and ten of
#			the kuten form.  For example, 0x2121 -> 0x0101 -> 0101;
#			0x777E -> 0x575E -> 8794
#
#	Any comments or problems, contact <[email protected]>
#
#
0x20	0x0020	# SPACE
0x21	0x0021	# EXCLAMATION MARK
0x22	0x0022	# QUOTATION MARK
0x23	0x0023	# NUMBER SIGN
0x24	0x0024	# DOLLAR SIGN
0x25	0x0025	# PERCENT SIGN
0x26	0x0026	# AMPERSAND
0x27	0x0027	# APOSTROPHE
0x28	0x0028	# LEFT PARENTHESIS
0x29	0x0029	# RIGHT PARENTHESIS
0x2A	0x002A	# ASTERISK
0x2B	0x002B	# PLUS SIGN
0x2C	0x002C	# COMMA
0x2D	0x002D	# HYPHEN-MINUS
0x2E	0x002E	# FULL STOP
0x2F	0x002F	# SOLIDUS
0x30	0x0030	# DIGIT ZERO
0x31	0x0031	# DIGIT ONE
0x32	0x0032	# DIGIT TWO
0x33	0x0033	# DIGIT THREE
0x34	0x0034	# DIGIT FOUR
0x35	0x0035	# DIGIT FIVE
0x36	0x0036	# DIGIT SIX
0x37	0x0037	# DIGIT SEVEN
0x38	0x0038	# DIGIT EIGHT
0x39	0x0039	# DIGIT NINE
0x3A	0x003A	# COLON
0x3B	0x003B	# SEMICOLON
0x3C	0x003C	# LESS-THAN SIGN
0x3D	0x003D	# EQUALS SIGN
0x3E	0x003E	# GREATER-THAN SIGN
0x3F	0x003F	# QUESTION MARK
0x40	0x0040	# COMMERCIAL AT
0x41	0x0041	# LATIN CAPITAL LETTER A
0x42	0x0042	# LATIN CAPITAL LETTER B
0x43	0x0043	# LATIN CAPITAL LETTER C
0x44	0x0044	# LATIN CAPITAL LETTER D
0x45	0x0045	# LATIN CAPITAL LETTER E
0x46	0x0046	# LATIN CAPITAL LETTER F
0x47	0x0047	# LATIN CAPITAL LETTER G
0x48	0x0048	# LATIN CAPITAL LETTER H
0x49	0x0049	# LATIN CAPITAL LETTER I
0x4A	0x004A	# LATIN CAPITAL LETTER J
0x4B	0x004B	# LATIN CAPITAL LETTER K
0x4C	0x004C	# LATIN CAPITAL LETTER L
0x4D	0x004D	# LATIN CAPITAL LETTER M
0x4E	0x004E	# LATIN CAPITAL LETTER N
0x4F	0x004F	# LATIN CAPITAL LETTER O
0x50	0x0050	# LATIN CAPITAL LETTER P
0x51	0x0051	# LATIN CAPITAL LETTER Q
0x52	0x0052	# LATIN CAPITAL LETTER R
0x53	0x0053	# LATIN CAPITAL LETTER S
0x54	0x0054	# LATIN CAPITAL LETTER T
0x55	0x0055	# LATIN CAPITAL LETTER U
0x56	0x0056	# LATIN CAPITAL LETTER V
0x57	0x0057	# LATIN CAPITAL LETTER W
0x58	0x0058	# LATIN CAPITAL LETTER X
0x59	0x0059	# LATIN CAPITAL LETTER Y
0x5A	0x005A	# LATIN CAPITAL LETTER Z
0x5B	0x005B	# LEFT SQUARE BRACKET
0x5C	0x005C	#REVERSE SOLIDUS (rendered as Halfwidth Yen Sign)
0x5D	0x005D	# RIGHT SQUARE BRACKET
0x5E	0x005E	# CIRCUMFLEX ACCENT
0x5F	0x005F	# LOW LINE
0x60	0x0060	# GRAVE ACCENT
0x61	0x0061	# LATIN SMALL LETTER A
0x62	0x0062	# LATIN SMALL LETTER B
0x63	0x0063	# LATIN SMALL LETTER C
0x64	0x0064	# LATIN SMALL LETTER D
0x65	0x0065	# LATIN SMALL LETTER E
0x66	0x0066	# LATIN SMALL LETTER F
0x67	0x0067	# LATIN SMALL LETTER G
0x68	0x0068	# LATIN SMALL LETTER H
0x69	0x0069	# LATIN SMALL LETTER I
0x6A	0x006A	# LATIN SMALL LETTER J
0x6B	0x006B	# LATIN SMALL LETTER K
0x6C	0x006C	# LATIN SMALL LETTER L
0x6D	0x006D	# LATIN SMALL LETTER M
0x6E	0x006E	# LATIN SMALL LETTER N
0x6F	0x006F	# LATIN SMALL LETTER O
0x70	0x0070	# LATIN SMALL LETTER P
0x71	0x0071	# LATIN SMALL LETTER Q
0x72	0x0072	# LATIN SMALL LETTER R
0x73	0x0073	# LATIN SMALL LETTER S
0x74	0x0074	# LATIN SMALL LETTER T
0x75	0x0075	# LATIN SMALL LETTER U
0x76	0x0076	# LATIN SMALL LETTER V
0x77	0x0077	# LATIN SMALL LETTER W
0x78	0x0078	# LATIN SMALL LETTER X
0x79	0x0079	# LATIN SMALL LETTER Y
0x7A	0x007A	# LATIN SMALL LETTER Z
0x7B	0x007B	# LEFT CURLY BRACKET
0x7C	0x007C	# VERTICAL LINE
0x7D	0x007D	# RIGHT CURLY BRACKET
0x7E	0x007E	# TILDE
0x2121	0x3000	# IDEOGRAPHIC SPACE
0x2122	0x3001	# IDEOGRAPHIC COMMA
0x2123	0x3002	# IDEOGRAPHIC FULL STOP
0x2124	0x30FB	# KATAKANA MIDDLE DOT
0x2125	0x02C9	# MODIFIER LETTER MACRON (Mandarin Chinese first tone)
0x2126	0x02C7	# CARON (Mandarin Chinese third tone)
0x2127	0x00A8	# DIAERESIS