Difference between revisions of "ASCII EBCDIC Conversion"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) |
PeterHarding (talk | contribs) |
||
(4 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
=References= | |||
See: | See: | ||
* http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html | * http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html | ||
* http://www.babbletower.net/index.html?/manencodings.html | * http://www.babbletower.net/index.html?/manencodings.html | ||
* http://www.guiffy.com/help/GuiffyHelp/Encodings.html | |||
* http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html | |||
=Example= | |||
Convert encoding names between MIME and Java | |||
<pre> | |||
1 /* | |||
2 * The Apache Software License, Version 1.1 | |||
3 * | |||
4 * | |||
5 * Copyright (c) 1999 The Apache Software Foundation. All rights | |||
6 * reserved. | |||
7 * | |||
8 * Redistribution and use in source and binary forms, with or without | |||
9 * modification, are permitted provided that the following conditions | |||
10 * are met: | |||
11 * | |||
12 * 1. Redistributions of source code must retain the above copyright | |||
13 * notice, this list of conditions and the following disclaimer. | |||
14 * | |||
15 * 2. Redistributions in binary form must reproduce the above copyright | |||
16 * notice, this list of conditions and the following disclaimer in | |||
17 * the documentation and/or other materials provided with the | |||
18 * distribution. | |||
19 * | |||
20 * 3. The end-user documentation included with the redistribution, | |||
21 * if any, must include the following acknowledgment: | |||
22 * "This product includes software developed by the | |||
23 * Apache Software Foundation (http://www.apache.org/)." | |||
24 * Alternately, this acknowledgment may appear in the software itself, | |||
25 * if and wherever such third-party acknowledgments normally appear. | |||
26 * | |||
27 * 4. The names "Xerces" and "Apache Software Foundation" must | |||
28 * not be used to endorse or promote products derived from this | |||
29 * software without prior written permission. For written | |||
30 * permission, please contact apache@apache.org. | |||
31 * | |||
32 * 5. Products derived from this software may not be called "Apache", | |||
33 * nor may "Apache" appear in their name, without prior written | |||
34 * permission of the Apache Software Foundation. | |||
35 * | |||
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |||
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR | |||
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |||
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |||
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
47 * SUCH DAMAGE. | |||
48 * ==================================================================== | |||
49 * | |||
50 * This software consists of voluntary contributions made by many | |||
51 * individuals on behalf of the Apache Software Foundation and was | |||
52 * originally based on software copyright (c) 1999, International | |||
53 * Business Machines, Inc., http://www.apache.org. For more | |||
54 * information on the Apache Software Foundation, please see | |||
55 * <http://www.apache.org/>. | |||
56 */ | |||
57 | |||
58 package org.apache.xerces.readers; | |||
59 | |||
60 import java.util.*; | |||
61 | |||
62 /** | |||
63 * MIME2Java is a convenience class which handles conversions between MIME charset names | |||
64 * and Java encoding names. | |||
65 * <p>The supported XML encodings are the intersection of XML-supported code sets and those | |||
66 * supported in JDK 1.1. | |||
67 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such | |||
68 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>. | |||
69 * <p>Java encoding names are used on <var>encoding</var> parameters to | |||
70 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>. | |||
71 * <P> | |||
72 * <TABLE BORDER="0" WIDTH="100%"> | |||
73 * <TR> | |||
74 * <TD WIDTH="33%"> | |||
75 * <P ALIGN="CENTER"><B>Common Name</B> | |||
76 * </TD> | |||
77 * <TD WIDTH="15%"> | |||
78 * <P ALIGN="CENTER"><B>Use this name in XML files</B> | |||
79 * </TD> | |||
80 * <TD WIDTH="12%"> | |||
81 * <P ALIGN="CENTER"><B>Name Type</B> | |||
82 * </TD> | |||
83 * <TD WIDTH="31%"> | |||
84 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B> | |||
85 * </TD> | |||
86 * </TR> | |||
87 * <TR> | |||
88 * <TD WIDTH="33%">8 bit Unicode</TD> | |||
89 * <TD WIDTH="15%"> | |||
90 * <P ALIGN="CENTER">UTF-8 | |||
91 * </TD> | |||
92 * <TD WIDTH="12%"> | |||
93 * <P ALIGN="CENTER">IANA | |||
94 * </TD> | |||
95 * <TD WIDTH="31%"> | |||
96 * <P ALIGN="CENTER">UTF8 | |||
97 * </TD> | |||
98 * </TR> | |||
99 * <TR> | |||
100 * <TD WIDTH="33%">ISO Latin 1</TD> | |||
101 * <TD WIDTH="15%"> | |||
102 * <P ALIGN="CENTER">ISO-8859-1 | |||
103 * </TD> | |||
104 * <TD WIDTH="12%"> | |||
105 * <P ALIGN="CENTER">MIME | |||
106 * </TD> | |||
107 * <TD WIDTH="31%"> | |||
108 * <P ALIGN="CENTER">ISO-8859-1 | |||
109 * </TD> | |||
110 * </TR> | |||
111 * <TR> | |||
112 * <TD WIDTH="33%">ISO Latin 2</TD> | |||
113 * <TD WIDTH="15%"> | |||
114 * <P ALIGN="CENTER">ISO-8859-2 | |||
115 * </TD> | |||
116 * <TD WIDTH="12%"> | |||
117 * <P ALIGN="CENTER">MIME | |||
118 * </TD> | |||
119 * <TD WIDTH="31%"> | |||
120 * <P ALIGN="CENTER">ISO-8859-2 | |||
121 * </TD> | |||
122 * </TR> | |||
123 * <TR> | |||
124 * <TD WIDTH="33%">ISO Latin 3</TD> | |||
125 * <TD WIDTH="15%"> | |||
126 * <P ALIGN="CENTER">ISO-8859-3 | |||
127 * </TD> | |||
128 * <TD WIDTH="12%"> | |||
129 * <P ALIGN="CENTER">MIME | |||
130 * </TD> | |||
131 * <TD WIDTH="31%"> | |||
132 * <P ALIGN="CENTER">ISO-8859-3 | |||
133 * </TD> | |||
134 * </TR> | |||
135 * <TR> | |||
136 * <TD WIDTH="33%">ISO Latin 4</TD> | |||
137 * <TD WIDTH="15%"> | |||
138 * <P ALIGN="CENTER">ISO-8859-4 | |||
139 * </TD> | |||
140 * <TD WIDTH="12%"> | |||
141 * <P ALIGN="CENTER">MIME | |||
142 * </TD> | |||
143 * <TD WIDTH="31%"> | |||
144 * <P ALIGN="CENTER">ISO-8859-4 | |||
145 * </TD> | |||
146 * </TR> | |||
147 * <TR> | |||
148 * <TD WIDTH="33%">ISO Latin Cyrillic</TD> | |||
149 * <TD WIDTH="15%"> | |||
150 * <P ALIGN="CENTER">ISO-8859-5 | |||
151 * </TD> | |||
152 * <TD WIDTH="12%"> | |||
153 * <P ALIGN="CENTER">MIME | |||
154 * </TD> | |||
155 * <TD WIDTH="31%"> | |||
156 * <P ALIGN="CENTER">ISO-8859-5 | |||
157 * </TD> | |||
158 * </TR> | |||
159 * <TR> | |||
160 * <TD WIDTH="33%">ISO Latin Arabic</TD> | |||
161 * <TD WIDTH="15%"> | |||
162 * <P ALIGN="CENTER">ISO-8859-6 | |||
163 * </TD> | |||
164 * <TD WIDTH="12%"> | |||
165 * <P ALIGN="CENTER">MIME | |||
166 * </TD> | |||
167 * <TD WIDTH="31%"> | |||
168 * <P ALIGN="CENTER">ISO-8859-6 | |||
169 * </TD> | |||
170 * </TR> | |||
171 * <TR> | |||
172 * <TD WIDTH="33%">ISO Latin Greek</TD> | |||
173 * <TD WIDTH="15%"> | |||
174 * <P ALIGN="CENTER">ISO-8859-7 | |||
175 * </TD> | |||
176 * <TD WIDTH="12%"> | |||
177 * <P ALIGN="CENTER">MIME | |||
178 * </TD> | |||
179 * <TD WIDTH="31%"> | |||
180 * <P ALIGN="CENTER">ISO-8859-7 | |||
181 * </TD> | |||
182 * </TR> | |||
183 * <TR> | |||
184 * <TD WIDTH="33%">ISO Latin Hebrew</TD> | |||
185 * <TD WIDTH="15%"> | |||
186 * <P ALIGN="CENTER">ISO-8859-8 | |||
187 * </TD> | |||
188 * <TD WIDTH="12%"> | |||
189 * <P ALIGN="CENTER">MIME | |||
190 * </TD> | |||
191 * <TD WIDTH="31%"> | |||
192 * <P ALIGN="CENTER">ISO-8859-8 | |||
193 * </TD> | |||
194 * </TR> | |||
195 * <TR> | |||
196 * <TD WIDTH="33%">ISO Latin 5</TD> | |||
197 * <TD WIDTH="15%"> | |||
198 * <P ALIGN="CENTER">ISO-8859-9 | |||
199 * </TD> | |||
200 * <TD WIDTH="12%"> | |||
201 * <P ALIGN="CENTER">MIME | |||
202 * </TD> | |||
203 * <TD WIDTH="31%"> | |||
204 * <P ALIGN="CENTER">ISO-8859-9 | |||
205 * </TD> | |||
206 * </TR> | |||
207 * <TR> | |||
208 * <TD WIDTH="33%">EBCDIC: US</TD> | |||
209 * <TD WIDTH="15%"> | |||
210 * <P ALIGN="CENTER">ebcdic-cp-us | |||
211 * </TD> | |||
212 * <TD WIDTH="12%"> | |||
213 * <P ALIGN="CENTER">IANA | |||
214 * </TD> | |||
215 * <TD WIDTH="31%"> | |||
216 * <P ALIGN="CENTER">cp037 | |||
217 * </TD> | |||
218 * </TR> | |||
219 * <TR> | |||
220 * <TD WIDTH="33%">EBCDIC: Canada</TD> | |||
221 * <TD WIDTH="15%"> | |||
222 * <P ALIGN="CENTER">ebcdic-cp-ca | |||
223 * </TD> | |||
224 * <TD WIDTH="12%"> | |||
225 * <P ALIGN="CENTER">IANA | |||
226 * </TD> | |||
227 * <TD WIDTH="31%"> | |||
228 * <P ALIGN="CENTER">cp037 | |||
229 * </TD> | |||
230 * </TR> | |||
231 * <TR> | |||
232 * <TD WIDTH="33%">EBCDIC: Netherlands</TD> | |||
233 * <TD WIDTH="15%"> | |||
234 * <P ALIGN="CENTER">ebcdic-cp-nl | |||
235 * </TD> | |||
236 * <TD WIDTH="12%"> | |||
237 * <P ALIGN="CENTER">IANA | |||
238 * </TD> | |||
239 * <TD WIDTH="31%"> | |||
240 * <P ALIGN="CENTER">cp037 | |||
241 * </TD> | |||
242 * </TR> | |||
243 * <TR> | |||
244 * <TD WIDTH="33%">EBCDIC: Denmark</TD> | |||
245 * <TD WIDTH="15%"> | |||
246 * <P ALIGN="CENTER">ebcdic-cp-dk | |||
247 * </TD> | |||
248 * <TD WIDTH="12%"> | |||
249 * <P ALIGN="CENTER">IANA | |||
250 * </TD> | |||
251 * <TD WIDTH="31%"> | |||
252 * <P ALIGN="CENTER">cp277 | |||
253 * </TD> | |||
254 * </TR> | |||
255 * <TR> | |||
256 * <TD WIDTH="33%">EBCDIC: Norway</TD> | |||
257 * <TD WIDTH="15%"> | |||
258 * <P ALIGN="CENTER">ebcdic-cp-no | |||
259 * </TD> | |||
260 * <TD WIDTH="12%"> | |||
261 * <P ALIGN="CENTER">IANA | |||
262 * </TD> | |||
263 * <TD WIDTH="31%"> | |||
264 * <P ALIGN="CENTER">cp277 | |||
265 * </TD> | |||
266 * </TR> | |||
267 * <TR> | |||
268 * <TD WIDTH="33%">EBCDIC: Finland</TD> | |||
269 * <TD WIDTH="15%"> | |||
270 * <P ALIGN="CENTER">ebcdic-cp-fi | |||
271 * </TD> | |||
272 * <TD WIDTH="12%"> | |||
273 * <P ALIGN="CENTER">IANA | |||
274 * </TD> | |||
275 * <TD WIDTH="31%"> | |||
276 * <P ALIGN="CENTER">cp278 | |||
277 * </TD> | |||
278 * </TR> | |||
279 * <TR> | |||
280 * <TD WIDTH="33%">EBCDIC: Sweden</TD> | |||
281 * <TD WIDTH="15%"> | |||
282 * <P ALIGN="CENTER">ebcdic-cp-se | |||
283 * </TD> | |||
284 * <TD WIDTH="12%"> | |||
285 * <P ALIGN="CENTER">IANA | |||
286 * </TD> | |||
287 * <TD WIDTH="31%"> | |||
288 * <P ALIGN="CENTER">cp278 | |||
289 * </TD> | |||
290 * </TR> | |||
291 * <TR> | |||
292 * <TD WIDTH="33%">EBCDIC: Italy</TD> | |||
293 * <TD WIDTH="15%"> | |||
294 * <P ALIGN="CENTER">ebcdic-cp-it | |||
295 * </TD> | |||
296 * <TD WIDTH="12%"> | |||
297 * <P ALIGN="CENTER">IANA | |||
298 * </TD> | |||
299 * <TD WIDTH="31%"> | |||
300 * <P ALIGN="CENTER">cp280 | |||
301 * </TD> | |||
302 * </TR> | |||
303 * <TR> | |||
304 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD> | |||
305 * <TD WIDTH="15%"> | |||
306 * <P ALIGN="CENTER">ebcdic-cp-es | |||
307 * </TD> | |||
308 * <TD WIDTH="12%"> | |||
309 * <P ALIGN="CENTER">IANA | |||
310 * </TD> | |||
311 * <TD WIDTH="31%"> | |||
312 * <P ALIGN="CENTER">cp284 | |||
313 * </TD> | |||
314 * </TR> | |||
315 * <TR> | |||
316 * <TD WIDTH="33%">EBCDIC: Great Britain</TD> | |||
317 * <TD WIDTH="15%"> | |||
318 * <P ALIGN="CENTER">ebcdic-cp-gb | |||
319 * </TD> | |||
320 * <TD WIDTH="12%"> | |||
321 * <P ALIGN="CENTER">IANA | |||
322 * </TD> | |||
323 * <TD WIDTH="31%"> | |||
324 * <P ALIGN="CENTER">cp285 | |||
325 * </TD> | |||
326 * </TR> | |||
327 * <TR> | |||
328 * <TD WIDTH="33%">EBCDIC: France</TD> | |||
329 * <TD WIDTH="15%"> | |||
330 * <P ALIGN="CENTER">ebcdic-cp-fr | |||
331 * </TD> | |||
332 * <TD WIDTH="12%"> | |||
333 * <P ALIGN="CENTER">IANA | |||
334 * </TD> | |||
335 * <TD WIDTH="31%"> | |||
336 * <P ALIGN="CENTER">cp297 | |||
337 * </TD> | |||
338 * </TR> | |||
339 * <TR> | |||
340 * <TD WIDTH="33%">EBCDIC: Arabic</TD> | |||
341 * <TD WIDTH="15%"> | |||
342 * <P ALIGN="CENTER">ebcdic-cp-ar1 | |||
343 * </TD> | |||
344 * <TD WIDTH="12%"> | |||
345 * <P ALIGN="CENTER">IANA | |||
346 * </TD> | |||
347 * <TD WIDTH="31%"> | |||
348 * <P ALIGN="CENTER">cp420 | |||
349 * </TD> | |||
350 * </TR> | |||
351 * <TR> | |||
352 * <TD WIDTH="33%">EBCDIC: Hebrew</TD> | |||
353 * <TD WIDTH="15%"> | |||
354 * <P ALIGN="CENTER">ebcdic-cp-he | |||
355 * </TD> | |||
356 * <TD WIDTH="12%"> | |||
357 * <P ALIGN="CENTER">IANA | |||
358 * </TD> | |||
359 * <TD WIDTH="31%"> | |||
360 * <P ALIGN="CENTER">cp424 | |||
361 * </TD> | |||
362 * </TR> | |||
363 * <TR> | |||
364 * <TD WIDTH="33%">EBCDIC: Switzerland</TD> | |||
365 * <TD WIDTH="15%"> | |||
366 * <P ALIGN="CENTER">ebcdic-cp-ch | |||
367 * </TD> | |||
368 * <TD WIDTH="12%"> | |||
369 * <P ALIGN="CENTER">IANA | |||
370 * </TD> | |||
371 * <TD WIDTH="31%"> | |||
372 * <P ALIGN="CENTER">cp500 | |||
373 * </TD> | |||
374 * </TR> | |||
375 * <TR> | |||
376 * <TD WIDTH="33%">EBCDIC: Roece</TD> | |||
377 * <TD WIDTH="15%"> | |||
378 * <P ALIGN="CENTER">ebcdic-cp-roece | |||
379 * </TD> | |||
380 * <TD WIDTH="12%"> | |||
381 * <P ALIGN="CENTER">IANA | |||
382 * </TD> | |||
383 * <TD WIDTH="31%"> | |||
384 * <P ALIGN="CENTER">cp870 | |||
385 * </TD> | |||
386 * </TR> | |||
387 * <TR> | |||
388 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD> | |||
389 * <TD WIDTH="15%"> | |||
390 * <P ALIGN="CENTER">ebcdic-cp-yu | |||
391 * </TD> | |||
392 * <TD WIDTH="12%"> | |||
393 * <P ALIGN="CENTER">IANA | |||
394 * </TD> | |||
395 * <TD WIDTH="31%"> | |||
396 * <P ALIGN="CENTER">cp870 | |||
397 * </TD> | |||
398 * </TR> | |||
399 * <TR> | |||
400 * <TD WIDTH="33%">EBCDIC: Iceland</TD> | |||
401 * <TD WIDTH="15%"> | |||
402 * <P ALIGN="CENTER">ebcdic-cp-is | |||
403 * </TD> | |||
404 * <TD WIDTH="12%"> | |||
405 * <P ALIGN="CENTER">IANA | |||
406 * </TD> | |||
407 * <TD WIDTH="31%"> | |||
408 * <P ALIGN="CENTER">cp871 | |||
409 * </TD> | |||
410 * </TR> | |||
411 * <TR> | |||
412 * <TD WIDTH="33%">EBCDIC: Urdu</TD> | |||
413 * <TD WIDTH="15%"> | |||
414 * <P ALIGN="CENTER">ebcdic-cp-ar2 | |||
415 * </TD> | |||
416 * <TD WIDTH="12%"> | |||
417 * <P ALIGN="CENTER">IANA | |||
418 * </TD> | |||
419 * <TD WIDTH="31%"> | |||
420 * <P ALIGN="CENTER">cp918 | |||
421 * </TD> | |||
422 * </TR> | |||
423 * <TR> | |||
424 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD> | |||
425 * <TD WIDTH="15%"> | |||
426 * <P ALIGN="CENTER">gb2312 | |||
427 * </TD> | |||
428 * <TD WIDTH="12%"> | |||
429 * <P ALIGN="CENTER">MIME | |||
430 * </TD> | |||
431 * <TD WIDTH="31%"> | |||
432 * <P ALIGN="CENTER">GB2312 | |||
433 * </TD> | |||
434 * </TR> | |||
435 * <TR> | |||
436 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD> | |||
437 * <TD WIDTH="15%"> | |||
438 * <P ALIGN="CENTER">euc-jp | |||
439 * </TD> | |||
440 * <TD WIDTH="12%"> | |||
441 * <P ALIGN="CENTER">MIME | |||
442 * </TD> | |||
443 * <TD WIDTH="31%"> | |||
444 * <P ALIGN="CENTER">eucjis | |||
445 * </TD> | |||
446 * </TR> | |||
447 * <TR> | |||
448 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD> | |||
449 * <TD WIDTH="15%"> | |||
450 * <P ALIGN="CENTER">iso-2020-jp | |||
451 * </TD> | |||
452 * <TD WIDTH="12%"> | |||
453 * <P ALIGN="CENTER">MIME | |||
454 * </TD> | |||
455 * <TD WIDTH="31%"> | |||
456 * <P ALIGN="CENTER">JIS | |||
457 * </TD> | |||
458 * </TR> | |||
459 * <TR> | |||
460 * <TD WIDTH="33%">Japanese: Shift JIS</TD> | |||
461 * <TD WIDTH="15%"> | |||
462 * <P ALIGN="CENTER">Shift_JIS | |||
463 * </TD> | |||
464 * <TD WIDTH="12%"> | |||
465 * <P ALIGN="CENTER">MIME | |||
466 * </TD> | |||
467 * <TD WIDTH="31%"> | |||
468 * <P ALIGN="CENTER">SJIS | |||
469 * </TD> | |||
470 * </TR> | |||
471 * <TR> | |||
472 * <TD WIDTH="33%">Chinese: Big5</TD> | |||
473 * <TD WIDTH="15%"> | |||
474 * <P ALIGN="CENTER">Big5 | |||
475 * </TD> | |||
476 * <TD WIDTH="12%"> | |||
477 * <P ALIGN="CENTER">MIME | |||
478 * </TD> | |||
479 * <TD WIDTH="31%"> | |||
480 * <P ALIGN="CENTER">Big5 | |||
481 * </TD> | |||
482 * </TR> | |||
483 * <TR> | |||
484 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD> | |||
485 * <TD WIDTH="15%"> | |||
486 * <P ALIGN="CENTER">euc-kr | |||
487 * </TD> | |||
488 * <TD WIDTH="12%"> | |||
489 * <P ALIGN="CENTER">MIME | |||
490 * </TD> | |||
491 * <TD WIDTH="31%"> | |||
492 * <P ALIGN="CENTER">iso2022kr | |||
493 * </TD> | |||
494 * </TR> | |||
495 * <TR> | |||
496 * <TD WIDTH="33%">Cyrillic</TD> | |||
497 * <TD WIDTH="15%"> | |||
498 * <P ALIGN="CENTER">koi8-r | |||
499 * </TD> | |||
500 * <TD WIDTH="12%"> | |||
501 * <P ALIGN="CENTER">MIME | |||
502 * </TD> | |||
503 * <TD WIDTH="31%"> | |||
504 * <P ALIGN="CENTER">koi8-r | |||
505 * </TD> | |||
506 * </TR> | |||
507 * </TABLE> | |||
508 * | |||
509 * @version | |||
510 * @author TAMURA Kent <kent@trl.ibm.co.jp> | |||
511 */ | |||
512 public class MIME2Java { | |||
513 | |||
514 static private Hashtable s_enchash; | |||
515 static private Hashtable s_revhash; | |||
516 | |||
517 static { | |||
518 s_enchash = new Hashtable(); | |||
519 // <preferred MIME name>, <Java encoding name> | |||
520 s_enchash.put("UTF-8", "UTF8"); | |||
521 s_enchash.put("US-ASCII", "8859_1"); // ? | |||
522 s_enchash.put("ISO-8859-1", "8859_1"); | |||
523 s_enchash.put("ISO-8859-2", "8859_2"); | |||
524 s_enchash.put("ISO-8859-3", "8859_3"); | |||
525 s_enchash.put("ISO-8859-4", "8859_4"); | |||
526 s_enchash.put("ISO-8859-5", "8859_5"); | |||
527 s_enchash.put("ISO-8859-6", "8859_6"); | |||
528 s_enchash.put("ISO-8859-7", "8859_7"); | |||
529 s_enchash.put("ISO-8859-8", "8859_8"); | |||
530 s_enchash.put("ISO-8859-9", "8859_9"); | |||
531 s_enchash.put("ISO-2022-JP", "JIS"); | |||
532 s_enchash.put("SHIFT_JIS", "SJIS"); | |||
533 s_enchash.put("EUC-JP", "EUCJIS"); | |||
534 s_enchash.put("GB2312", "GB2312"); | |||
535 s_enchash.put("BIG5", "Big5"); | |||
536 s_enchash.put("EUC-KR", "KSC5601"); | |||
537 s_enchash.put("ISO-2022-KR", "ISO2022KR"); | |||
538 s_enchash.put("KOI8-R", "KOI8_R"); | |||
539 | |||
540 s_enchash.put("EBCDIC-CP-US", "CP037"); | |||
541 s_enchash.put("EBCDIC-CP-CA", "CP037"); | |||
542 s_enchash.put("EBCDIC-CP-NL", "CP037"); | |||
543 s_enchash.put("EBCDIC-CP-DK", "CP277"); | |||
544 s_enchash.put("EBCDIC-CP-NO", "CP277"); | |||
545 s_enchash.put("EBCDIC-CP-FI", "CP278"); | |||
546 s_enchash.put("EBCDIC-CP-SE", "CP278"); | |||
547 s_enchash.put("EBCDIC-CP-IT", "CP280"); | |||
548 s_enchash.put("EBCDIC-CP-ES", "CP284"); | |||
549 s_enchash.put("EBCDIC-CP-GB", "CP285"); | |||
550 s_enchash.put("EBCDIC-CP-FR", "CP297"); | |||
551 s_enchash.put("EBCDIC-CP-AR1", "CP420"); | |||
552 s_enchash.put("EBCDIC-CP-HE", "CP424"); | |||
553 s_enchash.put("EBCDIC-CP-CH", "CP500"); | |||
554 s_enchash.put("EBCDIC-CP-ROECE", "CP870"); | |||
555 s_enchash.put("EBCDIC-CP-YU", "CP870"); | |||
556 s_enchash.put("EBCDIC-CP-IS", "CP871"); | |||
557 s_enchash.put("EBCDIC-CP-AR2", "CP918"); | |||
558 | |||
559 // j:CNS11643 -> EUC-TW? | |||
560 // ISO-2022-CN? ISO-2022-CN-EXT? | |||
561 | |||
562 s_revhash = new Hashtable(); | |||
563 // <Java encoding name>, <preferred MIME name> | |||
564 s_revhash.put("UTF8", "UTF-8"); | |||
565 //s_revhash.put("8859_1", "US-ASCII"); // ? | |||
566 s_revhash.put("8859_1", "ISO-8859-1"); | |||
567 s_revhash.put("8859_2", "ISO-8859-2"); | |||
568 s_revhash.put("8859_3", "ISO-8859-3"); | |||
569 s_revhash.put("8859_4", "ISO-8859-4"); | |||
570 s_revhash.put("8859_5", "ISO-8859-5"); | |||
571 s_revhash.put("8859_6", "ISO-8859-6"); | |||
572 s_revhash.put("8859_7", "ISO-8859-7"); | |||
573 s_revhash.put("8859_8", "ISO-8859-8"); | |||
574 s_revhash.put("8859_9", "ISO-8859-9"); | |||
575 s_revhash.put("JIS", "ISO-2022-JP"); | |||
576 s_revhash.put("SJIS", "Shift_JIS"); | |||
577 s_revhash.put("EUCJIS", "EUC-JP"); | |||
578 s_revhash.put("GB2312", "GB2312"); | |||
579 s_revhash.put("BIG5", "Big5"); | |||
580 s_revhash.put("KSC5601", "EUC-KR"); | |||
581 s_revhash.put("ISO2022KR", "ISO-2022-KR"); | |||
582 s_revhash.put("KOI8_R", "KOI8-R"); | |||
583 | |||
584 s_revhash.put("CP037", "EBCDIC-CP-US"); | |||
585 s_revhash.put("CP037", "EBCDIC-CP-CA"); | |||
586 s_revhash.put("CP037", "EBCDIC-CP-NL"); | |||
587 s_revhash.put("CP277", "EBCDIC-CP-DK"); | |||
588 s_revhash.put("CP277", "EBCDIC-CP-NO"); | |||
589 s_revhash.put("CP278", "EBCDIC-CP-FI"); | |||
590 s_revhash.put("CP278", "EBCDIC-CP-SE"); | |||
591 s_revhash.put("CP280", "EBCDIC-CP-IT"); | |||
592 s_revhash.put("CP284", "EBCDIC-CP-ES"); | |||
593 s_revhash.put("CP285", "EBCDIC-CP-GB"); | |||
594 s_revhash.put("CP297", "EBCDIC-CP-FR"); | |||
595 s_revhash.put("CP420", "EBCDIC-CP-AR1"); | |||
596 s_revhash.put("CP424", "EBCDIC-CP-HE"); | |||
597 s_revhash.put("CP500", "EBCDIC-CP-CH"); | |||
598 s_revhash.put("CP870", "EBCDIC-CP-ROECE"); | |||
599 s_revhash.put("CP870", "EBCDIC-CP-YU"); | |||
600 s_revhash.put("CP871", "EBCDIC-CP-IS"); | |||
601 s_revhash.put("CP918", "EBCDIC-CP-AR2"); | |||
602 } | |||
603 | |||
604 private MIME2Java() { | |||
605 } | |||
606 | |||
607 /** | |||
608 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name. | |||
609 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1, | |||
610 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, | |||
611 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, | |||
612 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R, | |||
613 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK, | |||
614 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT, | |||
615 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1, | |||
616 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU, | |||
617 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>. | |||
618 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var> | |||
619 * is unknown. | |||
620 * @see #reverse | |||
621 */ | |||
622 public static String convert(String mimeCharsetName) { | |||
623 return (String)s_enchash.get(mimeCharsetName.toUpperCase()); | |||
624 } | |||
625 | |||
626 /** | |||
627 * Convert a Java encoding name to MIME charset name. | |||
628 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4", | |||
629 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS", | |||
630 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278", | |||
631 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918". | |||
632 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3, | |||
633 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS, | |||
634 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278, | |||
635 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871 | |||
636 * and CP918</code>. | |||
637 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown. | |||
638 * @see #convert | |||
639 */ | |||
640 public static String reverse(String encoding) { | |||
641 return (String)s_revhash.get(encoding.toUpperCase()); | |||
642 } | |||
643 } | |||
</pre> | |||
[[Category:ASCII]][[Category:Java]] | [[Category:ASCII]][[Category:Java]] |
Latest revision as of 14:17, 30 August 2008
References
See:
- http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html
- http://www.babbletower.net/index.html?/manencodings.html
- http://www.guiffy.com/help/GuiffyHelp/Encodings.html
- http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
Example
Convert encoding names between MIME and Java
1 /* 2 * The Apache Software License, Version 1.1 3 * 4 * 5 * Copyright (c) 1999 The Apache Software Foundation. All rights 6 * reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * 3. The end-user documentation included with the redistribution, 21 * if any, must include the following acknowledgment: 22 * "This product includes software developed by the 23 * Apache Software Foundation (http://www.apache.org/)." 24 * Alternately, this acknowledgment may appear in the software itself, 25 * if and wherever such third-party acknowledgments normally appear. 26 * 27 * 4. The names "Xerces" and "Apache Software Foundation" must 28 * not be used to endorse or promote products derived from this 29 * software without prior written permission. For written 30 * permission, please contact apache@apache.org. 31 * 32 * 5. Products derived from this software may not be called "Apache", 33 * nor may "Apache" appear in their name, without prior written 34 * permission of the Apache Software Foundation. 35 * 36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 * ==================================================================== 49 * 50 * This software consists of voluntary contributions made by many 51 * individuals on behalf of the Apache Software Foundation and was 52 * originally based on software copyright (c) 1999, International 53 * Business Machines, Inc., http://www.apache.org. For more 54 * information on the Apache Software Foundation, please see 55 * <http://www.apache.org/>. 56 */ 57 58 package org.apache.xerces.readers; 59 60 import java.util.*; 61 62 /** 63 * MIME2Java is a convenience class which handles conversions between MIME charset names 64 * and Java encoding names. 65 * <p>The supported XML encodings are the intersection of XML-supported code sets and those 66 * supported in JDK 1.1. 67 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such 68 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>. 69 * <p>Java encoding names are used on <var>encoding</var> parameters to 70 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>. 71 * <P> 72 * <TABLE BORDER="0" WIDTH="100%"> 73 * <TR> 74 * <TD WIDTH="33%"> 75 * <P ALIGN="CENTER"><B>Common Name</B> 76 * </TD> 77 * <TD WIDTH="15%"> 78 * <P ALIGN="CENTER"><B>Use this name in XML files</B> 79 * </TD> 80 * <TD WIDTH="12%"> 81 * <P ALIGN="CENTER"><B>Name Type</B> 82 * </TD> 83 * <TD WIDTH="31%"> 84 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B> 85 * </TD> 86 * </TR> 87 * <TR> 88 * <TD WIDTH="33%">8 bit Unicode</TD> 89 * <TD WIDTH="15%"> 90 * <P ALIGN="CENTER">UTF-8 91 * </TD> 92 * <TD WIDTH="12%"> 93 * <P ALIGN="CENTER">IANA 94 * </TD> 95 * <TD WIDTH="31%"> 96 * <P ALIGN="CENTER">UTF8 97 * </TD> 98 * </TR> 99 * <TR> 100 * <TD WIDTH="33%">ISO Latin 1</TD> 101 * <TD WIDTH="15%"> 102 * <P ALIGN="CENTER">ISO-8859-1 103 * </TD> 104 * <TD WIDTH="12%"> 105 * <P ALIGN="CENTER">MIME 106 * </TD> 107 * <TD WIDTH="31%"> 108 * <P ALIGN="CENTER">ISO-8859-1 109 * </TD> 110 * </TR> 111 * <TR> 112 * <TD WIDTH="33%">ISO Latin 2</TD> 113 * <TD WIDTH="15%"> 114 * <P ALIGN="CENTER">ISO-8859-2 115 * </TD> 116 * <TD WIDTH="12%"> 117 * <P ALIGN="CENTER">MIME 118 * </TD> 119 * <TD WIDTH="31%"> 120 * <P ALIGN="CENTER">ISO-8859-2 121 * </TD> 122 * </TR> 123 * <TR> 124 * <TD WIDTH="33%">ISO Latin 3</TD> 125 * <TD WIDTH="15%"> 126 * <P ALIGN="CENTER">ISO-8859-3 127 * </TD> 128 * <TD WIDTH="12%"> 129 * <P ALIGN="CENTER">MIME 130 * </TD> 131 * <TD WIDTH="31%"> 132 * <P ALIGN="CENTER">ISO-8859-3 133 * </TD> 134 * </TR> 135 * <TR> 136 * <TD WIDTH="33%">ISO Latin 4</TD> 137 * <TD WIDTH="15%"> 138 * <P ALIGN="CENTER">ISO-8859-4 139 * </TD> 140 * <TD WIDTH="12%"> 141 * <P ALIGN="CENTER">MIME 142 * </TD> 143 * <TD WIDTH="31%"> 144 * <P ALIGN="CENTER">ISO-8859-4 145 * </TD> 146 * </TR> 147 * <TR> 148 * <TD WIDTH="33%">ISO Latin Cyrillic</TD> 149 * <TD WIDTH="15%"> 150 * <P ALIGN="CENTER">ISO-8859-5 151 * </TD> 152 * <TD WIDTH="12%"> 153 * <P ALIGN="CENTER">MIME 154 * </TD> 155 * <TD WIDTH="31%"> 156 * <P ALIGN="CENTER">ISO-8859-5 157 * </TD> 158 * </TR> 159 * <TR> 160 * <TD WIDTH="33%">ISO Latin Arabic</TD> 161 * <TD WIDTH="15%"> 162 * <P ALIGN="CENTER">ISO-8859-6 163 * </TD> 164 * <TD WIDTH="12%"> 165 * <P ALIGN="CENTER">MIME 166 * </TD> 167 * <TD WIDTH="31%"> 168 * <P ALIGN="CENTER">ISO-8859-6 169 * </TD> 170 * </TR> 171 * <TR> 172 * <TD WIDTH="33%">ISO Latin Greek</TD> 173 * <TD WIDTH="15%"> 174 * <P ALIGN="CENTER">ISO-8859-7 175 * </TD> 176 * <TD WIDTH="12%"> 177 * <P ALIGN="CENTER">MIME 178 * </TD> 179 * <TD WIDTH="31%"> 180 * <P ALIGN="CENTER">ISO-8859-7 181 * </TD> 182 * </TR> 183 * <TR> 184 * <TD WIDTH="33%">ISO Latin Hebrew</TD> 185 * <TD WIDTH="15%"> 186 * <P ALIGN="CENTER">ISO-8859-8 187 * </TD> 188 * <TD WIDTH="12%"> 189 * <P ALIGN="CENTER">MIME 190 * </TD> 191 * <TD WIDTH="31%"> 192 * <P ALIGN="CENTER">ISO-8859-8 193 * </TD> 194 * </TR> 195 * <TR> 196 * <TD WIDTH="33%">ISO Latin 5</TD> 197 * <TD WIDTH="15%"> 198 * <P ALIGN="CENTER">ISO-8859-9 199 * </TD> 200 * <TD WIDTH="12%"> 201 * <P ALIGN="CENTER">MIME 202 * </TD> 203 * <TD WIDTH="31%"> 204 * <P ALIGN="CENTER">ISO-8859-9 205 * </TD> 206 * </TR> 207 * <TR> 208 * <TD WIDTH="33%">EBCDIC: US</TD> 209 * <TD WIDTH="15%"> 210 * <P ALIGN="CENTER">ebcdic-cp-us 211 * </TD> 212 * <TD WIDTH="12%"> 213 * <P ALIGN="CENTER">IANA 214 * </TD> 215 * <TD WIDTH="31%"> 216 * <P ALIGN="CENTER">cp037 217 * </TD> 218 * </TR> 219 * <TR> 220 * <TD WIDTH="33%">EBCDIC: Canada</TD> 221 * <TD WIDTH="15%"> 222 * <P ALIGN="CENTER">ebcdic-cp-ca 223 * </TD> 224 * <TD WIDTH="12%"> 225 * <P ALIGN="CENTER">IANA 226 * </TD> 227 * <TD WIDTH="31%"> 228 * <P ALIGN="CENTER">cp037 229 * </TD> 230 * </TR> 231 * <TR> 232 * <TD WIDTH="33%">EBCDIC: Netherlands</TD> 233 * <TD WIDTH="15%"> 234 * <P ALIGN="CENTER">ebcdic-cp-nl 235 * </TD> 236 * <TD WIDTH="12%"> 237 * <P ALIGN="CENTER">IANA 238 * </TD> 239 * <TD WIDTH="31%"> 240 * <P ALIGN="CENTER">cp037 241 * </TD> 242 * </TR> 243 * <TR> 244 * <TD WIDTH="33%">EBCDIC: Denmark</TD> 245 * <TD WIDTH="15%"> 246 * <P ALIGN="CENTER">ebcdic-cp-dk 247 * </TD> 248 * <TD WIDTH="12%"> 249 * <P ALIGN="CENTER">IANA 250 * </TD> 251 * <TD WIDTH="31%"> 252 * <P ALIGN="CENTER">cp277 253 * </TD> 254 * </TR> 255 * <TR> 256 * <TD WIDTH="33%">EBCDIC: Norway</TD> 257 * <TD WIDTH="15%"> 258 * <P ALIGN="CENTER">ebcdic-cp-no 259 * </TD> 260 * <TD WIDTH="12%"> 261 * <P ALIGN="CENTER">IANA 262 * </TD> 263 * <TD WIDTH="31%"> 264 * <P ALIGN="CENTER">cp277 265 * </TD> 266 * </TR> 267 * <TR> 268 * <TD WIDTH="33%">EBCDIC: Finland</TD> 269 * <TD WIDTH="15%"> 270 * <P ALIGN="CENTER">ebcdic-cp-fi 271 * </TD> 272 * <TD WIDTH="12%"> 273 * <P ALIGN="CENTER">IANA 274 * </TD> 275 * <TD WIDTH="31%"> 276 * <P ALIGN="CENTER">cp278 277 * </TD> 278 * </TR> 279 * <TR> 280 * <TD WIDTH="33%">EBCDIC: Sweden</TD> 281 * <TD WIDTH="15%"> 282 * <P ALIGN="CENTER">ebcdic-cp-se 283 * </TD> 284 * <TD WIDTH="12%"> 285 * <P ALIGN="CENTER">IANA 286 * </TD> 287 * <TD WIDTH="31%"> 288 * <P ALIGN="CENTER">cp278 289 * </TD> 290 * </TR> 291 * <TR> 292 * <TD WIDTH="33%">EBCDIC: Italy</TD> 293 * <TD WIDTH="15%"> 294 * <P ALIGN="CENTER">ebcdic-cp-it 295 * </TD> 296 * <TD WIDTH="12%"> 297 * <P ALIGN="CENTER">IANA 298 * </TD> 299 * <TD WIDTH="31%"> 300 * <P ALIGN="CENTER">cp280 301 * </TD> 302 * </TR> 303 * <TR> 304 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD> 305 * <TD WIDTH="15%"> 306 * <P ALIGN="CENTER">ebcdic-cp-es 307 * </TD> 308 * <TD WIDTH="12%"> 309 * <P ALIGN="CENTER">IANA 310 * </TD> 311 * <TD WIDTH="31%"> 312 * <P ALIGN="CENTER">cp284 313 * </TD> 314 * </TR> 315 * <TR> 316 * <TD WIDTH="33%">EBCDIC: Great Britain</TD> 317 * <TD WIDTH="15%"> 318 * <P ALIGN="CENTER">ebcdic-cp-gb 319 * </TD> 320 * <TD WIDTH="12%"> 321 * <P ALIGN="CENTER">IANA 322 * </TD> 323 * <TD WIDTH="31%"> 324 * <P ALIGN="CENTER">cp285 325 * </TD> 326 * </TR> 327 * <TR> 328 * <TD WIDTH="33%">EBCDIC: France</TD> 329 * <TD WIDTH="15%"> 330 * <P ALIGN="CENTER">ebcdic-cp-fr 331 * </TD> 332 * <TD WIDTH="12%"> 333 * <P ALIGN="CENTER">IANA 334 * </TD> 335 * <TD WIDTH="31%"> 336 * <P ALIGN="CENTER">cp297 337 * </TD> 338 * </TR> 339 * <TR> 340 * <TD WIDTH="33%">EBCDIC: Arabic</TD> 341 * <TD WIDTH="15%"> 342 * <P ALIGN="CENTER">ebcdic-cp-ar1 343 * </TD> 344 * <TD WIDTH="12%"> 345 * <P ALIGN="CENTER">IANA 346 * </TD> 347 * <TD WIDTH="31%"> 348 * <P ALIGN="CENTER">cp420 349 * </TD> 350 * </TR> 351 * <TR> 352 * <TD WIDTH="33%">EBCDIC: Hebrew</TD> 353 * <TD WIDTH="15%"> 354 * <P ALIGN="CENTER">ebcdic-cp-he 355 * </TD> 356 * <TD WIDTH="12%"> 357 * <P ALIGN="CENTER">IANA 358 * </TD> 359 * <TD WIDTH="31%"> 360 * <P ALIGN="CENTER">cp424 361 * </TD> 362 * </TR> 363 * <TR> 364 * <TD WIDTH="33%">EBCDIC: Switzerland</TD> 365 * <TD WIDTH="15%"> 366 * <P ALIGN="CENTER">ebcdic-cp-ch 367 * </TD> 368 * <TD WIDTH="12%"> 369 * <P ALIGN="CENTER">IANA 370 * </TD> 371 * <TD WIDTH="31%"> 372 * <P ALIGN="CENTER">cp500 373 * </TD> 374 * </TR> 375 * <TR> 376 * <TD WIDTH="33%">EBCDIC: Roece</TD> 377 * <TD WIDTH="15%"> 378 * <P ALIGN="CENTER">ebcdic-cp-roece 379 * </TD> 380 * <TD WIDTH="12%"> 381 * <P ALIGN="CENTER">IANA 382 * </TD> 383 * <TD WIDTH="31%"> 384 * <P ALIGN="CENTER">cp870 385 * </TD> 386 * </TR> 387 * <TR> 388 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD> 389 * <TD WIDTH="15%"> 390 * <P ALIGN="CENTER">ebcdic-cp-yu 391 * </TD> 392 * <TD WIDTH="12%"> 393 * <P ALIGN="CENTER">IANA 394 * </TD> 395 * <TD WIDTH="31%"> 396 * <P ALIGN="CENTER">cp870 397 * </TD> 398 * </TR> 399 * <TR> 400 * <TD WIDTH="33%">EBCDIC: Iceland</TD> 401 * <TD WIDTH="15%"> 402 * <P ALIGN="CENTER">ebcdic-cp-is 403 * </TD> 404 * <TD WIDTH="12%"> 405 * <P ALIGN="CENTER">IANA 406 * </TD> 407 * <TD WIDTH="31%"> 408 * <P ALIGN="CENTER">cp871 409 * </TD> 410 * </TR> 411 * <TR> 412 * <TD WIDTH="33%">EBCDIC: Urdu</TD> 413 * <TD WIDTH="15%"> 414 * <P ALIGN="CENTER">ebcdic-cp-ar2 415 * </TD> 416 * <TD WIDTH="12%"> 417 * <P ALIGN="CENTER">IANA 418 * </TD> 419 * <TD WIDTH="31%"> 420 * <P ALIGN="CENTER">cp918 421 * </TD> 422 * </TR> 423 * <TR> 424 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD> 425 * <TD WIDTH="15%"> 426 * <P ALIGN="CENTER">gb2312 427 * </TD> 428 * <TD WIDTH="12%"> 429 * <P ALIGN="CENTER">MIME 430 * </TD> 431 * <TD WIDTH="31%"> 432 * <P ALIGN="CENTER">GB2312 433 * </TD> 434 * </TR> 435 * <TR> 436 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD> 437 * <TD WIDTH="15%"> 438 * <P ALIGN="CENTER">euc-jp 439 * </TD> 440 * <TD WIDTH="12%"> 441 * <P ALIGN="CENTER">MIME 442 * </TD> 443 * <TD WIDTH="31%"> 444 * <P ALIGN="CENTER">eucjis 445 * </TD> 446 * </TR> 447 * <TR> 448 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD> 449 * <TD WIDTH="15%"> 450 * <P ALIGN="CENTER">iso-2020-jp 451 * </TD> 452 * <TD WIDTH="12%"> 453 * <P ALIGN="CENTER">MIME 454 * </TD> 455 * <TD WIDTH="31%"> 456 * <P ALIGN="CENTER">JIS 457 * </TD> 458 * </TR> 459 * <TR> 460 * <TD WIDTH="33%">Japanese: Shift JIS</TD> 461 * <TD WIDTH="15%"> 462 * <P ALIGN="CENTER">Shift_JIS 463 * </TD> 464 * <TD WIDTH="12%"> 465 * <P ALIGN="CENTER">MIME 466 * </TD> 467 * <TD WIDTH="31%"> 468 * <P ALIGN="CENTER">SJIS 469 * </TD> 470 * </TR> 471 * <TR> 472 * <TD WIDTH="33%">Chinese: Big5</TD> 473 * <TD WIDTH="15%"> 474 * <P ALIGN="CENTER">Big5 475 * </TD> 476 * <TD WIDTH="12%"> 477 * <P ALIGN="CENTER">MIME 478 * </TD> 479 * <TD WIDTH="31%"> 480 * <P ALIGN="CENTER">Big5 481 * </TD> 482 * </TR> 483 * <TR> 484 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD> 485 * <TD WIDTH="15%"> 486 * <P ALIGN="CENTER">euc-kr 487 * </TD> 488 * <TD WIDTH="12%"> 489 * <P ALIGN="CENTER">MIME 490 * </TD> 491 * <TD WIDTH="31%"> 492 * <P ALIGN="CENTER">iso2022kr 493 * </TD> 494 * </TR> 495 * <TR> 496 * <TD WIDTH="33%">Cyrillic</TD> 497 * <TD WIDTH="15%"> 498 * <P ALIGN="CENTER">koi8-r 499 * </TD> 500 * <TD WIDTH="12%"> 501 * <P ALIGN="CENTER">MIME 502 * </TD> 503 * <TD WIDTH="31%"> 504 * <P ALIGN="CENTER">koi8-r 505 * </TD> 506 * </TR> 507 * </TABLE> 508 * 509 * @version 510 * @author TAMURA Kent <kent@trl.ibm.co.jp> 511 */ 512 public class MIME2Java { 513 514 static private Hashtable s_enchash; 515 static private Hashtable s_revhash; 516 517 static { 518 s_enchash = new Hashtable(); 519 // <preferred MIME name>, <Java encoding name> 520 s_enchash.put("UTF-8", "UTF8"); 521 s_enchash.put("US-ASCII", "8859_1"); // ? 522 s_enchash.put("ISO-8859-1", "8859_1"); 523 s_enchash.put("ISO-8859-2", "8859_2"); 524 s_enchash.put("ISO-8859-3", "8859_3"); 525 s_enchash.put("ISO-8859-4", "8859_4"); 526 s_enchash.put("ISO-8859-5", "8859_5"); 527 s_enchash.put("ISO-8859-6", "8859_6"); 528 s_enchash.put("ISO-8859-7", "8859_7"); 529 s_enchash.put("ISO-8859-8", "8859_8"); 530 s_enchash.put("ISO-8859-9", "8859_9"); 531 s_enchash.put("ISO-2022-JP", "JIS"); 532 s_enchash.put("SHIFT_JIS", "SJIS"); 533 s_enchash.put("EUC-JP", "EUCJIS"); 534 s_enchash.put("GB2312", "GB2312"); 535 s_enchash.put("BIG5", "Big5"); 536 s_enchash.put("EUC-KR", "KSC5601"); 537 s_enchash.put("ISO-2022-KR", "ISO2022KR"); 538 s_enchash.put("KOI8-R", "KOI8_R"); 539 540 s_enchash.put("EBCDIC-CP-US", "CP037"); 541 s_enchash.put("EBCDIC-CP-CA", "CP037"); 542 s_enchash.put("EBCDIC-CP-NL", "CP037"); 543 s_enchash.put("EBCDIC-CP-DK", "CP277"); 544 s_enchash.put("EBCDIC-CP-NO", "CP277"); 545 s_enchash.put("EBCDIC-CP-FI", "CP278"); 546 s_enchash.put("EBCDIC-CP-SE", "CP278"); 547 s_enchash.put("EBCDIC-CP-IT", "CP280"); 548 s_enchash.put("EBCDIC-CP-ES", "CP284"); 549 s_enchash.put("EBCDIC-CP-GB", "CP285"); 550 s_enchash.put("EBCDIC-CP-FR", "CP297"); 551 s_enchash.put("EBCDIC-CP-AR1", "CP420"); 552 s_enchash.put("EBCDIC-CP-HE", "CP424"); 553 s_enchash.put("EBCDIC-CP-CH", "CP500"); 554 s_enchash.put("EBCDIC-CP-ROECE", "CP870"); 555 s_enchash.put("EBCDIC-CP-YU", "CP870"); 556 s_enchash.put("EBCDIC-CP-IS", "CP871"); 557 s_enchash.put("EBCDIC-CP-AR2", "CP918"); 558 559 // j:CNS11643 -> EUC-TW? 560 // ISO-2022-CN? ISO-2022-CN-EXT? 561 562 s_revhash = new Hashtable(); 563 // <Java encoding name>, <preferred MIME name> 564 s_revhash.put("UTF8", "UTF-8"); 565 //s_revhash.put("8859_1", "US-ASCII"); // ? 566 s_revhash.put("8859_1", "ISO-8859-1"); 567 s_revhash.put("8859_2", "ISO-8859-2"); 568 s_revhash.put("8859_3", "ISO-8859-3"); 569 s_revhash.put("8859_4", "ISO-8859-4"); 570 s_revhash.put("8859_5", "ISO-8859-5"); 571 s_revhash.put("8859_6", "ISO-8859-6"); 572 s_revhash.put("8859_7", "ISO-8859-7"); 573 s_revhash.put("8859_8", "ISO-8859-8"); 574 s_revhash.put("8859_9", "ISO-8859-9"); 575 s_revhash.put("JIS", "ISO-2022-JP"); 576 s_revhash.put("SJIS", "Shift_JIS"); 577 s_revhash.put("EUCJIS", "EUC-JP"); 578 s_revhash.put("GB2312", "GB2312"); 579 s_revhash.put("BIG5", "Big5"); 580 s_revhash.put("KSC5601", "EUC-KR"); 581 s_revhash.put("ISO2022KR", "ISO-2022-KR"); 582 s_revhash.put("KOI8_R", "KOI8-R"); 583 584 s_revhash.put("CP037", "EBCDIC-CP-US"); 585 s_revhash.put("CP037", "EBCDIC-CP-CA"); 586 s_revhash.put("CP037", "EBCDIC-CP-NL"); 587 s_revhash.put("CP277", "EBCDIC-CP-DK"); 588 s_revhash.put("CP277", "EBCDIC-CP-NO"); 589 s_revhash.put("CP278", "EBCDIC-CP-FI"); 590 s_revhash.put("CP278", "EBCDIC-CP-SE"); 591 s_revhash.put("CP280", "EBCDIC-CP-IT"); 592 s_revhash.put("CP284", "EBCDIC-CP-ES"); 593 s_revhash.put("CP285", "EBCDIC-CP-GB"); 594 s_revhash.put("CP297", "EBCDIC-CP-FR"); 595 s_revhash.put("CP420", "EBCDIC-CP-AR1"); 596 s_revhash.put("CP424", "EBCDIC-CP-HE"); 597 s_revhash.put("CP500", "EBCDIC-CP-CH"); 598 s_revhash.put("CP870", "EBCDIC-CP-ROECE"); 599 s_revhash.put("CP870", "EBCDIC-CP-YU"); 600 s_revhash.put("CP871", "EBCDIC-CP-IS"); 601 s_revhash.put("CP918", "EBCDIC-CP-AR2"); 602 } 603 604 private MIME2Java() { 605 } 606 607 /** 608 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name. 609 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1, 610 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, 611 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, 612 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R, 613 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK, 614 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT, 615 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1, 616 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU, 617 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>. 618 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var> 619 * is unknown. 620 * @see #reverse 621 */ 622 public static String convert(String mimeCharsetName) { 623 return (String)s_enchash.get(mimeCharsetName.toUpperCase()); 624 } 625 626 /** 627 * Convert a Java encoding name to MIME charset name. 628 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4", 629 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS", 630 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278", 631 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918". 632 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3, 633 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS, 634 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278, 635 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871 636 * and CP918</code>. 637 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown. 638 * @see #convert 639 */ 640 public static String reverse(String encoding) { 641 return (String)s_revhash.get(encoding.toUpperCase()); 642 } 643 }