Difference between revisions of "ASCII EBCDIC Conversion"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) |
PeterHarding (talk | contribs) |
||
| (5 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
=References= | |||
See: | See: | ||
* http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html | * http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html | ||
* http://www.babbletower.net/index.html?/manencodings.html | |||
* http://www.guiffy.com/help/GuiffyHelp/Encodings.html | |||
* http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html | |||
=Example= | |||
Convert encoding names between MIME and Java | |||
<pre> | |||
1 /* | |||
2 * The Apache Software License, Version 1.1 | |||
3 * | |||
4 * | |||
5 * Copyright (c) 1999 The Apache Software Foundation. All rights | |||
6 * reserved. | |||
7 * | |||
8 * Redistribution and use in source and binary forms, with or without | |||
9 * modification, are permitted provided that the following conditions | |||
10 * are met: | |||
11 * | |||
12 * 1. Redistributions of source code must retain the above copyright | |||
13 * notice, this list of conditions and the following disclaimer. | |||
14 * | |||
15 * 2. Redistributions in binary form must reproduce the above copyright | |||
16 * notice, this list of conditions and the following disclaimer in | |||
17 * the documentation and/or other materials provided with the | |||
18 * distribution. | |||
19 * | |||
20 * 3. The end-user documentation included with the redistribution, | |||
21 * if any, must include the following acknowledgment: | |||
22 * "This product includes software developed by the | |||
23 * Apache Software Foundation (http://www.apache.org/)." | |||
24 * Alternately, this acknowledgment may appear in the software itself, | |||
25 * if and wherever such third-party acknowledgments normally appear. | |||
26 * | |||
27 * 4. The names "Xerces" and "Apache Software Foundation" must | |||
28 * not be used to endorse or promote products derived from this | |||
29 * software without prior written permission. For written | |||
30 * permission, please contact apache@apache.org. | |||
31 * | |||
32 * 5. Products derived from this software may not be called "Apache", | |||
33 * nor may "Apache" appear in their name, without prior written | |||
34 * permission of the Apache Software Foundation. | |||
35 * | |||
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |||
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR | |||
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |||
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |||
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
47 * SUCH DAMAGE. | |||
48 * ==================================================================== | |||
49 * | |||
50 * This software consists of voluntary contributions made by many | |||
51 * individuals on behalf of the Apache Software Foundation and was | |||
52 * originally based on software copyright (c) 1999, International | |||
53 * Business Machines, Inc., http://www.apache.org. For more | |||
54 * information on the Apache Software Foundation, please see | |||
55 * <http://www.apache.org/>. | |||
56 */ | |||
57 | |||
58 package org.apache.xerces.readers; | |||
59 | |||
60 import java.util.*; | |||
61 | |||
62 /** | |||
63 * MIME2Java is a convenience class which handles conversions between MIME charset names | |||
64 * and Java encoding names. | |||
65 * <p>The supported XML encodings are the intersection of XML-supported code sets and those | |||
66 * supported in JDK 1.1. | |||
67 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such | |||
68 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>. | |||
69 * <p>Java encoding names are used on <var>encoding</var> parameters to | |||
70 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>. | |||
71 * <P> | |||
72 * <TABLE BORDER="0" WIDTH="100%"> | |||
73 * <TR> | |||
74 * <TD WIDTH="33%"> | |||
75 * <P ALIGN="CENTER"><B>Common Name</B> | |||
76 * </TD> | |||
77 * <TD WIDTH="15%"> | |||
78 * <P ALIGN="CENTER"><B>Use this name in XML files</B> | |||
79 * </TD> | |||
80 * <TD WIDTH="12%"> | |||
81 * <P ALIGN="CENTER"><B>Name Type</B> | |||
82 * </TD> | |||
83 * <TD WIDTH="31%"> | |||
84 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B> | |||
85 * </TD> | |||
86 * </TR> | |||
87 * <TR> | |||
88 * <TD WIDTH="33%">8 bit Unicode</TD> | |||
89 * <TD WIDTH="15%"> | |||
90 * <P ALIGN="CENTER">UTF-8 | |||
91 * </TD> | |||
92 * <TD WIDTH="12%"> | |||
93 * <P ALIGN="CENTER">IANA | |||
94 * </TD> | |||
95 * <TD WIDTH="31%"> | |||
96 * <P ALIGN="CENTER">UTF8 | |||
97 * </TD> | |||
98 * </TR> | |||
99 * <TR> | |||
100 * <TD WIDTH="33%">ISO Latin 1</TD> | |||
101 * <TD WIDTH="15%"> | |||
102 * <P ALIGN="CENTER">ISO-8859-1 | |||
103 * </TD> | |||
104 * <TD WIDTH="12%"> | |||
105 * <P ALIGN="CENTER">MIME | |||
106 * </TD> | |||
107 * <TD WIDTH="31%"> | |||
108 * <P ALIGN="CENTER">ISO-8859-1 | |||
109 * </TD> | |||
110 * </TR> | |||
111 * <TR> | |||
112 * <TD WIDTH="33%">ISO Latin 2</TD> | |||
113 * <TD WIDTH="15%"> | |||
114 * <P ALIGN="CENTER">ISO-8859-2 | |||
115 * </TD> | |||
116 * <TD WIDTH="12%"> | |||
117 * <P ALIGN="CENTER">MIME | |||
118 * </TD> | |||
119 * <TD WIDTH="31%"> | |||
120 * <P ALIGN="CENTER">ISO-8859-2 | |||
121 * </TD> | |||
122 * </TR> | |||
123 * <TR> | |||
124 * <TD WIDTH="33%">ISO Latin 3</TD> | |||
125 * <TD WIDTH="15%"> | |||
126 * <P ALIGN="CENTER">ISO-8859-3 | |||
127 * </TD> | |||
128 * <TD WIDTH="12%"> | |||
129 * <P ALIGN="CENTER">MIME | |||
130 * </TD> | |||
131 * <TD WIDTH="31%"> | |||
132 * <P ALIGN="CENTER">ISO-8859-3 | |||
133 * </TD> | |||
134 * </TR> | |||
135 * <TR> | |||
136 * <TD WIDTH="33%">ISO Latin 4</TD> | |||
137 * <TD WIDTH="15%"> | |||
138 * <P ALIGN="CENTER">ISO-8859-4 | |||
139 * </TD> | |||
140 * <TD WIDTH="12%"> | |||
141 * <P ALIGN="CENTER">MIME | |||
142 * </TD> | |||
143 * <TD WIDTH="31%"> | |||
144 * <P ALIGN="CENTER">ISO-8859-4 | |||
145 * </TD> | |||
146 * </TR> | |||
147 * <TR> | |||
148 * <TD WIDTH="33%">ISO Latin Cyrillic</TD> | |||
149 * <TD WIDTH="15%"> | |||
150 * <P ALIGN="CENTER">ISO-8859-5 | |||
151 * </TD> | |||
152 * <TD WIDTH="12%"> | |||
153 * <P ALIGN="CENTER">MIME | |||
154 * </TD> | |||
155 * <TD WIDTH="31%"> | |||
156 * <P ALIGN="CENTER">ISO-8859-5 | |||
157 * </TD> | |||
158 * </TR> | |||
159 * <TR> | |||
160 * <TD WIDTH="33%">ISO Latin Arabic</TD> | |||
161 * <TD WIDTH="15%"> | |||
162 * <P ALIGN="CENTER">ISO-8859-6 | |||
163 * </TD> | |||
164 * <TD WIDTH="12%"> | |||
165 * <P ALIGN="CENTER">MIME | |||
166 * </TD> | |||
167 * <TD WIDTH="31%"> | |||
168 * <P ALIGN="CENTER">ISO-8859-6 | |||
169 * </TD> | |||
170 * </TR> | |||
171 * <TR> | |||
172 * <TD WIDTH="33%">ISO Latin Greek</TD> | |||
173 * <TD WIDTH="15%"> | |||
174 * <P ALIGN="CENTER">ISO-8859-7 | |||
175 * </TD> | |||
176 * <TD WIDTH="12%"> | |||
177 * <P ALIGN="CENTER">MIME | |||
178 * </TD> | |||
179 * <TD WIDTH="31%"> | |||
180 * <P ALIGN="CENTER">ISO-8859-7 | |||
181 * </TD> | |||
182 * </TR> | |||
183 * <TR> | |||
184 * <TD WIDTH="33%">ISO Latin Hebrew</TD> | |||
185 * <TD WIDTH="15%"> | |||
186 * <P ALIGN="CENTER">ISO-8859-8 | |||
187 * </TD> | |||
188 * <TD WIDTH="12%"> | |||
189 * <P ALIGN="CENTER">MIME | |||
190 * </TD> | |||
191 * <TD WIDTH="31%"> | |||
192 * <P ALIGN="CENTER">ISO-8859-8 | |||
193 * </TD> | |||
194 * </TR> | |||
195 * <TR> | |||
196 * <TD WIDTH="33%">ISO Latin 5</TD> | |||
197 * <TD WIDTH="15%"> | |||
198 * <P ALIGN="CENTER">ISO-8859-9 | |||
199 * </TD> | |||
200 * <TD WIDTH="12%"> | |||
201 * <P ALIGN="CENTER">MIME | |||
202 * </TD> | |||
203 * <TD WIDTH="31%"> | |||
204 * <P ALIGN="CENTER">ISO-8859-9 | |||
205 * </TD> | |||
206 * </TR> | |||
207 * <TR> | |||
208 * <TD WIDTH="33%">EBCDIC: US</TD> | |||
209 * <TD WIDTH="15%"> | |||
210 * <P ALIGN="CENTER">ebcdic-cp-us | |||
211 * </TD> | |||
212 * <TD WIDTH="12%"> | |||
213 * <P ALIGN="CENTER">IANA | |||
214 * </TD> | |||
215 * <TD WIDTH="31%"> | |||
216 * <P ALIGN="CENTER">cp037 | |||
217 * </TD> | |||
218 * </TR> | |||
219 * <TR> | |||
220 * <TD WIDTH="33%">EBCDIC: Canada</TD> | |||
221 * <TD WIDTH="15%"> | |||
222 * <P ALIGN="CENTER">ebcdic-cp-ca | |||
223 * </TD> | |||
224 * <TD WIDTH="12%"> | |||
225 * <P ALIGN="CENTER">IANA | |||
226 * </TD> | |||
227 * <TD WIDTH="31%"> | |||
228 * <P ALIGN="CENTER">cp037 | |||
229 * </TD> | |||
230 * </TR> | |||
231 * <TR> | |||
232 * <TD WIDTH="33%">EBCDIC: Netherlands</TD> | |||
233 * <TD WIDTH="15%"> | |||
234 * <P ALIGN="CENTER">ebcdic-cp-nl | |||
235 * </TD> | |||
236 * <TD WIDTH="12%"> | |||
237 * <P ALIGN="CENTER">IANA | |||
238 * </TD> | |||
239 * <TD WIDTH="31%"> | |||
240 * <P ALIGN="CENTER">cp037 | |||
241 * </TD> | |||
242 * </TR> | |||
243 * <TR> | |||
244 * <TD WIDTH="33%">EBCDIC: Denmark</TD> | |||
245 * <TD WIDTH="15%"> | |||
246 * <P ALIGN="CENTER">ebcdic-cp-dk | |||
247 * </TD> | |||
248 * <TD WIDTH="12%"> | |||
249 * <P ALIGN="CENTER">IANA | |||
250 * </TD> | |||
251 * <TD WIDTH="31%"> | |||
252 * <P ALIGN="CENTER">cp277 | |||
253 * </TD> | |||
254 * </TR> | |||
255 * <TR> | |||
256 * <TD WIDTH="33%">EBCDIC: Norway</TD> | |||
257 * <TD WIDTH="15%"> | |||
258 * <P ALIGN="CENTER">ebcdic-cp-no | |||
259 * </TD> | |||
260 * <TD WIDTH="12%"> | |||
261 * <P ALIGN="CENTER">IANA | |||
262 * </TD> | |||
263 * <TD WIDTH="31%"> | |||
264 * <P ALIGN="CENTER">cp277 | |||
265 * </TD> | |||
266 * </TR> | |||
267 * <TR> | |||
268 * <TD WIDTH="33%">EBCDIC: Finland</TD> | |||
269 * <TD WIDTH="15%"> | |||
270 * <P ALIGN="CENTER">ebcdic-cp-fi | |||
271 * </TD> | |||
272 * <TD WIDTH="12%"> | |||
273 * <P ALIGN="CENTER">IANA | |||
274 * </TD> | |||
275 * <TD WIDTH="31%"> | |||
276 * <P ALIGN="CENTER">cp278 | |||
277 * </TD> | |||
278 * </TR> | |||
279 * <TR> | |||
280 * <TD WIDTH="33%">EBCDIC: Sweden</TD> | |||
281 * <TD WIDTH="15%"> | |||
282 * <P ALIGN="CENTER">ebcdic-cp-se | |||
283 * </TD> | |||
284 * <TD WIDTH="12%"> | |||
285 * <P ALIGN="CENTER">IANA | |||
286 * </TD> | |||
287 * <TD WIDTH="31%"> | |||
288 * <P ALIGN="CENTER">cp278 | |||
289 * </TD> | |||
290 * </TR> | |||
291 * <TR> | |||
292 * <TD WIDTH="33%">EBCDIC: Italy</TD> | |||
293 * <TD WIDTH="15%"> | |||
294 * <P ALIGN="CENTER">ebcdic-cp-it | |||
295 * </TD> | |||
296 * <TD WIDTH="12%"> | |||
297 * <P ALIGN="CENTER">IANA | |||
298 * </TD> | |||
299 * <TD WIDTH="31%"> | |||
300 * <P ALIGN="CENTER">cp280 | |||
301 * </TD> | |||
302 * </TR> | |||
303 * <TR> | |||
304 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD> | |||
305 * <TD WIDTH="15%"> | |||
306 * <P ALIGN="CENTER">ebcdic-cp-es | |||
307 * </TD> | |||
308 * <TD WIDTH="12%"> | |||
309 * <P ALIGN="CENTER">IANA | |||
310 * </TD> | |||
311 * <TD WIDTH="31%"> | |||
312 * <P ALIGN="CENTER">cp284 | |||
313 * </TD> | |||
314 * </TR> | |||
315 * <TR> | |||
316 * <TD WIDTH="33%">EBCDIC: Great Britain</TD> | |||
317 * <TD WIDTH="15%"> | |||
318 * <P ALIGN="CENTER">ebcdic-cp-gb | |||
319 * </TD> | |||
320 * <TD WIDTH="12%"> | |||
321 * <P ALIGN="CENTER">IANA | |||
322 * </TD> | |||
323 * <TD WIDTH="31%"> | |||
324 * <P ALIGN="CENTER">cp285 | |||
325 * </TD> | |||
326 * </TR> | |||
327 * <TR> | |||
328 * <TD WIDTH="33%">EBCDIC: France</TD> | |||
329 * <TD WIDTH="15%"> | |||
330 * <P ALIGN="CENTER">ebcdic-cp-fr | |||
331 * </TD> | |||
332 * <TD WIDTH="12%"> | |||
333 * <P ALIGN="CENTER">IANA | |||
334 * </TD> | |||
335 * <TD WIDTH="31%"> | |||
336 * <P ALIGN="CENTER">cp297 | |||
337 * </TD> | |||
338 * </TR> | |||
339 * <TR> | |||
340 * <TD WIDTH="33%">EBCDIC: Arabic</TD> | |||
341 * <TD WIDTH="15%"> | |||
342 * <P ALIGN="CENTER">ebcdic-cp-ar1 | |||
343 * </TD> | |||
344 * <TD WIDTH="12%"> | |||
345 * <P ALIGN="CENTER">IANA | |||
346 * </TD> | |||
347 * <TD WIDTH="31%"> | |||
348 * <P ALIGN="CENTER">cp420 | |||
349 * </TD> | |||
350 * </TR> | |||
351 * <TR> | |||
352 * <TD WIDTH="33%">EBCDIC: Hebrew</TD> | |||
353 * <TD WIDTH="15%"> | |||
354 * <P ALIGN="CENTER">ebcdic-cp-he | |||
355 * </TD> | |||
356 * <TD WIDTH="12%"> | |||
357 * <P ALIGN="CENTER">IANA | |||
358 * </TD> | |||
359 * <TD WIDTH="31%"> | |||
360 * <P ALIGN="CENTER">cp424 | |||
361 * </TD> | |||
362 * </TR> | |||
363 * <TR> | |||
364 * <TD WIDTH="33%">EBCDIC: Switzerland</TD> | |||
365 * <TD WIDTH="15%"> | |||
366 * <P ALIGN="CENTER">ebcdic-cp-ch | |||
367 * </TD> | |||
368 * <TD WIDTH="12%"> | |||
369 * <P ALIGN="CENTER">IANA | |||
370 * </TD> | |||
371 * <TD WIDTH="31%"> | |||
372 * <P ALIGN="CENTER">cp500 | |||
373 * </TD> | |||
374 * </TR> | |||
375 * <TR> | |||
376 * <TD WIDTH="33%">EBCDIC: Roece</TD> | |||
377 * <TD WIDTH="15%"> | |||
378 * <P ALIGN="CENTER">ebcdic-cp-roece | |||
379 * </TD> | |||
380 * <TD WIDTH="12%"> | |||
381 * <P ALIGN="CENTER">IANA | |||
382 * </TD> | |||
383 * <TD WIDTH="31%"> | |||
384 * <P ALIGN="CENTER">cp870 | |||
385 * </TD> | |||
386 * </TR> | |||
387 * <TR> | |||
388 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD> | |||
389 * <TD WIDTH="15%"> | |||
390 * <P ALIGN="CENTER">ebcdic-cp-yu | |||
391 * </TD> | |||
392 * <TD WIDTH="12%"> | |||
393 * <P ALIGN="CENTER">IANA | |||
394 * </TD> | |||
395 * <TD WIDTH="31%"> | |||
396 * <P ALIGN="CENTER">cp870 | |||
397 * </TD> | |||
398 * </TR> | |||
399 * <TR> | |||
400 * <TD WIDTH="33%">EBCDIC: Iceland</TD> | |||
401 * <TD WIDTH="15%"> | |||
402 * <P ALIGN="CENTER">ebcdic-cp-is | |||
403 * </TD> | |||
404 * <TD WIDTH="12%"> | |||
405 * <P ALIGN="CENTER">IANA | |||
406 * </TD> | |||
407 * <TD WIDTH="31%"> | |||
408 * <P ALIGN="CENTER">cp871 | |||
409 * </TD> | |||
410 * </TR> | |||
411 * <TR> | |||
412 * <TD WIDTH="33%">EBCDIC: Urdu</TD> | |||
413 * <TD WIDTH="15%"> | |||
414 * <P ALIGN="CENTER">ebcdic-cp-ar2 | |||
415 * </TD> | |||
416 * <TD WIDTH="12%"> | |||
417 * <P ALIGN="CENTER">IANA | |||
418 * </TD> | |||
419 * <TD WIDTH="31%"> | |||
420 * <P ALIGN="CENTER">cp918 | |||
421 * </TD> | |||
422 * </TR> | |||
423 * <TR> | |||
424 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD> | |||
425 * <TD WIDTH="15%"> | |||
426 * <P ALIGN="CENTER">gb2312 | |||
427 * </TD> | |||
428 * <TD WIDTH="12%"> | |||
429 * <P ALIGN="CENTER">MIME | |||
430 * </TD> | |||
431 * <TD WIDTH="31%"> | |||
432 * <P ALIGN="CENTER">GB2312 | |||
433 * </TD> | |||
434 * </TR> | |||
435 * <TR> | |||
436 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD> | |||
437 * <TD WIDTH="15%"> | |||
438 * <P ALIGN="CENTER">euc-jp | |||
439 * </TD> | |||
440 * <TD WIDTH="12%"> | |||
441 * <P ALIGN="CENTER">MIME | |||
442 * </TD> | |||
443 * <TD WIDTH="31%"> | |||
444 * <P ALIGN="CENTER">eucjis | |||
445 * </TD> | |||
446 * </TR> | |||
447 * <TR> | |||
448 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD> | |||
449 * <TD WIDTH="15%"> | |||
450 * <P ALIGN="CENTER">iso-2020-jp | |||
451 * </TD> | |||
452 * <TD WIDTH="12%"> | |||
453 * <P ALIGN="CENTER">MIME | |||
454 * </TD> | |||
455 * <TD WIDTH="31%"> | |||
456 * <P ALIGN="CENTER">JIS | |||
457 * </TD> | |||
458 * </TR> | |||
459 * <TR> | |||
460 * <TD WIDTH="33%">Japanese: Shift JIS</TD> | |||
461 * <TD WIDTH="15%"> | |||
462 * <P ALIGN="CENTER">Shift_JIS | |||
463 * </TD> | |||
464 * <TD WIDTH="12%"> | |||
465 * <P ALIGN="CENTER">MIME | |||
466 * </TD> | |||
467 * <TD WIDTH="31%"> | |||
468 * <P ALIGN="CENTER">SJIS | |||
469 * </TD> | |||
470 * </TR> | |||
471 * <TR> | |||
472 * <TD WIDTH="33%">Chinese: Big5</TD> | |||
473 * <TD WIDTH="15%"> | |||
474 * <P ALIGN="CENTER">Big5 | |||
475 * </TD> | |||
476 * <TD WIDTH="12%"> | |||
477 * <P ALIGN="CENTER">MIME | |||
478 * </TD> | |||
479 * <TD WIDTH="31%"> | |||
480 * <P ALIGN="CENTER">Big5 | |||
481 * </TD> | |||
482 * </TR> | |||
483 * <TR> | |||
484 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD> | |||
485 * <TD WIDTH="15%"> | |||
486 * <P ALIGN="CENTER">euc-kr | |||
487 * </TD> | |||
488 * <TD WIDTH="12%"> | |||
489 * <P ALIGN="CENTER">MIME | |||
490 * </TD> | |||
491 * <TD WIDTH="31%"> | |||
492 * <P ALIGN="CENTER">iso2022kr | |||
493 * </TD> | |||
494 * </TR> | |||
495 * <TR> | |||
496 * <TD WIDTH="33%">Cyrillic</TD> | |||
497 * <TD WIDTH="15%"> | |||
498 * <P ALIGN="CENTER">koi8-r | |||
499 * </TD> | |||
500 * <TD WIDTH="12%"> | |||
501 * <P ALIGN="CENTER">MIME | |||
502 * </TD> | |||
503 * <TD WIDTH="31%"> | |||
504 * <P ALIGN="CENTER">koi8-r | |||
505 * </TD> | |||
506 * </TR> | |||
507 * </TABLE> | |||
508 * | |||
509 * @version | |||
510 * @author TAMURA Kent <kent@trl.ibm.co.jp> | |||
511 */ | |||
512 public class MIME2Java { | |||
513 | |||
514 static private Hashtable s_enchash; | |||
515 static private Hashtable s_revhash; | |||
516 | |||
517 static { | |||
518 s_enchash = new Hashtable(); | |||
519 // <preferred MIME name>, <Java encoding name> | |||
520 s_enchash.put("UTF-8", "UTF8"); | |||
521 s_enchash.put("US-ASCII", "8859_1"); // ? | |||
522 s_enchash.put("ISO-8859-1", "8859_1"); | |||
523 s_enchash.put("ISO-8859-2", "8859_2"); | |||
524 s_enchash.put("ISO-8859-3", "8859_3"); | |||
525 s_enchash.put("ISO-8859-4", "8859_4"); | |||
526 s_enchash.put("ISO-8859-5", "8859_5"); | |||
527 s_enchash.put("ISO-8859-6", "8859_6"); | |||
528 s_enchash.put("ISO-8859-7", "8859_7"); | |||
529 s_enchash.put("ISO-8859-8", "8859_8"); | |||
530 s_enchash.put("ISO-8859-9", "8859_9"); | |||
531 s_enchash.put("ISO-2022-JP", "JIS"); | |||
532 s_enchash.put("SHIFT_JIS", "SJIS"); | |||
533 s_enchash.put("EUC-JP", "EUCJIS"); | |||
534 s_enchash.put("GB2312", "GB2312"); | |||
535 s_enchash.put("BIG5", "Big5"); | |||
536 s_enchash.put("EUC-KR", "KSC5601"); | |||
537 s_enchash.put("ISO-2022-KR", "ISO2022KR"); | |||
538 s_enchash.put("KOI8-R", "KOI8_R"); | |||
539 | |||
540 s_enchash.put("EBCDIC-CP-US", "CP037"); | |||
541 s_enchash.put("EBCDIC-CP-CA", "CP037"); | |||
542 s_enchash.put("EBCDIC-CP-NL", "CP037"); | |||
543 s_enchash.put("EBCDIC-CP-DK", "CP277"); | |||
544 s_enchash.put("EBCDIC-CP-NO", "CP277"); | |||
545 s_enchash.put("EBCDIC-CP-FI", "CP278"); | |||
546 s_enchash.put("EBCDIC-CP-SE", "CP278"); | |||
547 s_enchash.put("EBCDIC-CP-IT", "CP280"); | |||
548 s_enchash.put("EBCDIC-CP-ES", "CP284"); | |||
549 s_enchash.put("EBCDIC-CP-GB", "CP285"); | |||
550 s_enchash.put("EBCDIC-CP-FR", "CP297"); | |||
551 s_enchash.put("EBCDIC-CP-AR1", "CP420"); | |||
552 s_enchash.put("EBCDIC-CP-HE", "CP424"); | |||
553 s_enchash.put("EBCDIC-CP-CH", "CP500"); | |||
554 s_enchash.put("EBCDIC-CP-ROECE", "CP870"); | |||
555 s_enchash.put("EBCDIC-CP-YU", "CP870"); | |||
556 s_enchash.put("EBCDIC-CP-IS", "CP871"); | |||
557 s_enchash.put("EBCDIC-CP-AR2", "CP918"); | |||
558 | |||
559 // j:CNS11643 -> EUC-TW? | |||
560 // ISO-2022-CN? ISO-2022-CN-EXT? | |||
561 | |||
562 s_revhash = new Hashtable(); | |||
563 // <Java encoding name>, <preferred MIME name> | |||
564 s_revhash.put("UTF8", "UTF-8"); | |||
565 //s_revhash.put("8859_1", "US-ASCII"); // ? | |||
566 s_revhash.put("8859_1", "ISO-8859-1"); | |||
567 s_revhash.put("8859_2", "ISO-8859-2"); | |||
568 s_revhash.put("8859_3", "ISO-8859-3"); | |||
569 s_revhash.put("8859_4", "ISO-8859-4"); | |||
570 s_revhash.put("8859_5", "ISO-8859-5"); | |||
571 s_revhash.put("8859_6", "ISO-8859-6"); | |||
572 s_revhash.put("8859_7", "ISO-8859-7"); | |||
573 s_revhash.put("8859_8", "ISO-8859-8"); | |||
574 s_revhash.put("8859_9", "ISO-8859-9"); | |||
575 s_revhash.put("JIS", "ISO-2022-JP"); | |||
576 s_revhash.put("SJIS", "Shift_JIS"); | |||
577 s_revhash.put("EUCJIS", "EUC-JP"); | |||
578 s_revhash.put("GB2312", "GB2312"); | |||
579 s_revhash.put("BIG5", "Big5"); | |||
580 s_revhash.put("KSC5601", "EUC-KR"); | |||
581 s_revhash.put("ISO2022KR", "ISO-2022-KR"); | |||
582 s_revhash.put("KOI8_R", "KOI8-R"); | |||
583 | |||
584 s_revhash.put("CP037", "EBCDIC-CP-US"); | |||
585 s_revhash.put("CP037", "EBCDIC-CP-CA"); | |||
586 s_revhash.put("CP037", "EBCDIC-CP-NL"); | |||
587 s_revhash.put("CP277", "EBCDIC-CP-DK"); | |||
588 s_revhash.put("CP277", "EBCDIC-CP-NO"); | |||
589 s_revhash.put("CP278", "EBCDIC-CP-FI"); | |||
590 s_revhash.put("CP278", "EBCDIC-CP-SE"); | |||
591 s_revhash.put("CP280", "EBCDIC-CP-IT"); | |||
592 s_revhash.put("CP284", "EBCDIC-CP-ES"); | |||
593 s_revhash.put("CP285", "EBCDIC-CP-GB"); | |||
594 s_revhash.put("CP297", "EBCDIC-CP-FR"); | |||
595 s_revhash.put("CP420", "EBCDIC-CP-AR1"); | |||
596 s_revhash.put("CP424", "EBCDIC-CP-HE"); | |||
597 s_revhash.put("CP500", "EBCDIC-CP-CH"); | |||
598 s_revhash.put("CP870", "EBCDIC-CP-ROECE"); | |||
599 s_revhash.put("CP870", "EBCDIC-CP-YU"); | |||
600 s_revhash.put("CP871", "EBCDIC-CP-IS"); | |||
601 s_revhash.put("CP918", "EBCDIC-CP-AR2"); | |||
602 } | |||
603 | |||
604 private MIME2Java() { | |||
605 } | |||
606 | |||
607 /** | |||
608 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name. | |||
609 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1, | |||
610 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, | |||
611 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, | |||
612 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R, | |||
613 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK, | |||
614 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT, | |||
615 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1, | |||
616 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU, | |||
617 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>. | |||
618 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var> | |||
619 * is unknown. | |||
620 * @see #reverse | |||
621 */ | |||
622 public static String convert(String mimeCharsetName) { | |||
623 return (String)s_enchash.get(mimeCharsetName.toUpperCase()); | |||
624 } | |||
625 | |||
626 /** | |||
627 * Convert a Java encoding name to MIME charset name. | |||
628 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4", | |||
629 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS", | |||
630 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278", | |||
631 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918". | |||
632 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3, | |||
633 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS, | |||
634 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278, | |||
635 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871 | |||
636 * and CP918</code>. | |||
637 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown. | |||
638 * @see #convert | |||
639 */ | |||
640 public static String reverse(String encoding) { | |||
641 return (String)s_revhash.get(encoding.toUpperCase()); | |||
642 } | |||
643 } | |||
</pre> | |||
[[Category:ASCII]][[Category:Java]] | [[Category:ASCII]][[Category:Java]] | ||
Latest revision as of 14:17, 30 August 2008
References
See:
- http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html
- http://www.babbletower.net/index.html?/manencodings.html
- http://www.guiffy.com/help/GuiffyHelp/Encodings.html
- http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
Example
Convert encoding names between MIME and Java
1 /*
2 * The Apache Software License, Version 1.1
3 *
4 *
5 * Copyright (c) 1999 The Apache Software Foundation. All rights
6 * reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Apache Software Foundation (http://www.apache.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Xerces" and "Apache Software Foundation" must
28 * not be used to endorse or promote products derived from this
29 * software without prior written permission. For written
30 * permission, please contact apache@apache.org.
31 *
32 * 5. Products derived from this software may not be called "Apache",
33 * nor may "Apache" appear in their name, without prior written
34 * permission of the Apache Software Foundation.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This software consists of voluntary contributions made by many
51 * individuals on behalf of the Apache Software Foundation and was
52 * originally based on software copyright (c) 1999, International
53 * Business Machines, Inc., http://www.apache.org. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58 package org.apache.xerces.readers;
59
60 import java.util.*;
61
62 /**
63 * MIME2Java is a convenience class which handles conversions between MIME charset names
64 * and Java encoding names.
65 * <p>The supported XML encodings are the intersection of XML-supported code sets and those
66 * supported in JDK 1.1.
67 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
68 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
69 * <p>Java encoding names are used on <var>encoding</var> parameters to
70 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.
71 * <P>
72 * <TABLE BORDER="0" WIDTH="100%">
73 * <TR>
74 * <TD WIDTH="33%">
75 * <P ALIGN="CENTER"><B>Common Name</B>
76 * </TD>
77 * <TD WIDTH="15%">
78 * <P ALIGN="CENTER"><B>Use this name in XML files</B>
79 * </TD>
80 * <TD WIDTH="12%">
81 * <P ALIGN="CENTER"><B>Name Type</B>
82 * </TD>
83 * <TD WIDTH="31%">
84 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
85 * </TD>
86 * </TR>
87 * <TR>
88 * <TD WIDTH="33%">8 bit Unicode</TD>
89 * <TD WIDTH="15%">
90 * <P ALIGN="CENTER">UTF-8
91 * </TD>
92 * <TD WIDTH="12%">
93 * <P ALIGN="CENTER">IANA
94 * </TD>
95 * <TD WIDTH="31%">
96 * <P ALIGN="CENTER">UTF8
97 * </TD>
98 * </TR>
99 * <TR>
100 * <TD WIDTH="33%">ISO Latin 1</TD>
101 * <TD WIDTH="15%">
102 * <P ALIGN="CENTER">ISO-8859-1
103 * </TD>
104 * <TD WIDTH="12%">
105 * <P ALIGN="CENTER">MIME
106 * </TD>
107 * <TD WIDTH="31%">
108 * <P ALIGN="CENTER">ISO-8859-1
109 * </TD>
110 * </TR>
111 * <TR>
112 * <TD WIDTH="33%">ISO Latin 2</TD>
113 * <TD WIDTH="15%">
114 * <P ALIGN="CENTER">ISO-8859-2
115 * </TD>
116 * <TD WIDTH="12%">
117 * <P ALIGN="CENTER">MIME
118 * </TD>
119 * <TD WIDTH="31%">
120 * <P ALIGN="CENTER">ISO-8859-2
121 * </TD>
122 * </TR>
123 * <TR>
124 * <TD WIDTH="33%">ISO Latin 3</TD>
125 * <TD WIDTH="15%">
126 * <P ALIGN="CENTER">ISO-8859-3
127 * </TD>
128 * <TD WIDTH="12%">
129 * <P ALIGN="CENTER">MIME
130 * </TD>
131 * <TD WIDTH="31%">
132 * <P ALIGN="CENTER">ISO-8859-3
133 * </TD>
134 * </TR>
135 * <TR>
136 * <TD WIDTH="33%">ISO Latin 4</TD>
137 * <TD WIDTH="15%">
138 * <P ALIGN="CENTER">ISO-8859-4
139 * </TD>
140 * <TD WIDTH="12%">
141 * <P ALIGN="CENTER">MIME
142 * </TD>
143 * <TD WIDTH="31%">
144 * <P ALIGN="CENTER">ISO-8859-4
145 * </TD>
146 * </TR>
147 * <TR>
148 * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
149 * <TD WIDTH="15%">
150 * <P ALIGN="CENTER">ISO-8859-5
151 * </TD>
152 * <TD WIDTH="12%">
153 * <P ALIGN="CENTER">MIME
154 * </TD>
155 * <TD WIDTH="31%">
156 * <P ALIGN="CENTER">ISO-8859-5
157 * </TD>
158 * </TR>
159 * <TR>
160 * <TD WIDTH="33%">ISO Latin Arabic</TD>
161 * <TD WIDTH="15%">
162 * <P ALIGN="CENTER">ISO-8859-6
163 * </TD>
164 * <TD WIDTH="12%">
165 * <P ALIGN="CENTER">MIME
166 * </TD>
167 * <TD WIDTH="31%">
168 * <P ALIGN="CENTER">ISO-8859-6
169 * </TD>
170 * </TR>
171 * <TR>
172 * <TD WIDTH="33%">ISO Latin Greek</TD>
173 * <TD WIDTH="15%">
174 * <P ALIGN="CENTER">ISO-8859-7
175 * </TD>
176 * <TD WIDTH="12%">
177 * <P ALIGN="CENTER">MIME
178 * </TD>
179 * <TD WIDTH="31%">
180 * <P ALIGN="CENTER">ISO-8859-7
181 * </TD>
182 * </TR>
183 * <TR>
184 * <TD WIDTH="33%">ISO Latin Hebrew</TD>
185 * <TD WIDTH="15%">
186 * <P ALIGN="CENTER">ISO-8859-8
187 * </TD>
188 * <TD WIDTH="12%">
189 * <P ALIGN="CENTER">MIME
190 * </TD>
191 * <TD WIDTH="31%">
192 * <P ALIGN="CENTER">ISO-8859-8
193 * </TD>
194 * </TR>
195 * <TR>
196 * <TD WIDTH="33%">ISO Latin 5</TD>
197 * <TD WIDTH="15%">
198 * <P ALIGN="CENTER">ISO-8859-9
199 * </TD>
200 * <TD WIDTH="12%">
201 * <P ALIGN="CENTER">MIME
202 * </TD>
203 * <TD WIDTH="31%">
204 * <P ALIGN="CENTER">ISO-8859-9
205 * </TD>
206 * </TR>
207 * <TR>
208 * <TD WIDTH="33%">EBCDIC: US</TD>
209 * <TD WIDTH="15%">
210 * <P ALIGN="CENTER">ebcdic-cp-us
211 * </TD>
212 * <TD WIDTH="12%">
213 * <P ALIGN="CENTER">IANA
214 * </TD>
215 * <TD WIDTH="31%">
216 * <P ALIGN="CENTER">cp037
217 * </TD>
218 * </TR>
219 * <TR>
220 * <TD WIDTH="33%">EBCDIC: Canada</TD>
221 * <TD WIDTH="15%">
222 * <P ALIGN="CENTER">ebcdic-cp-ca
223 * </TD>
224 * <TD WIDTH="12%">
225 * <P ALIGN="CENTER">IANA
226 * </TD>
227 * <TD WIDTH="31%">
228 * <P ALIGN="CENTER">cp037
229 * </TD>
230 * </TR>
231 * <TR>
232 * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
233 * <TD WIDTH="15%">
234 * <P ALIGN="CENTER">ebcdic-cp-nl
235 * </TD>
236 * <TD WIDTH="12%">
237 * <P ALIGN="CENTER">IANA
238 * </TD>
239 * <TD WIDTH="31%">
240 * <P ALIGN="CENTER">cp037
241 * </TD>
242 * </TR>
243 * <TR>
244 * <TD WIDTH="33%">EBCDIC: Denmark</TD>
245 * <TD WIDTH="15%">
246 * <P ALIGN="CENTER">ebcdic-cp-dk
247 * </TD>
248 * <TD WIDTH="12%">
249 * <P ALIGN="CENTER">IANA
250 * </TD>
251 * <TD WIDTH="31%">
252 * <P ALIGN="CENTER">cp277
253 * </TD>
254 * </TR>
255 * <TR>
256 * <TD WIDTH="33%">EBCDIC: Norway</TD>
257 * <TD WIDTH="15%">
258 * <P ALIGN="CENTER">ebcdic-cp-no
259 * </TD>
260 * <TD WIDTH="12%">
261 * <P ALIGN="CENTER">IANA
262 * </TD>
263 * <TD WIDTH="31%">
264 * <P ALIGN="CENTER">cp277
265 * </TD>
266 * </TR>
267 * <TR>
268 * <TD WIDTH="33%">EBCDIC: Finland</TD>
269 * <TD WIDTH="15%">
270 * <P ALIGN="CENTER">ebcdic-cp-fi
271 * </TD>
272 * <TD WIDTH="12%">
273 * <P ALIGN="CENTER">IANA
274 * </TD>
275 * <TD WIDTH="31%">
276 * <P ALIGN="CENTER">cp278
277 * </TD>
278 * </TR>
279 * <TR>
280 * <TD WIDTH="33%">EBCDIC: Sweden</TD>
281 * <TD WIDTH="15%">
282 * <P ALIGN="CENTER">ebcdic-cp-se
283 * </TD>
284 * <TD WIDTH="12%">
285 * <P ALIGN="CENTER">IANA
286 * </TD>
287 * <TD WIDTH="31%">
288 * <P ALIGN="CENTER">cp278
289 * </TD>
290 * </TR>
291 * <TR>
292 * <TD WIDTH="33%">EBCDIC: Italy</TD>
293 * <TD WIDTH="15%">
294 * <P ALIGN="CENTER">ebcdic-cp-it
295 * </TD>
296 * <TD WIDTH="12%">
297 * <P ALIGN="CENTER">IANA
298 * </TD>
299 * <TD WIDTH="31%">
300 * <P ALIGN="CENTER">cp280
301 * </TD>
302 * </TR>
303 * <TR>
304 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
305 * <TD WIDTH="15%">
306 * <P ALIGN="CENTER">ebcdic-cp-es
307 * </TD>
308 * <TD WIDTH="12%">
309 * <P ALIGN="CENTER">IANA
310 * </TD>
311 * <TD WIDTH="31%">
312 * <P ALIGN="CENTER">cp284
313 * </TD>
314 * </TR>
315 * <TR>
316 * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
317 * <TD WIDTH="15%">
318 * <P ALIGN="CENTER">ebcdic-cp-gb
319 * </TD>
320 * <TD WIDTH="12%">
321 * <P ALIGN="CENTER">IANA
322 * </TD>
323 * <TD WIDTH="31%">
324 * <P ALIGN="CENTER">cp285
325 * </TD>
326 * </TR>
327 * <TR>
328 * <TD WIDTH="33%">EBCDIC: France</TD>
329 * <TD WIDTH="15%">
330 * <P ALIGN="CENTER">ebcdic-cp-fr
331 * </TD>
332 * <TD WIDTH="12%">
333 * <P ALIGN="CENTER">IANA
334 * </TD>
335 * <TD WIDTH="31%">
336 * <P ALIGN="CENTER">cp297
337 * </TD>
338 * </TR>
339 * <TR>
340 * <TD WIDTH="33%">EBCDIC: Arabic</TD>
341 * <TD WIDTH="15%">
342 * <P ALIGN="CENTER">ebcdic-cp-ar1
343 * </TD>
344 * <TD WIDTH="12%">
345 * <P ALIGN="CENTER">IANA
346 * </TD>
347 * <TD WIDTH="31%">
348 * <P ALIGN="CENTER">cp420
349 * </TD>
350 * </TR>
351 * <TR>
352 * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
353 * <TD WIDTH="15%">
354 * <P ALIGN="CENTER">ebcdic-cp-he
355 * </TD>
356 * <TD WIDTH="12%">
357 * <P ALIGN="CENTER">IANA
358 * </TD>
359 * <TD WIDTH="31%">
360 * <P ALIGN="CENTER">cp424
361 * </TD>
362 * </TR>
363 * <TR>
364 * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
365 * <TD WIDTH="15%">
366 * <P ALIGN="CENTER">ebcdic-cp-ch
367 * </TD>
368 * <TD WIDTH="12%">
369 * <P ALIGN="CENTER">IANA
370 * </TD>
371 * <TD WIDTH="31%">
372 * <P ALIGN="CENTER">cp500
373 * </TD>
374 * </TR>
375 * <TR>
376 * <TD WIDTH="33%">EBCDIC: Roece</TD>
377 * <TD WIDTH="15%">
378 * <P ALIGN="CENTER">ebcdic-cp-roece
379 * </TD>
380 * <TD WIDTH="12%">
381 * <P ALIGN="CENTER">IANA
382 * </TD>
383 * <TD WIDTH="31%">
384 * <P ALIGN="CENTER">cp870
385 * </TD>
386 * </TR>
387 * <TR>
388 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
389 * <TD WIDTH="15%">
390 * <P ALIGN="CENTER">ebcdic-cp-yu
391 * </TD>
392 * <TD WIDTH="12%">
393 * <P ALIGN="CENTER">IANA
394 * </TD>
395 * <TD WIDTH="31%">
396 * <P ALIGN="CENTER">cp870
397 * </TD>
398 * </TR>
399 * <TR>
400 * <TD WIDTH="33%">EBCDIC: Iceland</TD>
401 * <TD WIDTH="15%">
402 * <P ALIGN="CENTER">ebcdic-cp-is
403 * </TD>
404 * <TD WIDTH="12%">
405 * <P ALIGN="CENTER">IANA
406 * </TD>
407 * <TD WIDTH="31%">
408 * <P ALIGN="CENTER">cp871
409 * </TD>
410 * </TR>
411 * <TR>
412 * <TD WIDTH="33%">EBCDIC: Urdu</TD>
413 * <TD WIDTH="15%">
414 * <P ALIGN="CENTER">ebcdic-cp-ar2
415 * </TD>
416 * <TD WIDTH="12%">
417 * <P ALIGN="CENTER">IANA
418 * </TD>
419 * <TD WIDTH="31%">
420 * <P ALIGN="CENTER">cp918
421 * </TD>
422 * </TR>
423 * <TR>
424 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
425 * <TD WIDTH="15%">
426 * <P ALIGN="CENTER">gb2312
427 * </TD>
428 * <TD WIDTH="12%">
429 * <P ALIGN="CENTER">MIME
430 * </TD>
431 * <TD WIDTH="31%">
432 * <P ALIGN="CENTER">GB2312
433 * </TD>
434 * </TR>
435 * <TR>
436 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
437 * <TD WIDTH="15%">
438 * <P ALIGN="CENTER">euc-jp
439 * </TD>
440 * <TD WIDTH="12%">
441 * <P ALIGN="CENTER">MIME
442 * </TD>
443 * <TD WIDTH="31%">
444 * <P ALIGN="CENTER">eucjis
445 * </TD>
446 * </TR>
447 * <TR>
448 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
449 * <TD WIDTH="15%">
450 * <P ALIGN="CENTER">iso-2020-jp
451 * </TD>
452 * <TD WIDTH="12%">
453 * <P ALIGN="CENTER">MIME
454 * </TD>
455 * <TD WIDTH="31%">
456 * <P ALIGN="CENTER">JIS
457 * </TD>
458 * </TR>
459 * <TR>
460 * <TD WIDTH="33%">Japanese: Shift JIS</TD>
461 * <TD WIDTH="15%">
462 * <P ALIGN="CENTER">Shift_JIS
463 * </TD>
464 * <TD WIDTH="12%">
465 * <P ALIGN="CENTER">MIME
466 * </TD>
467 * <TD WIDTH="31%">
468 * <P ALIGN="CENTER">SJIS
469 * </TD>
470 * </TR>
471 * <TR>
472 * <TD WIDTH="33%">Chinese: Big5</TD>
473 * <TD WIDTH="15%">
474 * <P ALIGN="CENTER">Big5
475 * </TD>
476 * <TD WIDTH="12%">
477 * <P ALIGN="CENTER">MIME
478 * </TD>
479 * <TD WIDTH="31%">
480 * <P ALIGN="CENTER">Big5
481 * </TD>
482 * </TR>
483 * <TR>
484 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
485 * <TD WIDTH="15%">
486 * <P ALIGN="CENTER">euc-kr
487 * </TD>
488 * <TD WIDTH="12%">
489 * <P ALIGN="CENTER">MIME
490 * </TD>
491 * <TD WIDTH="31%">
492 * <P ALIGN="CENTER">iso2022kr
493 * </TD>
494 * </TR>
495 * <TR>
496 * <TD WIDTH="33%">Cyrillic</TD>
497 * <TD WIDTH="15%">
498 * <P ALIGN="CENTER">koi8-r
499 * </TD>
500 * <TD WIDTH="12%">
501 * <P ALIGN="CENTER">MIME
502 * </TD>
503 * <TD WIDTH="31%">
504 * <P ALIGN="CENTER">koi8-r
505 * </TD>
506 * </TR>
507 * </TABLE>
508 *
509 * @version
510 * @author TAMURA Kent <kent@trl.ibm.co.jp>
511 */
512 public class MIME2Java {
513
514 static private Hashtable s_enchash;
515 static private Hashtable s_revhash;
516
517 static {
518 s_enchash = new Hashtable();
519 // <preferred MIME name>, <Java encoding name>
520 s_enchash.put("UTF-8", "UTF8");
521 s_enchash.put("US-ASCII", "8859_1"); // ?
522 s_enchash.put("ISO-8859-1", "8859_1");
523 s_enchash.put("ISO-8859-2", "8859_2");
524 s_enchash.put("ISO-8859-3", "8859_3");
525 s_enchash.put("ISO-8859-4", "8859_4");
526 s_enchash.put("ISO-8859-5", "8859_5");
527 s_enchash.put("ISO-8859-6", "8859_6");
528 s_enchash.put("ISO-8859-7", "8859_7");
529 s_enchash.put("ISO-8859-8", "8859_8");
530 s_enchash.put("ISO-8859-9", "8859_9");
531 s_enchash.put("ISO-2022-JP", "JIS");
532 s_enchash.put("SHIFT_JIS", "SJIS");
533 s_enchash.put("EUC-JP", "EUCJIS");
534 s_enchash.put("GB2312", "GB2312");
535 s_enchash.put("BIG5", "Big5");
536 s_enchash.put("EUC-KR", "KSC5601");
537 s_enchash.put("ISO-2022-KR", "ISO2022KR");
538 s_enchash.put("KOI8-R", "KOI8_R");
539
540 s_enchash.put("EBCDIC-CP-US", "CP037");
541 s_enchash.put("EBCDIC-CP-CA", "CP037");
542 s_enchash.put("EBCDIC-CP-NL", "CP037");
543 s_enchash.put("EBCDIC-CP-DK", "CP277");
544 s_enchash.put("EBCDIC-CP-NO", "CP277");
545 s_enchash.put("EBCDIC-CP-FI", "CP278");
546 s_enchash.put("EBCDIC-CP-SE", "CP278");
547 s_enchash.put("EBCDIC-CP-IT", "CP280");
548 s_enchash.put("EBCDIC-CP-ES", "CP284");
549 s_enchash.put("EBCDIC-CP-GB", "CP285");
550 s_enchash.put("EBCDIC-CP-FR", "CP297");
551 s_enchash.put("EBCDIC-CP-AR1", "CP420");
552 s_enchash.put("EBCDIC-CP-HE", "CP424");
553 s_enchash.put("EBCDIC-CP-CH", "CP500");
554 s_enchash.put("EBCDIC-CP-ROECE", "CP870");
555 s_enchash.put("EBCDIC-CP-YU", "CP870");
556 s_enchash.put("EBCDIC-CP-IS", "CP871");
557 s_enchash.put("EBCDIC-CP-AR2", "CP918");
558
559 // j:CNS11643 -> EUC-TW?
560 // ISO-2022-CN? ISO-2022-CN-EXT?
561
562 s_revhash = new Hashtable();
563 // <Java encoding name>, <preferred MIME name>
564 s_revhash.put("UTF8", "UTF-8");
565 //s_revhash.put("8859_1", "US-ASCII"); // ?
566 s_revhash.put("8859_1", "ISO-8859-1");
567 s_revhash.put("8859_2", "ISO-8859-2");
568 s_revhash.put("8859_3", "ISO-8859-3");
569 s_revhash.put("8859_4", "ISO-8859-4");
570 s_revhash.put("8859_5", "ISO-8859-5");
571 s_revhash.put("8859_6", "ISO-8859-6");
572 s_revhash.put("8859_7", "ISO-8859-7");
573 s_revhash.put("8859_8", "ISO-8859-8");
574 s_revhash.put("8859_9", "ISO-8859-9");
575 s_revhash.put("JIS", "ISO-2022-JP");
576 s_revhash.put("SJIS", "Shift_JIS");
577 s_revhash.put("EUCJIS", "EUC-JP");
578 s_revhash.put("GB2312", "GB2312");
579 s_revhash.put("BIG5", "Big5");
580 s_revhash.put("KSC5601", "EUC-KR");
581 s_revhash.put("ISO2022KR", "ISO-2022-KR");
582 s_revhash.put("KOI8_R", "KOI8-R");
583
584 s_revhash.put("CP037", "EBCDIC-CP-US");
585 s_revhash.put("CP037", "EBCDIC-CP-CA");
586 s_revhash.put("CP037", "EBCDIC-CP-NL");
587 s_revhash.put("CP277", "EBCDIC-CP-DK");
588 s_revhash.put("CP277", "EBCDIC-CP-NO");
589 s_revhash.put("CP278", "EBCDIC-CP-FI");
590 s_revhash.put("CP278", "EBCDIC-CP-SE");
591 s_revhash.put("CP280", "EBCDIC-CP-IT");
592 s_revhash.put("CP284", "EBCDIC-CP-ES");
593 s_revhash.put("CP285", "EBCDIC-CP-GB");
594 s_revhash.put("CP297", "EBCDIC-CP-FR");
595 s_revhash.put("CP420", "EBCDIC-CP-AR1");
596 s_revhash.put("CP424", "EBCDIC-CP-HE");
597 s_revhash.put("CP500", "EBCDIC-CP-CH");
598 s_revhash.put("CP870", "EBCDIC-CP-ROECE");
599 s_revhash.put("CP870", "EBCDIC-CP-YU");
600 s_revhash.put("CP871", "EBCDIC-CP-IS");
601 s_revhash.put("CP918", "EBCDIC-CP-AR2");
602 }
603
604 private MIME2Java() {
605 }
606
607 /**
608 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
609 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
610 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
611 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS,
612 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
613 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
614 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
615 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
616 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
617 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
618 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
619 * is unknown.
620 * @see #reverse
621 */
622 public static String convert(String mimeCharsetName) {
623 return (String)s_enchash.get(mimeCharsetName.toUpperCase());
624 }
625
626 /**
627 * Convert a Java encoding name to MIME charset name.
628 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
629 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS",
630 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
631 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
632 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
633 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS,
634 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
635 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
636 * and CP918</code>.
637 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
638 * @see #convert
639 */
640 public static String reverse(String encoding) {
641 return (String)s_revhash.get(encoding.toUpperCase());
642 }
643 }