ASCII EBCDIC Conversion
Jump to navigation
Jump to search
References
See:
- http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html
- http://www.babbletower.net/index.html?/manencodings.html
- http://www.guiffy.com/help/GuiffyHelp/Encodings.html
- http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
Example
Convert encoding names between MIME and Java
1 /*
2 * The Apache Software License, Version 1.1
3 *
4 *
5 * Copyright (c) 1999 The Apache Software Foundation. All rights
6 * reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Apache Software Foundation (http://www.apache.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Xerces" and "Apache Software Foundation" must
28 * not be used to endorse or promote products derived from this
29 * software without prior written permission. For written
30 * permission, please contact apache@apache.org.
31 *
32 * 5. Products derived from this software may not be called "Apache",
33 * nor may "Apache" appear in their name, without prior written
34 * permission of the Apache Software Foundation.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This software consists of voluntary contributions made by many
51 * individuals on behalf of the Apache Software Foundation and was
52 * originally based on software copyright (c) 1999, International
53 * Business Machines, Inc., http://www.apache.org. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58 package org.apache.xerces.readers;
59
60 import java.util.*;
61
62 /**
63 * MIME2Java is a convenience class which handles conversions between MIME charset names
64 * and Java encoding names.
65 * <p>The supported XML encodings are the intersection of XML-supported code sets and those
66 * supported in JDK 1.1.
67 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
68 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
69 * <p>Java encoding names are used on <var>encoding</var> parameters to
70 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.
71 * <P>
72 * <TABLE BORDER="0" WIDTH="100%">
73 * <TR>
74 * <TD WIDTH="33%">
75 * <P ALIGN="CENTER"><B>Common Name</B>
76 * </TD>
77 * <TD WIDTH="15%">
78 * <P ALIGN="CENTER"><B>Use this name in XML files</B>
79 * </TD>
80 * <TD WIDTH="12%">
81 * <P ALIGN="CENTER"><B>Name Type</B>
82 * </TD>
83 * <TD WIDTH="31%">
84 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
85 * </TD>
86 * </TR>
87 * <TR>
88 * <TD WIDTH="33%">8 bit Unicode</TD>
89 * <TD WIDTH="15%">
90 * <P ALIGN="CENTER">UTF-8
91 * </TD>
92 * <TD WIDTH="12%">
93 * <P ALIGN="CENTER">IANA
94 * </TD>
95 * <TD WIDTH="31%">
96 * <P ALIGN="CENTER">UTF8
97 * </TD>
98 * </TR>
99 * <TR>
100 * <TD WIDTH="33%">ISO Latin 1</TD>
101 * <TD WIDTH="15%">
102 * <P ALIGN="CENTER">ISO-8859-1
103 * </TD>
104 * <TD WIDTH="12%">
105 * <P ALIGN="CENTER">MIME
106 * </TD>
107 * <TD WIDTH="31%">
108 * <P ALIGN="CENTER">ISO-8859-1
109 * </TD>
110 * </TR>
111 * <TR>
112 * <TD WIDTH="33%">ISO Latin 2</TD>
113 * <TD WIDTH="15%">
114 * <P ALIGN="CENTER">ISO-8859-2
115 * </TD>
116 * <TD WIDTH="12%">
117 * <P ALIGN="CENTER">MIME
118 * </TD>
119 * <TD WIDTH="31%">
120 * <P ALIGN="CENTER">ISO-8859-2
121 * </TD>
122 * </TR>
123 * <TR>
124 * <TD WIDTH="33%">ISO Latin 3</TD>
125 * <TD WIDTH="15%">
126 * <P ALIGN="CENTER">ISO-8859-3
127 * </TD>
128 * <TD WIDTH="12%">
129 * <P ALIGN="CENTER">MIME
130 * </TD>
131 * <TD WIDTH="31%">
132 * <P ALIGN="CENTER">ISO-8859-3
133 * </TD>
134 * </TR>
135 * <TR>
136 * <TD WIDTH="33%">ISO Latin 4</TD>
137 * <TD WIDTH="15%">
138 * <P ALIGN="CENTER">ISO-8859-4
139 * </TD>
140 * <TD WIDTH="12%">
141 * <P ALIGN="CENTER">MIME
142 * </TD>
143 * <TD WIDTH="31%">
144 * <P ALIGN="CENTER">ISO-8859-4
145 * </TD>
146 * </TR>
147 * <TR>
148 * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
149 * <TD WIDTH="15%">
150 * <P ALIGN="CENTER">ISO-8859-5
151 * </TD>
152 * <TD WIDTH="12%">
153 * <P ALIGN="CENTER">MIME
154 * </TD>
155 * <TD WIDTH="31%">
156 * <P ALIGN="CENTER">ISO-8859-5
157 * </TD>
158 * </TR>
159 * <TR>
160 * <TD WIDTH="33%">ISO Latin Arabic</TD>
161 * <TD WIDTH="15%">
162 * <P ALIGN="CENTER">ISO-8859-6
163 * </TD>
164 * <TD WIDTH="12%">
165 * <P ALIGN="CENTER">MIME
166 * </TD>
167 * <TD WIDTH="31%">
168 * <P ALIGN="CENTER">ISO-8859-6
169 * </TD>
170 * </TR>
171 * <TR>
172 * <TD WIDTH="33%">ISO Latin Greek</TD>
173 * <TD WIDTH="15%">
174 * <P ALIGN="CENTER">ISO-8859-7
175 * </TD>
176 * <TD WIDTH="12%">
177 * <P ALIGN="CENTER">MIME
178 * </TD>
179 * <TD WIDTH="31%">
180 * <P ALIGN="CENTER">ISO-8859-7
181 * </TD>
182 * </TR>
183 * <TR>
184 * <TD WIDTH="33%">ISO Latin Hebrew</TD>
185 * <TD WIDTH="15%">
186 * <P ALIGN="CENTER">ISO-8859-8
187 * </TD>
188 * <TD WIDTH="12%">
189 * <P ALIGN="CENTER">MIME
190 * </TD>
191 * <TD WIDTH="31%">
192 * <P ALIGN="CENTER">ISO-8859-8
193 * </TD>
194 * </TR>
195 * <TR>
196 * <TD WIDTH="33%">ISO Latin 5</TD>
197 * <TD WIDTH="15%">
198 * <P ALIGN="CENTER">ISO-8859-9
199 * </TD>
200 * <TD WIDTH="12%">
201 * <P ALIGN="CENTER">MIME
202 * </TD>
203 * <TD WIDTH="31%">
204 * <P ALIGN="CENTER">ISO-8859-9
205 * </TD>
206 * </TR>
207 * <TR>
208 * <TD WIDTH="33%">EBCDIC: US</TD>
209 * <TD WIDTH="15%">
210 * <P ALIGN="CENTER">ebcdic-cp-us
211 * </TD>
212 * <TD WIDTH="12%">
213 * <P ALIGN="CENTER">IANA
214 * </TD>
215 * <TD WIDTH="31%">
216 * <P ALIGN="CENTER">cp037
217 * </TD>
218 * </TR>
219 * <TR>
220 * <TD WIDTH="33%">EBCDIC: Canada</TD>
221 * <TD WIDTH="15%">
222 * <P ALIGN="CENTER">ebcdic-cp-ca
223 * </TD>
224 * <TD WIDTH="12%">
225 * <P ALIGN="CENTER">IANA
226 * </TD>
227 * <TD WIDTH="31%">
228 * <P ALIGN="CENTER">cp037
229 * </TD>
230 * </TR>
231 * <TR>
232 * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
233 * <TD WIDTH="15%">
234 * <P ALIGN="CENTER">ebcdic-cp-nl
235 * </TD>
236 * <TD WIDTH="12%">
237 * <P ALIGN="CENTER">IANA
238 * </TD>
239 * <TD WIDTH="31%">
240 * <P ALIGN="CENTER">cp037
241 * </TD>
242 * </TR>
243 * <TR>
244 * <TD WIDTH="33%">EBCDIC: Denmark</TD>
245 * <TD WIDTH="15%">
246 * <P ALIGN="CENTER">ebcdic-cp-dk
247 * </TD>
248 * <TD WIDTH="12%">
249 * <P ALIGN="CENTER">IANA
250 * </TD>
251 * <TD WIDTH="31%">
252 * <P ALIGN="CENTER">cp277
253 * </TD>
254 * </TR>
255 * <TR>
256 * <TD WIDTH="33%">EBCDIC: Norway</TD>
257 * <TD WIDTH="15%">
258 * <P ALIGN="CENTER">ebcdic-cp-no
259 * </TD>
260 * <TD WIDTH="12%">
261 * <P ALIGN="CENTER">IANA
262 * </TD>
263 * <TD WIDTH="31%">
264 * <P ALIGN="CENTER">cp277
265 * </TD>
266 * </TR>
267 * <TR>
268 * <TD WIDTH="33%">EBCDIC: Finland</TD>
269 * <TD WIDTH="15%">
270 * <P ALIGN="CENTER">ebcdic-cp-fi
271 * </TD>
272 * <TD WIDTH="12%">
273 * <P ALIGN="CENTER">IANA
274 * </TD>
275 * <TD WIDTH="31%">
276 * <P ALIGN="CENTER">cp278
277 * </TD>
278 * </TR>
279 * <TR>
280 * <TD WIDTH="33%">EBCDIC: Sweden</TD>
281 * <TD WIDTH="15%">
282 * <P ALIGN="CENTER">ebcdic-cp-se
283 * </TD>
284 * <TD WIDTH="12%">
285 * <P ALIGN="CENTER">IANA
286 * </TD>
287 * <TD WIDTH="31%">
288 * <P ALIGN="CENTER">cp278
289 * </TD>
290 * </TR>
291 * <TR>
292 * <TD WIDTH="33%">EBCDIC: Italy</TD>
293 * <TD WIDTH="15%">
294 * <P ALIGN="CENTER">ebcdic-cp-it
295 * </TD>
296 * <TD WIDTH="12%">
297 * <P ALIGN="CENTER">IANA
298 * </TD>
299 * <TD WIDTH="31%">
300 * <P ALIGN="CENTER">cp280
301 * </TD>
302 * </TR>
303 * <TR>
304 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
305 * <TD WIDTH="15%">
306 * <P ALIGN="CENTER">ebcdic-cp-es
307 * </TD>
308 * <TD WIDTH="12%">
309 * <P ALIGN="CENTER">IANA
310 * </TD>
311 * <TD WIDTH="31%">
312 * <P ALIGN="CENTER">cp284
313 * </TD>
314 * </TR>
315 * <TR>
316 * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
317 * <TD WIDTH="15%">
318 * <P ALIGN="CENTER">ebcdic-cp-gb
319 * </TD>
320 * <TD WIDTH="12%">
321 * <P ALIGN="CENTER">IANA
322 * </TD>
323 * <TD WIDTH="31%">
324 * <P ALIGN="CENTER">cp285
325 * </TD>
326 * </TR>
327 * <TR>
328 * <TD WIDTH="33%">EBCDIC: France</TD>
329 * <TD WIDTH="15%">
330 * <P ALIGN="CENTER">ebcdic-cp-fr
331 * </TD>
332 * <TD WIDTH="12%">
333 * <P ALIGN="CENTER">IANA
334 * </TD>
335 * <TD WIDTH="31%">
336 * <P ALIGN="CENTER">cp297
337 * </TD>
338 * </TR>
339 * <TR>
340 * <TD WIDTH="33%">EBCDIC: Arabic</TD>
341 * <TD WIDTH="15%">
342 * <P ALIGN="CENTER">ebcdic-cp-ar1
343 * </TD>
344 * <TD WIDTH="12%">
345 * <P ALIGN="CENTER">IANA
346 * </TD>
347 * <TD WIDTH="31%">
348 * <P ALIGN="CENTER">cp420
349 * </TD>
350 * </TR>
351 * <TR>
352 * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
353 * <TD WIDTH="15%">
354 * <P ALIGN="CENTER">ebcdic-cp-he
355 * </TD>
356 * <TD WIDTH="12%">
357 * <P ALIGN="CENTER">IANA
358 * </TD>
359 * <TD WIDTH="31%">
360 * <P ALIGN="CENTER">cp424
361 * </TD>
362 * </TR>
363 * <TR>
364 * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
365 * <TD WIDTH="15%">
366 * <P ALIGN="CENTER">ebcdic-cp-ch
367 * </TD>
368 * <TD WIDTH="12%">
369 * <P ALIGN="CENTER">IANA
370 * </TD>
371 * <TD WIDTH="31%">
372 * <P ALIGN="CENTER">cp500
373 * </TD>
374 * </TR>
375 * <TR>
376 * <TD WIDTH="33%">EBCDIC: Roece</TD>
377 * <TD WIDTH="15%">
378 * <P ALIGN="CENTER">ebcdic-cp-roece
379 * </TD>
380 * <TD WIDTH="12%">
381 * <P ALIGN="CENTER">IANA
382 * </TD>
383 * <TD WIDTH="31%">
384 * <P ALIGN="CENTER">cp870
385 * </TD>
386 * </TR>
387 * <TR>
388 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
389 * <TD WIDTH="15%">
390 * <P ALIGN="CENTER">ebcdic-cp-yu
391 * </TD>
392 * <TD WIDTH="12%">
393 * <P ALIGN="CENTER">IANA
394 * </TD>
395 * <TD WIDTH="31%">
396 * <P ALIGN="CENTER">cp870
397 * </TD>
398 * </TR>
399 * <TR>
400 * <TD WIDTH="33%">EBCDIC: Iceland</TD>
401 * <TD WIDTH="15%">
402 * <P ALIGN="CENTER">ebcdic-cp-is
403 * </TD>
404 * <TD WIDTH="12%">
405 * <P ALIGN="CENTER">IANA
406 * </TD>
407 * <TD WIDTH="31%">
408 * <P ALIGN="CENTER">cp871
409 * </TD>
410 * </TR>
411 * <TR>
412 * <TD WIDTH="33%">EBCDIC: Urdu</TD>
413 * <TD WIDTH="15%">
414 * <P ALIGN="CENTER">ebcdic-cp-ar2
415 * </TD>
416 * <TD WIDTH="12%">
417 * <P ALIGN="CENTER">IANA
418 * </TD>
419 * <TD WIDTH="31%">
420 * <P ALIGN="CENTER">cp918
421 * </TD>
422 * </TR>
423 * <TR>
424 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
425 * <TD WIDTH="15%">
426 * <P ALIGN="CENTER">gb2312
427 * </TD>
428 * <TD WIDTH="12%">
429 * <P ALIGN="CENTER">MIME
430 * </TD>
431 * <TD WIDTH="31%">
432 * <P ALIGN="CENTER">GB2312
433 * </TD>
434 * </TR>
435 * <TR>
436 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
437 * <TD WIDTH="15%">
438 * <P ALIGN="CENTER">euc-jp
439 * </TD>
440 * <TD WIDTH="12%">
441 * <P ALIGN="CENTER">MIME
442 * </TD>
443 * <TD WIDTH="31%">
444 * <P ALIGN="CENTER">eucjis
445 * </TD>
446 * </TR>
447 * <TR>
448 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
449 * <TD WIDTH="15%">
450 * <P ALIGN="CENTER">iso-2020-jp
451 * </TD>
452 * <TD WIDTH="12%">
453 * <P ALIGN="CENTER">MIME
454 * </TD>
455 * <TD WIDTH="31%">
456 * <P ALIGN="CENTER">JIS
457 * </TD>
458 * </TR>
459 * <TR>
460 * <TD WIDTH="33%">Japanese: Shift JIS</TD>
461 * <TD WIDTH="15%">
462 * <P ALIGN="CENTER">Shift_JIS
463 * </TD>
464 * <TD WIDTH="12%">
465 * <P ALIGN="CENTER">MIME
466 * </TD>
467 * <TD WIDTH="31%">
468 * <P ALIGN="CENTER">SJIS
469 * </TD>
470 * </TR>
471 * <TR>
472 * <TD WIDTH="33%">Chinese: Big5</TD>
473 * <TD WIDTH="15%">
474 * <P ALIGN="CENTER">Big5
475 * </TD>
476 * <TD WIDTH="12%">
477 * <P ALIGN="CENTER">MIME
478 * </TD>
479 * <TD WIDTH="31%">
480 * <P ALIGN="CENTER">Big5
481 * </TD>
482 * </TR>
483 * <TR>
484 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
485 * <TD WIDTH="15%">
486 * <P ALIGN="CENTER">euc-kr
487 * </TD>
488 * <TD WIDTH="12%">
489 * <P ALIGN="CENTER">MIME
490 * </TD>
491 * <TD WIDTH="31%">
492 * <P ALIGN="CENTER">iso2022kr
493 * </TD>
494 * </TR>
495 * <TR>
496 * <TD WIDTH="33%">Cyrillic</TD>
497 * <TD WIDTH="15%">
498 * <P ALIGN="CENTER">koi8-r
499 * </TD>
500 * <TD WIDTH="12%">
501 * <P ALIGN="CENTER">MIME
502 * </TD>
503 * <TD WIDTH="31%">
504 * <P ALIGN="CENTER">koi8-r
505 * </TD>
506 * </TR>
507 * </TABLE>
508 *
509 * @version
510 * @author TAMURA Kent <kent@trl.ibm.co.jp>
511 */
512 public class MIME2Java {
513
514 static private Hashtable s_enchash;
515 static private Hashtable s_revhash;
516
517 static {
518 s_enchash = new Hashtable();
519 // <preferred MIME name>, <Java encoding name>
520 s_enchash.put("UTF-8", "UTF8");
521 s_enchash.put("US-ASCII", "8859_1"); // ?
522 s_enchash.put("ISO-8859-1", "8859_1");
523 s_enchash.put("ISO-8859-2", "8859_2");
524 s_enchash.put("ISO-8859-3", "8859_3");
525 s_enchash.put("ISO-8859-4", "8859_4");
526 s_enchash.put("ISO-8859-5", "8859_5");
527 s_enchash.put("ISO-8859-6", "8859_6");
528 s_enchash.put("ISO-8859-7", "8859_7");
529 s_enchash.put("ISO-8859-8", "8859_8");
530 s_enchash.put("ISO-8859-9", "8859_9");
531 s_enchash.put("ISO-2022-JP", "JIS");
532 s_enchash.put("SHIFT_JIS", "SJIS");
533 s_enchash.put("EUC-JP", "EUCJIS");
534 s_enchash.put("GB2312", "GB2312");
535 s_enchash.put("BIG5", "Big5");
536 s_enchash.put("EUC-KR", "KSC5601");
537 s_enchash.put("ISO-2022-KR", "ISO2022KR");
538 s_enchash.put("KOI8-R", "KOI8_R");
539
540 s_enchash.put("EBCDIC-CP-US", "CP037");
541 s_enchash.put("EBCDIC-CP-CA", "CP037");
542 s_enchash.put("EBCDIC-CP-NL", "CP037");
543 s_enchash.put("EBCDIC-CP-DK", "CP277");
544 s_enchash.put("EBCDIC-CP-NO", "CP277");
545 s_enchash.put("EBCDIC-CP-FI", "CP278");
546 s_enchash.put("EBCDIC-CP-SE", "CP278");
547 s_enchash.put("EBCDIC-CP-IT", "CP280");
548 s_enchash.put("EBCDIC-CP-ES", "CP284");
549 s_enchash.put("EBCDIC-CP-GB", "CP285");
550 s_enchash.put("EBCDIC-CP-FR", "CP297");
551 s_enchash.put("EBCDIC-CP-AR1", "CP420");
552 s_enchash.put("EBCDIC-CP-HE", "CP424");
553 s_enchash.put("EBCDIC-CP-CH", "CP500");
554 s_enchash.put("EBCDIC-CP-ROECE", "CP870");
555 s_enchash.put("EBCDIC-CP-YU", "CP870");
556 s_enchash.put("EBCDIC-CP-IS", "CP871");
557 s_enchash.put("EBCDIC-CP-AR2", "CP918");
558
559 // j:CNS11643 -> EUC-TW?
560 // ISO-2022-CN? ISO-2022-CN-EXT?
561
562 s_revhash = new Hashtable();
563 // <Java encoding name>, <preferred MIME name>
564 s_revhash.put("UTF8", "UTF-8");
565 //s_revhash.put("8859_1", "US-ASCII"); // ?
566 s_revhash.put("8859_1", "ISO-8859-1");
567 s_revhash.put("8859_2", "ISO-8859-2");
568 s_revhash.put("8859_3", "ISO-8859-3");
569 s_revhash.put("8859_4", "ISO-8859-4");
570 s_revhash.put("8859_5", "ISO-8859-5");
571 s_revhash.put("8859_6", "ISO-8859-6");
572 s_revhash.put("8859_7", "ISO-8859-7");
573 s_revhash.put("8859_8", "ISO-8859-8");
574 s_revhash.put("8859_9", "ISO-8859-9");
575 s_revhash.put("JIS", "ISO-2022-JP");
576 s_revhash.put("SJIS", "Shift_JIS");
577 s_revhash.put("EUCJIS", "EUC-JP");
578 s_revhash.put("GB2312", "GB2312");
579 s_revhash.put("BIG5", "Big5");
580 s_revhash.put("KSC5601", "EUC-KR");
581 s_revhash.put("ISO2022KR", "ISO-2022-KR");
582 s_revhash.put("KOI8_R", "KOI8-R");
583
584 s_revhash.put("CP037", "EBCDIC-CP-US");
585 s_revhash.put("CP037", "EBCDIC-CP-CA");
586 s_revhash.put("CP037", "EBCDIC-CP-NL");
587 s_revhash.put("CP277", "EBCDIC-CP-DK");
588 s_revhash.put("CP277", "EBCDIC-CP-NO");
589 s_revhash.put("CP278", "EBCDIC-CP-FI");
590 s_revhash.put("CP278", "EBCDIC-CP-SE");
591 s_revhash.put("CP280", "EBCDIC-CP-IT");
592 s_revhash.put("CP284", "EBCDIC-CP-ES");
593 s_revhash.put("CP285", "EBCDIC-CP-GB");
594 s_revhash.put("CP297", "EBCDIC-CP-FR");
595 s_revhash.put("CP420", "EBCDIC-CP-AR1");
596 s_revhash.put("CP424", "EBCDIC-CP-HE");
597 s_revhash.put("CP500", "EBCDIC-CP-CH");
598 s_revhash.put("CP870", "EBCDIC-CP-ROECE");
599 s_revhash.put("CP870", "EBCDIC-CP-YU");
600 s_revhash.put("CP871", "EBCDIC-CP-IS");
601 s_revhash.put("CP918", "EBCDIC-CP-AR2");
602 }
603
604 private MIME2Java() {
605 }
606
607 /**
608 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
609 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
610 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
611 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS,
612 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
613 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
614 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
615 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
616 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
617 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
618 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
619 * is unknown.
620 * @see #reverse
621 */
622 public static String convert(String mimeCharsetName) {
623 return (String)s_enchash.get(mimeCharsetName.toUpperCase());
624 }
625
626 /**
627 * Convert a Java encoding name to MIME charset name.
628 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
629 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS",
630 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
631 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
632 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
633 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS,
634 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
635 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
636 * and CP918</code>.
637 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
638 * @see #convert
639 */
640 public static String reverse(String encoding) {
641 return (String)s_revhash.get(encoding.toUpperCase());
642 }
643 }