ASCII EBCDIC Conversion
Jump to navigation
Jump to search
References
See:
- http://www.nntp.perl.org/group/perl.mvs/2001/03/msg735.html
- http://www.babbletower.net/index.html?/manencodings.html
- http://www.guiffy.com/help/GuiffyHelp/Encodings.html
- http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
Example
Convert encoding names between MIME and Java
1 /* 2 * The Apache Software License, Version 1.1 3 * 4 * 5 * Copyright (c) 1999 The Apache Software Foundation. All rights 6 * reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * 3. The end-user documentation included with the redistribution, 21 * if any, must include the following acknowledgment: 22 * "This product includes software developed by the 23 * Apache Software Foundation (http://www.apache.org/)." 24 * Alternately, this acknowledgment may appear in the software itself, 25 * if and wherever such third-party acknowledgments normally appear. 26 * 27 * 4. The names "Xerces" and "Apache Software Foundation" must 28 * not be used to endorse or promote products derived from this 29 * software without prior written permission. For written 30 * permission, please contact apache@apache.org. 31 * 32 * 5. Products derived from this software may not be called "Apache", 33 * nor may "Apache" appear in their name, without prior written 34 * permission of the Apache Software Foundation. 35 * 36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 * ==================================================================== 49 * 50 * This software consists of voluntary contributions made by many 51 * individuals on behalf of the Apache Software Foundation and was 52 * originally based on software copyright (c) 1999, International 53 * Business Machines, Inc., http://www.apache.org. For more 54 * information on the Apache Software Foundation, please see 55 * <http://www.apache.org/>. 56 */ 57 58 package org.apache.xerces.readers; 59 60 import java.util.*; 61 62 /** 63 * MIME2Java is a convenience class which handles conversions between MIME charset names 64 * and Java encoding names. 65 * <p>The supported XML encodings are the intersection of XML-supported code sets and those 66 * supported in JDK 1.1. 67 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such 68 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>. 69 * <p>Java encoding names are used on <var>encoding</var> parameters to 70 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>. 71 * <P> 72 * <TABLE BORDER="0" WIDTH="100%"> 73 * <TR> 74 * <TD WIDTH="33%"> 75 * <P ALIGN="CENTER"><B>Common Name</B> 76 * </TD> 77 * <TD WIDTH="15%"> 78 * <P ALIGN="CENTER"><B>Use this name in XML files</B> 79 * </TD> 80 * <TD WIDTH="12%"> 81 * <P ALIGN="CENTER"><B>Name Type</B> 82 * </TD> 83 * <TD WIDTH="31%"> 84 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B> 85 * </TD> 86 * </TR> 87 * <TR> 88 * <TD WIDTH="33%">8 bit Unicode</TD> 89 * <TD WIDTH="15%"> 90 * <P ALIGN="CENTER">UTF-8 91 * </TD> 92 * <TD WIDTH="12%"> 93 * <P ALIGN="CENTER">IANA 94 * </TD> 95 * <TD WIDTH="31%"> 96 * <P ALIGN="CENTER">UTF8 97 * </TD> 98 * </TR> 99 * <TR> 100 * <TD WIDTH="33%">ISO Latin 1</TD> 101 * <TD WIDTH="15%"> 102 * <P ALIGN="CENTER">ISO-8859-1 103 * </TD> 104 * <TD WIDTH="12%"> 105 * <P ALIGN="CENTER">MIME 106 * </TD> 107 * <TD WIDTH="31%"> 108 * <P ALIGN="CENTER">ISO-8859-1 109 * </TD> 110 * </TR> 111 * <TR> 112 * <TD WIDTH="33%">ISO Latin 2</TD> 113 * <TD WIDTH="15%"> 114 * <P ALIGN="CENTER">ISO-8859-2 115 * </TD> 116 * <TD WIDTH="12%"> 117 * <P ALIGN="CENTER">MIME 118 * </TD> 119 * <TD WIDTH="31%"> 120 * <P ALIGN="CENTER">ISO-8859-2 121 * </TD> 122 * </TR> 123 * <TR> 124 * <TD WIDTH="33%">ISO Latin 3</TD> 125 * <TD WIDTH="15%"> 126 * <P ALIGN="CENTER">ISO-8859-3 127 * </TD> 128 * <TD WIDTH="12%"> 129 * <P ALIGN="CENTER">MIME 130 * </TD> 131 * <TD WIDTH="31%"> 132 * <P ALIGN="CENTER">ISO-8859-3 133 * </TD> 134 * </TR> 135 * <TR> 136 * <TD WIDTH="33%">ISO Latin 4</TD> 137 * <TD WIDTH="15%"> 138 * <P ALIGN="CENTER">ISO-8859-4 139 * </TD> 140 * <TD WIDTH="12%"> 141 * <P ALIGN="CENTER">MIME 142 * </TD> 143 * <TD WIDTH="31%"> 144 * <P ALIGN="CENTER">ISO-8859-4 145 * </TD> 146 * </TR> 147 * <TR> 148 * <TD WIDTH="33%">ISO Latin Cyrillic</TD> 149 * <TD WIDTH="15%"> 150 * <P ALIGN="CENTER">ISO-8859-5 151 * </TD> 152 * <TD WIDTH="12%"> 153 * <P ALIGN="CENTER">MIME 154 * </TD> 155 * <TD WIDTH="31%"> 156 * <P ALIGN="CENTER">ISO-8859-5 157 * </TD> 158 * </TR> 159 * <TR> 160 * <TD WIDTH="33%">ISO Latin Arabic</TD> 161 * <TD WIDTH="15%"> 162 * <P ALIGN="CENTER">ISO-8859-6 163 * </TD> 164 * <TD WIDTH="12%"> 165 * <P ALIGN="CENTER">MIME 166 * </TD> 167 * <TD WIDTH="31%"> 168 * <P ALIGN="CENTER">ISO-8859-6 169 * </TD> 170 * </TR> 171 * <TR> 172 * <TD WIDTH="33%">ISO Latin Greek</TD> 173 * <TD WIDTH="15%"> 174 * <P ALIGN="CENTER">ISO-8859-7 175 * </TD> 176 * <TD WIDTH="12%"> 177 * <P ALIGN="CENTER">MIME 178 * </TD> 179 * <TD WIDTH="31%"> 180 * <P ALIGN="CENTER">ISO-8859-7 181 * </TD> 182 * </TR> 183 * <TR> 184 * <TD WIDTH="33%">ISO Latin Hebrew</TD> 185 * <TD WIDTH="15%"> 186 * <P ALIGN="CENTER">ISO-8859-8 187 * </TD> 188 * <TD WIDTH="12%"> 189 * <P ALIGN="CENTER">MIME 190 * </TD> 191 * <TD WIDTH="31%"> 192 * <P ALIGN="CENTER">ISO-8859-8 193 * </TD> 194 * </TR> 195 * <TR> 196 * <TD WIDTH="33%">ISO Latin 5</TD> 197 * <TD WIDTH="15%"> 198 * <P ALIGN="CENTER">ISO-8859-9 199 * </TD> 200 * <TD WIDTH="12%"> 201 * <P ALIGN="CENTER">MIME 202 * </TD> 203 * <TD WIDTH="31%"> 204 * <P ALIGN="CENTER">ISO-8859-9 205 * </TD> 206 * </TR> 207 * <TR> 208 * <TD WIDTH="33%">EBCDIC: US</TD> 209 * <TD WIDTH="15%"> 210 * <P ALIGN="CENTER">ebcdic-cp-us 211 * </TD> 212 * <TD WIDTH="12%"> 213 * <P ALIGN="CENTER">IANA 214 * </TD> 215 * <TD WIDTH="31%"> 216 * <P ALIGN="CENTER">cp037 217 * </TD> 218 * </TR> 219 * <TR> 220 * <TD WIDTH="33%">EBCDIC: Canada</TD> 221 * <TD WIDTH="15%"> 222 * <P ALIGN="CENTER">ebcdic-cp-ca 223 * </TD> 224 * <TD WIDTH="12%"> 225 * <P ALIGN="CENTER">IANA 226 * </TD> 227 * <TD WIDTH="31%"> 228 * <P ALIGN="CENTER">cp037 229 * </TD> 230 * </TR> 231 * <TR> 232 * <TD WIDTH="33%">EBCDIC: Netherlands</TD> 233 * <TD WIDTH="15%"> 234 * <P ALIGN="CENTER">ebcdic-cp-nl 235 * </TD> 236 * <TD WIDTH="12%"> 237 * <P ALIGN="CENTER">IANA 238 * </TD> 239 * <TD WIDTH="31%"> 240 * <P ALIGN="CENTER">cp037 241 * </TD> 242 * </TR> 243 * <TR> 244 * <TD WIDTH="33%">EBCDIC: Denmark</TD> 245 * <TD WIDTH="15%"> 246 * <P ALIGN="CENTER">ebcdic-cp-dk 247 * </TD> 248 * <TD WIDTH="12%"> 249 * <P ALIGN="CENTER">IANA 250 * </TD> 251 * <TD WIDTH="31%"> 252 * <P ALIGN="CENTER">cp277 253 * </TD> 254 * </TR> 255 * <TR> 256 * <TD WIDTH="33%">EBCDIC: Norway</TD> 257 * <TD WIDTH="15%"> 258 * <P ALIGN="CENTER">ebcdic-cp-no 259 * </TD> 260 * <TD WIDTH="12%"> 261 * <P ALIGN="CENTER">IANA 262 * </TD> 263 * <TD WIDTH="31%"> 264 * <P ALIGN="CENTER">cp277 265 * </TD> 266 * </TR> 267 * <TR> 268 * <TD WIDTH="33%">EBCDIC: Finland</TD> 269 * <TD WIDTH="15%"> 270 * <P ALIGN="CENTER">ebcdic-cp-fi 271 * </TD> 272 * <TD WIDTH="12%"> 273 * <P ALIGN="CENTER">IANA 274 * </TD> 275 * <TD WIDTH="31%"> 276 * <P ALIGN="CENTER">cp278 277 * </TD> 278 * </TR> 279 * <TR> 280 * <TD WIDTH="33%">EBCDIC: Sweden</TD> 281 * <TD WIDTH="15%"> 282 * <P ALIGN="CENTER">ebcdic-cp-se 283 * </TD> 284 * <TD WIDTH="12%"> 285 * <P ALIGN="CENTER">IANA 286 * </TD> 287 * <TD WIDTH="31%"> 288 * <P ALIGN="CENTER">cp278 289 * </TD> 290 * </TR> 291 * <TR> 292 * <TD WIDTH="33%">EBCDIC: Italy</TD> 293 * <TD WIDTH="15%"> 294 * <P ALIGN="CENTER">ebcdic-cp-it 295 * </TD> 296 * <TD WIDTH="12%"> 297 * <P ALIGN="CENTER">IANA 298 * </TD> 299 * <TD WIDTH="31%"> 300 * <P ALIGN="CENTER">cp280 301 * </TD> 302 * </TR> 303 * <TR> 304 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD> 305 * <TD WIDTH="15%"> 306 * <P ALIGN="CENTER">ebcdic-cp-es 307 * </TD> 308 * <TD WIDTH="12%"> 309 * <P ALIGN="CENTER">IANA 310 * </TD> 311 * <TD WIDTH="31%"> 312 * <P ALIGN="CENTER">cp284 313 * </TD> 314 * </TR> 315 * <TR> 316 * <TD WIDTH="33%">EBCDIC: Great Britain</TD> 317 * <TD WIDTH="15%"> 318 * <P ALIGN="CENTER">ebcdic-cp-gb 319 * </TD> 320 * <TD WIDTH="12%"> 321 * <P ALIGN="CENTER">IANA 322 * </TD> 323 * <TD WIDTH="31%"> 324 * <P ALIGN="CENTER">cp285 325 * </TD> 326 * </TR> 327 * <TR> 328 * <TD WIDTH="33%">EBCDIC: France</TD> 329 * <TD WIDTH="15%"> 330 * <P ALIGN="CENTER">ebcdic-cp-fr 331 * </TD> 332 * <TD WIDTH="12%"> 333 * <P ALIGN="CENTER">IANA 334 * </TD> 335 * <TD WIDTH="31%"> 336 * <P ALIGN="CENTER">cp297 337 * </TD> 338 * </TR> 339 * <TR> 340 * <TD WIDTH="33%">EBCDIC: Arabic</TD> 341 * <TD WIDTH="15%"> 342 * <P ALIGN="CENTER">ebcdic-cp-ar1 343 * </TD> 344 * <TD WIDTH="12%"> 345 * <P ALIGN="CENTER">IANA 346 * </TD> 347 * <TD WIDTH="31%"> 348 * <P ALIGN="CENTER">cp420 349 * </TD> 350 * </TR> 351 * <TR> 352 * <TD WIDTH="33%">EBCDIC: Hebrew</TD> 353 * <TD WIDTH="15%"> 354 * <P ALIGN="CENTER">ebcdic-cp-he 355 * </TD> 356 * <TD WIDTH="12%"> 357 * <P ALIGN="CENTER">IANA 358 * </TD> 359 * <TD WIDTH="31%"> 360 * <P ALIGN="CENTER">cp424 361 * </TD> 362 * </TR> 363 * <TR> 364 * <TD WIDTH="33%">EBCDIC: Switzerland</TD> 365 * <TD WIDTH="15%"> 366 * <P ALIGN="CENTER">ebcdic-cp-ch 367 * </TD> 368 * <TD WIDTH="12%"> 369 * <P ALIGN="CENTER">IANA 370 * </TD> 371 * <TD WIDTH="31%"> 372 * <P ALIGN="CENTER">cp500 373 * </TD> 374 * </TR> 375 * <TR> 376 * <TD WIDTH="33%">EBCDIC: Roece</TD> 377 * <TD WIDTH="15%"> 378 * <P ALIGN="CENTER">ebcdic-cp-roece 379 * </TD> 380 * <TD WIDTH="12%"> 381 * <P ALIGN="CENTER">IANA 382 * </TD> 383 * <TD WIDTH="31%"> 384 * <P ALIGN="CENTER">cp870 385 * </TD> 386 * </TR> 387 * <TR> 388 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD> 389 * <TD WIDTH="15%"> 390 * <P ALIGN="CENTER">ebcdic-cp-yu 391 * </TD> 392 * <TD WIDTH="12%"> 393 * <P ALIGN="CENTER">IANA 394 * </TD> 395 * <TD WIDTH="31%"> 396 * <P ALIGN="CENTER">cp870 397 * </TD> 398 * </TR> 399 * <TR> 400 * <TD WIDTH="33%">EBCDIC: Iceland</TD> 401 * <TD WIDTH="15%"> 402 * <P ALIGN="CENTER">ebcdic-cp-is 403 * </TD> 404 * <TD WIDTH="12%"> 405 * <P ALIGN="CENTER">IANA 406 * </TD> 407 * <TD WIDTH="31%"> 408 * <P ALIGN="CENTER">cp871 409 * </TD> 410 * </TR> 411 * <TR> 412 * <TD WIDTH="33%">EBCDIC: Urdu</TD> 413 * <TD WIDTH="15%"> 414 * <P ALIGN="CENTER">ebcdic-cp-ar2 415 * </TD> 416 * <TD WIDTH="12%"> 417 * <P ALIGN="CENTER">IANA 418 * </TD> 419 * <TD WIDTH="31%"> 420 * <P ALIGN="CENTER">cp918 421 * </TD> 422 * </TR> 423 * <TR> 424 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD> 425 * <TD WIDTH="15%"> 426 * <P ALIGN="CENTER">gb2312 427 * </TD> 428 * <TD WIDTH="12%"> 429 * <P ALIGN="CENTER">MIME 430 * </TD> 431 * <TD WIDTH="31%"> 432 * <P ALIGN="CENTER">GB2312 433 * </TD> 434 * </TR> 435 * <TR> 436 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD> 437 * <TD WIDTH="15%"> 438 * <P ALIGN="CENTER">euc-jp 439 * </TD> 440 * <TD WIDTH="12%"> 441 * <P ALIGN="CENTER">MIME 442 * </TD> 443 * <TD WIDTH="31%"> 444 * <P ALIGN="CENTER">eucjis 445 * </TD> 446 * </TR> 447 * <TR> 448 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD> 449 * <TD WIDTH="15%"> 450 * <P ALIGN="CENTER">iso-2020-jp 451 * </TD> 452 * <TD WIDTH="12%"> 453 * <P ALIGN="CENTER">MIME 454 * </TD> 455 * <TD WIDTH="31%"> 456 * <P ALIGN="CENTER">JIS 457 * </TD> 458 * </TR> 459 * <TR> 460 * <TD WIDTH="33%">Japanese: Shift JIS</TD> 461 * <TD WIDTH="15%"> 462 * <P ALIGN="CENTER">Shift_JIS 463 * </TD> 464 * <TD WIDTH="12%"> 465 * <P ALIGN="CENTER">MIME 466 * </TD> 467 * <TD WIDTH="31%"> 468 * <P ALIGN="CENTER">SJIS 469 * </TD> 470 * </TR> 471 * <TR> 472 * <TD WIDTH="33%">Chinese: Big5</TD> 473 * <TD WIDTH="15%"> 474 * <P ALIGN="CENTER">Big5 475 * </TD> 476 * <TD WIDTH="12%"> 477 * <P ALIGN="CENTER">MIME 478 * </TD> 479 * <TD WIDTH="31%"> 480 * <P ALIGN="CENTER">Big5 481 * </TD> 482 * </TR> 483 * <TR> 484 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD> 485 * <TD WIDTH="15%"> 486 * <P ALIGN="CENTER">euc-kr 487 * </TD> 488 * <TD WIDTH="12%"> 489 * <P ALIGN="CENTER">MIME 490 * </TD> 491 * <TD WIDTH="31%"> 492 * <P ALIGN="CENTER">iso2022kr 493 * </TD> 494 * </TR> 495 * <TR> 496 * <TD WIDTH="33%">Cyrillic</TD> 497 * <TD WIDTH="15%"> 498 * <P ALIGN="CENTER">koi8-r 499 * </TD> 500 * <TD WIDTH="12%"> 501 * <P ALIGN="CENTER">MIME 502 * </TD> 503 * <TD WIDTH="31%"> 504 * <P ALIGN="CENTER">koi8-r 505 * </TD> 506 * </TR> 507 * </TABLE> 508 * 509 * @version 510 * @author TAMURA Kent <kent@trl.ibm.co.jp> 511 */ 512 public class MIME2Java { 513 514 static private Hashtable s_enchash; 515 static private Hashtable s_revhash; 516 517 static { 518 s_enchash = new Hashtable(); 519 // <preferred MIME name>, <Java encoding name> 520 s_enchash.put("UTF-8", "UTF8"); 521 s_enchash.put("US-ASCII", "8859_1"); // ? 522 s_enchash.put("ISO-8859-1", "8859_1"); 523 s_enchash.put("ISO-8859-2", "8859_2"); 524 s_enchash.put("ISO-8859-3", "8859_3"); 525 s_enchash.put("ISO-8859-4", "8859_4"); 526 s_enchash.put("ISO-8859-5", "8859_5"); 527 s_enchash.put("ISO-8859-6", "8859_6"); 528 s_enchash.put("ISO-8859-7", "8859_7"); 529 s_enchash.put("ISO-8859-8", "8859_8"); 530 s_enchash.put("ISO-8859-9", "8859_9"); 531 s_enchash.put("ISO-2022-JP", "JIS"); 532 s_enchash.put("SHIFT_JIS", "SJIS"); 533 s_enchash.put("EUC-JP", "EUCJIS"); 534 s_enchash.put("GB2312", "GB2312"); 535 s_enchash.put("BIG5", "Big5"); 536 s_enchash.put("EUC-KR", "KSC5601"); 537 s_enchash.put("ISO-2022-KR", "ISO2022KR"); 538 s_enchash.put("KOI8-R", "KOI8_R"); 539 540 s_enchash.put("EBCDIC-CP-US", "CP037"); 541 s_enchash.put("EBCDIC-CP-CA", "CP037"); 542 s_enchash.put("EBCDIC-CP-NL", "CP037"); 543 s_enchash.put("EBCDIC-CP-DK", "CP277"); 544 s_enchash.put("EBCDIC-CP-NO", "CP277"); 545 s_enchash.put("EBCDIC-CP-FI", "CP278"); 546 s_enchash.put("EBCDIC-CP-SE", "CP278"); 547 s_enchash.put("EBCDIC-CP-IT", "CP280"); 548 s_enchash.put("EBCDIC-CP-ES", "CP284"); 549 s_enchash.put("EBCDIC-CP-GB", "CP285"); 550 s_enchash.put("EBCDIC-CP-FR", "CP297"); 551 s_enchash.put("EBCDIC-CP-AR1", "CP420"); 552 s_enchash.put("EBCDIC-CP-HE", "CP424"); 553 s_enchash.put("EBCDIC-CP-CH", "CP500"); 554 s_enchash.put("EBCDIC-CP-ROECE", "CP870"); 555 s_enchash.put("EBCDIC-CP-YU", "CP870"); 556 s_enchash.put("EBCDIC-CP-IS", "CP871"); 557 s_enchash.put("EBCDIC-CP-AR2", "CP918"); 558 559 // j:CNS11643 -> EUC-TW? 560 // ISO-2022-CN? ISO-2022-CN-EXT? 561 562 s_revhash = new Hashtable(); 563 // <Java encoding name>, <preferred MIME name> 564 s_revhash.put("UTF8", "UTF-8"); 565 //s_revhash.put("8859_1", "US-ASCII"); // ? 566 s_revhash.put("8859_1", "ISO-8859-1"); 567 s_revhash.put("8859_2", "ISO-8859-2"); 568 s_revhash.put("8859_3", "ISO-8859-3"); 569 s_revhash.put("8859_4", "ISO-8859-4"); 570 s_revhash.put("8859_5", "ISO-8859-5"); 571 s_revhash.put("8859_6", "ISO-8859-6"); 572 s_revhash.put("8859_7", "ISO-8859-7"); 573 s_revhash.put("8859_8", "ISO-8859-8"); 574 s_revhash.put("8859_9", "ISO-8859-9"); 575 s_revhash.put("JIS", "ISO-2022-JP"); 576 s_revhash.put("SJIS", "Shift_JIS"); 577 s_revhash.put("EUCJIS", "EUC-JP"); 578 s_revhash.put("GB2312", "GB2312"); 579 s_revhash.put("BIG5", "Big5"); 580 s_revhash.put("KSC5601", "EUC-KR"); 581 s_revhash.put("ISO2022KR", "ISO-2022-KR"); 582 s_revhash.put("KOI8_R", "KOI8-R"); 583 584 s_revhash.put("CP037", "EBCDIC-CP-US"); 585 s_revhash.put("CP037", "EBCDIC-CP-CA"); 586 s_revhash.put("CP037", "EBCDIC-CP-NL"); 587 s_revhash.put("CP277", "EBCDIC-CP-DK"); 588 s_revhash.put("CP277", "EBCDIC-CP-NO"); 589 s_revhash.put("CP278", "EBCDIC-CP-FI"); 590 s_revhash.put("CP278", "EBCDIC-CP-SE"); 591 s_revhash.put("CP280", "EBCDIC-CP-IT"); 592 s_revhash.put("CP284", "EBCDIC-CP-ES"); 593 s_revhash.put("CP285", "EBCDIC-CP-GB"); 594 s_revhash.put("CP297", "EBCDIC-CP-FR"); 595 s_revhash.put("CP420", "EBCDIC-CP-AR1"); 596 s_revhash.put("CP424", "EBCDIC-CP-HE"); 597 s_revhash.put("CP500", "EBCDIC-CP-CH"); 598 s_revhash.put("CP870", "EBCDIC-CP-ROECE"); 599 s_revhash.put("CP870", "EBCDIC-CP-YU"); 600 s_revhash.put("CP871", "EBCDIC-CP-IS"); 601 s_revhash.put("CP918", "EBCDIC-CP-AR2"); 602 } 603 604 private MIME2Java() { 605 } 606 607 /** 608 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name. 609 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1, 610 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, 611 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, 612 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R, 613 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK, 614 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT, 615 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1, 616 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU, 617 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>. 618 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var> 619 * is unknown. 620 * @see #reverse 621 */ 622 public static String convert(String mimeCharsetName) { 623 return (String)s_enchash.get(mimeCharsetName.toUpperCase()); 624 } 625 626 /** 627 * Convert a Java encoding name to MIME charset name. 628 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4", 629 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS", 630 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278", 631 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918". 632 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3, 633 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS, 634 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278, 635 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871 636 * and CP918</code>. 637 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown. 638 * @see #convert 639 */ 640 public static String reverse(String encoding) { 641 return (String)s_revhash.get(encoding.toUpperCase()); 642 } 643 }