/* Abiword * Copyright (C) 2001 Christian Biesinger * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ /* The above copyright notice of course only applies to the new code * that was written by me, not the part from the OpenOffice code (see below) */ // Taken from the file sal/inc/rtl/textenc.h lines 75ff // Interestingly, glib's list of encodings differs from the one of the // seperately available library... thus, some numbers have multiple names // in this list // And some are not present in glib's iconv, as of 2.3.1 // The libiconv list is up-to-date as of Oct. 27, 2002, per // the charset list at http://www.gnu.org/software/libiconv/ // // libiconv extra means that libiconv must've been configured with --enable-extra-encodings #include "ut_types.h" #include "ut_debugmsg.h" #include "ut_iconv.h" #include "ie_imp_StarOffice_encodings.h" struct SOEncoding { UT_uint16 number; const char* name; } static const SOEncodings[] = { { 0, "ISO-8859-1" }, // use latin1 as default charset; but 0 actually means unknown { 1, "WINDOWS-1252" }, { 2, "MAC" }, { 3, "CP437" }, // glibc / libiconv extra { 4, "CP850" }, { 5, "CP860" }, // glibc / libiconv extra { 6, "CP861" }, // glibc / libiconv extra { 7, "CP863" }, // glibc / libiconv extra { 8, "CP865" }, // glibc / libiconv extra /* RESERVED 9, System encoding */ /* 10: Symbol encoding (?) */ { 11, "US-ASCII" }, { 12, "ISO-8859-1" }, { 13, "ISO-8859-2" }, { 14, "ISO-8859-3" }, { 15, "ISO-8859-4" }, { 16, "ISO-8859-5" }, { 17, "ISO-8859-6" }, { 18, "ISO-8859-7" }, { 19, "ISO-8859-8" }, { 20, "ISO-8859-9" }, { 21, "ISO-8859-14" }, { 22, "ISO-8859-15" }, { 23, "CP737" }, // glibc / libiconv extra { 24, "CP775" }, // glibc / libiconv extra { 25, "CP852" }, // glibc / libiconv extra { 26, "CP855" }, // glibc / libiconv extra { 27, "CP857" }, // glibc / libiconv extra { 28, "CP862" }, { 29, "CP864" }, // glibc / libiconv extra { 30, "CP866" }, { 31, "CP869" }, // glibc / libiconv extra { 32, "CP874" }, // XXX Correct? Should be MS 874. aka WINDOWS-874 { 33, "WINDOWS-1250" }, { 34, "WINDOWS-1251" }, { 35, "WINDOWS-1253" }, { 36, "WINDOWS-1254" }, { 37, "WINDOWS-1255" }, { 38, "WINDOWS-1256" }, { 39, "WINDOWS-1257" }, { 40, "WINDOWS-1258" }, { 41, "MacArabic" }, // libiconv { 42, "MacCentralEurope" }, // libiconv { 43, "MacCroatian" }, // libiconv { 44, "MAC-CYRILLIC" }, // glibc { 44, "MacCyrillic" }, // libiconv /* 45: Apple Devanagari * 46: Apple Farsi */ { 47, "MacGreek" }, // libiconv /* 48: Apple Gujarati * 49: Apple Gurmukhi */ { 50, "MacHebrew" }, // libiconv { 51, "MAC-IS" }, // XXX Correct? Should be Apple Iceland. glibc. { 51, "MacIceland" }, // libiconv { 52, "MacRomania" }, // libiconv { 53, "MacThai" }, // libiconv { 54, "MacTurkish" }, // libiconv { 55, "MACUKRAINIAN" }, // glibc { 55, "MacUkraine" }, // libiconv /* 56: Apple Chinese Simplified * 57: Apple Chinese Traditional * 58: Apple Japanese * 59: Apple Korean */ { 60, "CP932" }, { 61, "CP936" }, { 62, "CP949" }, { 63, "CP950" }, { 64, "SHIFT-JIS" }, { 65, "GB2312" }, /* 66: GBT 12345 */ { 67, "GBK" }, { 68, "BIG5" }, { 69, "EUC-JP" }, { 70, "EUC-CN" }, { 71, "EUC-TW" }, { 72, "ISO-2022-JP" }, { 73, "ISO-2022-CN" }, { 74, "KOI8-R" }, { 75, "UTF-7" }, { 76, "UTF-8" }, { 77, "ISO-8859-10" }, { 78, "ISO-8859-13" }, { 79, "EUC-KR" }, { 80, "ISO-2022-KR" }, { 81, "JIS_X0201" }, // libiconv. interestingly, this one and the next two are not mentioned on the libiconv homepage. they seem to be supported anyway. { 82, "JIS_X0208" }, // libiconv { 83, "JIS_X0212" }, // libiconv { 84, "CP1361" }, { 85, "GB18030" }, { 0xFFFE, "UCS-4" }, { 0xFFFF, "UCS-2" } }; UT_iconv_t findConverter(UT_uint8 id) { UT_iconv_t converter = (UT_iconv_t)(-1); for (unsigned int i = 0; i < sizeof(SOEncodings)/sizeof(SOEncodings[0]); i++) { if (SOEncodings[i].number == id) { UT_DEBUGMSG(("SDW: Found charset %s for encoding #%i\n", SOEncodings[i].name, id)); converter = UT_iconv_open(UCS_INTERNAL, SOEncodings[i].name); if (UT_iconv_isValid(converter)) break; } } return converter; }