/* AbiWord * Copyright (C) 2000 AbiSource, Inc. * Copyright (C) 2001 Hubert Figuiere * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include #include #include "xap_CocoaEncodingManager.h" #include "ut_assert.h" #include "ut_debugmsg.h" #include "ut_string.h" /* We source this file in order not to replace symbols in gnome-libs. (if AW is linked with GNOME). */ /* Only this function is used. For $LANG=ru_RU.KOI8-R it returns the following list: ru_RU.KOI8-R ru_RU ru.KOI8-R ru C "" It's much more powerfull (gives correct result) than plain setlocale since it explicitly reads locale aliases. */ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ const GList * g_i18n_get_language_list (const gchar *category_name); #ifdef __cplusplus } #endif /* __cplusplus */ /* * Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation * All rights reserved. * * This file is part of the Gnome Library. * * The Gnome Library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * The Gnome Library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with the Gnome Library; see the file COPYING.LIB. If not, * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include static GHashTable *alias_table = NULL; static GHashTable *category_table= NULL; /*read an alias file for the locales*/ static void read_aliases (const char *file) { FILE *fp; char buf[256]; if (!alias_table) alias_table = g_hash_table_new (g_str_hash, g_str_equal); fp = fopen (file,"r"); if (!fp) return; while (fgets (buf,256,fp)) { char *p; g_strstrip(buf); if (buf[0]=='#' || buf[0]=='\0') continue; p = strtok (buf,"\t "); if (!p) continue; p = strtok (NULL,"\t "); if(!p) continue; if (!g_hash_table_lookup (alias_table, buf)) g_hash_table_insert (alias_table, g_strdup(buf), g_strdup(p)); } fclose (fp); } /*return the un-aliased language as a newly allocated string*/ static char * unalias_lang (char *lang) { char *p; int i; if (!alias_table) { read_aliases ("/usr/lib/locale/locale.alias"); read_aliases ("/usr/local/lib/locale/locale.alias"); read_aliases ("/usr/share/locale/locale.alias"); read_aliases ("/usr/local/share/locale/locale.alias"); read_aliases ("/usr/lib/X11/locale/locale.alias"); read_aliases ("/usr/openwin/lib/locale/locale.alias"); } i = 0; while ((p=(char*)g_hash_table_lookup(alias_table,lang)) && strcmp(p, lang)) { lang = p; if (i++ == 30) { static gboolean said_before = FALSE; if (!said_before) g_warning ("Too many alias levels for a locale, " "may indicate a loop"); said_before = TRUE; return lang; } } return lang; } /* Mask for components of locale spec. The ordering here is from * least significant to most significant */ enum { COMPONENT_CODESET = 1 << 0, COMPONENT_TERRITORY = 1 << 1, COMPONENT_MODIFIER = 1 << 2 }; /* Break an X/Open style locale specification into components */ static guint explode_locale (const gchar *locale, gchar **language, gchar **territory, gchar **codeset, gchar **modifier) { const gchar *uscore_pos; const gchar *at_pos; const gchar *dot_pos; guint mask = 0; uscore_pos = strchr (locale, '_'); dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); if (at_pos) { mask |= COMPONENT_MODIFIER; *modifier = g_strdup (at_pos); } else at_pos = locale + strlen (locale); if (dot_pos) { mask |= COMPONENT_CODESET; *codeset = g_new (gchar, 1 + at_pos - dot_pos); strncpy (*codeset, dot_pos, at_pos - dot_pos); (*codeset)[at_pos - dot_pos] = '\0'; } else dot_pos = at_pos; if (uscore_pos) { mask |= COMPONENT_TERRITORY; *territory = g_new (gchar, 1 + dot_pos - uscore_pos); strncpy (*territory, uscore_pos, dot_pos - uscore_pos); (*territory)[dot_pos - uscore_pos] = '\0'; } else uscore_pos = dot_pos; *language = g_new (gchar, 1 + uscore_pos - locale); strncpy (*language, locale, uscore_pos - locale); (*language)[uscore_pos - locale] = '\0'; return mask; } /* * Compute all interesting variants for a given locale name - * by stripping off different components of the value. * * For simplicity, we assume that the locale is in * X/Open format: language[_territory][.codeset][@modifier] * * TODO: Extend this to handle the CEN format (see the GNUlibc docs) * as well. We could just copy the code from glibc wholesale * but it is big, ugly, and complicated, so I'm reluctant * to do so when this should handle 99% of the time... */ static GList * compute_locale_variants (const gchar *locale) { GList *retval = NULL; gchar *language; gchar *territory; gchar *codeset; gchar *modifier; guint mask; guint i; UT_return_val_if_fail (locale != NULL, NULL); mask = explode_locale (locale, &language, &territory, &codeset, &modifier); /* Iterate through all possible combinations, from least attractive * to most attractive. */ for (i=0; i<=mask; i++) if ((i & ~mask) == 0) { gchar *val = g_strconcat(language, (i & COMPONENT_TERRITORY) ? territory : "", (i & COMPONENT_CODESET) ? codeset : "", (i & COMPONENT_MODIFIER) ? modifier : "", NULL); retval = g_list_prepend (retval, val); } g_free (language); if (mask & COMPONENT_CODESET) g_free (codeset); if (mask & COMPONENT_TERRITORY) g_free (territory); if (mask & COMPONENT_MODIFIER) g_free (modifier); return retval; } /* The following is (partly) taken from the gettext package. Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ static const gchar * guess_category_value (const gchar *categoryname) { const gchar *retval; /* The highest priority value is the `LANGUAGE' environment variable. This is a GNU extension. */ retval = g_getenv ("LANGUAGE"); if (retval != NULL && retval[0] != '\0') return retval; /* `LANGUAGE' is not set. So we have to proceed with the POSIX methods of looking to `LC_ALL', `LC_xxx', and `LANG'. On some systems this can be done by the `setlocale' function itself. */ /* Setting of LC_ALL overwrites all other. */ retval = g_getenv ("LC_ALL"); if (retval != NULL && retval[0] != '\0') return retval; /* Next comes the name of the desired category. */ retval = g_getenv (categoryname); if (retval != NULL && retval[0] != '\0') return retval; /* Last possibility is the LANG environment variable. */ retval = g_getenv ("LANG"); if (retval != NULL && retval[0] != '\0') return retval; return NULL; } /** * g_i18n_get_language_list: * @category_name: Name of category to look up, e.g. "LC_MESSAGES". * * This computes a list of language strings. It searches in the * standard environment variables to find the list, which is sorted * in order from most desirable to least desirable. The `C' locale * is appended to the list if it does not already appear (other routines depend on this behaviour). * If @category_name is %NULL, then LC_ALL is assumed. * * Return value: the list of languages, this list should not be freed as it is owned by gnome-i18n **/ const GList * g_i18n_get_language_list (const gchar *category_name) { GList *list; if (!category_name) category_name= "LC_ALL"; if (category_table) { #if 0 // we want a fresh reading of the LANG variable every time so we can // work out the 8bit encoding under utf-8 locale list= (GList *)g_hash_table_lookup (category_table, (const gpointer) category_name); #else xxx_UT_DEBUGMSG(("recreating hash table\n")); g_hash_table_destroy (category_table); category_table= g_hash_table_new (g_str_hash, g_str_equal); list= NULL; #endif } else { category_table= g_hash_table_new (g_str_hash, g_str_equal); list= NULL; } if (!list) { gint c_locale_defined= FALSE; const gchar *category_value; gchar *category_memory, *orig_category_memory; category_value = guess_category_value (category_name); if (! category_value) category_value = "C"; orig_category_memory = category_memory = (gchar*) g_malloc (strlen (category_value)+1); while (category_value[0] != '\0') { while (category_value[0] != '\0' && category_value[0] == ':') ++category_value; if (category_value[0] != '\0') { char *cp= category_memory; while (category_value[0] != '\0' && category_value[0] != ':') *category_memory++= *category_value++; category_memory[0]= '\0'; category_memory++; cp = unalias_lang(cp); if (strcmp (cp, "C") == 0) c_locale_defined= TRUE; list= g_list_concat (list, compute_locale_variants (cp)); } } g_free (orig_category_memory); if (!c_locale_defined) list= g_list_append (list, (void*)"C"); g_hash_table_insert (category_table, (gpointer) category_name, list); } return list; } /************************************************************/ XAP_EncodingManager *XAP_EncodingManager::get_instance() { if (_instance == 0) { UT_DEBUGMSG(("Building XAP_EncodingManager\n")); _instance = new XAP_CocoaEncodingManager(); _instance->initialize(); UT_DEBUGMSG(("XAP_EncodingManager built\n")); } return _instance; } /************************************************************/ XAP_CocoaEncodingManager::XAP_CocoaEncodingManager() { } XAP_CocoaEncodingManager::~XAP_CocoaEncodingManager() {} static const char * NativeEncodingName; static const char * NativeUnicodeEncodingName; static const char * Native8BitEncodingName; static const char * LanguageISOName; static const char * LanguageISOTerritory; const char* XAP_CocoaEncodingManager::getNativeEncodingName() const { return NativeEncodingName; } const char* XAP_CocoaEncodingManager::getNativeUnicodeEncodingName() const { return NativeEncodingName; } const char* XAP_CocoaEncodingManager::getNative8BitEncodingName() const { return Native8BitEncodingName; } const char* XAP_CocoaEncodingManager::getLanguageISOName() const { return LanguageISOName; } const char* XAP_CocoaEncodingManager::getLanguageISOTerritory() const { return LanguageISOTerritory; } void XAP_CocoaEncodingManager::initialize() { const GList* lst = g_i18n_get_language_list ("LANG"); const char* locname = (char*)lst->data; NativeEncodingName = "ISO-8859-1"; Native8BitEncodingName = "ISO-8859-1"; NativeUnicodeEncodingName = "UTF-8"; LanguageISOName = "en"; LanguageISOTerritory = "US"; if (!*locname || !strcmp(locname,"C")) { /* paranoic case - broken system */ ; /*already initialized*/ } else { char* lang,*terr,*cs,*mod; lang = terr = cs = mod = NULL; int mask = explode_locale (locname,&lang,&terr,&cs,&mod); LanguageISOName = lang; g_free(mod); // unneeded mod = NULL; if (mask & COMPONENT_TERRITORY) { LanguageISOTerritory = terr+1;/*yes, +1*/ } if (mask & COMPONENT_CODESET) { NativeEncodingName = cs+1; if (!strncmp(cs+1,"ISO8859",strlen("ISO8859"))) { /* work around glibc bug - its iconv doesn't know ISO8859-* - ISO-8859-1 should be used (we encounter this since locale.alias aliases en_US.ISO8859-1 to 'en' PS: This is true for my RH6.0 system - VH */ static char buf[40]; strcpy(buf,"ISO-"); strcat(buf,cs+1+3); NativeEncodingName = buf; } Native8BitEncodingName = NativeEncodingName; // need to get 8bit encoding if encoding is utf-8 if(!strcmp(NativeEncodingName, "utf-8") || !strcmp(NativeEncodingName, "UTF-8")) { // we want to get the encoding that would be used for the given // language/territory if the utf-8 encoding was not specified // by LANG std::string OLDLANG (getenv("LANG")); #if defined(SETENV_MISSING) std::string MYLANG ("LANG="); MYLANG += LanguageISOName; MYLANG += "_"; MYLANG += LanguageISOTerritory; putenv(MYLANG.c_str()); #else std::string MYLANG (LanguageISOName); MYLANG += "_"; MYLANG += LanguageISOTerritory; setenv ("LANG", MYLANG.c_str(), 1); #endif const GList* my_lst = g_i18n_get_language_list ("LANG"); const char* my_locname = (char*)my_lst->data; char* my_lang,*my_terr,*my_cs,*my_mod; my_lang = my_terr = my_cs = my_mod = NULL; int my_mask = explode_locale (my_locname,&my_lang,&my_terr,&my_cs,&my_mod); if(my_mod) { g_free (my_mod); my_mod = NULL; } if (my_mask & COMPONENT_CODESET) { Native8BitEncodingName = my_cs+1; xxx_UT_DEBUGMSG(("Native8BitEncodingName (1) %s\n", Native8BitEncodingName)); if (!strncmp(my_cs+1,"ISO8859",strlen("ISO8859"))) { static char buf[40]; strcpy(buf,"ISO-"); strcat(buf,my_cs+1+3); Native8BitEncodingName = buf; } xxx_UT_DEBUGMSG(("Native8BitEncodingName (2) %s\n", Native8BitEncodingName)); } #if defined(SETENV_MISSING) MYLANG = "LANG="; MYLANG += OLDLANG; putenv(MYLANG.c_str()); #else setenv("LANG", OLDLANG.c_str(), 1); #endif } } }; XAP_EncodingManager::initialize(); describe(); };