/* AbiSuite * Copyright (C) Jordi Mas i Hernāndez * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include "ut_misc.h" #include "barbarisms.h" #include "ut_debugmsg.h" #include "ut_hash.h" #include "ut_string.h" #include "ut_unicode.h" #include "ut_string_class.h" #include "xap_App.h" BarbarismChecker::BarbarismChecker() { m_pCurVector = NULL; } BarbarismChecker::~BarbarismChecker() { UT_GenericStringMap*>::UT_Cursor _hc1(&m_map); for (UT_GenericVector* pVec = _hc1.first(); _hc1.is_valid(); pVec = _hc1.next() ) { if (pVec) { for (UT_sint32 i=0; i < pVec->getItemCount(); i++) delete pVec->getNthItem(i); delete pVec; } } } /* Takes a language code and builds the barbarism full filename. */ bool BarbarismChecker::load(const char * szLang) { UT_DEBUGMSG(("SPELL: BarbarismChecker::load(%s)\n",szLang)); if (!szLang || !*szLang) return false; m_sLang = szLang; UT_String fileName; UT_String fullPath; fileName = szLang; fileName += "-barbarism.xml"; bool bLoaded = false; if (XAP_App::getApp()->findAbiSuiteLibFile(fullPath, fileName.c_str(), "dictionary")) { UT_XML parser; parser.setListener(this); if ((parser.parse (fullPath.c_str())) == UT_OK) { bLoaded = true; UT_DEBUGMSG(("SPELL: barbar %s loaded %zd\n", szLang, m_map.size())); } } return bLoaded; } /** * True if word is a barbarsim, false if not */ bool BarbarismChecker::checkWord(const UT_UCSChar * word32, size_t length) { UT_UTF8String stUTF8; stUTF8.appendUCS4(word32, length); // TODO: capitalization issues bool bResult = (m_map.pick(stUTF8.utf8_str()) != NULL); return bResult; } /* Looks for an exact case match of the suggestion Returns true if word is a barbarism */ bool BarbarismChecker::suggestExactWord(const UT_UCSChar *word32, size_t length, UT_GenericVector* pVecsugg) { const char* pUTF8; const UT_UCS4Char *pWord; UT_UTF8String stUTF8; UT_UCS4Char *suggest32; int nSize; stUTF8.appendUCS4(word32, length); pUTF8 = stUTF8.utf8_str(); UT_GenericVector* vec = m_map.pick(pUTF8); if (!vec) return false; const UT_uint32 nItems = vec->getItemCount(); if (!nItems) return false; for (UT_uint32 iItem = nItems; iItem; --iItem) { pWord = vec->getNthItem(iItem - 1); nSize = sizeof(UT_UCS4Char) * (UT_UCS4_strlen(pWord) + 1); suggest32 = static_cast(g_try_malloc(nSize)); memcpy (suggest32, pWord, nSize); pVecsugg->insertItemAt(suggest32, 0); } return true; } /* Suggest a word with all possible case combinations - If it's lower case, we just look for the lower case word - If it has the first letter upper case, we look for an exact match and for lower case - If it's upper case, we look for the exact match and lower case // not implemented yet Returns true if word is a barbarism */ bool BarbarismChecker::suggestWord(const UT_UCSChar *word32, size_t length, UT_GenericVector* pVecsugg) { bool bIsBarbarism = false; bool bIsLower = true; bool bIsUpperLower = false; size_t len; UT_UCSChar* pStr; if (!length) return false; /* If the word is lower case we just look at the lower case */ len=length; pStr = const_cast(word32); for (; len; pStr++, len--) { if (!UT_UCS4_islower(*pStr)) { bIsLower=false; break; } } if (bIsLower) return suggestExactWord(word32, length, pVecsugg); /* If the word has the first char upper case and the rest lower case */ if (UT_UCS4_isupper(*word32)) { UT_UCSChar* pStr2 = const_cast(word32); pStr2++; len=length; if (len) len--; /* After the first character, the rest should be lower case */ for (;len;pStr2++, len--) { if (!UT_UCS4_islower(*pStr2)) break; } if (!len) bIsUpperLower = true; } if (bIsUpperLower) { UT_UCS4Char* wordsearch; UT_UCS4_cloneString(&wordsearch, word32); /* Convert word into lowercase (only need the first char) */ wordsearch[0] = UT_UCS4_tolower(wordsearch[0]); if ((bIsBarbarism = suggestExactWord(wordsearch, length, pVecsugg))) { const UT_uint32 nItems = pVecsugg->getItemCount(); UT_UCSChar* pSug; /* Make the first letter of all the results uppercase */ for (UT_uint32 iItem = nItems; iItem; --iItem) { pSug = pVecsugg->getNthItem(iItem - 1); *pSug = UT_UCS4_toupper(*pSug); } } if (wordsearch) g_free(wordsearch); } return bIsBarbarism; } /* Called by the parser. We build the barbarism list here Barbarism (the index of the map) is stored in UTF-8 because the map index and the suggestions are in UT_UCSChar */ void BarbarismChecker::startElement(const gchar *name, const gchar **atts) { // TODO: find out the correct arguments for UT_getAttribute... if (strcmp(name, "barbarism")==0) { const char * word = UT_getAttribute ("word", atts); if (word != NULL) { m_pCurVector = new UT_GenericVector(); m_map.insert (word, m_pCurVector); } else m_pCurVector = NULL; } else if (strcmp(name, "suggestion")==0) { if (m_pCurVector) { const char* pUTF8 = UT_getAttribute ("word", atts); if (pUTF8 == NULL) return; size_t length = strlen (pUTF8); int nUSC4Len = 0; UT_UCS4String usc4; while (true) { UT_UCS4Char ch4 = UT_Unicode::UTF8_to_UCS4 (pUTF8, length); if (ch4 == 0) break; nUSC4Len++; usc4+=ch4; } const UT_UCS4Char* pData = usc4.ucs4_str(); UT_UCS4Char *word32 = new UT_UCS4Char[nUSC4Len+1]; memcpy (word32, pData, (nUSC4Len+1)*sizeof(UT_UCS4Char)); // insert suggestions at beginning // this preserves the order in the xml file // and puts them ahead of the regular suggestions m_pCurVector->insertItemAt(word32, 0); } } }