/* "$Id: $" * * Author: Jean-Marc Lienher ( http://oksid.ch ) * Copyright 2000-2003 by O'ksi'D. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. * * Please report all bugs and problems on the following page: * * http://www.fltk.org/str.php */ /* * Unicode to UTF-8 conversion functions. */ #if !defined(WIN32) && !defined(__APPLE__) #include "../../FL/Xutf8.h" /*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/ /* * Converts the first char of the UTF-8 string to an Unicode value * Returns the byte length of the converted UTF-8 char * Returns -1 if the UTF-8 string is not valid */ int XConvertUtf8ToUcs(const unsigned char *buf, int len, unsigned int *ucs) { if (buf[0] & 0x80) { if (buf[0] & 0x40) { if (buf[0] & 0x20) { if (buf[0] & 0x10) { if (buf[0] & 0x08) { if (buf[0] & 0x04) { if (buf[0] & 0x02) { /* bad UTF-8 string */ } else { /* 0x04000000 - 0x7FFFFFFF */ } } else if (len > 4 && (buf[1] & 0xC0) == 0x80 && (buf[2] & 0xC0) == 0x80 && (buf[3] & 0xC0) == 0x80 && (buf[4] & 0xC0) == 0x80) { /* 0x00200000 - 0x03FFFFFF */ *ucs = ((buf[0] & ~0xF8) << 24) + ((buf[1] & ~0x80) << 18) + ((buf[2] & ~0x80) << 12) + ((buf[3] & ~0x80) << 6) + (buf[4] & ~0x80); if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5; } } else if (len > 3 && (buf[1] & 0xC0) == 0x80 && (buf[2] & 0xC0) == 0x80 && (buf[3] & 0xC0) == 0x80) { /* 0x00010000 - 0x001FFFFF */ *ucs = ((buf[0] & ~0xF0) << 18) + ((buf[1] & ~0x80) << 12) + ((buf[2] & ~0x80) << 6) + (buf[3] & ~0x80); if (*ucs > 0x0000FFFF) return 4; } } else if (len > 2 && (buf[1] & 0xC0) == 0x80 && (buf[2] & 0xC0) == 0x80) { /* 0x00000800 - 0x0000FFFF */ *ucs = ((buf[0] & ~0xE0) << 12) + ((buf[1] & ~0x80) << 6) + (buf[2] & ~0x80); if (*ucs > 0x000007FF) return 3; } } else if (len > 1 && (buf[1] & 0xC0) == 0x80) { /* 0x00000080 - 0x000007FF */ *ucs = ((buf[0] & ~0xC0) << 6) + (buf[1] & ~0x80); if (*ucs > 0x0000007F) return 2; } } } else if (len > 0) { /* 0x00000000 - 0x0000007F */ *ucs = buf[0]; return 1; } *ucs = (unsigned int) '?'; /* bad utf-8 string */ return -1; } /* * Converts an Unicode value to an UTF-8 string * NOTE : the buffer (buf) must be at least 5 bytes long !!! */ int XConvertUcsToUtf8(unsigned int ucs, char *buf) { if (ucs < 0x000080) { buf[0] = ucs; return 1; } else if (ucs < 0x000800) { buf[0] = 0xC0 | (ucs >> 6); buf[1] = 0x80 | (ucs & 0x3F); return 2; } else if (ucs < 0x010000) { buf[0] = 0xE0 | (ucs >> 12); buf[1] = 0x80 | ((ucs >> 6) & 0x3F); buf[2] = 0x80 | (ucs & 0x3F); return 3; } else if (ucs < 0x00200000) { buf[0] = 0xF0 | (ucs >> 18); buf[1] = 0x80 | ((ucs >> 12) & 0x3F); buf[2] = 0x80 | ((ucs >> 6) & 0x3F); buf[3] = 0x80 | (ucs & 0x3F); return 4; } else if (ucs < 0x01000000) { buf[0] = 0xF8 | (ucs >> 24); buf[1] = 0x80 | ((ucs >> 18) & 0x3F); buf[2] = 0x80 | ((ucs >> 12) & 0x3F); buf[3] = 0x80 | ((ucs >> 6) & 0x3F); buf[4] = 0x80 | (ucs & 0x3F); return 5; } buf[0] = '?'; return -1; } /* * returns the byte length of the first UTF-8 char * (returns -1 if not valid) */ int XUtf8CharByteLen(const unsigned char *buf, int len) { unsigned int ucs; return XConvertUtf8ToUcs(buf, len, &ucs); } /* * returns the quantity of Unicode chars in the UTF-8 string */ int XCountUtf8Char(const unsigned char *buf, int len) { int i = 0; int nbc = 0; while (i < len) { int cl = XUtf8CharByteLen(buf + i, len - i); if (cl < 1) cl = 1; nbc++; i += cl; } return nbc; } /* * Same as XConvertUtf8ToUcs but no sanity check is done. */ int XFastConvertUtf8ToUcs(const unsigned char *buf, int len, unsigned int *ucs) { if (buf[0] & 0x80) { if (buf[0] & 0x40) { if (buf[0] & 0x20) { if (buf[0] & 0x10) { if (buf[0] & 0x08) { if (buf[0] & 0x04) { if (buf[0] & 0x02) { /* bad UTF-8 string */ } else { /* 0x04000000 - 0x7FFFFFFF */ } } else if (len > 4) { /* 0x00200000 - 0x03FFFFFF */ *ucs = ((buf[0] & ~0xF8) << 24) + ((buf[1] & ~0x80) << 18) + ((buf[2] & ~0x80) << 12) + ((buf[3] & ~0x80) << 6) + (buf[4] & ~0x80); return 5; } } else if (len > 3) { /* 0x00010000 - 0x001FFFFF */ *ucs = ((buf[0] & ~0xF0) << 18) + ((buf[1] & ~0x80) << 12) + ((buf[2] & ~0x80) << 6) + (buf[3] & ~0x80); return 4; } } else if (len > 2) { /* 0x00000800 - 0x0000FFFF */ *ucs = ((buf[0] & ~0xE0) << 12) + ((buf[1] & ~0x80) << 6) + (buf[2] & ~0x80); return 3; } } else if (len > 1) { /* 0x00000080 - 0x000007FF */ *ucs = ((buf[0] & ~0xC0) << 6) + (buf[1] & ~0x80); return 2; } } } else if (len > 0) { /* 0x00000000 - 0x0000007F */ *ucs = buf[0]; return 1; } *ucs = (unsigned int) '?'; /* bad utf-8 string */ return -1; } #endif /* X11 only */ /* * End of "$Id: $". */