Annotation of ChivanetAimPidgin/oscarprpl/src/c/encoding.c, revision 1.1

1.1     ! snw         1: /*
        !             2:  * Purple's oscar protocol plugin
        !             3:  * This file is the legal property of its developers.
        !             4:  * Please see the AUTHORS file distributed alongside this file.
        !             5:  *
        !             6:  * This library is free software; you can redistribute it and/or
        !             7:  * modify it under the terms of the GNU Lesser General Public
        !             8:  * License as published by the Free Software Foundation; either
        !             9:  * version 2 of the License, or (at your option) any later version.
        !            10:  *
        !            11:  * This library is distributed in the hope that it will be useful,
        !            12:  * but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            13:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            14:  * Lesser General Public License for more details.
        !            15:  *
        !            16:  * You should have received a copy of the GNU Lesser General Public
        !            17:  * License along with this library; if not, write to the Free Software
        !            18:  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111-1301  USA
        !            19: */
        !            20: 
        !            21: #include "encoding.h"
        !            22: 
        !            23: static gchar *
        !            24: encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
        !            25: {
        !            26:        gchar *utf8 = NULL;
        !            27:        const gchar *begin = encodings;
        !            28:        const gchar *end = NULL;
        !            29:        gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
        !            30:        const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
        !            31: 
        !            32:        if (!encodings) {
        !            33:                purple_debug_error("oscar", "encodings is NULL");
        !            34:                return NULL;
        !            35:        }
        !            36: 
        !            37:        for (;;)
        !            38:        {
        !            39:                /* extract next encoding */
        !            40:                end = strchr(begin, ',');
        !            41:                if (!end) {
        !            42:                        curr_encoding_ro = begin;
        !            43:                }       else { /* allocate buffer for encoding */
        !            44:                        curr_encoding = g_strndup(begin, end - begin);
        !            45:                        if (!curr_encoding) {
        !            46:                                purple_debug_error("oscar", "Error allocating memory for encoding");
        !            47:                                break;
        !            48:                        }
        !            49:                        curr_encoding_ro = curr_encoding;
        !            50:                }
        !            51: 
        !            52:                if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
        !            53:                        break;
        !            54:                }
        !            55: 
        !            56:                utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
        !            57: 
        !            58:                if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
        !            59:                        break;
        !            60: 
        !            61:                g_free(curr_encoding); /* free allocated buffer for encoding here */
        !            62: 
        !            63:                if (utf8) /* text was successfully converted */
        !            64:                        break;
        !            65: 
        !            66:                begin = end + 1;
        !            67:        }
        !            68: 
        !            69:        if (!utf8 && fallback)
        !            70:        { /* "begin" points to last encoding */
        !            71:                utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
        !            72:        }
        !            73: 
        !            74:        return utf8;
        !            75: }
        !            76: 
        !            77: static gchar *
        !            78: encoding_extract(const char *encoding)
        !            79: {
        !            80:        char *begin, *end;
        !            81: 
        !            82:        if (encoding == NULL) {
        !            83:                return NULL;
        !            84:        }
        !            85: 
        !            86:        if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
        !            87:                !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
        !            88:                !g_str_has_prefix(encoding, "text/plain; charset=")) {
        !            89:                return g_strdup(encoding);
        !            90:        }
        !            91: 
        !            92:        begin = strchr(encoding, '"');
        !            93:        end = strrchr(encoding, '"');
        !            94: 
        !            95:        if ((begin == NULL) || (end == NULL) || (begin >= end)) {
        !            96:                return g_strdup(encoding);
        !            97:        }
        !            98: 
        !            99:        return g_strndup(begin+1, (end-1) - begin);
        !           100: }
        !           101: 
        !           102: gchar *
        !           103: oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
        !           104: {
        !           105:        gchar *utf8 = NULL;
        !           106:        const gchar *glib_encoding = NULL;
        !           107:        gchar *extracted_encoding = encoding_extract(encoding);
        !           108: 
        !           109:        if (extracted_encoding == NULL || *extracted_encoding == '\0') {
        !           110:                purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
        !           111:        } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
        !           112:                glib_encoding = "iso-8859-1";
        !           113:        } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
        !           114:                glib_encoding = "Windows-1252";
        !           115:        } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
        !           116:                glib_encoding = "UTF-16BE";
        !           117:        } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
        !           118:                glib_encoding = extracted_encoding;
        !           119:        }
        !           120: 
        !           121:        if (glib_encoding != NULL) {
        !           122:                utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
        !           123:        }
        !           124: 
        !           125:        /*
        !           126:         * If utf8 is still NULL then either the encoding is utf-8 or
        !           127:         * we have been unable to convert the text to utf-8 from the encoding
        !           128:         * that was specified.  So we check if the text is valid utf-8 then
        !           129:         * just copy it.
        !           130:         */
        !           131:        if (utf8 == NULL) {
        !           132:                if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
        !           133:                        utf8 = g_strdup(_("(There was an error receiving this message.  The buddy you are speaking with is probably using a different encoding than expected.  If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
        !           134:                else
        !           135:                        utf8 = g_strndup(text, textlen);
        !           136:        }
        !           137: 
        !           138:        g_free(extracted_encoding);
        !           139:        return utf8;
        !           140: }
        !           141: 
        !           142: gchar *
        !           143: oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
        !           144: {
        !           145:        const char *charset = NULL;
        !           146:        char *ret = NULL;
        !           147: 
        !           148:        if (msg == NULL)
        !           149:                return NULL;
        !           150: 
        !           151:        if (g_utf8_validate(msg, -1, NULL))
        !           152:                return g_strdup(msg);
        !           153: 
        !           154:        if (od->icq)
        !           155:                charset = purple_account_get_string(account, "encoding", NULL);
        !           156: 
        !           157:        if(charset && *charset)
        !           158:                ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
        !           159: 
        !           160:        if(!ret)
        !           161:                ret = purple_utf8_try_convert(msg);
        !           162: 
        !           163:        return ret;
        !           164: }
        !           165: 
        !           166: static gchar *
        !           167: oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
        !           168: {
        !           169:        gchar *ret = NULL;
        !           170:        GError *err = NULL;
        !           171: 
        !           172:        if ((charsetstr == NULL) || (*charsetstr == '\0'))
        !           173:                return NULL;
        !           174: 
        !           175:        if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
        !           176:                ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
        !           177:                if (err != NULL) {
        !           178:                        purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
        !           179:                                                           charsetstr, err->message);
        !           180:                        g_error_free(err);
        !           181:                }
        !           182:        } else {
        !           183:                if (g_utf8_validate(data, datalen, NULL))
        !           184:                        ret = g_strndup(data, datalen);
        !           185:                else
        !           186:                        purple_debug_warning("oscar", "String is not valid UTF-8.\n");
        !           187:        }
        !           188: 
        !           189:        return ret;
        !           190: }
        !           191: 
        !           192: gchar *
        !           193: oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
        !           194: {
        !           195:        gchar *ret = NULL;
        !           196:        /* charsetstr1 is always set to what the correct encoding should be. */
        !           197:        const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
        !           198: 
        !           199:        if ((datalen == 0) || (data == NULL))
        !           200:                return NULL;
        !           201: 
        !           202:        if (charset == AIM_CHARSET_UNICODE) {
        !           203:                charsetstr1 = "UTF-16BE";
        !           204:                charsetstr2 = "UTF-8";
        !           205:        } else if (charset == AIM_CHARSET_LATIN_1) {
        !           206:                if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
        !           207:                        charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
        !           208:                else
        !           209:                        charsetstr1 = "ISO-8859-1";
        !           210:                charsetstr2 = "UTF-8";
        !           211:        } else if (charset == AIM_CHARSET_ASCII) {
        !           212:                /* Should just be "ASCII" */
        !           213:                charsetstr1 = "ASCII";
        !           214:                charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
        !           215:        } else if (charset == 0x000d) {
        !           216:                /* iChat sending unicode over a Direct IM connection = UTF-8 */
        !           217:                /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
        !           218:                charsetstr1 = "UTF-8";
        !           219:                charsetstr2 = "ISO-8859-1";
        !           220:                charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
        !           221:        } else {
        !           222:                /* Unknown, hope for valid UTF-8... */
        !           223:                charsetstr1 = "UTF-8";
        !           224:                charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
        !           225:        }
        !           226: 
        !           227:        purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
        !           228:                                          charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
        !           229: 
        !           230:        ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
        !           231:        if (ret == NULL) {
        !           232:                if (charsetstr3 != NULL) {
        !           233:                        /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
        !           234:                        ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
        !           235:                        if (ret == NULL)
        !           236:                                ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
        !           237:                } else {
        !           238:                        /* Try charsetstr2, allowing substitutions */
        !           239:                        ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
        !           240:                }
        !           241:        }
        !           242:        if (ret == NULL) {
        !           243:                char *str, *salvage, *tmp;
        !           244: 
        !           245:                str = g_malloc(datalen + 1);
        !           246:                strncpy(str, data, datalen);
        !           247:                str[datalen] = '\0';
        !           248:                salvage = purple_utf8_salvage(str);
        !           249:                tmp = g_strdup_printf(_("(There was an error receiving this message.  Either you and %s have different encodings selected, or %s has a buggy client.)"),
        !           250:                                          sourcebn, sourcebn);
        !           251:                ret = g_strdup_printf("%s %s", salvage, tmp);
        !           252:                g_free(tmp);
        !           253:                g_free(str);
        !           254:                g_free(salvage);
        !           255:        }
        !           256: 
        !           257:        return ret;
        !           258: }
        !           259: 
        !           260: static guint16
        !           261: get_simplest_charset(const char *utf8)
        !           262: {
        !           263:        while (*utf8)
        !           264:        {
        !           265:                if ((unsigned char)(*utf8) > 0x7f) {
        !           266:                        /* not ASCII! */
        !           267:                        return AIM_CHARSET_UNICODE;
        !           268:                }
        !           269:                utf8++;
        !           270:        }
        !           271:        return AIM_CHARSET_ASCII;
        !           272: }
        !           273: 
        !           274: gchar *
        !           275: oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
        !           276: {
        !           277:        guint16 msg_charset = get_simplest_charset(msg);
        !           278:        if (charset != NULL) {
        !           279:                *charset = msg_charset;
        !           280:        }
        !           281:        if (charsetstr != NULL) {
        !           282:                *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
        !           283:        }
        !           284:        return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);
        !           285: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>