Annotation of ChivanetAimPidgin/oscarprpl/src/c/encoding.c, revision 1.1.1.1

1.1       snw         1: /*
                      2:  * Purple's oscar protocol plugin
                      3:  * This file is the legal property of its developers.
                      4:  * Please see the AUTHORS file distributed alongside this file.
                      5:  *
                      6:  * This library is free software; you can redistribute it and/or
                      7:  * modify it under the terms of the GNU Lesser General Public
                      8:  * License as published by the Free Software Foundation; either
                      9:  * version 2 of the License, or (at your option) any later version.
                     10:  *
                     11:  * This library is distributed in the hope that it will be useful,
                     12:  * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     13:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     14:  * Lesser General Public License for more details.
                     15:  *
                     16:  * You should have received a copy of the GNU Lesser General Public
                     17:  * License along with this library; if not, write to the Free Software
                     18:  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111-1301  USA
                     19: */
                     20: 
                     21: #include "encoding.h"
                     22: 
                     23: static gchar *
                     24: encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
                     25: {
                     26:        gchar *utf8 = NULL;
                     27:        const gchar *begin = encodings;
                     28:        const gchar *end = NULL;
                     29:        gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
                     30:        const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
                     31: 
                     32:        if (!encodings) {
                     33:                purple_debug_error("oscar", "encodings is NULL");
                     34:                return NULL;
                     35:        }
                     36: 
                     37:        for (;;)
                     38:        {
                     39:                /* extract next encoding */
                     40:                end = strchr(begin, ',');
                     41:                if (!end) {
                     42:                        curr_encoding_ro = begin;
                     43:                }       else { /* allocate buffer for encoding */
                     44:                        curr_encoding = g_strndup(begin, end - begin);
                     45:                        if (!curr_encoding) {
                     46:                                purple_debug_error("oscar", "Error allocating memory for encoding");
                     47:                                break;
                     48:                        }
                     49:                        curr_encoding_ro = curr_encoding;
                     50:                }
                     51: 
                     52:                if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
                     53:                        break;
                     54:                }
                     55: 
                     56:                utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
                     57: 
                     58:                if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
                     59:                        break;
                     60: 
                     61:                g_free(curr_encoding); /* free allocated buffer for encoding here */
                     62: 
                     63:                if (utf8) /* text was successfully converted */
                     64:                        break;
                     65: 
                     66:                begin = end + 1;
                     67:        }
                     68: 
                     69:        if (!utf8 && fallback)
                     70:        { /* "begin" points to last encoding */
                     71:                utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
                     72:        }
                     73: 
                     74:        return utf8;
                     75: }
                     76: 
                     77: static gchar *
                     78: encoding_extract(const char *encoding)
                     79: {
                     80:        char *begin, *end;
                     81: 
                     82:        if (encoding == NULL) {
                     83:                return NULL;
                     84:        }
                     85: 
                     86:        if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
                     87:                !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
                     88:                !g_str_has_prefix(encoding, "text/plain; charset=")) {
                     89:                return g_strdup(encoding);
                     90:        }
                     91: 
                     92:        begin = strchr(encoding, '"');
                     93:        end = strrchr(encoding, '"');
                     94: 
                     95:        if ((begin == NULL) || (end == NULL) || (begin >= end)) {
                     96:                return g_strdup(encoding);
                     97:        }
                     98: 
                     99:        return g_strndup(begin+1, (end-1) - begin);
                    100: }
                    101: 
                    102: gchar *
                    103: oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
                    104: {
                    105:        gchar *utf8 = NULL;
                    106:        const gchar *glib_encoding = NULL;
                    107:        gchar *extracted_encoding = encoding_extract(encoding);
                    108: 
                    109:        if (extracted_encoding == NULL || *extracted_encoding == '\0') {
                    110:                purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
                    111:        } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
                    112:                glib_encoding = "iso-8859-1";
                    113:        } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
                    114:                glib_encoding = "Windows-1252";
                    115:        } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
                    116:                glib_encoding = "UTF-16BE";
                    117:        } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
                    118:                glib_encoding = extracted_encoding;
                    119:        }
                    120: 
                    121:        if (glib_encoding != NULL) {
                    122:                utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
                    123:        }
                    124: 
                    125:        /*
                    126:         * If utf8 is still NULL then either the encoding is utf-8 or
                    127:         * we have been unable to convert the text to utf-8 from the encoding
                    128:         * that was specified.  So we check if the text is valid utf-8 then
                    129:         * just copy it.
                    130:         */
                    131:        if (utf8 == NULL) {
                    132:                if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
                    133:                        utf8 = g_strdup(_("(There was an error receiving this message.  The buddy you are speaking with is probably using a different encoding than expected.  If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
                    134:                else
                    135:                        utf8 = g_strndup(text, textlen);
                    136:        }
                    137: 
                    138:        g_free(extracted_encoding);
                    139:        return utf8;
                    140: }
                    141: 
                    142: gchar *
                    143: oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
                    144: {
                    145:        const char *charset = NULL;
                    146:        char *ret = NULL;
                    147: 
                    148:        if (msg == NULL)
                    149:                return NULL;
                    150: 
                    151:        if (g_utf8_validate(msg, -1, NULL))
                    152:                return g_strdup(msg);
                    153: 
                    154:        if (od->icq)
                    155:                charset = purple_account_get_string(account, "encoding", NULL);
                    156: 
                    157:        if(charset && *charset)
                    158:                ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
                    159: 
                    160:        if(!ret)
                    161:                ret = purple_utf8_try_convert(msg);
                    162: 
                    163:        return ret;
                    164: }
                    165: 
                    166: static gchar *
                    167: oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
                    168: {
                    169:        gchar *ret = NULL;
                    170:        GError *err = NULL;
                    171: 
                    172:        if ((charsetstr == NULL) || (*charsetstr == '\0'))
                    173:                return NULL;
                    174: 
                    175:        if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
                    176:                ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
                    177:                if (err != NULL) {
                    178:                        purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
                    179:                                                           charsetstr, err->message);
                    180:                        g_error_free(err);
                    181:                }
                    182:        } else {
                    183:                if (g_utf8_validate(data, datalen, NULL))
                    184:                        ret = g_strndup(data, datalen);
                    185:                else
                    186:                        purple_debug_warning("oscar", "String is not valid UTF-8.\n");
                    187:        }
                    188: 
                    189:        return ret;
                    190: }
                    191: 
                    192: gchar *
                    193: oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
                    194: {
                    195:        gchar *ret = NULL;
                    196:        /* charsetstr1 is always set to what the correct encoding should be. */
                    197:        const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
                    198: 
                    199:        if ((datalen == 0) || (data == NULL))
                    200:                return NULL;
                    201: 
                    202:        if (charset == AIM_CHARSET_UNICODE) {
                    203:                charsetstr1 = "UTF-16BE";
                    204:                charsetstr2 = "UTF-8";
                    205:        } else if (charset == AIM_CHARSET_LATIN_1) {
                    206:                if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
                    207:                        charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
                    208:                else
                    209:                        charsetstr1 = "ISO-8859-1";
                    210:                charsetstr2 = "UTF-8";
                    211:        } else if (charset == AIM_CHARSET_ASCII) {
                    212:                /* Should just be "ASCII" */
                    213:                charsetstr1 = "ASCII";
                    214:                charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
                    215:        } else if (charset == 0x000d) {
                    216:                /* iChat sending unicode over a Direct IM connection = UTF-8 */
                    217:                /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
                    218:                charsetstr1 = "UTF-8";
                    219:                charsetstr2 = "ISO-8859-1";
                    220:                charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
                    221:        } else {
                    222:                /* Unknown, hope for valid UTF-8... */
                    223:                charsetstr1 = "UTF-8";
                    224:                charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
                    225:        }
                    226: 
                    227:        purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
                    228:                                          charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
                    229: 
                    230:        ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
                    231:        if (ret == NULL) {
                    232:                if (charsetstr3 != NULL) {
                    233:                        /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
                    234:                        ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
                    235:                        if (ret == NULL)
                    236:                                ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
                    237:                } else {
                    238:                        /* Try charsetstr2, allowing substitutions */
                    239:                        ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
                    240:                }
                    241:        }
                    242:        if (ret == NULL) {
                    243:                char *str, *salvage, *tmp;
                    244: 
                    245:                str = g_malloc(datalen + 1);
                    246:                strncpy(str, data, datalen);
                    247:                str[datalen] = '\0';
                    248:                salvage = purple_utf8_salvage(str);
                    249:                tmp = g_strdup_printf(_("(There was an error receiving this message.  Either you and %s have different encodings selected, or %s has a buggy client.)"),
                    250:                                          sourcebn, sourcebn);
                    251:                ret = g_strdup_printf("%s %s", salvage, tmp);
                    252:                g_free(tmp);
                    253:                g_free(str);
                    254:                g_free(salvage);
                    255:        }
                    256: 
                    257:        return ret;
                    258: }
                    259: 
                    260: static guint16
                    261: get_simplest_charset(const char *utf8)
                    262: {
                    263:        while (*utf8)
                    264:        {
                    265:                if ((unsigned char)(*utf8) > 0x7f) {
                    266:                        /* not ASCII! */
                    267:                        return AIM_CHARSET_UNICODE;
                    268:                }
                    269:                utf8++;
                    270:        }
                    271:        return AIM_CHARSET_ASCII;
                    272: }
                    273: 
                    274: gchar *
                    275: oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
                    276: {
                    277:        guint16 msg_charset = get_simplest_charset(msg);
                    278:        if (charset != NULL) {
                    279:                *charset = msg_charset;
                    280:        }
                    281:        if (charsetstr != NULL) {
                    282:                *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
                    283:        }
                    284:        return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);
                    285: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>