Annotation of ChivanetAimPidgin/oscarprpl/src/c/encoding.c, revision 1.1
1.1 ! snw 1: /*
! 2: * Purple's oscar protocol plugin
! 3: * This file is the legal property of its developers.
! 4: * Please see the AUTHORS file distributed alongside this file.
! 5: *
! 6: * This library is free software; you can redistribute it and/or
! 7: * modify it under the terms of the GNU Lesser General Public
! 8: * License as published by the Free Software Foundation; either
! 9: * version 2 of the License, or (at your option) any later version.
! 10: *
! 11: * This library is distributed in the hope that it will be useful,
! 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of
! 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 14: * Lesser General Public License for more details.
! 15: *
! 16: * You should have received a copy of the GNU Lesser General Public
! 17: * License along with this library; if not, write to the Free Software
! 18: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
! 19: */
! 20:
! 21: #include "encoding.h"
! 22:
! 23: static gchar *
! 24: encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
! 25: {
! 26: gchar *utf8 = NULL;
! 27: const gchar *begin = encodings;
! 28: const gchar *end = NULL;
! 29: gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
! 30: const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
! 31:
! 32: if (!encodings) {
! 33: purple_debug_error("oscar", "encodings is NULL");
! 34: return NULL;
! 35: }
! 36:
! 37: for (;;)
! 38: {
! 39: /* extract next encoding */
! 40: end = strchr(begin, ',');
! 41: if (!end) {
! 42: curr_encoding_ro = begin;
! 43: } else { /* allocate buffer for encoding */
! 44: curr_encoding = g_strndup(begin, end - begin);
! 45: if (!curr_encoding) {
! 46: purple_debug_error("oscar", "Error allocating memory for encoding");
! 47: break;
! 48: }
! 49: curr_encoding_ro = curr_encoding;
! 50: }
! 51:
! 52: if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
! 53: break;
! 54: }
! 55:
! 56: utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
! 57:
! 58: if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
! 59: break;
! 60:
! 61: g_free(curr_encoding); /* free allocated buffer for encoding here */
! 62:
! 63: if (utf8) /* text was successfully converted */
! 64: break;
! 65:
! 66: begin = end + 1;
! 67: }
! 68:
! 69: if (!utf8 && fallback)
! 70: { /* "begin" points to last encoding */
! 71: utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
! 72: }
! 73:
! 74: return utf8;
! 75: }
! 76:
! 77: static gchar *
! 78: encoding_extract(const char *encoding)
! 79: {
! 80: char *begin, *end;
! 81:
! 82: if (encoding == NULL) {
! 83: return NULL;
! 84: }
! 85:
! 86: if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
! 87: !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
! 88: !g_str_has_prefix(encoding, "text/plain; charset=")) {
! 89: return g_strdup(encoding);
! 90: }
! 91:
! 92: begin = strchr(encoding, '"');
! 93: end = strrchr(encoding, '"');
! 94:
! 95: if ((begin == NULL) || (end == NULL) || (begin >= end)) {
! 96: return g_strdup(encoding);
! 97: }
! 98:
! 99: return g_strndup(begin+1, (end-1) - begin);
! 100: }
! 101:
! 102: gchar *
! 103: oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
! 104: {
! 105: gchar *utf8 = NULL;
! 106: const gchar *glib_encoding = NULL;
! 107: gchar *extracted_encoding = encoding_extract(encoding);
! 108:
! 109: if (extracted_encoding == NULL || *extracted_encoding == '\0') {
! 110: purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
! 111: } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
! 112: glib_encoding = "iso-8859-1";
! 113: } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
! 114: glib_encoding = "Windows-1252";
! 115: } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
! 116: glib_encoding = "UTF-16BE";
! 117: } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
! 118: glib_encoding = extracted_encoding;
! 119: }
! 120:
! 121: if (glib_encoding != NULL) {
! 122: utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
! 123: }
! 124:
! 125: /*
! 126: * If utf8 is still NULL then either the encoding is utf-8 or
! 127: * we have been unable to convert the text to utf-8 from the encoding
! 128: * that was specified. So we check if the text is valid utf-8 then
! 129: * just copy it.
! 130: */
! 131: if (utf8 == NULL) {
! 132: if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
! 133: utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
! 134: else
! 135: utf8 = g_strndup(text, textlen);
! 136: }
! 137:
! 138: g_free(extracted_encoding);
! 139: return utf8;
! 140: }
! 141:
! 142: gchar *
! 143: oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
! 144: {
! 145: const char *charset = NULL;
! 146: char *ret = NULL;
! 147:
! 148: if (msg == NULL)
! 149: return NULL;
! 150:
! 151: if (g_utf8_validate(msg, -1, NULL))
! 152: return g_strdup(msg);
! 153:
! 154: if (od->icq)
! 155: charset = purple_account_get_string(account, "encoding", NULL);
! 156:
! 157: if(charset && *charset)
! 158: ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
! 159:
! 160: if(!ret)
! 161: ret = purple_utf8_try_convert(msg);
! 162:
! 163: return ret;
! 164: }
! 165:
! 166: static gchar *
! 167: oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
! 168: {
! 169: gchar *ret = NULL;
! 170: GError *err = NULL;
! 171:
! 172: if ((charsetstr == NULL) || (*charsetstr == '\0'))
! 173: return NULL;
! 174:
! 175: if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
! 176: ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
! 177: if (err != NULL) {
! 178: purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
! 179: charsetstr, err->message);
! 180: g_error_free(err);
! 181: }
! 182: } else {
! 183: if (g_utf8_validate(data, datalen, NULL))
! 184: ret = g_strndup(data, datalen);
! 185: else
! 186: purple_debug_warning("oscar", "String is not valid UTF-8.\n");
! 187: }
! 188:
! 189: return ret;
! 190: }
! 191:
! 192: gchar *
! 193: oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
! 194: {
! 195: gchar *ret = NULL;
! 196: /* charsetstr1 is always set to what the correct encoding should be. */
! 197: const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
! 198:
! 199: if ((datalen == 0) || (data == NULL))
! 200: return NULL;
! 201:
! 202: if (charset == AIM_CHARSET_UNICODE) {
! 203: charsetstr1 = "UTF-16BE";
! 204: charsetstr2 = "UTF-8";
! 205: } else if (charset == AIM_CHARSET_LATIN_1) {
! 206: if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
! 207: charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
! 208: else
! 209: charsetstr1 = "ISO-8859-1";
! 210: charsetstr2 = "UTF-8";
! 211: } else if (charset == AIM_CHARSET_ASCII) {
! 212: /* Should just be "ASCII" */
! 213: charsetstr1 = "ASCII";
! 214: charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
! 215: } else if (charset == 0x000d) {
! 216: /* iChat sending unicode over a Direct IM connection = UTF-8 */
! 217: /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
! 218: charsetstr1 = "UTF-8";
! 219: charsetstr2 = "ISO-8859-1";
! 220: charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
! 221: } else {
! 222: /* Unknown, hope for valid UTF-8... */
! 223: charsetstr1 = "UTF-8";
! 224: charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
! 225: }
! 226:
! 227: purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
! 228: charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
! 229:
! 230: ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
! 231: if (ret == NULL) {
! 232: if (charsetstr3 != NULL) {
! 233: /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
! 234: ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
! 235: if (ret == NULL)
! 236: ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
! 237: } else {
! 238: /* Try charsetstr2, allowing substitutions */
! 239: ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
! 240: }
! 241: }
! 242: if (ret == NULL) {
! 243: char *str, *salvage, *tmp;
! 244:
! 245: str = g_malloc(datalen + 1);
! 246: strncpy(str, data, datalen);
! 247: str[datalen] = '\0';
! 248: salvage = purple_utf8_salvage(str);
! 249: tmp = g_strdup_printf(_("(There was an error receiving this message. Either you and %s have different encodings selected, or %s has a buggy client.)"),
! 250: sourcebn, sourcebn);
! 251: ret = g_strdup_printf("%s %s", salvage, tmp);
! 252: g_free(tmp);
! 253: g_free(str);
! 254: g_free(salvage);
! 255: }
! 256:
! 257: return ret;
! 258: }
! 259:
! 260: static guint16
! 261: get_simplest_charset(const char *utf8)
! 262: {
! 263: while (*utf8)
! 264: {
! 265: if ((unsigned char)(*utf8) > 0x7f) {
! 266: /* not ASCII! */
! 267: return AIM_CHARSET_UNICODE;
! 268: }
! 269: utf8++;
! 270: }
! 271: return AIM_CHARSET_ASCII;
! 272: }
! 273:
! 274: gchar *
! 275: oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
! 276: {
! 277: guint16 msg_charset = get_simplest_charset(msg);
! 278: if (charset != NULL) {
! 279: *charset = msg_charset;
! 280: }
! 281: if (charsetstr != NULL) {
! 282: *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
! 283: }
! 284: return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);
! 285: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>