ChivanetAimPidgin/oscarprpl/src/c/encoding.c - view

File: [Coherent Logic Development] / ChivanetAimPidgin / oscarprpl / src / c / encoding.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Mon Jan 27 19:48:25 2025 UTC (6 months ago) by snw
Branches: MAIN, CoherentLogicDevelopment
CVS tags: test-tag, start, HEAD

Pidgin AIM Plugin for ChivaNet

1: /* 2: * Purple's oscar protocol plugin 3: * This file is the legal property of its developers. 4: * Please see the AUTHORS file distributed alongside this file. 5: * 6: * This library is free software; you can redistribute it and/or 7: * modify it under the terms of the GNU Lesser General Public 8: * License as published by the Free Software Foundation; either 9: * version 2 of the License, or (at your option) any later version. 10: * 11: * This library is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: * Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with this library; if not, write to the Free Software 18: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA 19: */ 20: 21: #include "encoding.h" 22: 23: static gchar * 24: encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback) 25: { 26: gchar *utf8 = NULL; 27: const gchar *begin = encodings; 28: const gchar *end = NULL; 29: gchar *curr_encoding = NULL; /* allocated buffer for encoding name */ 30: const gchar *curr_encoding_ro = NULL; /* read-only encoding name */ 31: 32: if (!encodings) { 33: purple_debug_error("oscar", "encodings is NULL"); 34: return NULL; 35: } 36: 37: for (;;) 38: { 39: /* extract next encoding */ 40: end = strchr(begin, ','); 41: if (!end) { 42: curr_encoding_ro = begin; 43: } else { /* allocate buffer for encoding */ 44: curr_encoding = g_strndup(begin, end - begin); 45: if (!curr_encoding) { 46: purple_debug_error("oscar", "Error allocating memory for encoding"); 47: break; 48: } 49: curr_encoding_ro = curr_encoding; 50: } 51: 52: if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) { 53: break; 54: } 55: 56: utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL); 57: 58: if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */ 59: break; 60: 61: g_free(curr_encoding); /* free allocated buffer for encoding here */ 62: 63: if (utf8) /* text was successfully converted */ 64: break; 65: 66: begin = end + 1; 67: } 68: 69: if (!utf8 && fallback) 70: { /* "begin" points to last encoding */ 71: utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error); 72: } 73: 74: return utf8; 75: } 76: 77: static gchar * 78: encoding_extract(const char *encoding) 79: { 80: char *begin, *end; 81: 82: if (encoding == NULL) { 83: return NULL; 84: } 85: 86: if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") && 87: !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") && 88: !g_str_has_prefix(encoding, "text/plain; charset=")) { 89: return g_strdup(encoding); 90: } 91: 92: begin = strchr(encoding, '"'); 93: end = strrchr(encoding, '"'); 94: 95: if ((begin == NULL) || (end == NULL) || (begin >= end)) { 96: return g_strdup(encoding); 97: } 98: 99: return g_strndup(begin+1, (end-1) - begin); 100: } 101: 102: gchar * 103: oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen) 104: { 105: gchar *utf8 = NULL; 106: const gchar *glib_encoding = NULL; 107: gchar *extracted_encoding = encoding_extract(encoding); 108: 109: if (extracted_encoding == NULL || *extracted_encoding == '\0') { 110: purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n"); 111: } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) { 112: glib_encoding = "iso-8859-1"; 113: } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) { 114: glib_encoding = "Windows-1252"; 115: } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) { 116: glib_encoding = "UTF-16BE"; 117: } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) { 118: glib_encoding = extracted_encoding; 119: } 120: 121: if (glib_encoding != NULL) { 122: utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE); 123: } 124: 125: /* 126: * If utf8 is still NULL then either the encoding is utf-8 or 127: * we have been unable to convert the text to utf-8 from the encoding 128: * that was specified. So we check if the text is valid utf-8 then 129: * just copy it. 130: */ 131: if (utf8 == NULL) { 132: if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL)) 133: utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)")); 134: else 135: utf8 = g_strndup(text, textlen); 136: } 137: 138: g_free(extracted_encoding); 139: return utf8; 140: } 141: 142: gchar * 143: oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg) 144: { 145: const char *charset = NULL; 146: char *ret = NULL; 147: 148: if (msg == NULL) 149: return NULL; 150: 151: if (g_utf8_validate(msg, -1, NULL)) 152: return g_strdup(msg); 153: 154: if (od->icq) 155: charset = purple_account_get_string(account, "encoding", NULL); 156: 157: if(charset && *charset) 158: ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE); 159: 160: if(!ret) 161: ret = purple_utf8_try_convert(msg); 162: 163: return ret; 164: } 165: 166: static gchar * 167: oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback) 168: { 169: gchar *ret = NULL; 170: GError *err = NULL; 171: 172: if ((charsetstr == NULL) || (*charsetstr == '\0')) 173: return NULL; 174: 175: if (g_ascii_strcasecmp("UTF-8", charsetstr)) { 176: ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback); 177: if (err != NULL) { 178: purple_debug_warning("oscar", "Conversion from %s failed: %s.\n", 179: charsetstr, err->message); 180: g_error_free(err); 181: } 182: } else { 183: if (g_utf8_validate(data, datalen, NULL)) 184: ret = g_strndup(data, datalen); 185: else 186: purple_debug_warning("oscar", "String is not valid UTF-8.\n"); 187: } 188: 189: return ret; 190: } 191: 192: gchar * 193: oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen) 194: { 195: gchar *ret = NULL; 196: /* charsetstr1 is always set to what the correct encoding should be. */ 197: const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL; 198: 199: if ((datalen == 0) || (data == NULL)) 200: return NULL; 201: 202: if (charset == AIM_CHARSET_UNICODE) { 203: charsetstr1 = "UTF-16BE"; 204: charsetstr2 = "UTF-8"; 205: } else if (charset == AIM_CHARSET_LATIN_1) { 206: if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn)) 207: charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 208: else 209: charsetstr1 = "ISO-8859-1"; 210: charsetstr2 = "UTF-8"; 211: } else if (charset == AIM_CHARSET_ASCII) { 212: /* Should just be "ASCII" */ 213: charsetstr1 = "ASCII"; 214: charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 215: } else if (charset == 0x000d) { 216: /* iChat sending unicode over a Direct IM connection = UTF-8 */ 217: /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */ 218: charsetstr1 = "UTF-8"; 219: charsetstr2 = "ISO-8859-1"; 220: charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 221: } else { 222: /* Unknown, hope for valid UTF-8... */ 223: charsetstr1 = "UTF-8"; 224: charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 225: } 226: 227: purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n", 228: charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : "")); 229: 230: ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE); 231: if (ret == NULL) { 232: if (charsetstr3 != NULL) { 233: /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */ 234: ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE); 235: if (ret == NULL) 236: ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE); 237: } else { 238: /* Try charsetstr2, allowing substitutions */ 239: ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE); 240: } 241: } 242: if (ret == NULL) { 243: char *str, *salvage, *tmp; 244: 245: str = g_malloc(datalen + 1); 246: strncpy(str, data, datalen); 247: str[datalen] = '\0'; 248: salvage = purple_utf8_salvage(str); 249: tmp = g_strdup_printf(_("(There was an error receiving this message. Either you and %s have different encodings selected, or %s has a buggy client.)"), 250: sourcebn, sourcebn); 251: ret = g_strdup_printf("%s %s", salvage, tmp); 252: g_free(tmp); 253: g_free(str); 254: g_free(salvage); 255: } 256: 257: return ret; 258: } 259: 260: static guint16 261: get_simplest_charset(const char *utf8) 262: { 263: while (*utf8) 264: { 265: if ((unsigned char)(*utf8) > 0x7f) { 266: /* not ASCII! */ 267: return AIM_CHARSET_UNICODE; 268: } 269: utf8++; 270: } 271: return AIM_CHARSET_ASCII; 272: } 273: 274: gchar * 275: oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr) 276: { 277: guint16 msg_charset = get_simplest_charset(msg); 278: if (charset != NULL) { 279: *charset = msg_charset; 280: } 281: if (charsetstr != NULL) { 282: *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0"; 283: } 284: return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL); 285: }