Annotation of ChivanetAimPidgin/oscarprpl/src/c/encoding.c, revision 1.1.1.1
1.1 snw 1: /*
2: * Purple's oscar protocol plugin
3: * This file is the legal property of its developers.
4: * Please see the AUTHORS file distributed alongside this file.
5: *
6: * This library is free software; you can redistribute it and/or
7: * modify it under the terms of the GNU Lesser General Public
8: * License as published by the Free Software Foundation; either
9: * version 2 of the License, or (at your option) any later version.
10: *
11: * This library is distributed in the hope that it will be useful,
12: * but WITHOUT ANY WARRANTY; without even the implied warranty of
13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14: * Lesser General Public License for more details.
15: *
16: * You should have received a copy of the GNU Lesser General Public
17: * License along with this library; if not, write to the Free Software
18: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
19: */
20:
21: #include "encoding.h"
22:
23: static gchar *
24: encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
25: {
26: gchar *utf8 = NULL;
27: const gchar *begin = encodings;
28: const gchar *end = NULL;
29: gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
30: const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
31:
32: if (!encodings) {
33: purple_debug_error("oscar", "encodings is NULL");
34: return NULL;
35: }
36:
37: for (;;)
38: {
39: /* extract next encoding */
40: end = strchr(begin, ',');
41: if (!end) {
42: curr_encoding_ro = begin;
43: } else { /* allocate buffer for encoding */
44: curr_encoding = g_strndup(begin, end - begin);
45: if (!curr_encoding) {
46: purple_debug_error("oscar", "Error allocating memory for encoding");
47: break;
48: }
49: curr_encoding_ro = curr_encoding;
50: }
51:
52: if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
53: break;
54: }
55:
56: utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
57:
58: if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
59: break;
60:
61: g_free(curr_encoding); /* free allocated buffer for encoding here */
62:
63: if (utf8) /* text was successfully converted */
64: break;
65:
66: begin = end + 1;
67: }
68:
69: if (!utf8 && fallback)
70: { /* "begin" points to last encoding */
71: utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
72: }
73:
74: return utf8;
75: }
76:
77: static gchar *
78: encoding_extract(const char *encoding)
79: {
80: char *begin, *end;
81:
82: if (encoding == NULL) {
83: return NULL;
84: }
85:
86: if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
87: !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
88: !g_str_has_prefix(encoding, "text/plain; charset=")) {
89: return g_strdup(encoding);
90: }
91:
92: begin = strchr(encoding, '"');
93: end = strrchr(encoding, '"');
94:
95: if ((begin == NULL) || (end == NULL) || (begin >= end)) {
96: return g_strdup(encoding);
97: }
98:
99: return g_strndup(begin+1, (end-1) - begin);
100: }
101:
102: gchar *
103: oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
104: {
105: gchar *utf8 = NULL;
106: const gchar *glib_encoding = NULL;
107: gchar *extracted_encoding = encoding_extract(encoding);
108:
109: if (extracted_encoding == NULL || *extracted_encoding == '\0') {
110: purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
111: } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
112: glib_encoding = "iso-8859-1";
113: } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
114: glib_encoding = "Windows-1252";
115: } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
116: glib_encoding = "UTF-16BE";
117: } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
118: glib_encoding = extracted_encoding;
119: }
120:
121: if (glib_encoding != NULL) {
122: utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
123: }
124:
125: /*
126: * If utf8 is still NULL then either the encoding is utf-8 or
127: * we have been unable to convert the text to utf-8 from the encoding
128: * that was specified. So we check if the text is valid utf-8 then
129: * just copy it.
130: */
131: if (utf8 == NULL) {
132: if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
133: utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
134: else
135: utf8 = g_strndup(text, textlen);
136: }
137:
138: g_free(extracted_encoding);
139: return utf8;
140: }
141:
142: gchar *
143: oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
144: {
145: const char *charset = NULL;
146: char *ret = NULL;
147:
148: if (msg == NULL)
149: return NULL;
150:
151: if (g_utf8_validate(msg, -1, NULL))
152: return g_strdup(msg);
153:
154: if (od->icq)
155: charset = purple_account_get_string(account, "encoding", NULL);
156:
157: if(charset && *charset)
158: ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
159:
160: if(!ret)
161: ret = purple_utf8_try_convert(msg);
162:
163: return ret;
164: }
165:
166: static gchar *
167: oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
168: {
169: gchar *ret = NULL;
170: GError *err = NULL;
171:
172: if ((charsetstr == NULL) || (*charsetstr == '\0'))
173: return NULL;
174:
175: if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
176: ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
177: if (err != NULL) {
178: purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
179: charsetstr, err->message);
180: g_error_free(err);
181: }
182: } else {
183: if (g_utf8_validate(data, datalen, NULL))
184: ret = g_strndup(data, datalen);
185: else
186: purple_debug_warning("oscar", "String is not valid UTF-8.\n");
187: }
188:
189: return ret;
190: }
191:
192: gchar *
193: oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
194: {
195: gchar *ret = NULL;
196: /* charsetstr1 is always set to what the correct encoding should be. */
197: const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
198:
199: if ((datalen == 0) || (data == NULL))
200: return NULL;
201:
202: if (charset == AIM_CHARSET_UNICODE) {
203: charsetstr1 = "UTF-16BE";
204: charsetstr2 = "UTF-8";
205: } else if (charset == AIM_CHARSET_LATIN_1) {
206: if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
207: charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
208: else
209: charsetstr1 = "ISO-8859-1";
210: charsetstr2 = "UTF-8";
211: } else if (charset == AIM_CHARSET_ASCII) {
212: /* Should just be "ASCII" */
213: charsetstr1 = "ASCII";
214: charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
215: } else if (charset == 0x000d) {
216: /* iChat sending unicode over a Direct IM connection = UTF-8 */
217: /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
218: charsetstr1 = "UTF-8";
219: charsetstr2 = "ISO-8859-1";
220: charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
221: } else {
222: /* Unknown, hope for valid UTF-8... */
223: charsetstr1 = "UTF-8";
224: charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
225: }
226:
227: purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
228: charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
229:
230: ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
231: if (ret == NULL) {
232: if (charsetstr3 != NULL) {
233: /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
234: ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
235: if (ret == NULL)
236: ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
237: } else {
238: /* Try charsetstr2, allowing substitutions */
239: ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
240: }
241: }
242: if (ret == NULL) {
243: char *str, *salvage, *tmp;
244:
245: str = g_malloc(datalen + 1);
246: strncpy(str, data, datalen);
247: str[datalen] = '\0';
248: salvage = purple_utf8_salvage(str);
249: tmp = g_strdup_printf(_("(There was an error receiving this message. Either you and %s have different encodings selected, or %s has a buggy client.)"),
250: sourcebn, sourcebn);
251: ret = g_strdup_printf("%s %s", salvage, tmp);
252: g_free(tmp);
253: g_free(str);
254: g_free(salvage);
255: }
256:
257: return ret;
258: }
259:
260: static guint16
261: get_simplest_charset(const char *utf8)
262: {
263: while (*utf8)
264: {
265: if ((unsigned char)(*utf8) > 0x7f) {
266: /* not ASCII! */
267: return AIM_CHARSET_UNICODE;
268: }
269: utf8++;
270: }
271: return AIM_CHARSET_ASCII;
272: }
273:
274: gchar *
275: oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
276: {
277: guint16 msg_charset = get_simplest_charset(msg);
278: if (charset != NULL) {
279: *charset = msg_charset;
280: }
281: if (charsetstr != NULL) {
282: *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
283: }
284: return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);
285: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>