File:  [Coherent Logic Development] / ChivanetAimPidgin / oscarprpl / src / c / encoding.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Mon Jan 27 19:48:25 2025 UTC (6 months ago) by snw
Branches: MAIN, CoherentLogicDevelopment
CVS tags: test-tag, start, HEAD
Pidgin AIM Plugin for ChivaNet

    1: /*
    2:  * Purple's oscar protocol plugin
    3:  * This file is the legal property of its developers.
    4:  * Please see the AUTHORS file distributed alongside this file.
    5:  *
    6:  * This library is free software; you can redistribute it and/or
    7:  * modify it under the terms of the GNU Lesser General Public
    8:  * License as published by the Free Software Foundation; either
    9:  * version 2 of the License, or (at your option) any later version.
   10:  *
   11:  * This library is distributed in the hope that it will be useful,
   12:  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:  * Lesser General Public License for more details.
   15:  *
   16:  * You should have received a copy of the GNU Lesser General Public
   17:  * License along with this library; if not, write to the Free Software
   18:  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111-1301  USA
   19: */
   20: 
   21: #include "encoding.h"
   22: 
   23: static gchar *
   24: encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
   25: {
   26: 	gchar *utf8 = NULL;
   27: 	const gchar *begin = encodings;
   28: 	const gchar *end = NULL;
   29: 	gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
   30: 	const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
   31: 
   32: 	if (!encodings) {
   33: 		purple_debug_error("oscar", "encodings is NULL");
   34: 		return NULL;
   35: 	}
   36: 
   37: 	for (;;)
   38: 	{
   39: 		/* extract next encoding */
   40: 		end = strchr(begin, ',');
   41: 		if (!end) {
   42: 			curr_encoding_ro = begin;
   43: 		}	else { /* allocate buffer for encoding */
   44: 			curr_encoding = g_strndup(begin, end - begin);
   45: 			if (!curr_encoding) {
   46: 				purple_debug_error("oscar", "Error allocating memory for encoding");
   47: 				break;
   48: 			}
   49: 			curr_encoding_ro = curr_encoding;
   50: 		}
   51: 
   52: 		if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
   53: 			break;
   54: 		}
   55: 
   56: 		utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
   57: 
   58: 		if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
   59: 			break;
   60: 
   61: 		g_free(curr_encoding); /* free allocated buffer for encoding here */
   62: 
   63: 		if (utf8) /* text was successfully converted */
   64: 			break;
   65: 
   66: 		begin = end + 1;
   67: 	}
   68: 
   69: 	if (!utf8 && fallback)
   70: 	{ /* "begin" points to last encoding */
   71: 		utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
   72: 	}
   73: 
   74: 	return utf8;
   75: }
   76: 
   77: static gchar *
   78: encoding_extract(const char *encoding)
   79: {
   80: 	char *begin, *end;
   81: 
   82: 	if (encoding == NULL) {
   83: 		return NULL;
   84: 	}
   85: 
   86: 	if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
   87: 		!g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
   88: 		!g_str_has_prefix(encoding, "text/plain; charset=")) {
   89: 		return g_strdup(encoding);
   90: 	}
   91: 
   92: 	begin = strchr(encoding, '"');
   93: 	end = strrchr(encoding, '"');
   94: 
   95: 	if ((begin == NULL) || (end == NULL) || (begin >= end)) {
   96: 		return g_strdup(encoding);
   97: 	}
   98: 
   99: 	return g_strndup(begin+1, (end-1) - begin);
  100: }
  101: 
  102: gchar *
  103: oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
  104: {
  105: 	gchar *utf8 = NULL;
  106: 	const gchar *glib_encoding = NULL;
  107: 	gchar *extracted_encoding = encoding_extract(encoding);
  108: 
  109: 	if (extracted_encoding == NULL || *extracted_encoding == '\0') {
  110: 		purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
  111: 	} else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
  112: 		glib_encoding = "iso-8859-1";
  113: 	} else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
  114: 		glib_encoding = "Windows-1252";
  115: 	} else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
  116: 		glib_encoding = "UTF-16BE";
  117: 	} else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
  118: 		glib_encoding = extracted_encoding;
  119: 	}
  120: 
  121: 	if (glib_encoding != NULL) {
  122: 		utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
  123: 	}
  124: 
  125: 	/*
  126: 	 * If utf8 is still NULL then either the encoding is utf-8 or
  127: 	 * we have been unable to convert the text to utf-8 from the encoding
  128: 	 * that was specified.  So we check if the text is valid utf-8 then
  129: 	 * just copy it.
  130: 	 */
  131: 	if (utf8 == NULL) {
  132: 		if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
  133: 			utf8 = g_strdup(_("(There was an error receiving this message.  The buddy you are speaking with is probably using a different encoding than expected.  If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
  134: 		else
  135: 			utf8 = g_strndup(text, textlen);
  136: 	}
  137: 
  138: 	g_free(extracted_encoding);
  139: 	return utf8;
  140: }
  141: 
  142: gchar *
  143: oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
  144: {
  145: 	const char *charset = NULL;
  146: 	char *ret = NULL;
  147: 
  148: 	if (msg == NULL)
  149: 		return NULL;
  150: 
  151: 	if (g_utf8_validate(msg, -1, NULL))
  152: 		return g_strdup(msg);
  153: 
  154: 	if (od->icq)
  155: 		charset = purple_account_get_string(account, "encoding", NULL);
  156: 
  157: 	if(charset && *charset)
  158: 		ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
  159: 
  160: 	if(!ret)
  161: 		ret = purple_utf8_try_convert(msg);
  162: 
  163: 	return ret;
  164: }
  165: 
  166: static gchar *
  167: oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
  168: {
  169: 	gchar *ret = NULL;
  170: 	GError *err = NULL;
  171: 
  172: 	if ((charsetstr == NULL) || (*charsetstr == '\0'))
  173: 		return NULL;
  174: 
  175: 	if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
  176: 		ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
  177: 		if (err != NULL) {
  178: 			purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
  179: 							   charsetstr, err->message);
  180: 			g_error_free(err);
  181: 		}
  182: 	} else {
  183: 		if (g_utf8_validate(data, datalen, NULL))
  184: 			ret = g_strndup(data, datalen);
  185: 		else
  186: 			purple_debug_warning("oscar", "String is not valid UTF-8.\n");
  187: 	}
  188: 
  189: 	return ret;
  190: }
  191: 
  192: gchar *
  193: oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
  194: {
  195: 	gchar *ret = NULL;
  196: 	/* charsetstr1 is always set to what the correct encoding should be. */
  197: 	const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
  198: 
  199: 	if ((datalen == 0) || (data == NULL))
  200: 		return NULL;
  201: 
  202: 	if (charset == AIM_CHARSET_UNICODE) {
  203: 		charsetstr1 = "UTF-16BE";
  204: 		charsetstr2 = "UTF-8";
  205: 	} else if (charset == AIM_CHARSET_LATIN_1) {
  206: 		if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
  207: 			charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
  208: 		else
  209: 			charsetstr1 = "ISO-8859-1";
  210: 		charsetstr2 = "UTF-8";
  211: 	} else if (charset == AIM_CHARSET_ASCII) {
  212: 		/* Should just be "ASCII" */
  213: 		charsetstr1 = "ASCII";
  214: 		charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
  215: 	} else if (charset == 0x000d) {
  216: 		/* iChat sending unicode over a Direct IM connection = UTF-8 */
  217: 		/* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
  218: 		charsetstr1 = "UTF-8";
  219: 		charsetstr2 = "ISO-8859-1";
  220: 		charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
  221: 	} else {
  222: 		/* Unknown, hope for valid UTF-8... */
  223: 		charsetstr1 = "UTF-8";
  224: 		charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
  225: 	}
  226: 
  227: 	purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
  228: 					  charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
  229: 
  230: 	ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
  231: 	if (ret == NULL) {
  232: 		if (charsetstr3 != NULL) {
  233: 			/* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
  234: 			ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
  235: 			if (ret == NULL)
  236: 				ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
  237: 		} else {
  238: 			/* Try charsetstr2, allowing substitutions */
  239: 			ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
  240: 		}
  241: 	}
  242: 	if (ret == NULL) {
  243: 		char *str, *salvage, *tmp;
  244: 
  245: 		str = g_malloc(datalen + 1);
  246: 		strncpy(str, data, datalen);
  247: 		str[datalen] = '\0';
  248: 		salvage = purple_utf8_salvage(str);
  249: 		tmp = g_strdup_printf(_("(There was an error receiving this message.  Either you and %s have different encodings selected, or %s has a buggy client.)"),
  250: 					  sourcebn, sourcebn);
  251: 		ret = g_strdup_printf("%s %s", salvage, tmp);
  252: 		g_free(tmp);
  253: 		g_free(str);
  254: 		g_free(salvage);
  255: 	}
  256: 
  257: 	return ret;
  258: }
  259: 
  260: static guint16
  261: get_simplest_charset(const char *utf8)
  262: {
  263: 	while (*utf8)
  264: 	{
  265: 		if ((unsigned char)(*utf8) > 0x7f) {
  266: 			/* not ASCII! */
  267: 			return AIM_CHARSET_UNICODE;
  268: 		}
  269: 		utf8++;
  270: 	}
  271: 	return AIM_CHARSET_ASCII;
  272: }
  273: 
  274: gchar *
  275: oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
  276: {
  277: 	guint16 msg_charset = get_simplest_charset(msg);
  278: 	if (charset != NULL) {
  279: 		*charset = msg_charset;
  280: 	}
  281: 	if (charsetstr != NULL) {
  282: 		*charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
  283: 	}
  284: 	return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);
  285: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>