From e62762bc4b884d576c00fd7a2636b610e7c1b578 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Tue, 27 Dec 2005 16:10:15 +0100 Subject: Moved util.c to root, moved some things to there from bitlbee.c --- util.c | 413 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 util.c (limited to 'util.c') diff --git a/util.c b/util.c new file mode 100644 index 00000000..6a2f2e46 --- /dev/null +++ b/util.c @@ -0,0 +1,413 @@ + /********************************************************************\ + * BitlBee -- An IRC to other IM-networks gateway * + * * + * Copyright 2002-2004 Wilmer van der Gaast and others * + \********************************************************************/ + +/* + * nogaim + * + * Gaim without gaim - for BitlBee + * + * Copyright (C) 1998-1999, Mark Spencer + * (and possibly other members of the Gaim team) + * Copyright 2002-2004 Wilmer van der Gaast + */ + +/* + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License with + the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL; + if not, write to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA +*/ + +/* Parts from util.c from gaim needed by nogaim */ +#define BITLBEE_CORE +#include "nogaim.h" +#include +#include +#include +#include +#include + +char *utf8_to_str(const char *in) +{ + int n = 0, i = 0; + int inlen; + char *result; + + if (!in) + return NULL; + + inlen = strlen(in); + + result = g_malloc(inlen + 1); + + while (n <= inlen - 1) { + long c = (long)in[n]; + if (c < 0x80) + result[i++] = (char)c; + else { + if ((c & 0xC0) == 0xC0) + result[i++] = + (char)(((c & 0x03) << 6) | (((unsigned char)in[++n]) & 0x3F)); + else if ((c & 0xE0) == 0xE0) { + if (n + 2 <= inlen) { + result[i] = + (char)(((c & 0xF) << 4) | (((unsigned char)in[++n]) & 0x3F)); + result[i] = + (char)(((unsigned char)result[i]) | + (((unsigned char)in[++n]) & 0x3F)); + i++; + } else + n += 2; + } else if ((c & 0xF0) == 0xF0) + n += 3; + else if ((c & 0xF8) == 0xF8) + n += 4; + else if ((c & 0xFC) == 0xFC) + n += 5; + } + n++; + } + result[i] = '\0'; + + return result; +} + +char *str_to_utf8(const char *in) +{ + int n = 0, i = 0; + int inlen; + char *result = NULL; + + if (!in) + return NULL; + + inlen = strlen(in); + + result = g_malloc(inlen * 2 + 1); + + while (n < inlen) { + long c = (long)in[n]; + if (c == 27) { + n += 2; + if (in[n] == 'x') + n++; + if (in[n] == '3') + n++; + n += 2; + continue; + } + /* why are we removing newlines and carriage returns? + if ((c == 0x0D) || (c == 0x0A)) { + n++; + continue; + } + */ + if (c < 128) + result[i++] = (char)c; + else { + result[i++] = (char)((c >> 6) | 192); + result[i++] = (char)((c & 63) | 128); + } + n++; + } + result[i] = '\0'; + + return result; +} + +void strip_linefeed(gchar *text) +{ + int i, j; + gchar *text2 = g_malloc(strlen(text) + 1); + + for (i = 0, j = 0; text[i]; i++) + if (text[i] != '\r') + text2[j++] = text[i]; + text2[j] = '\0'; + + strcpy(text, text2); + g_free(text2); +} + +char *add_cr(char *text) +{ + char *ret = NULL; + int count = 0, j; + unsigned int i; + + if (text[0] == '\n') + count++; + for (i = 1; i < strlen(text); i++) + if (text[i] == '\n' && text[i - 1] != '\r') + count++; + + if (count == 0) + return g_strdup(text); + + ret = g_malloc0(strlen(text) + count + 1); + + i = 0; j = 0; + if (text[i] == '\n') + ret[j++] = '\r'; + ret[j++] = text[i++]; + for (; i < strlen(text); i++) { + if (text[i] == '\n' && text[i - 1] != '\r') + ret[j++] = '\r'; + ret[j++] = text[i]; + } + + return ret; +} + +static char alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789+/"; + +/* XXX Find bug */ +char *tobase64(const char *text) +{ + char *out = NULL; + const char *c; + unsigned int tmp = 0; + int len = 0, n = 0; + + c = text; + + while (*c) { + tmp = tmp << 8; + tmp += *c; + n++; + + if (n == 3) { + out = g_realloc(out, len + 4); + out[len] = alphabet[(tmp >> 18) & 0x3f]; + out[len + 1] = alphabet[(tmp >> 12) & 0x3f]; + out[len + 2] = alphabet[(tmp >> 6) & 0x3f]; + out[len + 3] = alphabet[tmp & 0x3f]; + len += 4; + tmp = 0; + n = 0; + } + c++; + } + switch (n) { + + case 2: + tmp <<= 8; + out = g_realloc(out, len + 5); + out[len] = alphabet[(tmp >> 18) & 0x3f]; + out[len + 1] = alphabet[(tmp >> 12) & 0x3f]; + out[len + 2] = alphabet[(tmp >> 6) & 0x3f]; + out[len + 3] = '='; + out[len + 4] = 0; + break; + case 1: + tmp <<= 16; + out = g_realloc(out, len + 5); + out[len] = alphabet[(tmp >> 18) & 0x3f]; + out[len + 1] = alphabet[(tmp >> 12) & 0x3f]; + out[len + 2] = '='; + out[len + 3] = '='; + out[len + 4] = 0; + break; + case 0: + out = g_realloc(out, len + 1); + out[len] = 0; + break; + } + return out; +} + +char *normalize(const char *s) +{ + static char buf[BUF_LEN]; + char *t, *u; + int x = 0; + + g_return_val_if_fail((s != NULL), NULL); + + u = t = g_strdup(s); + + strcpy(t, s); + g_strdown(t); + + while (*t && (x < BUF_LEN - 1)) { + if (*t != ' ') { + buf[x] = *t; + x++; + } + t++; + } + buf[x] = '\0'; + g_free(u); + return buf; +} + +time_t get_time(int year, int month, int day, int hour, int min, int sec) +{ + struct tm tm; + + tm.tm_year = year - 1900; + tm.tm_mon = month - 1; + tm.tm_mday = day; + tm.tm_hour = hour; + tm.tm_min = min; + tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60; + return mktime(&tm); +} + +typedef struct htmlentity +{ + char code[8]; + char is; +} htmlentity_t; + +/* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */ + +static htmlentity_t ent[] = +{ + { "lt", '<' }, + { "gt", '>' }, + { "amp", '&' }, + { "quot", '"' }, + { "aacute", 'á' }, + { "eacute", 'é' }, + { "iacute", 'é' }, + { "oacute", 'ó' }, + { "uacute", 'ú' }, + { "agrave", 'à' }, + { "egrave", 'è' }, + { "igrave", 'ì' }, + { "ograve", 'ò' }, + { "ugrave", 'ù' }, + { "acirc", 'â' }, + { "ecirc", 'ê' }, + { "icirc", 'î' }, + { "ocirc", 'ô' }, + { "ucirc", 'û' }, + { "nbsp", ' ' }, + { "", 0 } +}; + +void strip_html( char *in ) +{ + char *start = in; + char *out = g_malloc( strlen( in ) + 1 ); + char *s = out, *cs; + int i, matched; + + memset( out, 0, strlen( in ) + 1 ); + + while( *in ) + { + if( *in == '<' && ( isalpha( *(in+1) ) || *(in+1) == '/' ) ) + { + /* If in points at a < and in+1 points at a letter or a slash, this is probably + a HTML-tag. Try to find a closing > and continue there. If the > can't be + found, assume that it wasn't a HTML-tag after all. */ + + cs = in; + + while( *in && *in != '>' ) + in ++; + + if( *in ) + { + if( g_strncasecmp( cs+1, "br", 2) == 0 ) + *(s++) = '\n'; + in ++; + } + else + { + in = cs; + *(s++) = *(in++); + } + } + else if( *in == '&' ) + { + cs = ++in; + while( *in && isalpha( *in ) ) + in ++; + + if( *in == ';' ) in ++; + matched = 0; + + for( i = 0; *ent[i].code; i ++ ) + if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 ) + { + *(s++) = ent[i].is; + matched = 1; + break; + } + + /* None of the entities were matched, so return the string */ + if( !matched ) + { + in = cs - 1; + *(s++) = *(in++); + } + } + else + { + *(s++) = *(in++); + } + } + + strcpy( start, out ); + g_free( out ); +} + +char *escape_html( const char *html ) +{ + const char *c = html; + GString *ret; + char *str; + + if( html == NULL ) + return( NULL ); + + ret = g_string_new( "" ); + + while( *c ) + { + switch( *c ) + { + case '&': + ret = g_string_append( ret, "&" ); + break; + case '<': + ret = g_string_append( ret, "<" ); + break; + case '>': + ret = g_string_append( ret, ">" ); + break; + case '"': + ret = g_string_append( ret, """ ); + break; + default: + ret = g_string_append_c( ret, *c ); + } + c ++; + } + + str = ret->str; + g_string_free( ret, FALSE ); + return( str ); +} + +void info_string_append(GString *str, char *newline, char *name, char *value) +{ + if( value && value[0] ) + g_string_sprintfa( str, "%s%s: %s", newline, name, value ); +} -- cgit v1.2.3 From c88999c6ea83ffbc8a0eb8f1ceb0ee07612dfe29 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Tue, 27 Dec 2005 16:20:35 +0100 Subject: Forgot to actually move those functions in previous commit. And *argh*, don't commit things done for debugging! --- util.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'util.c') diff --git a/util.c b/util.c index 6a2f2e46..73298fab 100644 --- a/util.c +++ b/util.c @@ -5,13 +5,12 @@ \********************************************************************/ /* - * nogaim - * - * Gaim without gaim - for BitlBee + * Various utility functions. Some are copied from Gaim to support the + * IM-modules, most are from BitlBee. * * Copyright (C) 1998-1999, Mark Spencer * (and possibly other members of the Gaim team) - * Copyright 2002-2004 Wilmer van der Gaast + * Copyright 2002-2005 Wilmer van der Gaast */ /* @@ -31,7 +30,6 @@ Suite 330, Boston, MA 02111-1307 USA */ -/* Parts from util.c from gaim needed by nogaim */ #define BITLBEE_CORE #include "nogaim.h" #include @@ -411,3 +409,75 @@ void info_string_append(GString *str, char *newline, char *name, char *value) if( value && value[0] ) g_string_sprintfa( str, "%s%s: %s", newline, name, value ); } + +/* Decode%20a%20file%20name */ +void http_decode( char *s ) +{ + char *t; + int i, j, k; + + t = g_new( char, strlen( s ) + 1 ); + + for( i = j = 0; s[i]; i ++, j ++ ) + { + if( s[i] == '%' ) + { + if( sscanf( s + i + 1, "%2x", &k ) ) + { + t[j] = k; + i += 2; + } + else + { + *t = 0; + break; + } + } + else + { + t[j] = s[i]; + } + } + t[j] = 0; + + strcpy( s, t ); + g_free( t ); +} + +/* Warning: This one explodes the string. Worst-cases can make the string 3x its original size! */ +/* This fuction is safe, but make sure you call it safely as well! */ +void http_encode( char *s ) +{ + char *t; + int i, j; + + t = g_strdup( s ); + + for( i = j = 0; t[i]; i ++, j ++ ) + { + if( t[i] <= ' ' || ((unsigned char *)t)[i] >= 128 || t[i] == '%' ) + { + sprintf( s + j, "%%%02X", ((unsigned char*)t)[i] ); + j += 2; + } + else + { + s[j] = t[i]; + } + } + s[j] = 0; + + g_free( t ); +} + +/* Strip newlines from a string. Modifies the string passed to it. */ +char *strip_newlines( char *source ) +{ + int i; + + for( i = 0; source[i] != '\0'; i ++ ) + if( source[i] == '\n' || source[i] == '\r' ) + source[i] = ' '; + + return source; +} -- cgit v1.2.3 From a252c1ad43823eb935148a5578ee0d666902b2f1 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sat, 31 Dec 2005 21:29:15 +0100 Subject: Removed useless UTF8-related functions (iconv works a lot better). --- util.c | 88 ------------------------------------------------------------------ 1 file changed, 88 deletions(-) (limited to 'util.c') diff --git a/util.c b/util.c index 73298fab..57ee0522 100644 --- a/util.c +++ b/util.c @@ -38,94 +38,6 @@ #include #include -char *utf8_to_str(const char *in) -{ - int n = 0, i = 0; - int inlen; - char *result; - - if (!in) - return NULL; - - inlen = strlen(in); - - result = g_malloc(inlen + 1); - - while (n <= inlen - 1) { - long c = (long)in[n]; - if (c < 0x80) - result[i++] = (char)c; - else { - if ((c & 0xC0) == 0xC0) - result[i++] = - (char)(((c & 0x03) << 6) | (((unsigned char)in[++n]) & 0x3F)); - else if ((c & 0xE0) == 0xE0) { - if (n + 2 <= inlen) { - result[i] = - (char)(((c & 0xF) << 4) | (((unsigned char)in[++n]) & 0x3F)); - result[i] = - (char)(((unsigned char)result[i]) | - (((unsigned char)in[++n]) & 0x3F)); - i++; - } else - n += 2; - } else if ((c & 0xF0) == 0xF0) - n += 3; - else if ((c & 0xF8) == 0xF8) - n += 4; - else if ((c & 0xFC) == 0xFC) - n += 5; - } - n++; - } - result[i] = '\0'; - - return result; -} - -char *str_to_utf8(const char *in) -{ - int n = 0, i = 0; - int inlen; - char *result = NULL; - - if (!in) - return NULL; - - inlen = strlen(in); - - result = g_malloc(inlen * 2 + 1); - - while (n < inlen) { - long c = (long)in[n]; - if (c == 27) { - n += 2; - if (in[n] == 'x') - n++; - if (in[n] == '3') - n++; - n += 2; - continue; - } - /* why are we removing newlines and carriage returns? - if ((c == 0x0D) || (c == 0x0A)) { - n++; - continue; - } - */ - if (c < 128) - result[i++] = (char)c; - else { - result[i++] = (char)((c >> 6) | 192); - result[i++] = (char)((c & 63) | 128); - } - n++; - } - result[i] = '\0'; - - return result; -} - void strip_linefeed(gchar *text) { int i, j; -- cgit v1.2.3 From 39cc341b8f6299fbf8a62b243d278d1e48c8def7 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Tue, 3 Jan 2006 19:30:54 +0100 Subject: strip_html now replaces non-ASCII characters (entities like é) to their UTF-8 versions instead of Latin1. Also added &[aeiou]uml; entities to the list. However, I still don't know if this is really important anyway... --- util.c | 57 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'util.c') diff --git a/util.c b/util.c index 57ee0522..3fad6314 100644 --- a/util.c +++ b/util.c @@ -180,34 +180,39 @@ time_t get_time(int year, int month, int day, int hour, int min, int sec) typedef struct htmlentity { char code[8]; - char is; + char is[4]; } htmlentity_t; /* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */ -static htmlentity_t ent[] = +static const htmlentity_t ent[] = { - { "lt", '<' }, - { "gt", '>' }, - { "amp", '&' }, - { "quot", '"' }, - { "aacute", 'á' }, - { "eacute", 'é' }, - { "iacute", 'é' }, - { "oacute", 'ó' }, - { "uacute", 'ú' }, - { "agrave", 'à' }, - { "egrave", 'è' }, - { "igrave", 'ì' }, - { "ograve", 'ò' }, - { "ugrave", 'ù' }, - { "acirc", 'â' }, - { "ecirc", 'ê' }, - { "icirc", 'î' }, - { "ocirc", 'ô' }, - { "ucirc", 'û' }, - { "nbsp", ' ' }, - { "", 0 } + { "lt", "<" }, + { "gt", ">" }, + { "amp", "&" }, + { "quot", "\"" }, + { "aacute", "á" }, + { "eacute", "é" }, + { "iacute", "é" }, + { "oacute", "ó" }, + { "uacute", "ú" }, + { "agrave", "à" }, + { "egrave", "è" }, + { "igrave", "ì" }, + { "ograve", "ò" }, + { "ugrave", "ù" }, + { "acirc", "â" }, + { "ecirc", "ê" }, + { "icirc", "î" }, + { "ocirc", "ô" }, + { "ucirc", "û" }, + { "auml", "ä" }, + { "euml", "ë" }, + { "iuml", "ï" }, + { "ouml", "ö" }, + { "uuml", "ü" }, + { "nbsp", " " }, + { "", "" } }; void strip_html( char *in ) @@ -256,7 +261,11 @@ void strip_html( char *in ) for( i = 0; *ent[i].code; i ++ ) if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 ) { - *(s++) = ent[i].is; + int j; + + for( j = 0; ent[i].is[j]; j ++ ) + *(s++) = ent[i].is[j]; + matched = 1; break; } -- cgit v1.2.3 From 2a6ca4f4d8c8ec86b8bb38442189c85a001a5f6c Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Wed, 4 Jan 2006 12:16:58 +0100 Subject: Better handling of IPv4 connections in IPv6 mode. (Wrapping/Unwrapping of ::ffff:style addresses.) --- util.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'util.c') diff --git a/util.c b/util.c index 3fad6314..e4b58090 100644 --- a/util.c +++ b/util.c @@ -402,3 +402,43 @@ char *strip_newlines( char *source ) return source; } + +#ifdef IPV6 +/* Wrap an IPv4 address into IPv6 space. Not thread-safe... */ +char *ipv6_wrap( char *src ) +{ + static char dst[64]; + int i; + + for( i = 0; src[i]; i ++ ) + if( ( src[i] < '0' || src[i] > '9' ) && src[i] != '.' ) + break; + + /* Hmm, it's not even an IP... */ + if( src[i] ) + return src; + + g_snprintf( dst, sizeof( dst ), "::ffff:%s", src ); + + return dst; +} + +/* Unwrap an IPv4 address into IPv6 space. Thread-safe, because it's very simple. :-) */ +char *ipv6_unwrap( char *src ) +{ + int i; + + if( g_strncasecmp( src, "::ffff:", 7 ) != 0 ) + return src; + + for( i = 7; src[i]; i ++ ) + if( ( src[i] < '0' || src[i] > '9' ) && src[i] != '.' ) + break; + + /* Hmm, it's not even an IP... */ + if( src[i] ) + return src; + + return ( src + 7 ); +} +#endif -- cgit v1.2.3 From dd8d4c5243eea91dd3b0709ae76abdd3743e99bc Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Tue, 10 Jan 2006 15:36:49 +0100 Subject: http_encode() now just escapes everything except [A-Za-z0-9]. Should fix #83. --- util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'util.c') diff --git a/util.c b/util.c index e4b58090..db783fe0 100644 --- a/util.c +++ b/util.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -376,7 +377,8 @@ void http_encode( char *s ) for( i = j = 0; t[i]; i ++, j ++ ) { - if( t[i] <= ' ' || ((unsigned char *)t)[i] >= 128 || t[i] == '%' ) + /* if( t[i] <= ' ' || ((unsigned char *)t)[i] >= 128 || t[i] == '%' ) */ + if( !isalnum( t[i] ) ) { sprintf( s + j, "%%%02X", ((unsigned char*)t)[i] ); j += 2; -- cgit v1.2.3