diff options
author | Wilmer van der Gaast <wilmer@gaast.net> | 2006-01-03 19:30:54 +0100 |
---|---|---|
committer | Wilmer van der Gaast <wilmer@gaast.net> | 2006-01-03 19:30:54 +0100 |
commit | 39cc341b8f6299fbf8a62b243d278d1e48c8def7 (patch) | |
tree | ca0dbb4a395f054b2af5f324ceacbd153e86cf4e | |
parent | a252c1ad43823eb935148a5578ee0d666902b2f1 (diff) |
strip_html now replaces non-ASCII characters (entities like é) to
their UTF-8 versions instead of Latin1. Also added &[aeiou]uml; entities
to the list. However, I still don't know if this is really important anyway...
-rw-r--r-- | util.c | 57 |
1 files changed, 33 insertions, 24 deletions
@@ -180,34 +180,39 @@ time_t get_time(int year, int month, int day, int hour, int min, int sec) typedef struct htmlentity { char code[8]; - char is; + char is[4]; } htmlentity_t; /* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */ -static htmlentity_t ent[] = +static const htmlentity_t ent[] = { - { "lt", '<' }, - { "gt", '>' }, - { "amp", '&' }, - { "quot", '"' }, - { "aacute", 'á' }, - { "eacute", 'é' }, - { "iacute", 'é' }, - { "oacute", 'ó' }, - { "uacute", 'ú' }, - { "agrave", 'à' }, - { "egrave", 'è' }, - { "igrave", 'ì' }, - { "ograve", 'ò' }, - { "ugrave", 'ù' }, - { "acirc", 'â' }, - { "ecirc", 'ê' }, - { "icirc", 'î' }, - { "ocirc", 'ô' }, - { "ucirc", 'û' }, - { "nbsp", ' ' }, - { "", 0 } + { "lt", "<" }, + { "gt", ">" }, + { "amp", "&" }, + { "quot", "\"" }, + { "aacute", "á" }, + { "eacute", "é" }, + { "iacute", "é" }, + { "oacute", "ó" }, + { "uacute", "ú" }, + { "agrave", "à " }, + { "egrave", "è" }, + { "igrave", "ì" }, + { "ograve", "ò" }, + { "ugrave", "ù" }, + { "acirc", "â" }, + { "ecirc", "ê" }, + { "icirc", "î" }, + { "ocirc", "ô" }, + { "ucirc", "û" }, + { "auml", "ä" }, + { "euml", "ë" }, + { "iuml", "ï" }, + { "ouml", "ö" }, + { "uuml", "ü" }, + { "nbsp", " " }, + { "", "" } }; void strip_html( char *in ) @@ -256,7 +261,11 @@ void strip_html( char *in ) for( i = 0; *ent[i].code; i ++ ) if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 ) { - *(s++) = ent[i].is; + int j; + + for( j = 0; ent[i].is[j]; j ++ ) + *(s++) = ent[i].is[j]; + matched = 1; break; } |