aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilmer van der Gaast <wilmer@gaast.net>2006-01-03 19:30:54 +0100
committerWilmer van der Gaast <wilmer@gaast.net>2006-01-03 19:30:54 +0100
commit39cc341b8f6299fbf8a62b243d278d1e48c8def7 (patch)
treeca0dbb4a395f054b2af5f324ceacbd153e86cf4e
parenta252c1ad43823eb935148a5578ee0d666902b2f1 (diff)
strip_html now replaces non-ASCII characters (entities like &eacute;) to
their UTF-8 versions instead of Latin1. Also added &[aeiou]uml; entities to the list. However, I still don't know if this is really important anyway...
-rw-r--r--util.c57
1 files changed, 33 insertions, 24 deletions
diff --git a/util.c b/util.c
index 57ee0522..3fad6314 100644
--- a/util.c
+++ b/util.c
@@ -180,34 +180,39 @@ time_t get_time(int year, int month, int day, int hour, int min, int sec)
typedef struct htmlentity
{
char code[8];
- char is;
+ char is[4];
} htmlentity_t;
/* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */
-static htmlentity_t ent[] =
+static const htmlentity_t ent[] =
{
- { "lt", '<' },
- { "gt", '>' },
- { "amp", '&' },
- { "quot", '"' },
- { "aacute", 'á' },
- { "eacute", 'é' },
- { "iacute", 'é' },
- { "oacute", 'ó' },
- { "uacute", 'ú' },
- { "agrave", 'à' },
- { "egrave", 'è' },
- { "igrave", 'ì' },
- { "ograve", 'ò' },
- { "ugrave", 'ù' },
- { "acirc", 'â' },
- { "ecirc", 'ê' },
- { "icirc", 'î' },
- { "ocirc", 'ô' },
- { "ucirc", 'û' },
- { "nbsp", ' ' },
- { "", 0 }
+ { "lt", "<" },
+ { "gt", ">" },
+ { "amp", "&" },
+ { "quot", "\"" },
+ { "aacute", "á" },
+ { "eacute", "é" },
+ { "iacute", "é" },
+ { "oacute", "ó" },
+ { "uacute", "ú" },
+ { "agrave", "à" },
+ { "egrave", "è" },
+ { "igrave", "ì" },
+ { "ograve", "ò" },
+ { "ugrave", "ù" },
+ { "acirc", "â" },
+ { "ecirc", "ê" },
+ { "icirc", "î" },
+ { "ocirc", "ô" },
+ { "ucirc", "û" },
+ { "auml", "ä" },
+ { "euml", "ë" },
+ { "iuml", "ï" },
+ { "ouml", "ö" },
+ { "uuml", "ü" },
+ { "nbsp", " " },
+ { "", "" }
};
void strip_html( char *in )
@@ -256,7 +261,11 @@ void strip_html( char *in )
for( i = 0; *ent[i].code; i ++ )
if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 )
{
- *(s++) = ent[i].is;
+ int j;
+
+ for( j = 0; ent[i].is[j]; j ++ )
+ *(s++) = ent[i].is[j];
+
matched = 1;
break;
}