[Commits] [SCM] claws branch, master, updated. 3.11.1-30-g491667f

ticho at claws-mail.org ticho at claws-mail.org
Thu Nov 27 01:05:42 CET 2014


The branch, master has been updated
       via  491667f21ad76f34145ac78e7682f115763fe1e0 (commit)
      from  a10cf7518f9d97e9925a9807dd9f3da283225776 (commit)

Summary of changes:
 src/plugins/rssyl/strutils.c |  114 ++++++++++++++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 26 deletions(-)


- Log -----------------------------------------------------------------
commit 491667f21ad76f34145ac78e7682f115763fe1e0
Author: Andrej Kacian <ticho at claws-mail.org>
Date:   Thu Nov 27 01:00:11 2014 +0100

    RSSyl: Wrote more robust replacing of HTML character references. We now handle decimal references (Ӓ), in addition to few selected named references.

diff --git a/src/plugins/rssyl/strutils.c b/src/plugins/rssyl/strutils.c
index 3d9f1c5..b2704d9 100644
--- a/src/plugins/rssyl/strutils.c
+++ b/src/plugins/rssyl/strutils.c
@@ -113,22 +113,25 @@ struct _RSSyl_HTMLSymbol
 	gchar *const val;
 };
 
+/* TODO: find a way to offload this to a library which knows all the
+ * defined named entities (over 200). */
 static RSSyl_HTMLSymbol symbol_list[] = {
-	{ "<", "<" },
-	{ ">", ">" },
-	{ "&", "&" },
-	{ """, "\"" },
-	{ "‘",  "'" },
-	{ "’",  "'" },
-	{ "“",  "\"" },
-	{ "”",  "\"" },
-	{ " ", " " },
-	{ "™", "(TM)" },
-	{ "™", "(TM)" },
-	{ "'", "'" },
-	{ "…", "..." },
-	{ "…", "..." },
-	{ "—", "-" },
+	{ "lt", "<" },
+	{ "gt", ">" },
+	{ "amp", "&" },
+	{ "apos", "'" },
+	{ "quot", "\"" },
+	{ "lsquo",  "‘" },
+	{ "rsquo",  "’" },
+	{ "ldquo",  "“" },
+	{ "rdquo",  "”" },
+	{ "nbsp", " " },
+	{ "trade", "™" },
+	{ "copy", "©" },
+	{ "reg", "®" },
+	{ "hellip", "…" },
+	{ "mdash", "—" },
+	{ "euro", "€" },
 	{ NULL, NULL }
 };
 
@@ -147,6 +150,71 @@ static RSSyl_HTMLSymbol tag_list[] = {
 	{ NULL, NULL }
 };
 
+static gchar *rssyl_replace_chrefs(gchar *string)
+{
+	char *new = g_malloc0(strlen(string)), *ret;
+	char buf[16], tmp[6];
+	int i, ii, j, n, len;
+	gunichar c;
+	gboolean valid, replaced;
+
+	/* &xx; */
+	ii = 0;
+	for (i = 0; i < strlen(string); ++i) {
+		if (string[i] == '&') {
+			j = i+1;
+			n = 0;
+			valid = FALSE;
+			while (string[j] != '\0' && j < 16) {
+				if (string[j] != ';') {
+					buf[n++] = string[j];
+				} else {
+					/* End of entity */
+					valid = TRUE;
+					buf[n] = '\0';
+					break;
+				}
+				j++;
+			}
+			if (strlen(buf) > 0 && valid) {
+				replaced = FALSE;
+
+				if (buf[0] == '#' && (c = atoi(buf+1)) > 0) {
+					len = g_unichar_to_utf8(c, tmp);
+					tmp[len] = '\0';
+					g_strlcat(new, tmp, strlen(string));
+					ii += len;
+					replaced = TRUE;
+				} else {
+					for (c = 0; symbol_list[c].key != NULL; c++) {
+						if (!strcmp(buf, symbol_list[c].key)) {
+							g_strlcat(new, symbol_list[c].val, strlen(string));
+							ii += strlen(symbol_list[c].val);
+							replaced = TRUE;
+							break;
+						}
+					}
+				}
+				if (!replaced) {
+					new[ii++] = '&'; /* & */
+					g_strlcat(new, buf, strlen(string));
+					ii += strlen(buf);
+					new[ii++] = ';';
+				}
+				i = j;
+			} else {
+				new[ii++] = string[i];
+			}
+		} else {
+			new[ii++] = string[i];
+		}
+	}
+
+	ret = g_strdup(new);
+	g_free(new);
+	return ret;
+}
+
 gchar *rssyl_replace_html_stuff(gchar *text,
 		gboolean symbols, gboolean tags)
 {
@@ -155,24 +223,18 @@ gchar *rssyl_replace_html_stuff(gchar *text,
 
 	g_return_val_if_fail(text != NULL, NULL);
 
-	wtext = g_strdup(text);
-
-	/* Ugly, needlessly traverses the string again and again. Probably
-	 * could use a rewrite. */
 	if( symbols ) {
-		for( i = 0; symbol_list[i].key != NULL; i++ ) {
-			if( g_strstr_len(text, strlen(text), symbol_list[i].key) ) {
-				tmp = rssyl_strreplace(wtext, symbol_list[i].key, symbol_list[i].val);
-				wtext = g_strdup(tmp);
-				g_free(tmp);
-			}
-		}
+		wtext = rssyl_replace_chrefs(text);
+	} else {
+		wtext = g_strdup(text);
 	}
 
+	/* TODO: rewrite this part to work similarly to rssyl_replace_chrefs() */
 	if( tags ) {
 		for( i = 0; tag_list[i].key != NULL; i++ ) {
 			if( g_strstr_len(text, strlen(text), symbol_list[i].key) ) {
 				tmp = rssyl_strreplace(wtext, tag_list[i].key, tag_list[i].val);
+				g_free(wtext);
 				wtext = g_strdup(tmp);
 				g_free(tmp);
 			}

-----------------------------------------------------------------------


hooks/post-receive
-- 
Claws Mail


More information about the Commits mailing list