[Commits] [SCM] claws branch, master, updated. 3.11.1-30-g491667f
ticho at claws-mail.org
ticho at claws-mail.org
Thu Nov 27 01:05:42 CET 2014
The branch, master has been updated
via 491667f21ad76f34145ac78e7682f115763fe1e0 (commit)
from a10cf7518f9d97e9925a9807dd9f3da283225776 (commit)
Summary of changes:
src/plugins/rssyl/strutils.c | 114 ++++++++++++++++++++++++++++++++----------
1 file changed, 88 insertions(+), 26 deletions(-)
- Log -----------------------------------------------------------------
commit 491667f21ad76f34145ac78e7682f115763fe1e0
Author: Andrej Kacian <ticho at claws-mail.org>
Date: Thu Nov 27 01:00:11 2014 +0100
RSSyl: Wrote more robust replacing of HTML character references. We now handle decimal references (Ӓ), in addition to few selected named references.
diff --git a/src/plugins/rssyl/strutils.c b/src/plugins/rssyl/strutils.c
index 3d9f1c5..b2704d9 100644
--- a/src/plugins/rssyl/strutils.c
+++ b/src/plugins/rssyl/strutils.c
@@ -113,22 +113,25 @@ struct _RSSyl_HTMLSymbol
gchar *const val;
};
+/* TODO: find a way to offload this to a library which knows all the
+ * defined named entities (over 200). */
static RSSyl_HTMLSymbol symbol_list[] = {
- { "<", "<" },
- { ">", ">" },
- { "&", "&" },
- { """, "\"" },
- { "‘", "'" },
- { "’", "'" },
- { "“", "\"" },
- { "”", "\"" },
- { " ", " " },
- { "™", "(TM)" },
- { "", "(TM)" },
- { "'", "'" },
- { "…", "..." },
- { "…", "..." },
- { "—", "-" },
+ { "lt", "<" },
+ { "gt", ">" },
+ { "amp", "&" },
+ { "apos", "'" },
+ { "quot", "\"" },
+ { "lsquo", "‘" },
+ { "rsquo", "’" },
+ { "ldquo", "“" },
+ { "rdquo", "”" },
+ { "nbsp", " " },
+ { "trade", "™" },
+ { "copy", "©" },
+ { "reg", "®" },
+ { "hellip", "…" },
+ { "mdash", "—" },
+ { "euro", "€" },
{ NULL, NULL }
};
@@ -147,6 +150,71 @@ static RSSyl_HTMLSymbol tag_list[] = {
{ NULL, NULL }
};
+static gchar *rssyl_replace_chrefs(gchar *string)
+{
+ char *new = g_malloc0(strlen(string)), *ret;
+ char buf[16], tmp[6];
+ int i, ii, j, n, len;
+ gunichar c;
+ gboolean valid, replaced;
+
+ /* &xx; */
+ ii = 0;
+ for (i = 0; i < strlen(string); ++i) {
+ if (string[i] == '&') {
+ j = i+1;
+ n = 0;
+ valid = FALSE;
+ while (string[j] != '\0' && j < 16) {
+ if (string[j] != ';') {
+ buf[n++] = string[j];
+ } else {
+ /* End of entity */
+ valid = TRUE;
+ buf[n] = '\0';
+ break;
+ }
+ j++;
+ }
+ if (strlen(buf) > 0 && valid) {
+ replaced = FALSE;
+
+ if (buf[0] == '#' && (c = atoi(buf+1)) > 0) {
+ len = g_unichar_to_utf8(c, tmp);
+ tmp[len] = '\0';
+ g_strlcat(new, tmp, strlen(string));
+ ii += len;
+ replaced = TRUE;
+ } else {
+ for (c = 0; symbol_list[c].key != NULL; c++) {
+ if (!strcmp(buf, symbol_list[c].key)) {
+ g_strlcat(new, symbol_list[c].val, strlen(string));
+ ii += strlen(symbol_list[c].val);
+ replaced = TRUE;
+ break;
+ }
+ }
+ }
+ if (!replaced) {
+ new[ii++] = '&'; /* & */
+ g_strlcat(new, buf, strlen(string));
+ ii += strlen(buf);
+ new[ii++] = ';';
+ }
+ i = j;
+ } else {
+ new[ii++] = string[i];
+ }
+ } else {
+ new[ii++] = string[i];
+ }
+ }
+
+ ret = g_strdup(new);
+ g_free(new);
+ return ret;
+}
+
gchar *rssyl_replace_html_stuff(gchar *text,
gboolean symbols, gboolean tags)
{
@@ -155,24 +223,18 @@ gchar *rssyl_replace_html_stuff(gchar *text,
g_return_val_if_fail(text != NULL, NULL);
- wtext = g_strdup(text);
-
- /* Ugly, needlessly traverses the string again and again. Probably
- * could use a rewrite. */
if( symbols ) {
- for( i = 0; symbol_list[i].key != NULL; i++ ) {
- if( g_strstr_len(text, strlen(text), symbol_list[i].key) ) {
- tmp = rssyl_strreplace(wtext, symbol_list[i].key, symbol_list[i].val);
- wtext = g_strdup(tmp);
- g_free(tmp);
- }
- }
+ wtext = rssyl_replace_chrefs(text);
+ } else {
+ wtext = g_strdup(text);
}
+ /* TODO: rewrite this part to work similarly to rssyl_replace_chrefs() */
if( tags ) {
for( i = 0; tag_list[i].key != NULL; i++ ) {
if( g_strstr_len(text, strlen(text), symbol_list[i].key) ) {
tmp = rssyl_strreplace(wtext, tag_list[i].key, tag_list[i].val);
+ g_free(wtext);
wtext = g_strdup(tmp);
g_free(tmp);
}
-----------------------------------------------------------------------
hooks/post-receive
--
Claws Mail
More information about the Commits
mailing list