[Commits] [SCM] claws branch, master, updated. 3.15.0-175-g8e9f89b
mones at claws-mail.org
mones at claws-mail.org
Wed Nov 15 20:37:03 CET 2017
The branch, master has been updated
via 8e9f89bef30238e178d71344e5135b030933956e (commit)
via 4d178a21c1d763945c6f27bc01814d58f7fa3dd7 (commit)
via 1285ad7143f2aa0cf1fc3b6bad8af828d49ffb4c (commit)
via d61f4bdfcbe0f6e081ce15f394f501818039d233 (commit)
from e61840c926adafa402746471347f9d7a233c5564 (commit)
Summary of changes:
src/Makefile.am | 2 +
src/entity.c | 403 +++++++++++++++++++++++++++++++++
src/{gtk/sslcertwindow.h => entity.h} | 30 ++-
src/html.c | 319 +-------------------------
src/html.h | 8 +-
src/plugins/rssyl/strutils.c | 77 +------
6 files changed, 441 insertions(+), 398 deletions(-)
create mode 100644 src/entity.c
copy src/{gtk/sslcertwindow.h => entity.h} (62%)
- Log -----------------------------------------------------------------
commit 8e9f89bef30238e178d71344e5135b030933956e
Author: Ricardo Mones <ricardo at mones.org>
Date: Mon Nov 13 21:11:33 2017 +0100
Use entity decoding API in HTML parser
diff --git a/src/html.c b/src/html.c
index 95f5a08..e982794 100644
--- a/src/html.c
+++ b/src/html.c
@@ -24,288 +24,12 @@
#include "html.h"
#include "codeconv.h"
#include "utils.h"
+#include "entity.h"
#define SC_HTMLBUFSIZE 8192
#define HR_STR "────────────────────────────────────────────────"
#define LI_STR "• "
-typedef struct _SC_HTMLSymbol SC_HTMLSymbol;
-
-struct _SC_HTMLSymbol
-{
- gchar *const key;
- gchar *const val;
-};
-
-static SC_HTMLSymbol symbol_list[] = {
- {""", "\42"},
- {"&", "\46"},
- {"'", "\47"},
- {"<", "\74"},
- {">", "\76"},
- {""", "\42"},
- {"&", "\46"},
- {"'", "\47"},
- {"<", "\74"},
- {">", "\76"},
- {"", "\47"},
- {"", "\342\204\242"},
- {" ", "\40"},
- {"¡", "\302\241"},
- {"¢", "\302\242"},
- {"£", "\302\243"},
- {"¤", "\302\244"},
- {"¥", "\302\245"},
- {"¦", "\302\246"},
- {"§", "\302\247"},
- {"¨", "\302\250"},
- {"©", "\302\251"},
- {"ª", "\302\252"},
- {"«", "\302\253"},
- {"¬", "\302\254"},
- {"", "\302\255"},
- {"®", "\302\256"},
- {"¯", "\302\257"},
- {"°", "\302\260"},
- {"±", "\302\261"},
- {"²", "\302\262"},
- {"³", "\302\263"},
- {"´", "\302\264"},
- {"µ", "\302\265"},
- {"¶", "\302\266"},
- {"·", "\302\267"},
- {"¸", "\302\270"},
- {"¹", "\302\271"},
- {"º", "\302\272"},
- {"»", "\302\273"},
- {"¼", "\302\274"},
- {"½", "\302\275"},
- {"¾", "\302\276"},
- {"¿", "\302\277"},
- {"À", "\303\200"},
- {"Á", "\303\201"},
- {"Â", "\303\202"},
- {"Ã", "\303\203"},
- {"Ä", "\303\204"},
- {"Å", "\303\205"},
- {"Æ", "\303\206"},
- {"Ç", "\303\207"},
- {"È", "\303\210"},
- {"É", "\303\211"},
- {"Ê", "\303\212"},
- {"Ë", "\303\213"},
- {"Ì", "\303\214"},
- {"Í", "\303\215"},
- {"Î", "\303\216"},
- {"Ï", "\303\217"},
- {"Ð", "\303\220"},
- {"Ñ", "\303\221"},
- {"Ò", "\303\222"},
- {"Ó", "\303\223"},
- {"Ô", "\303\224"},
- {"Õ", "\303\225"},
- {"Ö", "\303\226"},
- {"×", "\303\227"},
- {"Ø", "\303\230"},
- {"Ù", "\303\231"},
- {"Ú", "\303\232"},
- {"Û", "\303\233"},
- {"Ü", "\303\234"},
- {"Ý", "\303\235"},
- {"Þ", "\303\236"},
- {"ß", "\303\237"},
- {"à", "\303\240"},
- {"á", "\303\241"},
- {"â", "\303\242"},
- {"ã", "\303\243"},
- {"ä", "\303\244"},
- {"å", "\303\245"},
- {"æ", "\303\246"},
- {"ç", "\303\247"},
- {"è", "\303\250"},
- {"é", "\303\251"},
- {"ê", "\303\252"},
- {"ë", "\303\253"},
- {"ì", "\303\254"},
- {"í", "\303\255"},
- {"î", "\303\256"},
- {"ï", "\303\257"},
- {"ð", "\303\260"},
- {"ñ", "\303\261"},
- {"ò", "\303\262"},
- {"ó", "\303\263"},
- {"ô", "\303\264"},
- {"õ", "\303\265"},
- {"ö", "\303\266"},
- {"÷", "\303\267"},
- {"ø", "\303\270"},
- {"ù", "\303\271"},
- {"ú", "\303\272"},
- {"û", "\303\273"},
- {"ü", "\303\274"},
- {"ý", "\303\275"},
- {"þ", "\303\276"},
- {"ÿ", "\303\277"},
- {"Œ", "\305\222"},
- {"œ", "\305\223"},
- {"Š", "\305\240"},
- {"š", "\305\241"},
- {"Ÿ", "\305\270"},
- {"ˆ", "\313\206"},
- {"˜", "\313\234"},
- {" ", "\342\200\202"},
- {" ", "\342\200\203"},
- {" ", "\342\200\211"},
- {"–", "\342\200\223"},
- {"—", "\342\200\224"},
- {"‘", "\342\200\230"},
- {"’", "\342\200\231"},
- {"‚", "\342\200\232"},
- {"“", "\342\200\234"},
- {"”", "\342\200\235"},
- {"„", "\342\200\236"},
- {"†", "\342\200\240"},
- {"‡", "\342\200\241"},
- {"•", "\342\200\242"},
- {"…", "\342\200\246"},
- {"‰", "\342\200\260"},
- {"‹", "\342\200\271"},
- {"›", "\342\200\272"},
- {"€", "\342\202\254"},
- {"™", "\342\204\242"},
- {""", "\42"},
- {"&", "\46"},
- {"'", "\47"},
- {"<", "\74"},
- {">", "\76"},
- {"&squot;", "\47"},
- {" ", "\40"},
- {"¡", "\302\241"},
- {"¢", "\302\242"},
- {"£", "\302\243"},
- {"¤", "\302\244"},
- {"¥", "\302\245"},
- {"¦", "\302\246"},
- {"§", "\302\247"},
- {"¨", "\302\250"},
- {"©", "\302\251"},
- {"ª", "\302\252"},
- {"«", "\302\253"},
- {"¬", "\302\254"},
- {"", "\302\255"},
- {"®", "\302\256"},
- {"¯", "\302\257"},
- {"°", "\302\260"},
- {"±", "\302\261"},
- {"²", "\302\262"},
- {"³", "\302\263"},
- {"´", "\302\264"},
- {"µ", "\302\265"},
- {"¶", "\302\266"},
- {"·", "\302\267"},
- {"¸", "\302\270"},
- {"¹", "\302\271"},
- {"º", "\302\272"},
- {"»", "\302\273"},
- {"¼", "\302\274"},
- {"½", "\302\275"},
- {"¾", "\302\276"},
- {"¿", "\302\277"},
- {"À", "\303\200"},
- {"Á", "\303\201"},
- {"Â", "\303\202"},
- {"Ã", "\303\203"},
- {"Ä", "\303\204"},
- {"Å", "\303\205"},
- {"Æ", "\303\206"},
- {"Ç", "\303\207"},
- {"È", "\303\210"},
- {"É", "\303\211"},
- {"Ê", "\303\212"},
- {"Ë", "\303\213"},
- {"Ì", "\303\214"},
- {"Í", "\303\215"},
- {"Î", "\303\216"},
- {"Ï", "\303\217"},
- {"Ð", "\303\220"},
- {"Ñ", "\303\221"},
- {"Ò", "\303\222"},
- {"Ó", "\303\223"},
- {"Ô", "\303\224"},
- {"Õ", "\303\225"},
- {"Ö", "\303\226"},
- {"×", "\303\227"},
- {"Ø", "\303\230"},
- {"Ù", "\303\231"},
- {"Ú", "\303\232"},
- {"Û", "\303\233"},
- {"Ü", "\303\234"},
- {"Ý", "\303\235"},
- {"Þ", "\303\236"},
- {"ß", "\303\237"},
- {"à", "\303\240"},
- {"á", "\303\241"},
- {"â", "\303\242"},
- {"ã", "\303\243"},
- {"ä", "\303\244"},
- {"å", "\303\245"},
- {"æ", "\303\246"},
- {"ç", "\303\247"},
- {"è", "\303\250"},
- {"é", "\303\251"},
- {"ê", "\303\252"},
- {"ë", "\303\253"},
- {"ì", "\303\254"},
- {"í", "\303\255"},
- {"î", "\303\256"},
- {"ï", "\303\257"},
- {"ð", "\303\260"},
- {"ñ", "\303\261"},
- {"ò", "\303\262"},
- {"ó", "\303\263"},
- {"ô", "\303\264"},
- {"õ", "\303\265"},
- {"ö", "\303\266"},
- {"÷", "\303\267"},
- {"ø", "\303\270"},
- {"ù", "\303\271"},
- {"ú", "\303\272"},
- {"û", "\303\273"},
- {"ü", "\303\274"},
- {"ý", "\303\275"},
- {"þ", "\303\276"},
- {"ÿ", "\303\277"},
- {"Œ", "\305\222"},
- {"œ", "\305\223"},
- {"Š", "\305\240"},
- {"š", "\305\241"},
- {"Ÿ", "\305\270"},
- {"ˆ", "\313\206"},
- {"˜", "\313\234"},
- {" ", "\342\200\202"},
- {" ", "\342\200\203"},
- {" ", "\342\200\211"},
- {"–", "\342\200\223"},
- {"—", "\342\200\224"},
- {"‘", "\342\200\230"},
- {"’", "\342\200\231"},
- {"‚", "\342\200\232"},
- {"“", "\342\200\234"},
- {"”", "\342\200\235"},
- {"„", "\342\200\236"},
- {"†", "\342\200\240"},
- {"‡", "\342\200\241"},
- {"•", "\342\200\242"},
- {"…", "\342\200\246"},
- {"‰", "\342\200\260"},
- {"‹", "\342\200\271"},
- {"›", "\342\200\272"},
- {"€", "\342\202\254"},
- {"™", "\342\204\242"}
-};
-
-static GHashTable *default_symbol_table;
-
static SC_HTMLState sc_html_read_line (SC_HTMLParser *parser);
static void sc_html_append_char (SC_HTMLParser *parser,
gchar ch);
@@ -340,16 +64,6 @@ SC_HTMLParser *sc_html_parser_new(FILE *fp, CodeConverter *conv)
parser->pre = FALSE;
parser->indent = 0;
- if (!default_symbol_table) {
- gint i;
- default_symbol_table = g_hash_table_new(g_str_hash, g_str_equal);
- for (i = 0; i < sizeof(symbol_list) / sizeof(symbol_list[0]); i++)
- g_hash_table_insert(default_symbol_table,
- symbol_list[i].key, symbol_list[i].val);
- }
-
- parser->symbol_table = default_symbol_table;
-
return parser;
}
@@ -612,8 +326,7 @@ static void decode_href(SC_HTMLParser *parser)
tparser->str = g_string_new(NULL);
tparser->buf = g_string_new(parser->href);
tparser->bufp = tparser->buf->str;
- tparser->symbol_table = default_symbol_table;
-
+
tmp = sc_html_parse(tparser);
g_free(parser->href);
@@ -725,33 +438,21 @@ static SC_HTMLState sc_html_parse_tag(SC_HTMLParser *parser)
static void sc_html_parse_special(SC_HTMLParser *parser)
{
- gchar symbol_name[9];
- gint n;
- const gchar *val;
+ gchar *entity;
parser->state = SC_HTML_UNKNOWN;
cm_return_if_fail(*parser->bufp == '&');
- /* &foo; */
- for (n = 0; parser->bufp[n] != '\0' && parser->bufp[n] != ';'; n++)
- ;
- if (n > 7 || parser->bufp[n] != ';') {
+ entity = entity_decode(parser->bufp);
+ if (entity != NULL) {
+ sc_html_append_str(parser, entity, -1);
+ g_free(entity);
+ while (*parser->bufp++ != ';');
+ } else {
/* output literal `&' */
sc_html_append_char(parser, *parser->bufp++);
- parser->state = SC_HTML_NORMAL;
- return;
}
- strncpy2(symbol_name, parser->bufp, n + 2);
- parser->bufp += n + 1;
-
- if ((val = g_hash_table_lookup(parser->symbol_table, symbol_name))
- != NULL) {
- sc_html_append_str(parser, val, -1);
- parser->state = SC_HTML_NORMAL;
- return;
- }
-
- sc_html_append_str(parser, symbol_name, -1);
+ parser->state = SC_HTML_NORMAL;
}
static gchar *sc_html_find_tag(SC_HTMLParser *parser, const gchar *tag)
diff --git a/src/html.h b/src/html.h
index 98e2f3a..922389a 100644
--- a/src/html.h
+++ b/src/html.h
@@ -1,6 +1,6 @@
/*
- * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
- * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
+ * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
+ * Copyright (C) 1999-2017 Hiroyuki Yamamoto and the Claws Mail team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -14,7 +14,6 @@
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
*/
#ifndef __HTML_H__
@@ -51,9 +50,6 @@ struct _SC_HTMLParser
FILE *fp;
CodeConverter *conv;
- GHashTable *symbol_table;
- GHashTable *alt_symbol_table;
-
GString *str;
GString *buf;
commit 4d178a21c1d763945c6f27bc01814d58f7fa3dd7
Author: Ricardo Mones <ricardo at mones.org>
Date: Mon Nov 13 20:40:11 2017 +0100
Rssyl: use new entity decoding API
diff --git a/src/plugins/rssyl/strutils.c b/src/plugins/rssyl/strutils.c
index fe8f148..7d172c8 100644
--- a/src/plugins/rssyl/strutils.c
+++ b/src/plugins/rssyl/strutils.c
@@ -30,6 +30,7 @@
/* Claws Mail includes */
#include <common/utils.h>
+#include <entity.h>
/* Local includes */
/* (shouldn't be any) */
@@ -120,28 +121,6 @@ struct _RSSyl_HTMLSymbol
gchar *const val;
};
-/* TODO: find a way to offload this to a library which knows all the
- * defined named entities (over 200). */
-static RSSyl_HTMLSymbol symbol_list[] = {
- { "lt", "<" },
- { "gt", ">" },
- { "amp", "&" },
- { "apos", "'" },
- { "quot", "\"" },
- { "lsquo", "‘" },
- { "rsquo", "’" },
- { "ldquo", "“" },
- { "rdquo", "”" },
- { "nbsp", " " },
- { "trade", "™" },
- { "copy", "©" },
- { "reg", "®" },
- { "hellip", "…" },
- { "mdash", "—" },
- { "euro", "€" },
- { NULL, NULL }
-};
-
static RSSyl_HTMLSymbol tag_list[] = {
{ "<cite>", "\"" },
{ "</cite>", "\"" },
@@ -160,55 +139,21 @@ static RSSyl_HTMLSymbol tag_list[] = {
static gchar *rssyl_replace_chrefs(gchar *string)
{
char *new = g_malloc0(strlen(string) + 1), *ret;
- char buf[16], tmp[6];
- int i, ii, j, n, len;
- gunichar c;
- gboolean valid, replaced;
+ gchar *entity;
+ int i, ii;
/* &xx; */
ii = 0;
for (i = 0; i < strlen(string); ++i) {
if (string[i] == '&') {
- j = i+1;
- n = 0;
- valid = FALSE;
- while (string[j] != '\0' && n < 16) {
- if (string[j] != ';') {
- buf[n++] = string[j];
- } else {
- /* End of entity */
- valid = TRUE;
- buf[n] = '\0';
- break;
- }
- j++;
- }
- if (strlen(buf) > 0 && valid) {
- replaced = FALSE;
-
- if (buf[0] == '#' && (c = atoi(buf+1)) > 0) {
- len = g_unichar_to_utf8(c, tmp);
- tmp[len] = '\0';
- g_strlcat(new, tmp, strlen(string));
- ii += len;
- replaced = TRUE;
- } else {
- for (c = 0; symbol_list[c].key != NULL; c++) {
- if (!strcmp(buf, symbol_list[c].key)) {
- g_strlcat(new, symbol_list[c].val, strlen(string));
- ii += strlen(symbol_list[c].val);
- replaced = TRUE;
- break;
- }
- }
- }
- if (!replaced) {
- new[ii++] = '&'; /* & */
- g_strlcat(new, buf, strlen(string));
- ii += strlen(buf);
- new[ii++] = ';';
- }
- i = j;
+ entity = entity_decode(&(string[i]));
+ if (entity != NULL) {
+ g_strlcat(new, entity, strlen(string));
+ ii += strlen(entity);
+ g_free(entity);
+ entity = NULL;
+ while (string[++i] != ';');
+ --i; /* loop will inc it again */
} else {
new[ii++] = string[i];
}
commit 1285ad7143f2aa0cf1fc3b6bad8af828d49ffb4c
Author: Ricardo Mones <ricardo at mones.org>
Date: Mon Nov 13 01:31:44 2017 +0100
Complete, normalize and fix table of entities
https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
diff --git a/src/entity.c b/src/entity.c
index cc72291..98a01e4 100644
--- a/src/entity.c
+++ b/src/entity.c
@@ -37,137 +37,285 @@ struct _EntitySymbol
gchar *const value;
};
+/* in alphabetical order with upper-case version first */
static EntitySymbol symbolic_entities[] = {
- /* in alphabetical order with upper-case version first */
- {"Aacute", "\303\201"},
- {"aacute", "\303\241"},
- {"Acirc", "\303\202"},
- {"acirc", "\303\242"},
- {"acute", "\302\264"},
- {"AElig", "\303\206"},
- {"aelig", "\303\246"},
- {"Agrave", "\303\200"},
- {"agrave", "\303\240"},
- {"amp", "&" },
- {"apos", "'" },
- {"Aring", "\303\205"},
- {"aring", "\303\245"},
- {"Atilde", "\303\203"},
- {"atilde", "\303\243"},
- {"Auml", "\303\204"},
- {"auml", "\303\244"},
- {"bdquo", "\342\200\236"},
- {"brvbar", "\302\246"},
- {"bull", "\342\200\242"},
- {"Ccedil", "\303\207"},
- {"ccedil", "\303\247"},
- {"cedil", "\302\270"},
- {"cent", "\302\242"},
- {"circ", "\313\206"},
- {"copy", "©" },
- {"curren", "\302\244"},
- {"dagger", "\342\200\240"},
- {"Dagger", "\342\200\241"},
- {"deg", "\302\260"},
- {"divide", "\303\267"},
- {"Eacute", "\303\211"},
- {"eacute", "\303\251"},
- {"Ecirc", "\303\212"},
- {"ecirc", "\303\252"},
- {"Egrave", "\303\210"},
- {"egrave", "\303\250"},
- {"emsp", "\342\200\203"},
- {"ensp", "\342\200\202"},
- {"ETH", "\303\220"},
- {"eth", "\303\260"},
- {"Euml", "\303\213"},
- {"euml", "\303\253"},
- {"euro", "€" },
- {"frac12", "\302\275"},
- {"frac14", "\302\274"},
- {"frac34", "\302\276"},
- {"gt", ">" },
- {"hellip", "…" },
- {"Iacute", "\303\215"},
- {"iacute", "\303\255"},
- {"Icirc", "\303\216"},
- {"icirc", "\303\256"},
- {"iexcl", "\302\241"},
- {"Igrave", "\303\214"},
- {"igrave", "\303\254"},
- {"iquest", "\302\277"},
- {"Iuml", "\303\217"},
- {"iuml", "\303\257"},
- {"laquo", "\302\253"},
- {"ldquo", "“" },
- {"lsaquo", "\342\200\271"},
- {"lsquo", "‘" },
- {"lt", "<" },
- {"macr", "\302\257"},
- {"mdash", "—" },
- {"micro", "\302\265"},
- {"middot", "\302\267"},
- {"nbsp", " " },
- {"ndash", "\342\200\223"},
- {"not", "\302\254"},
- {"Ntilde", "\303\221"},
- {"ntilde", "\303\261"},
- {"Oacute", "\303\223"},
- {"oacute", "\303\263"},
- {"Ocirc", "\303\224"},
- {"ocirc", "\303\264"},
- {"OElig", "\305\222"},
- {"oelig", "\305\223"},
- {"Ograve", "\303\222"},
- {"ograve", "\303\262"},
- {"ordf", "\302\252"},
- {"ordm", "\302\272"},
- {"Oslash", "\303\230"},
- {"oslash", "\303\270"},
- {"Otilde", "\303\225"},
- {"otilde", "\303\265"},
- {"Ouml", "\303\226"},
- {"ouml", "\303\266"},
- {"para", "\302\266"},
- {"permil", "\342\200\260"},
- {"plusmn", "\302\261"},
- {"pound", "\302\243"},
- {"quot", "\"" },
- {"raquo", "\302\273"},
- {"rdquo", "”" },
- {"reg", "®" },
- {"rsaquo", "\342\200\272"},
- {"rsquo", "’" },
- {"sbquo", "\342\200\232"},
- {"Scaron", "\305\240"},
- {"scaron", "\305\241"},
- {"sect", "\302\247"},
- {"shy", "\302\255"},
- {"squot", "\47"},
- {"sup1", "\302\271"},
- {"sup2", "\302\262"},
- {"sup3", "\302\263"},
- {"szlig", "\303\237"},
- {"thinsp", "\342\200\211"},
- {"THORN", "\303\236"},
- {"thorn", "\303\276"},
- {"tilde", "\313\234"},
- {"times", "\303\227"},
- {"trade", "™" },
- {"Uacute", "\303\232"},
- {"uacute", "\303\272"},
- {"Ucirc", "\303\233"},
- {"ucirc", "\303\273"},
- {"Ugrave", "\303\231"},
- {"ugrave", "\303\271"},
- {"uml", "\302\250"},
- {"Uuml", "\303\234"},
- {"uuml", "\303\274"},
- {"Yacute", "\303\235"},
- {"yacute", "\303\275"},
- {"yen", "\302\245"},
- {"yuml", "\303\277"},
- {"Yuml", "\305\270"},
+ /* A */
+ {"Aacute", "Á"},
+ {"aacute", "á"},
+ {"Acirc", "Â"},
+ {"acirc", "â"},
+ {"acute", "´"},
+ {"AElig", "Æ"},
+ {"aelig", "æ"},
+ {"Agrave", "À"},
+ {"agrave", "à"},
+ {"alefsym", "ℵ"},
+ {"Alpha", "Α"},
+ {"alpha", "α"},
+ {"amp", "&"},
+ {"and", "∧"},
+ {"ang", "∠"},
+ {"apos", "'"},
+ {"Aring", "Å"},
+ {"aring", "å"},
+ {"asymp", "≈"},
+ {"Atilde", "Ã"},
+ {"atilde", "ã"},
+ {"Auml", "Ä"},
+ {"auml", "ä"},
+ /* B */
+ {"bdquo", "„"},
+ {"Beta", "Β"},
+ {"beta", "β"},
+ {"brvbar", "¦"},
+ {"bull", "•"},
+ /* C */
+ {"cap", "∩"},
+ {"Ccedil", "Ç"},
+ {"ccedil", "ç"},
+ {"cedil", "¸"},
+ {"cent", "¢"},
+ {"Chi", "Χ"},
+ {"chi", "χ"},
+ {"circ", "ˆ"},
+ {"clubs", "♣"},
+ {"cong", "≅"},
+ {"copy", "©"},
+ {"crarr", "↵"},
+ {"cup", "∪"},
+ {"curren", "¤"},
+ /* D */
+ {"dagger", "†"},
+ {"Dagger", "‡"},
+ {"dArr", "⇓"},
+ {"darr", "↓"},
+ {"deg", "°"},
+ {"Delta", "Δ"},
+ {"delta", "δ"},
+ {"diams", "♦"},
+ {"divide", "÷"},
+ /* E */
+ {"Eacute", "É"},
+ {"eacute", "é"},
+ {"Ecirc", "Ê"},
+ {"ecirc", "ê"},
+ {"Egrave", "È"},
+ {"egrave", "è"},
+ {"empty", "∅"},
+ {"emsp", "\xE2\x80\x83"},
+ {"ensp", "\xE2\x80\x82"},
+ {"Epsilon", "Ε"},
+ {"epsilon", "ε"},
+ {"equiv", "≡"},
+ {"Eta", "Η"},
+ {"eta", "η"},
+ {"ETH", "Ð"},
+ {"eth", "ð"},
+ {"Euml", "Ë"},
+ {"euml", "ë"},
+ {"euro", "€"},
+ {"exist", "∃"},
+ /* F */
+ {"fnof", "ƒ"},
+ {"forall", "∀"},
+ {"frac12", "½"},
+ {"frac14", "¼"},
+ {"frac34", "¾"},
+ {"frasl", "⁄"},
+ /* G */
+ {"Gamma", "Γ"},
+ {"gamma", "γ"},
+ {"ge", "≥"},
+ {"gt", ">"},
+ /* H */
+ {"hArr", "⇔"},
+ {"harr", "↔"},
+ {"hearts", "♥"},
+ {"hellip", "…"},
+ /* I */
+ {"Iacute", "Í"},
+ {"iacute", "í"},
+ {"IArr", "⇐"},
+ {"Icirc", "Î"},
+ {"icirc", "î"},
+ {"iexcl", "¡"},
+ {"Igrave", "Ì"},
+ {"igrave", "ì"},
+ {"image", "ℑ"},
+ {"infin", "∞"},
+ {"int", "∫"},
+ {"Iota", "Ι"},
+ {"iota", "ι"},
+ {"iquest", "¿"},
+ {"isin", "∈"},
+ {"Iuml", "Ï"},
+ {"iuml", "ï"},
+ /* K */
+ {"Kappa", "Κ"},
+ {"kappa", "κ"},
+ /* L */
+ {"Lambda", "Λ"},
+ {"lambda", "λ"},
+ {"lang", "〈"},
+ {"laquo", "«"},
+ {"larr", "←"},
+ {"lceil", "⌈"},
+ {"ldquo", "“"},
+ {"le", "≤"},
+ {"lfloor", "⌊"},
+ {"lowast", "∗"},
+ {"loz", "◊"},
+ {"lrm", "\xE2\x80\x8E"},
+ {"lsaquo", "‹"},
+ {"lsquo", "‘"},
+ {"lt", "<"},
+ /* M */
+ {"macr", "¯"},
+ {"mdash", "—"},
+ {"micro", "µ"},
+ {"middot", "·"},
+ {"minus", "−"},
+ {"Mu", "Μ"},
+ {"mu", "μ"},
+ /* N */
+ {"nabla", "∇"},
+ {"nbsp", "\xC2\xA0"},
+ {"ndash", "–"},
+ {"ne", "≠"},
+ {"ni", "∋"},
+ {"not", "¬"},
+ {"notin", "∉"},
+ {"nsub", "⊄"},
+ {"Ntilde", "Ñ"},
+ {"ntilde", "ñ"},
+ {"Nu", "Ν"},
+ {"nu", "ν"},
+ /* O */
+ {"Oacute", "Ó"},
+ {"oacute", "ó"},
+ {"Ocirc", "Ô"},
+ {"ocirc", "ô"},
+ {"OElig", "Œ"},
+ {"oelig", "œ"},
+ {"Ograve", "Ò"},
+ {"ograve", "ò"},
+ {"oline", "‾"},
+ {"Omega", "Ω"},
+ {"omega", "ω"},
+ {"Omicron", "Ο"},
+ {"omicron", "ο"},
+ {"oplus", "⊕"},
+ {"or", "∨"},
+ {"ordf", "ª"},
+ {"ordm", "º"},
+ {"Oslash", "Ø"},
+ {"oslash", "ø"},
+ {"Otilde", "Õ"},
+ {"otilde", "õ"},
+ {"otimes", "⊗"},
+ {"Ouml", "Ö"},
+ {"ouml", "ö"},
+ /* P */
+ {"para", "¶"},
+ {"part", "∂"},
+ {"permil", "‰"},
+ {"perp", "⊥"},
+ {"Phi", "Φ"},
+ {"phi", "φ"},
+ {"Pi", "Π"},
+ {"pi", "π"},
+ {"piv", "ϖ"},
+ {"plusmn", "±"},
+ {"pound", "£"},
+ {"Prime", "″"},
+ {"prime", "′"},
+ {"prod", "∏"},
+ {"prop", "∝"},
+ {"Psi", "Ψ"},
+ {"psi", "ψ"},
+ /* Q */
+ {"quot", "\""},
+ /* R */
+ {"radic", "√"},
+ {"rang", "〉"},
+ {"raquo", "»"},
+ {"rArr", "⇒"},
+ {"rarr", "→"},
+ {"rceil", "⌉"},
+ {"rdquo", "”"},
+ {"real", "ℜ"},
+ {"reg", "®"},
+ {"rfloor", "⌋"},
+ {"Rho", "Ρ"},
+ {"rho", "ρ"},
+ {"rlm", "\xE2\x80\x8F"},
+ {"rsaquo", "›"},
+ {"rsquo", "’"},
+ /* S */
+ {"sbquo", "‚"},
+ {"Scaron", "Š"},
+ {"scaron", "š"},
+ {"sdot", "⋅"},
+ {"sect", "§"},
+ {"shy", "\xC2\xAD"},
+ {"Sigma", "Σ"},
+ {"sigma", "σ"},
+ {"sigmaf", "ς"},
+ {"sim", "∼"},
+ {"spades", "♠"},
+ {"sub", "⊂"},
+ {"sube", "⊆"},
+ {"sum", "∑"},
+ {"sup", "⊃"},
+ {"sup1", "¹"},
+ {"sup2", "²"},
+ {"sup3", "³"},
+ {"supe", "⊇"},
+ {"szlig", "ß"},
+ /* T */
+ {"Tau", "Τ"},
+ {"tau", "τ"},
+ {"there4", "∴"},
+ {"Theta", "Θ"},
+ {"theta", "θ"},
+ {"thetasym", "ϑ"},
+ {"thinsp", "\xE2\x80\x89"},
+ {"THORN", "Þ"},
+ {"thorn", "þ"},
+ {"tilde", "˜"},
+ {"times", "×"},
+ {"trade", "™"},
+ /* U */
+ {"Uacute", "Ú"},
+ {"uacute", "ú"},
+ {"uArr", "⇑"},
+ {"uarr", "↑"},
+ {"Ucirc", "Û"},
+ {"ucirc", "û"},
+ {"Ugrave", "Ù"},
+ {"ugrave", "ù"},
+ {"uml", "¨"},
+ {"upsih", "ϒ"},
+ {"Upsilon", "Υ"},
+ {"upsilon", "υ"},
+ {"Uuml", "Ü"},
+ {"uuml", "ü"},
+ /* W */
+ {"weierp", "℘"},
+ /* X */
+ {"Xi", "Ξ"},
+ {"xi", "ξ"},
+ /* Y */
+ {"Yacute", "Ý"},
+ {"yacute", "ý"},
+ {"yen", "¥"},
+ {"Yuml", "Ÿ"},
+ {"yuml", "ÿ"},
+ /* Z */
+ {"Zeta", "Ζ"},
+ {"zeta", "ζ"},
+ {"zwj", "\xE2\x80\x8D"},
+ {"zwnj", "\xE2\x80\x8C"},
{NULL, NULL}
};
commit d61f4bdfcbe0f6e081ce15f394f501818039d233
Author: Ricardo Mones <ricardo at mones.org>
Date: Mon Nov 6 23:41:27 2017 +0100
Implement HTML entity decoding in one function
Content of symbols table remixed from existing html.c and
rssyl/strutils.c tables.
diff --git a/src/Makefile.am b/src/Makefile.am
index 1db2b0d..d4eafbc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -142,6 +142,7 @@ claws_mail_SOURCES = \
displayheader.c \
edittags.c \
enriched.c \
+ entity.c \
export.c \
file_checker.c \
filtering.c \
@@ -260,6 +261,7 @@ claws_mailinclude_HEADERS = \
displayheader.h \
edittags.h \
enriched.h \
+ entity.h \
export.h \
filtering.h \
folder.h \
diff --git a/src/entity.c b/src/entity.c
new file mode 100644
index 0000000..cc72291
--- /dev/null
+++ b/src/entity.c
@@ -0,0 +1,255 @@
+/*
+ * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
+ * Copyright (C) 2017 Ricardo Mones and the Claws Mail team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#include "claws-features.h"
+#endif
+
+#include "defs.h"
+#include "utils.h"
+#include "entity.h"
+
+#define ENTITY_MAX_LEN 8
+#define DECODED_MAX_LEN 6
+
+static GHashTable *symbol_table = NULL;
+
+typedef struct _EntitySymbol EntitySymbol;
+
+struct _EntitySymbol
+{
+ gchar *const key;
+ gchar *const value;
+};
+
+static EntitySymbol symbolic_entities[] = {
+ /* in alphabetical order with upper-case version first */
+ {"Aacute", "\303\201"},
+ {"aacute", "\303\241"},
+ {"Acirc", "\303\202"},
+ {"acirc", "\303\242"},
+ {"acute", "\302\264"},
+ {"AElig", "\303\206"},
+ {"aelig", "\303\246"},
+ {"Agrave", "\303\200"},
+ {"agrave", "\303\240"},
+ {"amp", "&" },
+ {"apos", "'" },
+ {"Aring", "\303\205"},
+ {"aring", "\303\245"},
+ {"Atilde", "\303\203"},
+ {"atilde", "\303\243"},
+ {"Auml", "\303\204"},
+ {"auml", "\303\244"},
+ {"bdquo", "\342\200\236"},
+ {"brvbar", "\302\246"},
+ {"bull", "\342\200\242"},
+ {"Ccedil", "\303\207"},
+ {"ccedil", "\303\247"},
+ {"cedil", "\302\270"},
+ {"cent", "\302\242"},
+ {"circ", "\313\206"},
+ {"copy", "©" },
+ {"curren", "\302\244"},
+ {"dagger", "\342\200\240"},
+ {"Dagger", "\342\200\241"},
+ {"deg", "\302\260"},
+ {"divide", "\303\267"},
+ {"Eacute", "\303\211"},
+ {"eacute", "\303\251"},
+ {"Ecirc", "\303\212"},
+ {"ecirc", "\303\252"},
+ {"Egrave", "\303\210"},
+ {"egrave", "\303\250"},
+ {"emsp", "\342\200\203"},
+ {"ensp", "\342\200\202"},
+ {"ETH", "\303\220"},
+ {"eth", "\303\260"},
+ {"Euml", "\303\213"},
+ {"euml", "\303\253"},
+ {"euro", "€" },
+ {"frac12", "\302\275"},
+ {"frac14", "\302\274"},
+ {"frac34", "\302\276"},
+ {"gt", ">" },
+ {"hellip", "…" },
+ {"Iacute", "\303\215"},
+ {"iacute", "\303\255"},
+ {"Icirc", "\303\216"},
+ {"icirc", "\303\256"},
+ {"iexcl", "\302\241"},
+ {"Igrave", "\303\214"},
+ {"igrave", "\303\254"},
+ {"iquest", "\302\277"},
+ {"Iuml", "\303\217"},
+ {"iuml", "\303\257"},
+ {"laquo", "\302\253"},
+ {"ldquo", "“" },
+ {"lsaquo", "\342\200\271"},
+ {"lsquo", "‘" },
+ {"lt", "<" },
+ {"macr", "\302\257"},
+ {"mdash", "—" },
+ {"micro", "\302\265"},
+ {"middot", "\302\267"},
+ {"nbsp", " " },
+ {"ndash", "\342\200\223"},
+ {"not", "\302\254"},
+ {"Ntilde", "\303\221"},
+ {"ntilde", "\303\261"},
+ {"Oacute", "\303\223"},
+ {"oacute", "\303\263"},
+ {"Ocirc", "\303\224"},
+ {"ocirc", "\303\264"},
+ {"OElig", "\305\222"},
+ {"oelig", "\305\223"},
+ {"Ograve", "\303\222"},
+ {"ograve", "\303\262"},
+ {"ordf", "\302\252"},
+ {"ordm", "\302\272"},
+ {"Oslash", "\303\230"},
+ {"oslash", "\303\270"},
+ {"Otilde", "\303\225"},
+ {"otilde", "\303\265"},
+ {"Ouml", "\303\226"},
+ {"ouml", "\303\266"},
+ {"para", "\302\266"},
+ {"permil", "\342\200\260"},
+ {"plusmn", "\302\261"},
+ {"pound", "\302\243"},
+ {"quot", "\"" },
+ {"raquo", "\302\273"},
+ {"rdquo", "”" },
+ {"reg", "®" },
+ {"rsaquo", "\342\200\272"},
+ {"rsquo", "’" },
+ {"sbquo", "\342\200\232"},
+ {"Scaron", "\305\240"},
+ {"scaron", "\305\241"},
+ {"sect", "\302\247"},
+ {"shy", "\302\255"},
+ {"squot", "\47"},
+ {"sup1", "\302\271"},
+ {"sup2", "\302\262"},
+ {"sup3", "\302\263"},
+ {"szlig", "\303\237"},
+ {"thinsp", "\342\200\211"},
+ {"THORN", "\303\236"},
+ {"thorn", "\303\276"},
+ {"tilde", "\313\234"},
+ {"times", "\303\227"},
+ {"trade", "™" },
+ {"Uacute", "\303\232"},
+ {"uacute", "\303\272"},
+ {"Ucirc", "\303\233"},
+ {"ucirc", "\303\273"},
+ {"Ugrave", "\303\231"},
+ {"ugrave", "\303\271"},
+ {"uml", "\302\250"},
+ {"Uuml", "\303\234"},
+ {"uuml", "\303\274"},
+ {"Yacute", "\303\235"},
+ {"yacute", "\303\275"},
+ {"yen", "\302\245"},
+ {"yuml", "\303\277"},
+ {"Yuml", "\305\270"},
+ {NULL, NULL}
+};
+
+static gchar* entity_extract_to_buffer(gchar *p, gchar b[])
+{
+ gint i = 0;
+
+ while (*p != '\0' && *p != ';' && i < ENTITY_MAX_LEN) {
+ b[i] = *p;
+ ++i, ++p;
+ }
+ if (*p != ';' || i == ENTITY_MAX_LEN)
+ return NULL;
+ b[i] = '\0';
+
+ return b;
+}
+
+static gchar *entity_decode_numeric(gchar *str)
+{
+ gchar b[ENTITY_MAX_LEN];
+ gchar *p = str, *res;
+ gboolean hex = FALSE;
+ gunichar c;
+
+ ++p;
+ if (*p == '\0')
+ return NULL;
+
+ if (*p == 'x') {
+ hex = TRUE;
+ ++p;
+ if (*p == '\0')
+ return NULL;
+ }
+
+ if (entity_extract_to_buffer (p, b) == NULL)
+ return NULL;
+
+ c = g_ascii_strtoll (b, NULL, (hex? 16: 10));
+ res = g_malloc0 (DECODED_MAX_LEN + 1);
+ g_unichar_to_utf8 (c, res);
+
+ return res;
+}
+
+static gchar *entity_decode_symbol(gchar *str)
+{
+ gchar b[ENTITY_MAX_LEN];
+ gchar *decoded;
+
+ if (entity_extract_to_buffer (str, b) == NULL)
+ return NULL;
+
+ if (symbol_table == NULL) {
+ gint i;
+
+ symbol_table = g_hash_table_new (g_str_hash, g_str_equal);
+ for (i = 0; symbolic_entities[i].key != NULL; ++i) {
+ g_hash_table_insert (symbol_table,
+ symbolic_entities[i].key, symbolic_entities[i].value);
+ }
+ debug_print("initialized entities table with %d symbols\n", i);
+ }
+
+ decoded = g_hash_table_lookup (symbol_table, b);
+ if (decoded != NULL)
+ return g_strdup (decoded);
+
+ return NULL;
+}
+
+gchar *entity_decode(gchar *str)
+{
+ gchar *p = str;
+ if (p == NULL || *p != '&')
+ return NULL;
+ ++p;
+ if (*p == '\0')
+ return NULL;
+ if (*p == '#')
+ return entity_decode_numeric(p);
+ else
+ return entity_decode_symbol(p);
+}
diff --git a/src/entity.h b/src/entity.h
new file mode 100644
index 0000000..9e3e492
--- /dev/null
+++ b/src/entity.h
@@ -0,0 +1,33 @@
+/*
+ * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
+ * Copyright (C) 2017 Ricardo Mones and the Claws Mail team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ENTITY_H__
+#define __ENTITY_H__
+
+#include <glib.h>
+
+/*
+ * Try to decode the HTML entity pointed by str, whose first element
+ * must be the '&' character.
+ *
+ * Returns a newly-allocated string with the decoded entity or NULL
+ * on failure to decode (like an unknown or invalid entity).
+ * Returned strings must be freed with g_free().
+ */
+gchar *entity_decode(gchar *str);
+
+#endif /* __ENTITY_H__ */
-----------------------------------------------------------------------
hooks/post-receive
--
Claws Mail
More information about the Commits
mailing list