summaryrefslogtreecommitdiffstats
path: root/src/rccenca.c
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2007-06-27 09:28:22 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2007-06-27 09:28:22 +0000
commit35381569403e90b8d34b223f524519521bc81598 (patch)
tree924527ab503a59400cfd96859e101ce8234eabd0 /src/rccenca.c
parentfd502754926131e3562a2210ff81af111ccaf867 (diff)
downloadlibrcc-35381569403e90b8d34b223f524519521bc81598.tar.gz
librcc-35381569403e90b8d34b223f524519521bc81598.tar.bz2
librcc-35381569403e90b8d34b223f524519521bc81598.tar.xz
librcc-35381569403e90b8d34b223f524519521bc81598.zip
Engines rework
- LibGuess support for far east language autodetection - Support for LibRCD 0.1.9 supporting ISO-8859-1 strings - Fixing wrong encodings names returned by Enca - Engine plugins naming scheme is altered - New API functions: rccEngineGetInfo, rccEngineGetAutoCharsetByName - Most of languages are no more hardcoded, but moved to the configuration - RCD engine is added to Belarusian language (I hope it should work) - Some encoding names are fixed in configuration - Support for external libiconv - Support for libcharset - Find UI interface language from LC_MESSAGES locale - Simple compilation fix (Thanx to D. Panov)
Diffstat (limited to 'src/rccenca.c')
-rw-r--r--src/rccenca.c48
1 files changed, 46 insertions, 2 deletions
diff --git a/src/rccenca.c b/src/rccenca.c
index 28d3ccf..e46847e 100644
--- a/src/rccenca.c
+++ b/src/rccenca.c
@@ -20,6 +20,41 @@ static rcc_library_handle enca_handle = NULL;
#endif /* RCC_ENCA_DYNAMIC */
static rcc_engine *enca_engines = NULL;
+
+/* CORK, KEYBCS2 is missing */
+rcc_enca_corrections rcc_enca_missing_corrections[] = {
+ { "be", "KOI8-UNI", "ISO-IR-111" },
+ { NULL, "macce", "MACCENTRALEUROPE" },
+ { "zh", "HZ", "HZ" },
+ { "sk", "KOI-8_CS_2", "CSKOI8R" },
+ { NULL, NULL, NULL }
+};
+
+rcc_enca_corrections rcc_enca_error_corrections[] = {
+ { NULL, "ECMA-cyrillic", "ISO-IR-111" },
+ { NULL, NULL, NULL }
+};
+
+
+static const char *rccEncaGetCorrection(const char *lang, const char *charset) {
+ int i;
+ for (i=0;rcc_enca_error_corrections[i].enca_charset;i++) {
+ if (((!rcc_enca_error_corrections[i].lang)||((lang)&&(!strcmp(lang, rcc_enca_error_corrections[i].lang))))&&(!strcmp(charset, rcc_enca_error_corrections[i].enca_charset)))
+ return rcc_enca_error_corrections[i].iconv_charset;
+ }
+ return charset;
+}
+
+static const char *rccEncaGetMissing(const char *lang, const char *charset) {
+ int i;
+ for (i=0;rcc_enca_missing_corrections[i].enca_charset;i++) {
+ if (((!rcc_enca_missing_corrections[i].lang)||((lang)&&(!strcmp(lang, rcc_enca_missing_corrections[i].lang))))&&(!strcmp(charset, rcc_enca_missing_corrections[i].enca_charset)))
+ return rcc_enca_missing_corrections[i].iconv_charset;
+ }
+ return charset;
+}
+
+
rcc_engine_internal rccEncaInitContext(rcc_engine_context ctx) {
#ifdef RCC_ENCA_SUPPORT
EncaAnalyser enca;
@@ -65,7 +100,12 @@ rcc_autocharset_id rccEnca(rcc_engine_context ctx, const char *buf, int len) {
if (ee.charset<0) return (rcc_charset_id)-1;
charset = enca_charset_name(ee.charset, ENCA_NAME_STYLE_ICONV);
- return rccGetAutoCharsetByName(ctx->config->ctx, charset);
+ if (charset) {
+ charset = rccEncaGetCorrection(rccEngineGetLanguage(ctx)->sn, charset);
+ } else {
+ charset = rccEncaGetMissing(rccEngineGetLanguage(ctx)->sn, enca_charset_name(ee.charset, ENCA_NAME_STYLE_ENCA));
+ }
+ return rccEngineGetAutoCharsetByName(ctx, charset);
#else /* RCC_ENCA_SUPPORT */
return (rcc_charset_id)-1;
#endif /* RCC_ENCA_SUPPORT */
@@ -160,7 +200,11 @@ int rccEncaInit() {
for (l=0;l<n_charsets;l++) {
// Enca bug, STYLE_ICONV return's a lot of NULL's
charset = enca_charset_name(charsets[l], ENCA_NAME_STYLE_ICONV);
- if (!charset) charset = enca_charset_name(charsets[l], ENCA_NAME_STYLE_ENCA);
+ if (charset) {
+ charset = rccEncaGetCorrection(rcc_default_languages[i].sn, charset);
+ } else {
+ charset = rccEncaGetMissing(rcc_default_languages[i].sn, enca_charset_name(charsets[l], ENCA_NAME_STYLE_ENCA));
+ }
enca_engines[i].charsets[k++] = charset;
}
enca_engines[j].charsets[k] = NULL;