From 35381569403e90b8d34b223f524519521bc81598 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Wed, 27 Jun 2007 09:28:22 +0000 Subject: Engines rework - LibGuess support for far east language autodetection - Support for LibRCD 0.1.9 supporting ISO-8859-1 strings - Fixing wrong encodings names returned by Enca - Engine plugins naming scheme is altered - New API functions: rccEngineGetInfo, rccEngineGetAutoCharsetByName - Most of languages are no more hardcoded, but moved to the configuration - RCD engine is added to Belarusian language (I hope it should work) - Some encoding names are fixed in configuration - Support for external libiconv - Support for libcharset - Find UI interface language from LC_MESSAGES locale - Simple compilation fix (Thanx to D. Panov) --- engines/Makefile.am | 22 ++++++++++----- engines/libguess.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++ engines/librcd.c | 9 ++++-- 3 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 engines/libguess.c (limited to 'engines') diff --git a/engines/Makefile.am b/engines/Makefile.am index 2b7bb26..93e490a 100644 --- a/engines/Makefile.am +++ b/engines/Makefile.am @@ -1,14 +1,22 @@ -lib_LTLIBRARIES = libwestern.la +lib_LTLIBRARIES = western_engine.la libdir = $(pkgdatadir)/engines if HAVE_RCD -lib_LTLIBRARIES += librcd.la -librcd_la_SOURCES = librcd.c -librcd_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +lib_LTLIBRARIES += librcd_engine.la +librcd_engine_la_SOURCES = librcd.c +librcd_engine_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +librcd_engine_la_LIBADD = @RCD_LIBS@ endif -libwestern_la_SOURCES = western.c -libwestern_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +if HAVE_LIBGUESS +lib_LTLIBRARIES += libguess_engine.la +libguess_engine_la_SOURCES = libguess.c +libguess_engine_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +libguess_engine_la_LIBADD = @LIBGUESS_LIBS@ +endif + +western_engine_la_SOURCES = western.c +western_engine_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" -AM_CPPFLAGS = -I../src @RCD_INCLUDES@ +AM_CPPFLAGS = -I../src @RCD_INCLUDES@ @LIBGUESS_INCLUDES@ diff --git a/engines/libguess.c b/engines/libguess.c new file mode 100644 index 0000000..7f13b50 --- /dev/null +++ b/engines/libguess.c @@ -0,0 +1,80 @@ +#include +#include + +#include +#include + +#define UTF8_ID 0 +#define UTF16_ID 1 + +typedef const char *(*guess_function)(const char *buf, int buflen); + +struct rcc_guess_engine_t { + struct rcc_engine_t engine; + guess_function func; +}; +typedef struct rcc_guess_engine_t rcc_guess_engine; + +rcc_autocharset_id guessDetect(rcc_engine_context ctx, const char *buf, int len) { + const char *res; + rcc_guess_engine *info; + + if (!buf) return (rcc_autocharset_id)-1; + + info = (rcc_guess_engine*)rccEngineGetInfo(ctx); + if (info) { + if (info->func) res = info->func(buf, len?len:strlen(buf)); + else { + if (!len) len = strlen(buf); + res = guess_cn(buf, len); + if (!res) res = guess_tw(buf, len); + printf("%s\n",res?res:"null"); + } + } else + res = NULL; + + if (!res) return (rcc_autocharset_id)-1; + return rccEngineGetAutoCharsetByName(ctx, res); +} + + +struct rcc_guess_engine_t guessJPEngine = { + { + "LibGUESS", + NULL, /* Constructor */ + NULL, /* Destructor */ + &guessDetect, + {"UTF-8", "UTF-16", "ISO-2022-JP", "EUC-JP", "SJIS", NULL} + }, + &guess_jp +}; + +struct rcc_guess_engine_t guessCNEngine = { + { + "LibGUESS", + NULL, /* Constructor */ + NULL, /* Destructor */ + &guessDetect, + {"UTF-8", "UTF-16", "ISO-2022-CN", "GB2312", "GB18030", "BIG5", NULL} + }, + NULL +}; + +struct rcc_guess_engine_t guessKREngine = { + { + "LibGUESS", + NULL, /* Constructor */ + NULL, /* Destructor */ + &guessDetect, + {"UTF-8", "UTF-16", "ISO-2022-KR", "EUC-KR", "JOHAB", NULL} + }, + &guess_kr +}; + + +rcc_engine *rccGetInfo(const char *lang) { + if (!strcmp(lang, "zh")) return (rcc_engine*)&guessCNEngine; + if (!strcmp(lang, "ja")) return (rcc_engine*)&guessJPEngine; + if (!strcmp(lang, "ko")) return (rcc_engine*)&guessKREngine; + return NULL; +} diff --git a/engines/librcd.c b/engines/librcd.c index c24d244..bfb14b0 100644 --- a/engines/librcd.c +++ b/engines/librcd.c @@ -9,11 +9,15 @@ static rcc_autocharset_id AutoengineRussian(rcc_engine_context ctx, const char * } static rcc_engine russian_engine = { - "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL} + "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", "ISO8859-1", NULL} }; static rcc_engine ukrainian_engine = { - "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL} + "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", "ISO8859-1", NULL} +}; + +static rcc_engine belarussian_engine = { + "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","ISO-IR-111","UTF-8","IBM866", "ISO8859-1", NULL} }; rcc_engine *rccGetInfo(const char *lang) { @@ -21,6 +25,7 @@ rcc_engine *rccGetInfo(const char *lang) { if (!strcmp(lang, "ru")) return &russian_engine; if (!strcmp(lang, "uk")) return &ukrainian_engine; + if (!strcmp(lang, "be")) return &belarussian_engine; return NULL; } -- cgit v1.2.3