From 9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Sun, 7 Aug 2005 21:41:32 +0000 Subject: - Russian autoengine is renamed to LibRCD - Fix Learning with Language Autodetection switched on - Attempt to perform rccFS with Language Autodetection switched off, if failed with default behaviour. - Systematization of translation engine: + Rearangement of the translation modes: OFF, TO_ENGLISH, SKIP_RELATED, SKIP_PARRENT, FULL. + New class types: TRANSLATE_LOCALE, TRANSLATE_CURRENT, TRANSLATE_FROM. - Detect "Unicode" locales for foreign languages - "out" class is assumed to be TRANSLATE_LOCALE - Respect RCC_CLASS_KNOWN - Check for Latin UTF-8 prior to running any charset detection engine. --- src/engine.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'src/engine.c') diff --git a/src/engine.c b/src/engine.c index 8058faf..f9c2284 100644 --- a/src/engine.c +++ b/src/engine.c @@ -151,3 +151,51 @@ rcc_context rccEngineGetRccContext(rcc_engine_context ctx) { return ctx->config->ctx; } + +#define bit(i) (1<0) { + // Western is 0x100-0x17e + if ((buf[i]&0xC0)==0x80) bytes--; + else return 0; + } else { + if (buf[i]<128) continue; + + for (j=6;j>=0;j--) + if ((buf[i]&bit(j))==0) break; + + if ((j==0)||(j==6)) return 0; + + bytes=6-j; + if (bytes==1) { + // Western Languages (C2-C3) + if ((buf[i]!=0xC2)&&(buf[i]!=0xC3)) return 0; + } else return 0; + } + } + return 1; +} + + +rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len) { + rcc_autocharset_id utf; + + if (CheckWestern(buf, len)) { + utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF-8"); + if (utf != (rcc_autocharset_id)-1) return utf; + utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF8"); + if (utf != (rcc_autocharset_id)-1) return utf; + utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF_8"); + return utf; + } + + if ((ctx)&&(ctx->func)) return ctx->func(ctx, buf, len); + return (rcc_autocharset_id)-1; +} + -- cgit v1.2.3