summaryrefslogtreecommitdiffstats
path: root/src/engine.c
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-07 21:41:32 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-07 21:41:32 +0000
commit9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a (patch)
tree9d78892cf391d2cdb5da216cfec78a8b565751c7 /src/engine.c
parent94ca629ceec7b0dc9f6f724b2e15923d3ec1d5b3 (diff)
downloadlibrcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.tar.gz
librcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.tar.bz2
librcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.tar.xz
librcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.zip
- Russian autoengine is renamed to LibRCD
- Fix Learning with Language Autodetection switched on - Attempt to perform rccFS with Language Autodetection switched off, if failed with default behaviour. - Systematization of translation engine: + Rearangement of the translation modes: OFF, TO_ENGLISH, SKIP_RELATED, SKIP_PARRENT, FULL. + New class types: TRANSLATE_LOCALE, TRANSLATE_CURRENT, TRANSLATE_FROM. - Detect "Unicode" locales for foreign languages - "out" class is assumed to be TRANSLATE_LOCALE - Respect RCC_CLASS_KNOWN - Check for Latin UTF-8 prior to running any charset detection engine.
Diffstat (limited to 'src/engine.c')
-rw-r--r--src/engine.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/src/engine.c b/src/engine.c
index 8058faf..f9c2284 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -151,3 +151,51 @@ rcc_context rccEngineGetRccContext(rcc_engine_context ctx) {
return ctx->config->ctx;
}
+
+#define bit(i) (1<<i)
+
+static int CheckWestern(const unsigned char *buf, int len) {
+ long i,j;
+ int bytes=0;
+
+ if (!len) len = strlen(buf);
+ for (i=0;i<len;i++) {
+ if (bytes>0) {
+ // Western is 0x100-0x17e
+ if ((buf[i]&0xC0)==0x80) bytes--;
+ else return 0;
+ } else {
+ if (buf[i]<128) continue;
+
+ for (j=6;j>=0;j--)
+ if ((buf[i]&bit(j))==0) break;
+
+ if ((j==0)||(j==6)) return 0;
+
+ bytes=6-j;
+ if (bytes==1) {
+ // Western Languages (C2-C3)
+ if ((buf[i]!=0xC2)&&(buf[i]!=0xC3)) return 0;
+ } else return 0;
+ }
+ }
+ return 1;
+}
+
+
+rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len) {
+ rcc_autocharset_id utf;
+
+ if (CheckWestern(buf, len)) {
+ utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF-8");
+ if (utf != (rcc_autocharset_id)-1) return utf;
+ utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF8");
+ if (utf != (rcc_autocharset_id)-1) return utf;
+ utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF_8");
+ return utf;
+ }
+
+ if ((ctx)&&(ctx->func)) return ctx->func(ctx, buf, len);
+ return (rcc_autocharset_id)-1;
+}
+