summaryrefslogtreecommitdiffstats
path: root/engines
diff options
context:
space:
mode:
Diffstat (limited to 'engines')
-rw-r--r--engines/Makefile.am5
-rw-r--r--engines/russian.c2
-rw-r--r--engines/western.c72
3 files changed, 77 insertions, 2 deletions
diff --git a/engines/Makefile.am b/engines/Makefile.am
index 7226682..404cc32 100644
--- a/engines/Makefile.am
+++ b/engines/Makefile.am
@@ -1,4 +1,4 @@
-lib_LTLIBRARIES =
+lib_LTLIBRARIES = libwestern.la
libdir = $(pkgdatadir)/engines
@@ -8,4 +8,7 @@ librussian_la_SOURCES = russian.c
librussian_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo"
endif
+libwestern_la_SOURCES = western.c
+libwestern_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo"
+
AM_CPPFLAGS = -I../src @RCD_INCLUDES@
diff --git a/engines/russian.c b/engines/russian.c
index 08b8310..0df145c 100644
--- a/engines/russian.c
+++ b/engines/russian.c
@@ -5,7 +5,7 @@
#include <librcc.h>
static rcc_autocharset_id AutoengineRussian(rcc_engine_context ctx, const char *buf, int len) {
- return (rcc_charset_id)rcdGetRussianCharset(buf,len);
+ return (rcc_autocharset_id)rcdGetRussianCharset(buf,len);
}
static rcc_engine russian_engine = {
diff --git a/engines/western.c b/engines/western.c
new file mode 100644
index 0000000..4c6e1aa
--- /dev/null
+++ b/engines/western.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <string.h>
+
+#include <librcc.h>
+
+#define bit(i) (1<<i)
+
+/*
+ * Latin unicode subset:
+ * 0x100 - 0x17E
+ * 0x180 - 0x24F
+ * 0x1E00 - 0x1EFF
+ */
+
+static rcc_autocharset_id AutoengineWestern(rcc_engine_context ctx, const char *sbuf, int len) {
+ const unsigned char *buf = sbuf;
+ long i,j;
+ int bytes=0,rflag=0;
+ int res=0;
+
+ if (!len) len = strlen(buf);
+ for (i=0;i<len;i++) {
+ if (buf[i]<128) continue;
+
+ if (bytes>0) {
+ if ((buf[i]&0xC0)==0x80) {
+ if (rflag) {
+ // Western is 0x100-0x17e
+ res++;
+ }
+ bytes--;
+ } else {
+ res--;
+ bytes=1-bytes;
+ rflag=0;
+ }
+ } else {
+ for (j=6;j>=0;j--)
+ if ((buf[i]&bit(j))==0) break;
+
+ if ((j==0)||(j==6)) {
+ if ((j==6)&&(bytes<0)) bytes++;
+ else res--;
+ continue;
+ }
+ bytes=6-j;
+ if (bytes==1) {
+ // Western Languages (C2-C3)
+ if (buf[i]==0xC2) rflag=1;
+ else if (buf[i]==0xC3) rflag=2;
+ }
+ }
+
+ if ((buf[i]==0xC0)||(buf[i]==0xC1)) {
+ if (i+1==len) break;
+
+ }
+ }
+
+ if (res > 0) return (rcc_autocharset_id)0;
+ return (rcc_autocharset_id)1;
+}
+
+static rcc_engine western_engine = {
+ "Western", NULL, NULL, &AutoengineWestern, {"UTF-8","ISO8859-1", NULL}
+};
+
+rcc_engine *rccGetInfo(const char *lang) {
+ if (!lang) return NULL;
+
+ return &western_engine;
+}