summaryrefslogtreecommitdiffstats
path: root/engines/western.c
blob: 4c6e1aa77acee65673098c0972b4bffd7e9b1fad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include <stdio.h>
#include <string.h>

#include <librcc.h>

#define bit(i) (1<<i)

/* 
 * Latin unicode subset:
 * 0x100 - 0x17E
 * 0x180 - 0x24F
 * 0x1E00 - 0x1EFF
 */

static rcc_autocharset_id AutoengineWestern(rcc_engine_context ctx, const char *sbuf, int len) {
    const unsigned char *buf = sbuf;
    long i,j;
    int bytes=0,rflag=0;
    int res=0;

    if (!len) len = strlen(buf);
    for (i=0;i<len;i++) {
	if (buf[i]<128) continue;
	
	if (bytes>0) {
	    if ((buf[i]&0xC0)==0x80) {
		if (rflag) {
		    // Western is 0x100-0x17e
		    res++;
		}
		bytes--;
	    } else {
		res--;
		bytes=1-bytes;
		rflag=0;
	    }
	} else {
	    for (j=6;j>=0;j--)
		if ((buf[i]&bit(j))==0) break;
	    
	    if ((j==0)||(j==6)) {
		if ((j==6)&&(bytes<0)) bytes++;
		else res--;
		continue;
	    }
	    bytes=6-j;
	    if (bytes==1) {
		// Western Languages (C2-C3)
		if (buf[i]==0xC2) rflag=1;
		else if (buf[i]==0xC3) rflag=2;
	    }
	}
	    
	if ((buf[i]==0xC0)||(buf[i]==0xC1)) {
	    if (i+1==len) break;
	    	    
	}
    }

    if (res > 0) return (rcc_autocharset_id)0;
    return (rcc_autocharset_id)1;
}

static rcc_engine western_engine = {
    "Western", NULL, NULL, &AutoengineWestern, {"UTF-8","ISO8859-1", NULL}
};

rcc_engine *rccGetInfo(const char *lang) {
    if (!lang) return NULL;

    return &western_engine;
}