6
#define max(a,b) ((a>b)?a:b)
7
#define min(a,b) ((a<b)?a:b)
9
#define STRNLEN(str,n) (n?strnlen(str,n):strlen(str))
12
typedef struct lng_stat2 {
20
#include "russian_table.h"
23
static int end_symbol(char ch) {
24
if (ch=='\r'||ch=='\n'||ch==0||ch==' '||ch=='\t'||ch==','||ch=='.'||ch=='!'||ch=='?'||ch==';'||ch=='-'||ch==':'||ch=='"'||ch=='\''||ch==')') return 1;
28
static int start_symbol(char ch) {
29
if ((ch=='\t')||ch=='\r'||ch=='\n'||(ch==' ')||(ch=='(')||(ch=='"')||(ch=='\'')) return 1;
33
typedef const struct lng_stat2 *lng_stat2_ptr;
35
static void bfind(const unsigned char *a, lng_stat2_ptr *w, lng_stat2_ptr *k, lng_stat2_ptr *al) {
36
const struct lng_stat2 *winptr, *koiptr,*altptr;
37
int ki,wi,ai,d,ws=0,ks=0,as=0;
49
if (wi>indexes2) wi-=d;
55
}else if(a[1]<winptr->b){
60
}else if(a[0]<winptr->a){
68
if (ki>indexes2) ki-=d;
74
}else if(a[1]<koiptr->b){
79
}else if(a[0]<koiptr->a){
87
if (ai>indexes2) ai-=d;
93
}else if(a[1]<altptr->b){
98
}else if(a[0]<altptr->a){
114
static double calculate(double s, double m, double e) {
118
static int is_win_charset2(const unsigned char *txt, int len){
119
const struct lng_stat2 *winptr, *koiptr,*altptr;
120
double winstep,koistep,altstep,winestep,koiestep,altestep,winsstep,koisstep,altsstep;
121
double winstat=0,koistat=0,altstat=0,winestat=0,koiestat=0,altestat=0,winsstat=0,koisstat=0,altsstat=0;
125
fprintf(stderr,"Word: %s\n",txt);
127
for(j=0;j<len-1;j++){
128
//skip bottom half of table
129
if(txt[j]<128 || txt[j+1]<128) continue;
131
fprintf(stderr,"Pair: %c%c",txt[j],txt[j+1]);
133
bfind(txt+j,&winptr,&koiptr,&altptr);
135
if ((j==0)||(start_symbol(txt[j-1]))) {
136
if (winptr) winsstep=winptr->srate;
137
else winsstep=NF_VALUE;
138
if (koiptr) koisstep=koiptr->srate;
139
else koisstep=NF_VALUE;
140
if (altptr) altsstep=altptr->srate;
141
else altsstep=NF_VALUE;
149
fprintf(stderr,", Win %lf, Koi %lf, Alt: %lf\n",winsstep,koisstep,altsstep);
151
} else if ((j==len-2)||(end_symbol(txt[j+2]))) {
152
if (winptr) winestep=winptr->erate;
153
else winestep=NF_VALUE;
154
if (koiptr) koiestep=koiptr->erate;
155
else koiestep=NF_VALUE;
156
if (altptr) altestep=altptr->erate;
157
else altestep=NF_VALUE;
165
fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winestep,koiestep,altestep);
168
if (winptr) winstep=winptr->rate;
169
else winstep=NF_VALUE;
170
if (koiptr) koistep=koiptr->rate;
171
else koistep=NF_VALUE;
172
if (altptr) altstep=altptr->rate;
173
else altstep=NF_VALUE;
181
fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winstep,koistep,altstep);
197
fprintf(stderr,"Start. Win: %lf, Koi: %lf, Alt: %lf\n",winsstat,koisstat,altsstat);
198
fprintf(stderr,"Middle. Win: %lf, Koi: %lf, Alt: %lf\n",winstat,koistat,altstat);
199
fprintf(stderr,"End. Win: %lf, Koi: %lf, Alt: %lf\n",winestat,koiestat,altestat);
200
fprintf(stderr,"Final. Win: %lf, Koi: %lf, Alt: %lf\n",calculate(winsstat,winstat,winestat),calculate(koisstat,koistat,koiestat),calculate(altsstat,altstat,altestat));
202
if ((calculate(altsstat,altstat,altestat)>calculate(koisstat,koistat,koiestat))&&(calculate(altsstat,altstat,altestat)>calculate(winsstat,winstat,winestat))) return 3;
203
if (calculate(koisstat,koistat,koiestat)>calculate(winsstat,winstat,winestat)) return 1;
208
static int check_utf8(const unsigned char *buf, int len) {
214
for (i=0;i<len;i++) {
215
if (buf[i]<128) continue;
218
if ((buf[i]&0xC0)==0x80) {
221
// Russian is 0x410-0x44F
222
if ((rflag==1)&&(tmp>=0x10)) res++;
223
else if ((rflag==2)&&(tmp<=0x0F)) res++;
233
if ((buf[i]&bit(j))==0) break;
235
if ((j==0)||(j==6)) {
236
if ((j==6)&&(bytes<0)) bytes++;
242
// Cyrrilic D0-D3, Russian - D0-D1
243
if (buf[i]==0xD0) rflag=1;
244
else if (buf[i]==0xD1) rflag=2;
248
if ((buf[i]==0xD0)||(buf[i]==0xD1)) {
257
enum russian_charsets get_russian_charset(const char *buf,int len) {
260
l = STRNLEN(buf,len);
261
if (check_utf8(buf,l)>1) return RUSSIAN_CHARSET_UTF8;
262
return is_win_charset2(buf,l);