11
#define first_char 128
14
#define original_first_char 192
15
#define original_last_char 255
17
#define chars_number (last_char-first_char+1)
18
#define array_size (chars_number*chars_number)
33
iconv_t icnv=(iconv_t)-1;
35
int end_symbol(char ch) {
36
if (ch=='\r'||ch=='\n'||ch==0||ch==' '||ch=='\t'||ch==','||ch=='.'||ch=='!'||ch=='?'||ch==';'||ch=='-'||ch==':'||ch=='"'||ch=='\''||ch==')') return 1;
40
int start_symbol(char ch) {
41
if ((ch=='\t')||ch=='\r'||ch=='\n'||(ch==' ')||(ch=='(')||(ch=='"')||(ch=='\'')) return 1;
46
unsigned char convert_char(unsigned char c) {
52
if (icnv == (iconv_t)-1) return c;
53
if (iconv(icnv,&pc,&lc,&pr,&lr)<0) {
54
printf("Error converting characters!\n");
60
int get_array_pos(struct array_pos *pos, int a, int b) {
62
if ((a<original_first_char)||(a>original_last_char)) return -1;
63
if ((b<original_first_char)||(b>original_last_char)) return -1;
70
if ((la<original_first_char)||(la>original_last_char)) la=a;
71
if ((lb<original_first_char)||(lb>original_last_char)) lb=b;
72
if ((ua<original_first_char)||(ua>original_last_char)) ua=a;
73
if ((ub<original_first_char)||(ub>original_last_char)) ub=b;
80
// la=a;lb=b;ua=a;ub=b;
82
pos->ll=(la-first_char)*chars_number+(lb-first_char);
84
pos->ul=(ua-first_char)*chars_number+(lb-first_char);
89
pos->lu=(la-first_char)*chars_number+(ub-first_char);
94
if ((lb!=ub)&&(la!=ua)) {
95
pos->uu=(ua-first_char)*chars_number+(ub-first_char);
103
struct pstat *analyze(const unsigned char *text, unsigned long length) {
106
struct array_pos pos;
108
a=(struct pstat*)malloc(array_size*sizeof(struct pstat));
111
for (i=0;i<array_size;i++) {
117
for (i=1;i<length;i++) {
118
if (get_array_pos(&pos,text[i-1],text[i])>=0) {
120
if ((i==1)||(start_symbol(text[i-2]))) a[pos.ll].s++;
121
else if ((i+2==length)||(end_symbol(text[i+1]))) a[pos.ll].e++;
125
if ((i==1)||(start_symbol(text[i-2]))) a[pos.ul].s++;
126
else if ((i+2==length)||(end_symbol(text[i+1]))) a[pos.ul].e++;
130
// if ((i==1)||(start_symbol(text[i-2]))) a[pos.lu].s++;
131
// else if ((i+2==length)||(end_symbol(text[i+1]))) a[pos.lu].e++;
132
// else a[pos.lu].p++;
135
if ((i==1)||(start_symbol(text[i-2]))) a[pos.uu].s++;
136
else if ((i+2==length)||(end_symbol(text[i+1]))) a[pos.uu].e++;
145
int print(struct pstat *a) {
148
for (i=first_char,k=0,n=0;i<=last_char;i++)
149
for (j=first_char;j<=last_char;j++,k++) {
150
if ((a[k].p)||(a[k].s)||(a[k].e)) {
151
if ((n)&&(n%8==0)) printf(",\n");
152
else if (n) printf(", ");
153
printf("{'%c','%c',%lf,%lf,%lf}",i,j,a[k].p?log10(a[k].p):-2,a[k].s?log10(a[k].s):-2,a[k].e?log10(a[k].e):-2);
157
if ((n%8)!=1) printf("\n");
162
unsigned long npow(unsigned long n) {
164
while (res<=n) res*=2;
168
main(int argc, char *argv[]) {
180
printf("Usage: %s <file name> <encoding>\n",argv[0]);
184
if (strlen(argv[2])>12) {
185
printf("Invalid encoding(%s) specified!\n",argv[2]);
189
if ((!strcasecmp(argv[2],"koi"))||(!strcasecmp(argv[2],"koi8"))||(!strcasecmp(argv[2],"koi-8"))||(!strcasecmp(argv[2],"koi8-r")))
190
sprintf(locale,"%s","KOI8-R");
191
else if ((!strcasecmp(argv[2],"win"))||(!strcasecmp(argv[2],"cp1251"))||(!strcasecmp(argv[2],"cp-1251"))||(!strcasecmp(argv[2],"win1251"))||(!strcasecmp(argv[2],"win-1251")))
192
sprintf(locale,"%s","CP1251");
193
else if ((!strcasecmp(argv[2],"alt"))||(!strcasecmp(argv[2],"cp866"))||(!strcasecmp(argv[2],"cp-866"))||(!strcasecmp(argv[2],"ibm866"))||(!strcasecmp(argv[2],"ibm-866")))
194
sprintf(locale,"%s","IBM866");
196
sprintf(locale,"%s",argv[2]);
198
if (!setlocale(LC_CTYPE,"")) {
199
printf("Can't set locale!\n");
203
if (strcmp(locale,nl_langinfo(CODESET))) {
204
if ((icnv=iconv_open(locale,nl_langinfo(CODESET)))<0) {
205
printf("Can't initialize iconv!\n");
211
if (stat(argv[1],&st)) {
212
printf("Specified file can't be stated!\n");
217
if (!S_ISREG(st.st_mode)) {
218
printf("Specified file isn't regular file!\n");
223
text=(unsigned char*)malloc(st.st_size);
225
printf("Can't allocate %lu bytes of memory!\n",st.st_size);
230
f=fopen(argv[1],"r");
232
printf("Failed to open specified file. Check permissions!\n");
237
if (fread(text,1,st.st_size,f)!=st.st_size) {
238
printf("Problem reading specified file!\n");
246
a=analyze(text,st.st_size);
248
printf("static const lng_stat2 enc_%s[]={\n",argv[2]);
252
fprintf(stderr,"static unsigned int indexes2=%lu;\n",num);
253
fprintf(stderr,"static unsigned int npow2=%lu;\n",npow(num));
254
} else printf("Failed to allocate %lu bytes of memory!\n",array_size*sizeof(struct pstat));