diff options
| -rw-r--r-- | ToDo | 15 | ||||
| -rw-r--r-- | configure.in | 41 | ||||
| -rw-r--r-- | external/rcclibtranslate.c | 84 | ||||
| -rw-r--r-- | src/Makefile.am | 5 | ||||
| -rw-r--r-- | src/rccexternal.c | 9 | ||||
| -rw-r--r-- | src/rccexternal.h | 2 | ||||
| -rw-r--r-- | src/rccmutex.c | 73 | ||||
| -rw-r--r-- | src/rccmutex.h | 27 | ||||
| -rw-r--r-- | src/rccstring.c | 1 | ||||
| -rw-r--r-- | src/rcctranslate.c | 133 | ||||
| -rw-r--r-- | src/rcctranslate.h | 8 | ||||
| -rw-r--r-- | src/recode.c | 162 | 
12 files changed, 473 insertions, 87 deletions
@@ -1,4 +1,13 @@  0.3.x: +    - Buffer managment: +	+ SetBufferSize ( 0 - autogrow ) +    - Language autodetection and translation improvements +	+ Look on ofline translation libraries and other possibilities to improove  +	translation and language detection. +	+ Implement ispell support +	+ Configurable timeouts + +1.x:      - Common encodings:  	+ Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE)  	+ Special type of classes to select only from Unicode encodings (or even just specified subset of encodings) @@ -6,10 +15,8 @@  	    * rccToEncoding(current_language, *new_language, buf, size)?  	    * rccFromEncoding(current_language, utf8_language, buf, size)?  	    * Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions) -    - Buffer managment: -	+ SetBufferSize ( 0 - autogrow ) -    - Look on ofline translation libraries and other possibilities to improove  -      translation and language detection. +    - Recoding options: +	+ Skip Translation  on request:      - Multibyte(not-UTF8) support for FS classes diff --git a/configure.in b/configure.in index 16051b5..013e9ae 100644 --- a/configure.in +++ b/configure.in @@ -201,6 +201,45 @@ AM_PATH_ASPELL([      HAVE_ASPELL=no  ]) + +PTHREAD_LIBS=error +AC_EGREP_CPP(yes,[ +#if (defined(__FreeBSD_cc_version) && __FreeBSD_cc_version <= 500001) || defined(__OpenBSD__) +	  yes +#endif +], +	PTHREAD_CFLAGS="-D_THREAD_SAFE"  +	PTHREAD_LIBS="-pthread" +) +if test "x$PTHREAD_LIBS" = "xerror"; then +	AC_CHECK_LIB(pthread, pthread_mutex_lock, PTHREAD_LIBS="-lpthread") +fi +if test "x$PTHREAD_LIBS" = xerror; then +        AC_CHECK_LIB(pthreads, pthread_mutex_lock, PTHREAD_LIBS="-lpthreads") +fi +if test "x$PTHREAD_LIBS" = xerror; then +        AC_CHECK_LIB(c_r, pthread_mutex_lock, PTHREAD_LIBS="-lc_r") +fi +if test "x$PTHREAD_LIBS" = xerror; then +	AC_CHECK_FUNC(pthread_mutex_lock, PTHREAD_LIBS="") +fi +if test "x$PTHREAD_LIBS" = xerror; then +	PTHREAD_LIBS="" +	PTHREAD_CFLAGS="" +else +    AC_CHECK_HEADER(pthread.h, [ +	AC_DEFINE(HAVE_PTHREAD,1,[Defines if pthread is available]) +	HAVE_PTHREAD=yes +    ],[ +	HAVE_PTHREAD=no +	PTHREAD_LIBS="" +	PTHREAD_CFLAGS="" +    ]) +fi +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_LIBS) + +      dnl Checks for typedefs, structures, and compiler characteristics.  AC_C_CONST @@ -211,6 +250,8 @@ AC_OUTPUT(src/Makefile engines/Makefile external/Makefile ui/Makefile examples/M  echo ""  echo "Configuration:" +echo "  POSIX Threading Support:               $HAVE_PTHREAD" +echo ""  echo "  Dynamic Engine Loading Support:        $HAVE_DLOPEN"  echo "  Enca Charset Detection Support:        $HAVE_ENCA"  echo "  LibRCD Charset Detection Support:      $HAVE_RCD" diff --git a/external/rcclibtranslate.c b/external/rcclibtranslate.c index 56ce8a2..46fcd6e 100644 --- a/external/rcclibtranslate.c +++ b/external/rcclibtranslate.c @@ -45,11 +45,34 @@ static char *rccCreateKey(const char *from, const char *to, const char *data, si      return res;  } +static char *rccTranslateFixEOL(char *result, const char *text) { +    size_t i,j; +    char *res; +     +    if (!result) return result; +    if (strstr(text, "\r\n")) return result; +     +    res = (char*)malloc((strlen(result)+1)*sizeof(char)); +    if (!res) { +	free(result); +	return NULL; +    } +     +    for (i=0, j=0;result[i];i++) { +	if ((result[i]=='\r')&&(result[i+1]=='\n')) i++; +	else res[j++] = result[i]; +    } +    res[j] = 0; +    free(result); +    return res; +} +  static void *rccLibPostponed(void *info) {      char *result;      char *data;      char from[3];      char to[3]; +    size_t datalen;      from[2] = 0;      to[2] = 0; @@ -60,13 +83,21 @@ static void *rccLibPostponed(void *info) {  	if (data) {  	    g_mutex_unlock(mutex); +	    datalen = strlen(data); +	      	    memcpy(from, data, 2);  	    memcpy(to, data + 2, 2); -	    result = translate_session_translate_text(session, data + 4, from, to, NULL, NULL, NULL); -	    if (result) { -		rccDb4SetKey(db4ctx, data, strlen(data), result); -		free(result); +	    result = rccDb4GetKey(db4ctx, data, datalen); +	    if (result) free(result); +	    else { +		result = translate_session_translate_text(session, data + 4, from, to, NULL, NULL, NULL); + +		if (result) { +		    result = rccTranslateFixEOL(result, data+4); +		    rccDb4SetKey(db4ctx, data, datalen, result); +		    free(result); +		}  	    }  	    free(data); @@ -164,6 +195,26 @@ void rccLibTranslateFree() {  } +static void rccLibTranslateQueue(const char *from, const char *to, const char *text) { +#ifdef HAVE_LIBTRANSLATE +    char *key = NULL; +    size_t keysize; +     +    if ((!session)||(!from)||(!to)||(!text)) return; +    if ((strlen(from)!=2)||(strlen(to)!=2)) return; + +    if (db4ctx) { +	key = rccCreateKey(from,to,text,&keysize); +	if (key) { +	    g_mutex_lock(mutex); +	    g_queue_push_tail(queue, key); +	    g_mutex_unlock(mutex); +	    g_cond_signal(cond); +	} +    } +#endif /* HAVE_LIBTRANSLATE */ +} +  static char *rccLibTranslateDo(const char *from, const char *to, const char *text, unsigned long timeout) {  #ifdef HAVE_LIBTRANSLATE      char *result; @@ -188,6 +239,8 @@ static char *rccLibTranslateDo(const char *from, const char *to, const char *tex  # else      result = translate_session_translate_text(session, text, from, to, NULL, NULL, NULL);  # endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */ + +    result = rccTranslateFixEOL(result, text);      if ((db4ctx)&&(key)) {  	if (result) { @@ -242,6 +295,7 @@ void *rccLibTranslate(void *info) {  			res = read(s, buffer + readed, size - readed);  			if (res<=0) connected = 0;  		    } +		    if (!connected)  goto clear;  		    prefix.cmd.cmd = 0;  		    prefix.cmd.size = 0; @@ -264,14 +318,30 @@ respond:  		    } else connected = 0;  		    if (prefix.cmd.size) free(translated);		     +clear: +		    free(buffer); +		} else connected = 0; +	    break; +	    case RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE: +		size = 1 + prefix.cmd.size + sizeof(rcc_external_command_s) - sizeof(rcc_translate_prefix_s); +		buffer = (char*)malloc(size); +		if (buffer) { +		    for (readed = 0; (readed < size)&&(connected); readed += res) { +			res = read(s, buffer + readed, size - readed); +			if (res<=0) connected = 0; +		    } +		    if ((connected)&&(!prefix.from[2])&&(!prefix.to[2])&&(!buffer[readed-1])) { +			rccLibTranslateQueue(prefix.from, prefix.to, buffer); +		    }  		    free(buffer);  		} else connected = 0;  	    break;  	    default: -		buffer = (char*)malloc(prefix.cmd.size); +		size = 1 + prefix.cmd.size + sizeof(rcc_external_command_s) - sizeof(rcc_translate_prefix_s); +		buffer = (char*)malloc(size);  		if (buffer) { -		    for (readed = 0; (readed < prefix.cmd.size)&&(connected); readed += res) { -			res = read(s, buffer + readed, prefix.cmd.size - readed); +		    for (readed = 0; (readed < size)&&(connected); readed += res) { +			res = read(s, buffer + readed, size - readed);  			if (res<=0) connected = 0;  		    }  		    free(buffer); diff --git a/src/Makefile.am b/src/Makefile.am index 4ba3c35..0a1fdc1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -7,6 +7,7 @@ librcc_la_SOURCES = librcc.c \      curconfig.c curconfig.h \      rccconfig.c rccconfig.h \      rcclist.c rcclist.h \ +    rccmutex.c rccmutex.h \      plugin.c plugin.h \      rccexternal.c rccexternal.h \      fake_enca.h fake_rcd.h \ @@ -23,7 +24,7 @@ librcc_la_SOURCES = librcc.c \      internal.h  include_HEADERS = librcc.h -AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ -librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ +AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ @PTHREAD_CFLAGS@ +librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ @PTHREAD_LIBS@  librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ diff --git a/src/rccexternal.c b/src/rccexternal.c index 4a09948..6a81c56 100644 --- a/src/rccexternal.c +++ b/src/rccexternal.c @@ -42,6 +42,7 @@  #include "internal.h"  #define RCC_EXT_PROG_NAME "rccexternal" +#define RCC_EXTERNAL_TIMEOUT			250 /* 100us */  static pid_t pid = (pid_t)-1;  static char *addr = NULL; @@ -88,9 +89,13 @@ void rccExternalFree() {  }  static int rccExternalSetDeadline(struct timeval *tv, unsigned long timeout) { +/*      gettimeofday(tv, NULL);      tv->tv_sec += (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) / 1000000;      tv->tv_usec = (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) % 1000000; +*/ +    tv->tv_sec = (timeout + RCC_EXTERNAL_TIMEOUT) / 1000000; +    tv->tv_usec = (timeout + RCC_EXTERNAL_TIMEOUT) % 1000000;      return 0;  } @@ -103,7 +108,7 @@ size_t rccExternalWrite(int s, const char *buffer, ssize_t size, unsigned long t      if (s == -1) return -1; -    for (writed = 0; (writed < size)&&(connected); writed += connected?res:0) { +    for (writed = 0; ((writed < size)&&(connected)); writed += connected?res:0) {  	FD_ZERO(&fdcon);  	FD_SET(s, &fdcon);  	rccExternalSetDeadline(&tv, timeout); @@ -127,7 +132,7 @@ size_t rccExternalRead(int s, char *buffer, ssize_t size, unsigned long timeout)      if (s == -1) return -1; -    for (readed = 0; (readed < size)&&(connected); readed += connected?res:0) { +    for (readed = 0; ((readed < size)&&(connected)); readed += connected?res:0) {  	FD_ZERO(&fdcon);  	FD_SET(s, &fdcon);  	rccExternalSetDeadline(&tv, timeout); diff --git a/src/rccexternal.h b/src/rccexternal.h index bffd6b3..236e2df 100644 --- a/src/rccexternal.h +++ b/src/rccexternal.h @@ -1,8 +1,6 @@  #ifndef _RCC_EXTERNAL_H  #define _RCC_EXTERNAL_H -#define RCC_EXTERNAL_TIMEOUT			1000000 -  typedef enum rcc_external_module_t {      RCC_EXTERNAL_MODULE_CONTROL = 0,      RCC_EXTERNAL_MODULE_LIBRTRANSLATE, diff --git a/src/rccmutex.c b/src/rccmutex.c new file mode 100644 index 0000000..e2690fa --- /dev/null +++ b/src/rccmutex.c @@ -0,0 +1,73 @@ +#include <stdlib.h> +#include <time.h> + +#include "rccmutex.h" + +#define RCC_MUTEX_SLEEP 500 + +rcc_mutex rccMutexCreate() { +    rcc_mutex mutex; +     +    mutex = (rcc_mutex)malloc(sizeof(rcc_mutex_s)); +    if (mutex) { +#ifdef HAVE_PTHREAD +	pthread_mutex_init(&mutex->mutex, NULL); +#else +	mutex->mutex = 0; +#endif /* HAVE_PTHREAD */ +    } +    return mutex; +} + +void rccMutexFree(rcc_mutex mutex) { +    if (mutex) { +#ifdef HAVE_PTHREAD +	pthread_mutex_destroy(&mutex->mutex); +#endif /* HAVE_PTHREAD */ +	free(mutex); +    } +} + +int rccMutexLock(rcc_mutex mutex) { +#ifndef HAVE_PTHREAD +    struct timespec ts; +#endif /* !HAVE_PTHREAD */ + +    if (!mutex) return -1; +     +#ifdef HAVE_PTHREAD +    return pthread_mutex_lock(&mutex->mutex); +#else +    while (mutex->mutex) { +	    ts.tv_sec = RCC_MUTEX_SLEEP / 1000000; +	    ts.tv_nsec = (RCC_MUTEX_SLEEP % 1000000)*1000; +	    nanosleep(&ts, NULL); +    } +    mutex->mutex = 1; + +    return 0; +#endif /* HAVE_PTHREAD */ +} + +int rccMutexTryLock(rcc_mutex mutex) { +    if (!mutex) return -1; +     +#ifdef HAVE_PTHREAD +    return pthread_mutex_trylock(&mutex->mutex); +#else +    if (mutex->mutex) return -1; +    mutex->mutex = 1; +    return 0; +#endif /* HAVE_PTHREAD */ +} + +void rccMutexUnLock(rcc_mutex mutex) { +    if (!mutex) return; +#ifdef HAVE_PTHREAD +    pthread_mutex_unlock(&mutex->mutex); +#else +    mutex->mutex = 0; +#endif /* HAVE_PTHREAD */ +} + + diff --git a/src/rccmutex.h b/src/rccmutex.h new file mode 100644 index 0000000..8585621 --- /dev/null +++ b/src/rccmutex.h @@ -0,0 +1,27 @@ +#ifndef _RCC_MUTEX_H +#define _RCC_MUTEX_H + +#include "../config.h" + +#ifdef HAVE_PTHREAD +# include <pthread.h> +#endif /* HAVE_PTHREAD */ + +struct rcc_mutex_t { +#ifdef HAVE_PTHREAD +    pthread_mutex_t mutex; +#else +    unsigned char mutex; +#endif /* HAVE_PTHREAD */ +}; +typedef struct rcc_mutex_t rcc_mutex_s; +typedef struct rcc_mutex_t *rcc_mutex; + +rcc_mutex rccMutexCreate(); +void rccMutexFree(rcc_mutex mutex); + +int rccMutexLock(rcc_mutex mutex); +int rccMutexTryLock(rcc_mutex mutex); +void rccMutexUnLock(rcc_mutex mutex); +  +#endif /* _RCC_MUTEX_H */ diff --git a/src/rccstring.c b/src/rccstring.c index 9c4c19f..aa92407 100644 --- a/src/rccstring.c +++ b/src/rccstring.c @@ -61,6 +61,7 @@ int rccStringFixID(rcc_string string, rcc_context ctx) {  int rccStringChangeID(rcc_string string, rcc_language_id language_id) {      if ((!string)&&(language_id != (rcc_language_id)-1)) return -1; +//    printf("ChangingID %lu: %s\n", language_id, string);      ((rcc_string_header*)string)->language_id = language_id;      return 0;  } diff --git a/src/rcctranslate.c b/src/rcctranslate.c index d7bb4e4..9dcf411 100644 --- a/src/rcctranslate.c +++ b/src/rcctranslate.c @@ -3,10 +3,12 @@  #include <string.h>  #include "internal.h" +#include "rccconfig.h"  #include "rccexternal.h" +#include "rccmutex.h"  #include "rcctranslate.h" - +#define RCC_TRANSLATE_DEFAULT_TIMEOUT	1000000	/* 1s */  int rccTranslateInit() { @@ -26,18 +28,37 @@ rcc_translate rccTranslateOpen(const char *from, const char *to) {      translate = (rcc_translate)malloc(sizeof(rcc_translate_s));      if (!translate) return NULL; +     +    translate->mutex = rccMutexCreate(); +    translate->wmutex = rccMutexCreate(); +    if ((!translate->mutex)||(!translate->wmutex)) { +	if (translate->mutex) rccMutexFree(translate->mutex); +	if (translate->wmutex) rccMutexFree(translate->wmutex); +	free(translate); +	return NULL; +    }      translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE);      if (translate->sock == -1) { +	rccMutexFree(translate->mutex); +	rccMutexFree(translate->wmutex);  	free(translate);  	return NULL;      }      translate->remaining = 0; +    translate->werror = 0; +      translate->prefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE;      translate->prefix.cmd.size = sizeof(rcc_translate_prefix_s);      memcpy(translate->prefix.from, from, 3*sizeof(char));      memcpy(translate->prefix.to, to,  3*sizeof(char)); + +    translate->wprefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE; +    translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s); +    memcpy(translate->wprefix.from, from, 3*sizeof(char)); +    memcpy(translate->wprefix.to, to,  3*sizeof(char)); +      rccTranslateSetTimeout(translate, RCC_TRANSLATE_DEFAULT_TIMEOUT);      return translate; @@ -50,18 +71,40 @@ void rccTranslateClose(rcc_translate translate) {  #ifdef HAVE_LIBTRANSLATE      if (!translate) return;      if (translate->sock != -1) rccExternalClose(translate->sock); +    rccMutexFree(translate->mutex); +    rccMutexFree(translate->wmutex);      free(translate);  #endif /* HAVE_LIBTRANSLATE */  }  int rccTranslateSetTimeout(rcc_translate translate, unsigned long us) { -#ifdef HAVE_LIBTRANSLATE_TIMED_TRANSLATE          if (!translate) return -1;      translate->prefix.timeout = us;      return 0; -#else -    return -1; -#endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */ +} + +#define RCC_UNLOCK_W 1 +#define RCC_UNLOCK_R 2 +#define RCC_UNLOCK_RW 3 +#define RCC_UNLOCK_WR 3 +static char *rccTranslateReturn(rcc_translate translate, char *ret, int unlock) { +    if (unlock&RCC_UNLOCK_R) rccMutexUnLock(translate->mutex); +    if (unlock&RCC_UNLOCK_W) rccMutexUnLock(translate->wmutex); +    return ret; +} +#define rccTranslateReturnNULL(translate, unlock) rccTranslateReturn(translate, NULL, unlock)  + +static int rccTranslateQueue(rcc_translate translate, const char *buf) { +    size_t len, err; +     +     +    len = strlen(buf); +    translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s) + len - sizeof(rcc_external_command_s); + +    err = rccExternalWrite(translate->sock, (char*)&translate->wprefix, sizeof(rcc_translate_prefix_s) - 1, 0); +    if (!err) err = rccExternalWrite(translate->sock, buf, len + 1, 0); +    fsync(translate->sock); +    return err?1:0;  }  char *rccTranslate(rcc_translate translate, const char *buf) { @@ -69,27 +112,57 @@ char *rccTranslate(rcc_translate translate, const char *buf) {      rcc_external_command_s resp;      size_t err, len;      char *buffer; -/*      size_t i; -*/ -     +      if ((!translate)||(!buf)) return NULL; -/*     -    if (!strcmp(translate->prefix.to, "en")) { -	for (i=0;buf[i];i++)  +    if (!strcmp(translate->prefix.to, rcc_english_language_sn)) { +	for (i=0;buf[i];i++) {  	    if ((unsigned char)buf[i]>0x7F) break; +	    if ((buf[i]>='A')&&(buf[i]<='Z')) break; +	    if ((buf[i]>='a')&&(buf[i]<='z')) break; +	}  	if (!buf[i]) return NULL;      } -*/ +     +    rccMutexLock(translate->wmutex); +     +    if (rccMutexTryLock(translate->mutex)) { +	if ((translate->werror)||(translate->sock == -1)) return rccTranslateReturnNULL(translate,RCC_UNLOCK_W); + +	if (rccTranslateQueue(translate, buf)) translate->werror = 1; +	return rccTranslateReturnNULL(translate, RCC_UNLOCK_W); +    } +     +    if (translate->werror) { +	rccExternalClose(translate->sock); +	translate->sock = -1; +	translate->werror = 0; +    }      if (translate->sock == -1) {  	translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); -	if (translate->sock == -1) return NULL; +	if (translate->sock == -1) { +	    return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); +	} else { +	    translate->werror = 0; +	    translate->remaining = 0; +	}      } else if (translate->remaining) {  	if (translate->remaining == (size_t)-1) {  	    err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), 0); -	    if (err) return NULL; +	    if (err) { +		if (err == sizeof(rcc_external_command_s)) { +		    if (rccTranslateQueue(translate, buf)) { +		        rccExternalClose(translate->sock); +			translate->sock = -1; +		    } +		} else { +		    rccExternalClose(translate->sock); +		    translate->sock = -1; +		} +		return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); +	    }  	    translate->remaining = resp.size;  	} @@ -97,13 +170,18 @@ char *rccTranslate(rcc_translate translate, const char *buf) {  	if (!buffer) {  	    rccExternalClose(translate->sock);  	    translate->sock = -1; -	    return NULL; +	    return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);  	} +	  	err = rccExternalRead(translate->sock, buffer, translate->remaining, 0);  	free(buffer);  	if (err) {  	    translate->remaining = err; -	    return NULL; +	    if (rccTranslateQueue(translate, buf)) { +		rccExternalClose(translate->sock); +		translate->sock = -1; +	    } +	    return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);  	}  	translate->remaining = 0;      } @@ -114,41 +192,50 @@ char *rccTranslate(rcc_translate translate, const char *buf) {      if (err) {  	rccExternalClose(translate->sock);  	translate->sock = -1; -	return NULL; +	return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);      }      err = rccExternalWrite(translate->sock, buf, len + 1, 0);      if (err) {  	rccExternalClose(translate->sock);  	translate->sock = -1; -	return NULL; +	return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);      } +    rccMutexUnLock(translate->wmutex);      err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), translate->prefix.timeout);      if (err) {  	if (err == sizeof(rcc_external_command_s)) {  	    translate->remaining = (size_t)-1;  	} else { +	    rccMutexLock(translate->wmutex);  	    rccExternalClose(translate->sock);  	    translate->sock = -1; +	    rccMutexUnLock(translate->wmutex);  	} -	return NULL; +	return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);      } -    if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) return NULL; + +    if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) +	return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);      buffer = (char*)malloc(resp.size*sizeof(char));      if (!buffer) { +	rccMutexLock(translate->wmutex);  	rccExternalClose(translate->sock);  	translate->sock = -1; -	return NULL; +	rccMutexUnLock(translate->wmutex); + +	return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);      } +      err = rccExternalRead(translate->sock, buffer, resp.size, 0);      if (err) {  	translate->remaining = err;  	free(buffer); -	return NULL; +	return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);      } -    return buffer; +    return rccTranslateReturn(translate, buffer, RCC_UNLOCK_R);  #else      return NULL;  #endif /* HAVE_LIBTRANSLATE */ diff --git a/src/rcctranslate.h b/src/rcctranslate.h index 961af6f..b00cdfd 100644 --- a/src/rcctranslate.h +++ b/src/rcctranslate.h @@ -1,9 +1,10 @@  #ifndef _RCC_TRANSLATE_H  #define _RCC_TRANSLATE_H +#include "rccmutex.h"  #include "rccexternal.h" -#define RCC_TRANSLATE_DEFAULT_TIMEOUT	5000000	/* 5s */  #define RCC_EXTERNAL_COMMAND_TRANSLATE 0x80 +#define RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE 0x81  struct rcc_translate_prefix_t { @@ -19,8 +20,13 @@ typedef struct rcc_translate_prefix_t *rcc_translate_prefix;  struct rcc_translate_t {      rcc_translate_prefix_s prefix; +    rcc_translate_prefix_s wprefix;      size_t remaining; +    rcc_mutex mutex; +    rcc_mutex wmutex;      int sock; + +    unsigned char werror;  };  typedef struct rcc_translate_t rcc_translate_s; diff --git a/src/recode.c b/src/recode.c index 7e12343..d337164 100644 --- a/src/recode.c +++ b/src/recode.c @@ -15,21 +15,34 @@  #include "rccspell.h"  #define isSpace(ch) ((ch<0x7F)&&((ch<'A')||(ch>'z')||((ch>'Z')&&(ch<'a')))) -#define RCC_REQUIRED_PROBABILITY	0.66 +#define RCC_PROBABILITY_STEP		0.10 +#define RCC_REQUIRED_PROBABILITY	0.33 +#define RCC_REQUIRED_LENGTH		5 +#define RCC_ACCEPTABLE_PROBABILITY	0 +#define RCC_ACCEPTABLE_LENGTH		3  rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) { -    rcc_speller speller; +    rcc_speller speller = NULL, english_speller = NULL;      unsigned long i, nlanguages;      rcc_language_config config, config0 = NULL;      rcc_string recoded;      unsigned char *utf8;      size_t j, mode; -    unsigned long words, english, result; +    unsigned long spres, words, english, result; +    size_t longest;      unsigned char english_mode, english_word = 1; +    char *english_string = NULL;      rcc_language_id english_lang = (rcc_language_id)-1; +    size_t english_longest = 0; +    unsigned char is_english_string = 1;      double res, english_res = 0;      rcc_option_value usedb4; -     +    rcc_language_id bestlang = (rcc_language_id)-1; +    unsigned long bestlongest = RCC_ACCEPTABLE_LENGTH; +    double bestres = RCC_ACCEPTABLE_PROBABILITY; +    char *best_string = NULL; + +    unsigned long accepted_nonenglish_langs = 0;      usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); @@ -50,6 +63,15 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id      nlanguages = ctx->n_languages; +    english_lang = rccGetLanguageByName(ctx, rcc_english_language_sn); +    if (english_lang != (rcc_language_id)-1) { +	config = rccGetUsableConfig(ctx, english_lang); +	if (config) { +	    english_speller  = rccConfigGetSpeller(config); +	    if (rccSpellerGetError(english_speller)) english_speller = NULL; +	} +    } +          for (i=0;i<nlanguages;i++) {  	config = rccGetUsableConfig(ctx, (rcc_language_id)i);  	if (!config) continue; @@ -68,11 +90,20 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id  	else english_mode = 0;  	utf8 = (char*)rccStringGetString(recoded); -	for (result=0,english=0,words=0,mode=0,j=0;utf8[j];j++) { +	printf("%s\n", config->language->sn); +	 +	for (result=0,english=0,words=0,longest=0,mode=0,j=0;utf8[j];j++) {  	    if (isSpace(utf8[j])) {  		if (mode) { -		    if ((!english_mode)&&(english_word)) english++; -		    result+=rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; +		    if ((!english_mode)&&(english_word)&&(rccSpellerSized(english_speller, utf8 + mode -1, j - mode + 1))) +			english++; +		    else { +			if ((english_mode)&&(!english_word)) is_english_string = 0; +			spres = rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; +			printf("%.*s %s\n",j-mode+1,utf8+mode-1, spres?"<======":""); +			if ((spres)&&((j - mode + 1)>longest)) longest = j - mode + 1; +			result+=spres; +		    }  		    words++;  		    mode = 0;  		} else continue; @@ -85,40 +116,89 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id  		}  	    }  	} +  	if (mode) { -	    result+=rccSpeller(speller, utf8 + mode - 1)?1:0; +	    if ((!english_mode)&&(english_word)&&(rccSpeller(english_speller, utf8 + mode -1))) +		english++; +	    else { +		if ((english_mode)&&(!english_word)) is_english_string = 0; +		spres = rccSpeller(speller, utf8 + mode - 1)?1:0; +		if ((spres)&&((j-mode+1)>longest)) longest = j - mode + 1; +		result += spres; +	    }  	    words++;  	}  	if (english_mode) { +	    if (english_string) free(english_string); +	    printf("%u %u\n", result, words); +  	    english_res = 1.*result/words; -	    english_lang = (rcc_language_id)i;     -	} else if (words) { -	    res = 1.*result/words; -	    if (res > RCC_REQUIRED_PROBABILITY) { +	    english_lang = (rcc_language_id)i; +	    english_longest = longest; +	    english_string = recoded; +	} else if (words>english) { +	    res = 1.*result/(words - english); +	    printf("%u %u %u\n", result, words, english); +	    if ((res > RCC_REQUIRED_PROBABILITY)&&(longest > RCC_REQUIRED_LENGTH)) { +		if (best_string) free(best_string); +		if (english_string) free(english_string); +		  		if (retstring) *retstring = recoded;  		else free(recoded);  		return (rcc_language_id)i; -	    } -	    if (words > english) { -		res = 1.*(result - english)/(words - english); -		if (res > RCC_REQUIRED_PROBABILITY) { -		    if (retstring) *retstring = recoded; -		    else free(recoded); -		    return (rcc_language_id)i; -		} -	    } -	} -	 -	free(recoded); +	    } else if  ((res > bestres + RCC_PROBABILITY_STEP)|| +		    ((res > bestres - RCC_PROBABILITY_STEP)&&(longest > bestlongest))|| +		    ((res > bestres)&&(longest == bestlongest))) { +		 +		if (best_string) free(best_string); +		 +		bestres = res; +		bestlang = (rcc_language_id)i; +		bestlongest = longest; +		best_string = recoded; +	    }  else if (!accepted_nonenglish_langs) { +		bestlang = (rcc_language_id)i; +		best_string = recoded; +	    } else free(recoded); + +	    accepted_nonenglish_langs++; +	} else free(recoded);      } -    if (english_res > RCC_REQUIRED_PROBABILITY) { -        if (retstring) { -	    *retstring = rccCreateString(english_lang, buf, len); -	} +    if ((is_english_string)&&(english_res > RCC_REQUIRED_PROBABILITY)&&(english_longest > RCC_REQUIRED_LENGTH)) { +	if (best_string) free(best_string); +	if (retstring) *retstring = english_string; +	else if (english_string) free(english_string);          return english_lang;      } + +    if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)) { +	if (english_string) free(english_string); +	if (retstring) *retstring = best_string; +	else if (best_string) free(best_string); +        return bestlang; +    }  + +    if ((is_english_string)&&(english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { +	if (best_string) free(best_string);	 +	if (retstring) *retstring = english_string; +	else if (english_string) free(english_string); +        return english_lang; +    }  +     +    if (best_string) { +	if (english_string) free(english_string); +	if (retstring) *retstring = best_string; +	else if (best_string) free(best_string); +        return bestlang; +    } else if (best_string) free(best_string); +     +    if ((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { +	if (retstring) *retstring = english_string; +	else if (english_string) free(english_string); +        return english_lang; +    } else if (english_string) free(english_string);      return (rcc_language_id)-1;  } @@ -206,9 +286,12 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,  */      detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result); -    if (detected_language_id != (rcc_language_id)-1) return result; +    if (detected_language_id != (rcc_language_id)-1) { +	printf("Language %i: %s\n", rccStringGetLanguage(result), result); +	return result; +    } + -          err = rccConfigure(ctx);      if (err) return NULL; @@ -316,7 +399,6 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s  	    }  	    if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((config->trans)&&(!translated))) { -		puts("entrans");  		if (!config->entrans) {  		    config->entrans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rcc_english_language_sn);  		} @@ -384,7 +466,6 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const      const char *from_charset, *to_charset;      rcc_charset_id from_charset_id, to_charset_id;      rcc_class_type class_type; -    rcc_option_value usedb4;      if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx; @@ -394,20 +475,9 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const      class_type = rccGetClassType(ctx, to);      if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding; -    if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding; - -    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); -    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { -	stmp = rccDb4GetKey(ctx->db4ctx, buf, len); -	if (stmp) { -	     if (rccStringFixID(stmp, ctx)) free(stmp); -	     else { -		result = rccSizedTo(ctx, to, stmp, rlen); -		free(stmp); -		return result; -	    } -	} -    } +    if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)) goto recoding; +    if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) goto recoding; +    if (rccGetOption(ctx, RCC_OPTION_TRANSLATE)) goto recoding;      err = rccConfigure(ctx);      if (err) return NULL;  | 
