diff options
| -rw-r--r-- | ToDo | 7 | ||||
| -rw-r--r-- | configure.in | 8 | ||||
| -rw-r--r-- | examples/Makefile.am | 3 | ||||
| -rw-r--r-- | examples/example2.c | 13 | ||||
| -rw-r--r-- | examples/input-russian.txt | 2 | ||||
| -rw-r--r-- | examples/rcc-gtk-config.c | 6 | ||||
| -rw-r--r-- | m4/aspell.m4 | 44 | ||||
| -rw-r--r-- | src/Makefile.am | 5 | ||||
| -rw-r--r-- | src/librcc.h | 119 | ||||
| -rw-r--r-- | src/lng.c | 45 | ||||
| -rw-r--r-- | src/lng.h | 2 | ||||
| -rw-r--r-- | src/lngconfig.c | 229 | ||||
| -rw-r--r-- | src/lngconfig.h | 7 | ||||
| -rw-r--r-- | src/rccconfig.c | 27 | ||||
| -rw-r--r-- | src/rccconfig.h | 5 | ||||
| -rw-r--r-- | src/rccexternal.c | 2 | ||||
| -rw-r--r-- | src/rcciconv.c | 5 | ||||
| -rw-r--r-- | src/rcciconv.h | 2 | ||||
| -rw-r--r-- | src/rccspell.c | 63 | ||||
| -rw-r--r-- | src/rccspell.h | 29 | ||||
| -rw-r--r-- | src/rccstring.c | 8 | ||||
| -rw-r--r-- | src/rccstring.h | 1 | ||||
| -rw-r--r-- | src/rcctranslate.c | 10 | ||||
| -rw-r--r-- | src/recode.c | 233 | ||||
| -rw-r--r-- | ui/rccnames.c | 4 | 
25 files changed, 795 insertions, 84 deletions
@@ -1,4 +1,4 @@ -0.2.x: +0.3.x:      - Common encodings:  	+ Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE)  	+ Special type of classes to select only from Unicode encodings (or even just specified subset of encodings) @@ -8,9 +8,8 @@  	    * Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions)      - Buffer managment:  	+ SetBufferSize ( 0 - autogrow ) -    - Language autodetection -	+ Using spellchecker (aspell) -    - Look on ofline translation libraries     +    - Look on ofline translation libraries and other possibilities to improove  +      translation and language detection.  on request:      - Multibyte(not-UTF8) support for FS classes diff --git a/configure.in b/configure.in index b833095..16051b5 100644 --- a/configure.in +++ b/configure.in @@ -194,6 +194,13 @@ else  fi  fi +AM_PATH_ASPELL([ +    AC_DEFINE(HAVE_ASPELL,1,[Defines if aspell is available]) +    HAVE_ASPELL=yes +],[ +    HAVE_ASPELL=no +]) +  dnl Checks for typedefs, structures, and compiler characteristics.  AC_C_CONST @@ -209,6 +216,7 @@ echo "  Enca Charset Detection Support:        $HAVE_ENCA"  echo "  LibRCD Charset Detection Support:      $HAVE_RCD"  echo ""  echo "  Multilanguage support with DB4:        $HAVE_BDB" +echo "  Language autodetection using aspell:   $HAVE_ASPELL"  echo "  Libtranslate support:                  $HAVE_LIBTRANSLATE"  echo "  Libtranslate Timed Translate:          $HAVE_LIBTRANSLATE_TIMED_TRANSLATE"  echo "" diff --git a/examples/Makefile.am b/examples/Makefile.am index 99b7506..710b7dc 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -20,6 +20,9 @@ endif  EXTRA_DIST= input-russian.txt rcc.xml rcc-example.xml mpg123-rcc.patch +test: example2 +	cat input-russian.txt | ./example2 +  test-russian: example2  	cat input-russian.txt | ./example2 ru diff --git a/examples/example2.c b/examples/example2.c index cc4a3fa..5ef3efb 100644 --- a/examples/example2.c +++ b/examples/example2.c @@ -24,27 +24,26 @@ int main(int argc, char *argv[]) {      rccInit();      rccInitDefaultContext(NULL, 0, 0, classes, 0);      rccInitDb4(NULL, "example", 0); -    rccSetOption(NULL, RCC_OPTION_TRANSLATE, 1); - -//    rccExternalInit(); -//    rccExternalFree(); +    rccSetOption(NULL, RCC_OPTION_TRANSLATE, RCC_OPTION_TRANSLATE_FULL);      current_language_id = rccGetCurrentLanguage(NULL);      english_language_id = rccGetLanguageByName(NULL, "en");      if (argc>1) rccSetLanguageByName(NULL, argv[1]); +    else rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1);      language_id = rccGetCurrentLanguage(NULL);      language = rccGetCurrentLanguageName(NULL);      if (language) printf("Current Language: %s\n\n", language); -    else printf("Unable Detect Language\n\n"); +    else { +	printf("Unable Detect Language, using english\n\n"); +	rccSetLanguageByName(NULL, "en"); +    }      while (fgets(buf,255,stdin)) {  	if (strlen(buf)<2) break; -	rccSetLanguage(NULL, language_id);  	rccstring = rccFrom(NULL, 0, buf);  	if (rccstring) { -	    rccSetLanguage(NULL, english_language_id);  	    recoded = rccTo(NULL, 1, rccstring);  	    if (recoded) {  		printf(recoded); diff --git a/examples/input-russian.txt b/examples/input-russian.txt index 8ea6e44..b89c105 100644 --- a/examples/input-russian.txt +++ b/examples/input-russian.txt @@ -2,3 +2,5 @@  ïðèâåò  ¯à¨¢¥â  привет +Good Morning +Guten Abend diff --git a/examples/rcc-gtk-config.c b/examples/rcc-gtk-config.c index d0775a6..7758f09 100644 --- a/examples/rcc-gtk-config.c +++ b/examples/rcc-gtk-config.c @@ -38,15 +38,15 @@ int main (int argc, char *argv[])      if (argc<1) config = argv[0];      else config = argv[1]; +    gtk_set_locale (); +    gtk_init (&argc, &argv); +      rccInit();      rccUiInit();      ctx = rccCreateContext(NULL, 0, 0, classes, 0);      rccLoad(ctx, config);      uictx = rccUiCreateContext(ctx); -    gtk_set_locale (); -    gtk_init (&argc, &argv); -      window1 = gtk_window_new (GTK_WINDOW_TOPLEVEL);      gtk_window_set_policy(GTK_WINDOW (window1), FALSE, FALSE, TRUE);      gtk_window_set_title (GTK_WINDOW (window1), "LibRCC Config"); diff --git a/m4/aspell.m4 b/m4/aspell.m4 new file mode 100644 index 0000000..164e534 --- /dev/null +++ b/m4/aspell.m4 @@ -0,0 +1,44 @@ +# This file is part of GNOME Translate. +# +# Copyright (C) 2004 Jean-Yves Lefort. +# +# As a special exception to the GNOME Translate licensing terms, +# Jean-Yves Lefort gives unlimited permission to copy, distribute and +# modify this file. + +dnl AM_PATH_ASPELL([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl +AC_DEFUN([AM_PATH_ASPELL], +[ASPELL_CFLAGS="" +ASPELL_LIBS="-laspell" + +ac_save_CFLAGS="$CFLAGS" +ac_save_LIBS="$LIBS" +CFLAGS="$CFLAGS $ASPELL_CFLAGS" +LIBS="$LIBS $ASPELL_LIBS" + +AC_MSG_CHECKING([for Aspell]) +AC_RUN_IFELSE([ +#include <aspell.h> + +int main() { +  new_aspell_config; + +  exit(0); +} +], [found=yes], [found=no], [found=yes]) +AC_MSG_RESULT($found) + +CFLAGS="$ac_save_CFLAGS" +LIBS="$ac_save_LIBS" + +if test $found = yes; then +	ifelse([$1],, :, [$1]) +else +	ASPELL_CFLAGS="" +	ASPELL_LIBS="" +	ifelse([$2],, :, [$2]) +fi + +AC_SUBST(ASPELL_CFLAGS) +AC_SUBST(ASPELL_LIBS)]) diff --git a/src/Makefile.am b/src/Makefile.am index baa08a4..4ba3c35 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,6 +12,7 @@ librcc_la_SOURCES = librcc.c \      fake_enca.h fake_rcd.h \      rccenca.c rccenca.h \      rccdb4.c rccdb4.h \ +    rccspell.c rccspell.h \      engine.c engine.h \      rccstring.c rccstring.h \      rccxml.c rccxml.h \ @@ -22,7 +23,7 @@ librcc_la_SOURCES = librcc.c \      internal.h  include_HEADERS = librcc.h -AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ -librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ +AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ +librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@  librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ diff --git a/src/librcc.h b/src/librcc.h index 52e6be4..d08937e 100644 --- a/src/librcc.h +++ b/src/librcc.h @@ -364,6 +364,23 @@ typedef int rcc_option_value;  #define RCC_OPTION_LEARNING_FLAG_LEARN 2  /** +  * Switch translation off. +  */ +#define RCC_OPTION_TRANSLATE_OFF 0 +/** +  * Translate data to english language (Current language don't matter). +  */ +#define RCC_OPTION_TRANSLATE_TO_ENGLISH 1 +/** +  * Skip translation of the english text. +  */ +#define RCC_OPTION_TRANSLATE_SKIP_ENGLISH 2 +/** +  * Translate whole data to the current language. +  */ +#define RCC_OPTION_TRANSLATE_FULL 3 + +/**    * List of options available    */  typedef enum rcc_option_t { @@ -371,8 +388,9 @@ typedef enum rcc_option_t {      RCC_OPTION_AUTODETECT_FS_TITLES,	/**< Detect titles of #RCC_CLASS_FS classes */      RCC_OPTION_AUTODETECT_FS_NAMES,	/**< Try to find encoding of #RCC_CLASS_FS by accessing fs */      RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, /**< Use only configured languages or languages with auto-engines */ -    RCC_OPTION_TRANSLATE,		/**< Translate #rcc_string if it's language differs from current one */      RCC_OPTION_AUTOENGINE_SET_CURRENT,	/**< If enabled autodetection engine will set current charset */ +    RCC_OPTION_AUTODETECT_LANGUAGE,	/**< Enables language detection */ +    RCC_OPTION_TRANSLATE,		/**< Translate #rcc_string if it's language differs from current one */      RCC_MAX_OPTIONS  } rcc_option; @@ -970,6 +988,26 @@ int rccTranslateSetTimeout(rcc_translate translate, unsigned long us);  char *rccTranslate(rcc_translate translate, const char *buf);  /* recode.c */ + +/** +  * Tries to detect language of string +  * @param ctx is working context ( or default one if NULL supplied ) +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is language_id or -1 if autodetection is failed +  */ +rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); +/** +  * Tries to detect charset of string +  * @param ctx is working context ( or default one if NULL supplied ) +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is auto_charset_id or -1 if autodetection is failed +  */ +int rccDetectCharset(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); +  /**    * Recode string from specified encoding class to #rcc_string. Encoding detection engines and    * recoding cache are used (if possible) to detect original 'buf' encoding. Otherwise the  @@ -1079,7 +1117,7 @@ char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char    * @param rlen in rlen the size of recoded string will be returned.    * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory.    */ -char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen); +rcc_string rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen);  /**    * Recode string between specified encodings.     * @@ -1094,6 +1132,77 @@ char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const ch  char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to, const char *buf, size_t len, size_t *rlen); +/** +  * Tries to detect charset of string +  * @param config is language configuration +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is auto_charset_id or -1 if autodetection is failed +  */ +rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len); + +/** +  * Recode string from specified encoding class to #rcc_string. Encoding detection engines and +  * recoding cache are used (if possible) to detect original 'buf' encoding. Otherwise the  +  * preconfigured encoding of class is assumed. +  * +  * @param config is language configuration +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len); +/** +  * Recode string from #rcc_string to specified encoding class. If encoding class is of  +  * 'File System' type, the autoprobing for file names can be performed. In the other cases +  * the rcc_string will be recoded in preconfigured class encoding. +  * +  * @param config is language configuration +  * @param class_id is encoding class +  * @param buf is original zero terminated string +  * @param rlen in rlen the size of recoded string will be returned. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen); +/** +  * Recode string between different encoding classes. The conversion is relays on rccConfigSizedFrom +  * and rccConfigSizedTo functions. +  * @see rccConfigSizedFrom +  * @see rccConfigSizedTo +  * +  * @param config is language configuration +  * @param from is source encoding class +  * @param to is destination encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @param rlen in rlen the size of recoded string will be returned. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen); +/** +  * Recode string from specified encoding to #rcc_string.  +  * +  * @param config is language configuration +  * @param charset is source encoding +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +rcc_string rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen); +/** +  * Recode string from #rcc_string to specified encoding.  +  * +  * @param config is language configuration +  * @param charset is destination encoding +  * @param buf is original zero terminated string +  * @param rlen in rlen the size of recoded string will be returned. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen); + +  #define rccFrom(ctx, class_id, buf) rccSizedFrom(ctx, class_id, buf, 0)  #define rccTo(ctx, class_id, buf) rccSizedTo(ctx, class_id, buf, NULL)  #define rccRecode(ctx, from, to, buf) rccSizedRecode(ctx, from, to, buf, 0, NULL) @@ -1104,6 +1213,12 @@ char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to,  #define rccRecodeFromCharset(ctx, class_id, charset, buf) rccSizedRecodeFromCharset(ctx, class_id, charset, buf, 0, NULL)  #define rccRecodeCharsets(ctx, from, to, buf) rccSizedRecodeCharsets(ctx, from, to, buf, 0, NULL) +#define rccConfigFrom(ctx, class_id, buf) rccConfigSizedFrom(ctx, class_id, buf, 0) +#define rccConfigTo(ctx, class_id, buf) rccConfigSizedTo(ctx, class_id, buf, NULL) +#define rccConfigRecode(ctx, from, to, buf) rccConfigSizedRecode(ctx, from, to, buf, 0, NULL) +#define rccConfigRecodeToCharset(ctx, class_id, charset, buf) rccConfigSizedRecodeToCharset(ctx, class_id, charset, buf, 0, NULL) +#define rccConfigRecodeFromCharset(ctx, class_id, charset, buf) rccConfigSizedRecodeFromCharset(ctx, class_id, charset, buf, 0, NULL) +  /*******************************************************************************  ******************************** Options ***************************************  *******************************************************************************/ @@ -36,11 +36,39 @@ rcc_language_id rccGetLanguageByName(rcc_context ctx, const char *name) {      return (rcc_language_id)-1;  } -static rcc_language_id rccGetDefaultLanguage(rcc_context ctx) { -    unsigned int i; +int rccCheckLanguageUsability(rcc_context ctx, rcc_language_id language_id) { +    rcc_language_config config;      rcc_option_value clo;      rcc_engine_ptr *engines; -    rcc_language_config config; +    rcc_charset *charsets; + +    if (!ctx) { +	if (rcc_default_ctx) ctx = rcc_default_ctx; +	else return 0; +    } +    if (language_id>=ctx->n_languages) return 0; + +    language_id = rccGetRealLanguage(ctx, language_id); +     +    clo = rccGetOption(ctx, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY); +    if (clo) { +	config = rccCheckConfig(ctx, (rcc_language_id)language_id); +	if ((!config)||(!config->configured)) { +	    charsets = ctx->languages[language_id]->charsets; +	    if ((charsets[0])&&(charsets[1])&&(charsets[2])) { +		if (clo == 1) { +		    engines = ctx->languages[language_id]->engines; +		    if ((!engines[0])||(!engines[1])) return 0; +		} else return 0; +	    } +	} +    } +    return 1; +} + + +static rcc_language_id rccGetDefaultLanguage(rcc_context ctx) { +    unsigned int i;      char stmp[RCC_MAX_LANGUAGE_CHARS+1];      if (ctx->default_language) return ctx->default_language; @@ -48,16 +76,7 @@ static rcc_language_id rccGetDefaultLanguage(rcc_context ctx) {      if (!rccLocaleGetLanguage(stmp, ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS)) {      	for (i=0;ctx->languages[i];i++) {  	    if (!strcmp(ctx->languages[i]->sn, stmp)) { -		clo = rccGetOption(ctx, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY); -		if (clo) { -		    config = rccCheckConfig(ctx, (rcc_language_id)i); -		    if ((!config)||(!config->configured)) { -			if (clo == 1) { -			    engines = ctx->languages[i]->engines; -			    if ((!engines[0])||(!engines[1])) break; -			} else break; -		    } -		} +		if (!rccCheckLanguageUsability(ctx, (rcc_language_id)i)) break;  		ctx->default_language = (rcc_language_id)i;  		return (rcc_language_id)i;  	    } @@ -4,6 +4,8 @@  #include "internal.h"  #include "lngconfig.h" + +int rccCheckLanguageUsability(rcc_context ctx, rcc_language_id language_id);  rcc_language_ptr rccGetLanguagePointer(rcc_context ctx, rcc_language_id language_id);  #define rccGetCurrentEnginePointer(ctx) rccConfigGetCurrentEnginePointer(ctx->current_config) diff --git a/src/lngconfig.c b/src/lngconfig.c index c50ee74..26d0779 100644 --- a/src/lngconfig.c +++ b/src/lngconfig.c @@ -2,9 +2,12 @@  #include <stdlib.h>  #include <string.h> +#include "../config.h" +  #include "internal.h"  #include "rccconfig.h"  #include "rcclocale.h" +#include "lng.h"  rcc_engine_ptr rccConfigGetEnginePointer(rcc_language_config config, rcc_engine_id engine_id) {      unsigned int i; @@ -165,6 +168,7 @@ int rccConfigInit(rcc_language_config config, rcc_context ctx) {      config->fsiconv = NULL;      config->trans = NULL; +    config->entrans = NULL;      config->ctx = ctx;      config->language = NULL; @@ -172,6 +176,7 @@ int rccConfigInit(rcc_language_config config, rcc_context ctx) {      config->engine = -1;      config->default_charset = dcharsets;      config->configured = 0; +    config->speller = NULL;      config->iconv_to = iconv_to;      config->configure = 1; @@ -204,6 +209,10 @@ void rccConfigClear(rcc_language_config config) {  	    rccTranslateClose(config->trans);  	    config->trans = NULL;  	} +	if (config->entrans) { +	    rccTranslateClose(config->entrans); +	    config->entrans = NULL; +	}  	if (config->iconv_to) {  	    free(config->iconv_to);  	    config->iconv_to = NULL; @@ -216,31 +225,55 @@ void rccConfigClear(rcc_language_config config) {  	    free(config->default_charset);  	    config->default_charset = NULL;  	} +	if (config->speller) { +	    rccSpellerFree(config->speller); +	    config->speller = NULL; +	}      }  } -rcc_language_config rccCheckConfig(rcc_context ctx, rcc_language_id language_id) { -    rcc_language_id new_language_id; -     -    new_language_id = rccGetRealLanguage(ctx, language_id); -    if ((new_language_id == (rcc_language_id)-1)||(new_language_id != language_id)) return NULL; -    if (!ctx->configs[language_id].charset) return NULL; -    if (!strcasecmp(ctx->languages[language_id]->sn, "off")) return NULL; +static rcc_language_config rccGetConfigPointer(rcc_context ctx, rcc_language_id language_id, rcc_language_id *r_language_id) { + +    language_id = rccGetRealLanguage(ctx, language_id); +    if (!strcasecmp(ctx->languages[language_id]->sn, rcc_disabled_language_sn)) return NULL; +    if (r_language_id) *r_language_id = language_id;      return ctx->configs + language_id;  } +rcc_language_config rccCheckConfig(rcc_context ctx, rcc_language_id language_id) { +    rcc_language_config config; + +    config = rccGetConfigPointer(ctx, language_id, NULL); +    if ((config)&&(!config->charset)) return NULL; + +    return config; +} + + +rcc_language_config rccGetUsableConfig(rcc_context ctx, rcc_language_id language_id) { +    rcc_language_config config; + +    config = rccGetConfigPointer(ctx, language_id, &language_id); +    if (config) { +	if (!rccCheckLanguageUsability(ctx, language_id)) return NULL; +	if ((!config->charset)&&(rccConfigInit(config, ctx))) return NULL; +	config->language = ctx->languages[language_id]; +    } + +    return config; +} +  rcc_language_config rccGetConfig(rcc_context ctx, rcc_language_id language_id) { -    language_id = rccGetRealLanguage(ctx, language_id); -    if (language_id == (rcc_language_id)-1) return NULL; -    if (!strcasecmp(ctx->languages[language_id]->sn, "off")) return NULL; -     -    if (!ctx->configs[language_id].charset) { -	if (rccConfigInit(ctx->configs+language_id, ctx)) return NULL; -    }     +    rcc_language_config config; -    ctx->configs[language_id].language = ctx->languages[language_id]; -    return ctx->configs + language_id; +    config = rccGetConfigPointer(ctx, language_id, &language_id); +    if (config) { +	if ((!config->charset)&&(rccConfigInit(config, ctx))) return NULL; +	config->language = ctx->languages[language_id]; +    } + +    return config;  }  rcc_language_config rccGetConfigByName(rcc_context ctx, const char *name) { @@ -261,6 +294,15 @@ rcc_language_config rccGetCurrentConfig(rcc_context ctx) {      return rccGetConfig(ctx, language_id);  } +rcc_speller rccConfigGetSpeller(rcc_language_config config) { +    if (!config) return NULL; +     +    if (config->speller) return config->speller; + +    config->speller = rccSpellerCreate(config->language->sn); +    return config->speller; +} +  rcc_engine_id rccConfigGetSelectedEngine(rcc_language_config config) {      if (!config) return (rcc_engine_id)-1; @@ -532,6 +574,161 @@ int rccConfigConfigure(rcc_language_config config) {      return 0;  } + +rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { +    rcc_context ctx; +    rcc_string result; +    rcc_option_value usedb4; +    rcc_autocharset_id charset_id; +    const char *charset; + + +    if (!config) return NULL; +    ctx = config->ctx; + +    if (rccStringSizedCheck(buf, len)) return NULL; +     +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	result = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (result) { +	     if (rccStringFixID(result, ctx)) free(result); +	     else return result; +	} +    } + +    charset_id = rccConfigDetectCharset(config, class_id, buf, len); +    if (charset_id != (rcc_autocharset_id)-1) +	charset = rccConfigGetAutoCharsetName(config, charset_id); +    else +	charset = rccConfigGetCurrentCharsetName(config, class_id); +     +    if (charset) { +	result = rccSizedFromCharset(ctx, charset, buf, len); +	if (result) rccStringChangeID(result, rccGetLanguageByName(ctx, config->language->sn)); +	return result; +    } +     +    return NULL; +} + +char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen) { +    rcc_context ctx; +    const char *charset; + +    if (!config) return NULL; +    ctx = config->ctx; + +    charset = rccConfigGetCurrentCharsetName(config, class_id); + +    if (charset) +	return rccSizedToCharset(ctx, charset, buf, rlen); +     +    return NULL; +} + + +char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) { +    rcc_context ctx; +    rcc_string result; +    rcc_option_value usedb4; +    rcc_autocharset_id charset_id; +    rcc_string stmp; +    const char *tocharset, *fromcharset; + + +    if (!config) return NULL; +    ctx = config->ctx; + +    if (rccStringSizedCheck(buf, len)) return NULL; +     +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	stmp = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (stmp) { +	     if (rccStringFixID(stmp, ctx)) free(stmp); +	     else { +		result = rccConfigSizedTo(config, to, stmp, rlen); +		free(stmp); +		return result; +	    } +	} +    } + +    charset_id = rccConfigDetectCharset(config, from, buf, len); +    if (charset_id != (rcc_autocharset_id)-1) +	fromcharset = rccConfigGetAutoCharsetName(config, charset_id); +    else +	fromcharset = rccConfigGetCurrentCharsetName(config, from); +     +    tocharset = rccConfigGetCurrentCharsetName(config, to); +     +    if ((fromcharset)&&(tocharset)) +	return rccSizedRecodeCharsets(ctx, fromcharset, tocharset, buf, len, rlen); + +    return NULL; + +} + + +char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen) { +    rcc_context ctx; +    rcc_string result; +    rcc_option_value usedb4; +    rcc_autocharset_id charset_id; +    rcc_string stmp; +    const char *ocharset; + + +    if (!config) return NULL; +    ctx = config->ctx; + +    if (rccStringSizedCheck(buf, len)) return NULL; +     +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	stmp = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (stmp) { +	     if (rccStringFixID(stmp, ctx)) free(stmp); +	     else { +		result = rccSizedToCharset(ctx, charset, stmp, rlen); +		free(stmp); +	        return result; +	    } +	} +    } + +    charset_id = rccConfigDetectCharset(config, class_id, buf, len); +    if (charset_id != (rcc_autocharset_id)-1) +	ocharset = rccConfigGetAutoCharsetName(config, charset_id); +    else +	ocharset = rccConfigGetCurrentCharsetName(config, class_id); +     +    if (ocharset) +	return rccSizedRecodeCharsets(ctx, ocharset, charset, buf, len, rlen); + +    return NULL; +} + +char *rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) { +    rcc_context ctx; +    const char *ocharset; + +    if (!config) return NULL; +    ctx = config->ctx; + +    ocharset = rccConfigGetCurrentCharsetName(config, class_id); + +    if (ocharset) +	return rccSizedRecodeCharsets(ctx, charset, ocharset, buf, len, rlen); + +    return NULL; +} + +  /*      rcc_option_value options[RCC_MAX_OPTIONS]; diff --git a/src/lngconfig.h b/src/lngconfig.h index 92cc050..9d23139 100644 --- a/src/lngconfig.h +++ b/src/lngconfig.h @@ -3,6 +3,7 @@  #include "rcciconv.h"  #include "rcctranslate.h" +#include "rccspell.h"  struct rcc_language_config_t {      rcc_context ctx; @@ -17,8 +18,10 @@ struct rcc_language_config_t {      unsigned char configured; +    rcc_speller speller;      rcc_translate trans;      rcc_language_id translang; +    rcc_translate entrans;      rcc_iconv fsiconv;  }; @@ -30,9 +33,13 @@ rcc_engine_ptr rccConfigCheckEnginePointer(rcc_language_config config, rcc_engin  rcc_engine_ptr rccConfigGetCurrentEnginePointer(rcc_language_config config);  rcc_engine_ptr rccConfigCheckCurrentEnginePointer(rcc_language_config config); +rcc_speller rccConfigGetSpeller(rcc_language_config config); +  int rccConfigInit(rcc_language_config config, rcc_context ctx);  void rccConfigClear(rcc_language_config config); +rcc_language_config rccGetUsableConfig(rcc_context ctx, rcc_language_id language_id); +  int rccConfigConfigure(rcc_language_config config);  rcc_charset_id rccConfigGetLocaleUnicodeCharset(rcc_language_config config, const char *locale_variable); diff --git a/src/rccconfig.c b/src/rccconfig.c index ed6d30a..f820606 100644 --- a/src/rccconfig.c +++ b/src/rccconfig.c @@ -12,13 +12,18 @@ rcc_language_alias rcc_default_aliases[] = {      { NULL, NULL}  }; +const char rcc_default_language_sn[] = "default"; +const char rcc_disabled_language_sn[] = "Off"; +const char rcc_english_language_sn[] = "en"; +const char rcc_disabled_engine_sn[] = "Off";  const char rcc_default_charset[] = "Default"; +  const char rcc_utf8_charset[] = "UTF-8";  const char rcc_engine_nonconfigured[] = "Default";  const char rcc_option_nonconfigured[] = "DEFAULT";  rcc_engine rcc_default_engine = { -    "Off", NULL, NULL, NULL, {NULL} +    rcc_disabled_engine_sn, NULL, NULL, NULL, {NULL}  };  rcc_engine rcc_russian_engine = { @@ -32,11 +37,11 @@ rcc_engine rcc_ukrainian_engine = {  rcc_language rcc_default_languages[RCC_MAX_LANGUAGES + 1];  rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = { -{"default", {rcc_default_charset, NULL}, { +{rcc_default_language_sn, {rcc_default_charset, NULL}, {      &rcc_default_engine,      NULL  }}, -{"off", {rcc_default_charset, NULL}, { +{rcc_disabled_language_sn, {rcc_default_charset, NULL}, {      &rcc_default_engine,      NULL  }}, @@ -112,14 +117,28 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {  rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL };  rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL };  rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL }; +rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_ENGLISH", "FULL", NULL };  rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1];  rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = { +#ifdef HAVE_DB_H      {RCC_OPTION_LEARNING_MODE, 1, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1 }, RCC_OPTION_TYPE_STANDARD,  "LEARNING_MODE", rcc_sn_learning }, +#else +    {RCC_OPTION_LEARNING_MODE, 1, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1 }, RCC_OPTION_TYPE_INVISIBLE,  "LEARNING_MODE", rcc_sn_learning }, +#endif /* HAVE_DB_H */      {RCC_OPTION_AUTODETECT_FS_NAMES, 1, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD,  "AUTODETECT_FS_NAMES", rcc_sn_boolean},      {RCC_OPTION_AUTODETECT_FS_TITLES, 1, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_INVISIBLE, "AUTODETECT_FS_TITLES", rcc_sn_boolean},      {RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 1, { RCC_OPTION_RANGE_TYPE_MENU, 0, 2, 1}, RCC_OPTION_TYPE_INVISIBLE, "CONFIGURED_LANGUAGES_ONLY", rcc_sn_clo}, -    {RCC_OPTION_TRANSLATE, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD, "TRANSLATE", rcc_sn_boolean }, +#ifdef HAVE_ASPELL +    {RCC_OPTION_AUTODETECT_LANGUAGE, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD, "AUTODETECT_LANGUAGE", rcc_sn_boolean}, +#else +    {RCC_OPTION_AUTODETECT_LANGUAGE, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_INVISIBLE, "AUTODETECT_LANGUAGE", rcc_sn_boolean}, +#endif  +#ifdef HAVE_LIBTRANSLATE +    {RCC_OPTION_TRANSLATE, 0, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1}, RCC_OPTION_TYPE_STANDARD, "TRANSLATE", rcc_sn_translate }, +#else +    {RCC_OPTION_TRANSLATE, 0, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1}, RCC_OPTION_TYPE_INVISIBLE, "TRANSLATE", rcc_sn_translate }, +#endif /* HAVE_LIBTRANSLATE */      {RCC_OPTION_AUTOENGINE_SET_CURRENT, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD, "AUTOENGINE_SET_CURRENT", rcc_sn_boolean },      {RCC_MAX_OPTIONS}  }; diff --git a/src/rccconfig.h b/src/rccconfig.h index b94a39b..8e794ba 100644 --- a/src/rccconfig.h +++ b/src/rccconfig.h @@ -6,6 +6,11 @@  #undef RCC_DEBUG  #define RCC_LOCALE_VARIABLE "LC_CTYPE" +extern const char rcc_default_language_sn[]; +extern const char rcc_english_language_sn[]; +extern const char rcc_disabled_language_sn[]; +extern const char rcc_disabled_engine_sn[]; +  extern rcc_language_alias rcc_default_aliases[];  extern const char rcc_default_charset[];  extern const char rcc_utf8_charset[]; diff --git a/src/rccexternal.c b/src/rccexternal.c index 16b3667..4a09948 100644 --- a/src/rccexternal.c +++ b/src/rccexternal.c @@ -153,7 +153,7 @@ int rccExternalConnect(unsigned char module) {      fd_set fdcon;      if (pid == (pid_t)-1) return -1; - +          sock = socket(PF_UNIX, SOCK_STREAM, 0);      if (sock<=0) return -1; diff --git a/src/rcciconv.c b/src/rcciconv.c index d9903de..93278a7 100644 --- a/src/rcciconv.c +++ b/src/rcciconv.c @@ -48,6 +48,11 @@ void rccIConvClose(rcc_iconv icnv) {      }  } +int rccIConvGetError(rcc_iconv icnv) { +    if ((!icnv)||(icnv->icnv == (iconv_t)-1)) return -1; +    return 0; +} +  size_t rccIConvRecode(rcc_iconv icnv, char *outbuf, size_t outsize, const char *buf, size_t size) {      char *in_buf, *out_buf, err;      int in_left, out_left; diff --git a/src/rcciconv.h b/src/rcciconv.h index 0070696..1520534 100644 --- a/src/rcciconv.h +++ b/src/rcciconv.h @@ -8,6 +8,8 @@ struct rcc_iconv_t {  };  typedef struct rcc_iconv_t rcc_iconv_s; +int rccIConvGetError(rcc_iconv icnv); +  size_t rccIConvInternal(rcc_context ctx, rcc_iconv icnv, const char *buf, size_t len);  /**  diff --git a/src/rccspell.c b/src/rccspell.c new file mode 100644 index 0000000..c54e267 --- /dev/null +++ b/src/rccspell.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "rccspell.h" + +rcc_speller rccSpellerCreate(const char *lang) { +#ifdef HAVE_ASPELL +    rcc_speller rccspeller; +    AspellSpeller *speller = NULL; +    AspellConfig *config; +    AspellCanHaveError *possible_err; + +    if (!lang) return NULL; + +    rccspeller = (rcc_speller)malloc(sizeof(rcc_speller_s)); +    if (!rccspeller) return rccspeller; +     +    config = new_aspell_config(); + +    if (config) {     +	if (aspell_config_replace(config, "encoding", "utf-8")&&aspell_config_replace(config, "master", lang)) { +	    possible_err = new_aspell_speller(config); +	    if (aspell_error_number(possible_err) == 0) { +		speller = to_aspell_speller(possible_err); +	    } +	} +	delete_aspell_config(config); +    } +     +    rccspeller->speller = speller; +    return rccspeller; +#else  +    return NULL; +#endif /* HAVE_ASPELL */ +} + +void rccSpellerFree(rcc_speller rccspeller) { +#ifdef HAVE_ASPELL +    if ((rccspeller)&&(rccspeller->speller)) +	delete_aspell_speller(rccspeller->speller); +    free(rccspeller); +#endif /* HAVE_ASPELL */ +} + +int rccSpellerGetError(rcc_speller rccspeller) { +    if ((!rccspeller)||(!rccspeller->speller)) return -1; +    return 0; +} + +int rccSpellerSized(rcc_speller speller, const char *word, size_t len) { +#ifdef HAVE_ASPELL +    int res; +     +    if (rccSpellerGetError(speller)) return 0; +    res = aspell_speller_check(speller->speller, word, len?len:-1); +    return res<0?0:res;     +#endif /* HAVE_ASPELL */ +    return 0; +} + +int rccSpeller(rcc_speller speller, const char *word) { +    return rccSpellerSized(speller, word, 0); +} diff --git a/src/rccspell.h b/src/rccspell.h new file mode 100644 index 0000000..49e39f4 --- /dev/null +++ b/src/rccspell.h @@ -0,0 +1,29 @@ +#ifndef _RCC_SPELL_H +#define _RCC_SPELL_H + +#include "../config.h" + +#ifdef HAVE_ASPELL +#include <aspell.h> +#endif /* HAVE_ASPELL */ + +struct rcc_speller_t { +#ifdef HAVE_ASPELL +    struct AspellSpeller *speller; +#else  +    void *speller; +#endif /* HAVE_ASPELL */ +}; + +typedef struct rcc_speller_t *rcc_speller; +typedef struct rcc_speller_t rcc_speller_s; + +rcc_speller rccSpellerCreate(const char *lang); +void rccSpellerFree(rcc_speller speller); + +int rccSpellerGetError(rcc_speller rccspeller); + +int rccSpellerSized(rcc_speller speller, const char *word, size_t len); +int rccSpeller(rcc_speller speller, const char *word); + +#endif /* _RCC_SPELL_H */ diff --git a/src/rccstring.c b/src/rccstring.c index d6c6805..9c4c19f 100644 --- a/src/rccstring.c +++ b/src/rccstring.c @@ -58,6 +58,14 @@ int rccStringFixID(rcc_string string, rcc_context ctx) {      return 0;  } +int rccStringChangeID(rcc_string string, rcc_language_id language_id) { +    if ((!string)&&(language_id != (rcc_language_id)-1)) return -1; +     +    ((rcc_string_header*)string)->language_id = language_id; +    return 0; +} + +  void rccStringFree(rcc_string str) {      if (str) free(str);  } diff --git a/src/rccstring.h b/src/rccstring.h index 3c5d8d7..e9e9734 100644 --- a/src/rccstring.h +++ b/src/rccstring.h @@ -16,6 +16,7 @@ void rccStringFree(rcc_string str);  int rccStringSetLang(rcc_string string, const char *sn);  int rccStringFixID(rcc_string string, rcc_context ctx); +int rccStringChangeID(rcc_string string, rcc_language_id language_id);  #ifdef HAVE_STRNLEN  # ifndef strnlen diff --git a/src/rcctranslate.c b/src/rcctranslate.c index 3bbd916..d7bb4e4 100644 --- a/src/rcctranslate.c +++ b/src/rcctranslate.c @@ -66,18 +66,22 @@ int rccTranslateSetTimeout(rcc_translate translate, unsigned long us) {  char *rccTranslate(rcc_translate translate, const char *buf) {  #ifdef HAVE_LIBTRANSLATE -    size_t i;      rcc_external_command_s resp;      size_t err, len;      char *buffer; - -    if ((!translate)||(!buf)) return NULL; +/* +    size_t i; +*/ +    if ((!translate)||(!buf)) return NULL; + +/*          if (!strcmp(translate->prefix.to, "en")) {  	for (i=0;buf[i];i++)   	    if ((unsigned char)buf[i]>0x7F) break;  	if (!buf[i]) return NULL;      } +*/      if (translate->sock == -1) {  	translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); diff --git a/src/recode.c b/src/recode.c index c44095c..7e12343 100644 --- a/src/recode.c +++ b/src/recode.c @@ -2,6 +2,8 @@  #include <stdlib.h>  #include <string.h> +#include "../config.h" +  #include "internal.h"  #include "rcciconv.h"  #include "fs.h" @@ -10,19 +12,140 @@  #include "rccconfig.h"  #include "rccdb4.h"  #include "rcctranslate.h" +#include "rccspell.h" + +#define isSpace(ch) ((ch<0x7F)&&((ch<'A')||(ch>'z')||((ch>'Z')&&(ch<'a')))) +#define RCC_REQUIRED_PROBABILITY	0.66 + +rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) { +    rcc_speller speller; +    unsigned long i, nlanguages; +    rcc_language_config config, config0 = NULL; +    rcc_string recoded; +    unsigned char *utf8; +    size_t j, mode; +    unsigned long words, english, result; +    unsigned char english_mode, english_word = 1; +    rcc_language_id english_lang = (rcc_language_id)-1; +    double res, english_res = 0; +    rcc_option_value usedb4; +     + +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	recoded = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (recoded) { +	     if (rccStringFixID(recoded, ctx)) free(recoded); +	     else { +	        english_lang = rccStringGetLanguage(recoded); +	        if (retstring) *retstring = recoded; +		else free(recoded); +	        return english_lang; +	    } +	} +    } +     +    if (!rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) return (rcc_language_id)-1; + +    nlanguages = ctx->n_languages; + +    for (i=0;i<nlanguages;i++) { +	config = rccGetUsableConfig(ctx, (rcc_language_id)i); +	if (!config) continue; + +	if (i) { +	    if (config==config0) continue; +	} else config0=config; +	 +	speller = rccConfigGetSpeller(config); +	if (rccSpellerGetError(speller)) continue; + +	recoded = rccConfigSizedFrom(config, class_id, buf, len); +	if (!recoded) continue; +	 +	if (!strcasecmp(config->language->sn, rcc_english_language_sn)) english_mode = 1; +	else english_mode = 0; +	 +	utf8 = (char*)rccStringGetString(recoded); +	for (result=0,english=0,words=0,mode=0,j=0;utf8[j];j++) { +	    if (isSpace(utf8[j])) { +		if (mode) { +		    if ((!english_mode)&&(english_word)) english++; +		    result+=rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; +		    words++; +		    mode = 0; +		} else continue; +	    } else { +		if (mode) { +		    if (utf8[j]>0x7F) english_word = 0; +		} else { +		    mode = j + 1; +		    english_word = 1; +		} +	    } +	} +	if (mode) { +	    result+=rccSpeller(speller, utf8 + mode - 1)?1:0; +	    words++; +	} +	 +	if (english_mode) { +	    english_res = 1.*result/words; +	    english_lang = (rcc_language_id)i;     +	} else if (words) { +	    res = 1.*result/words; +	    if (res > RCC_REQUIRED_PROBABILITY) { +		if (retstring) *retstring = recoded; +		else free(recoded); +		return (rcc_language_id)i; +	    } +	    if (words > english) { +		res = 1.*(result - english)/(words - english); +		if (res > RCC_REQUIRED_PROBABILITY) { +		    if (retstring) *retstring = recoded; +		    else free(recoded); +		    return (rcc_language_id)i; +		} +	    } +	} +	 +	free(recoded); +    } + +    if (english_res > RCC_REQUIRED_PROBABILITY) { +        if (retstring) { +	    *retstring = rccCreateString(english_lang, buf, len); +	} +        return english_lang; +    } +     +    return (rcc_language_id)-1; +} +rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) { +    if (!ctx) { +	if (rcc_default_ctx) ctx = rcc_default_ctx; +	else return -1; +    } +     +    return rccDetectLanguageInternal(ctx, class_id, buf, len, NULL); +} -static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) { +rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { +    rcc_context ctx;      rcc_class_type class_type;      rcc_engine_ptr engine; -    if (!buf) return (rcc_autocharset_id)-1; +    if ((!buf)||(!config)) return (rcc_autocharset_id)-1; +     +    ctx = config->ctx;      class_type = rccGetClassType(ctx, class_id);      if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) { -	engine = rccGetCurrentEnginePointer(ctx); +	engine = rccConfigGetCurrentEnginePointer(config);  	if ((!engine)||(!engine->func)) return (rcc_autocharset_id)-1;  	return engine->func(&ctx->engine_ctx, buf, len);      } @@ -30,16 +153,26 @@ static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, c      return (rcc_autocharset_id)-1;  } +int rccDetectCharset(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) { +    if (!ctx) { +	if (rcc_default_ctx) ctx = rcc_default_ctx; +	else return -1; +    } + +    return rccConfigDetectCharset(ctx->current_config, class_id, buf, len); +} + +  rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {      int err;      size_t ret; -    rcc_language_id language_id; +    rcc_language_id language_id, detected_language_id;      rcc_autocharset_id charset_id;      rcc_iconv icnv = NULL;      rcc_string result;      rcc_option_value usedb4;      const char *charset; - +          if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx;  	else return NULL; @@ -52,10 +185,11 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,      language_id = rccGetCurrentLanguage(ctx);      if (language_id == (rcc_language_id)-1) return NULL; -    if (!strcasecmp(ctx->languages[language_id]->sn, "off")) return NULL; +    if (!strcasecmp(ctx->languages[language_id]->sn, rcc_disabled_language_sn)) return NULL; -    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); +/*      if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {  	result = rccDb4GetKey(ctx->db4ctx, buf, len);  	if (result) { @@ -63,11 +197,22 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,  	     else return result;  	}      } +     +    if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) { +	detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len); +	if (detected_language_id != (rcc_language_id)-1) +	    language_id = detected_language_id; +    } +*/ +     +    detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result); +    if (detected_language_id != (rcc_language_id)-1) return result; +          err = rccConfigure(ctx);      if (err) return NULL; -    charset_id = rccIConvAuto(ctx, class_id, buf, len); +    charset_id = rccDetectCharset(ctx, class_id, buf, len);      if (charset_id != (rcc_autocharset_id)-1) {  	icnv = ctx->iconv_auto[charset_id];  	if (rccGetOption(ctx, RCC_OPTION_AUTOENGINE_SET_CURRENT)) { @@ -105,6 +250,9 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      rcc_language_id language_id;      rcc_language_id current_language_id;      rcc_class_type class_type; +    rcc_option_value translate; +    const char *langname; +    unsigned char english_source;      rcc_iconv icnv;      if (!ctx) { @@ -127,33 +275,60 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      if (err) return NULL;      class_type = rccGetClassType(ctx, class_id); -    if ((class_type != RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_TRANSLATE))) { +    translate = rccGetOption(ctx, RCC_OPTION_TRANSLATE); +     +    langname = rccGetLanguageName(ctx, language_id); +    if (strcasecmp(langname, rcc_english_language_sn)) english_source = 0; +    else english_source = 1; +     +    if ((class_type != RCC_CLASS_FS)&&((translate==RCC_OPTION_TRANSLATE_FULL)||((translate)&&(!english_source)))) {  	current_language_id = rccGetCurrentLanguage(ctx);  	if (current_language_id != language_id) {  	    if ((config->trans)&&(config->translang != current_language_id)) {  		rccTranslateClose(config->trans);  		config->trans = NULL;  	    } -	    if (!config->trans) { -		config->trans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rccGetLanguageName(ctx, current_language_id)); -		config->translang = current_language_id; +	     +	    if (translate != RCC_OPTION_TRANSLATE_TO_ENGLISH) { +		if (!config->trans) { +		    config->trans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rccGetLanguageName(ctx, current_language_id)); +		    config->translang = current_language_id; +		} + +		if (config->trans) { +		    translated = rccTranslate(config->trans, utfstring); +		    if (translated) { +			language_id = current_language_id; +		     +			config = rccGetConfig(ctx, language_id); +			if (!config) { +			    free(translated); +			    return NULL; +			} + +			err = rccConfigConfigure(config); +			if (err) { +			    free(translated); +			    return NULL; +			} +		    }  +		}  	    } -	    if (config->trans) { -		translated = rccTranslate(config->trans, utfstring); -		if (translated) { -		    language_id = current_language_id; +	     +	    if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((config->trans)&&(!translated))) { +		puts("entrans"); +		if (!config->entrans) { +		    config->entrans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rcc_english_language_sn); +		} +		if (config->entrans) { +		    translated = rccTranslate(config->entrans, utfstring); +  		    config = rccGetConfig(ctx, language_id); -		    if (!config) { -			free(translated); -			return NULL; -		    } +		    if (!config) return translated;  		    err = rccConfigConfigure(config); -		    if (err) { -			free(translated); -			return NULL; -		    } +		    if (err) return translated;  		}  	    }  	} @@ -183,7 +358,7 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      icnv =  config->iconv_to[class_id];      if (icnv) { -	newlen = rccIConvInternal(ctx, icnv, translated?translated:utfstring, newlen); +	newlen = rccIConvInternal(ctx, icnv, translated?translated:utfstring, translated?0:newlen);  	if (translated) free(translated);  	if (newlen == (size_t)-1) return NULL; @@ -237,7 +412,7 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const      err = rccConfigure(ctx);      if (err) return NULL; -    from_charset_id = rccIConvAuto(ctx, from, buf, len); +    from_charset_id = rccDetectCharset(ctx, from, buf, len);      if (from_charset_id != (rcc_charset_id)-1) {  	from_charset = rccGetAutoCharsetName(ctx, from_charset_id);  	to_charset = rccGetCurrentCharsetName(ctx, to); @@ -385,13 +560,15 @@ char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char      return extracted;  } -/* Convert to class_id from Charset */ +/* Convert to class_id from Charset. +Usage of this function assuming the knowledge about the incoming string.  +The charset as well as the language. So no detection (DB4,Aspell) of language +will be performed. */  char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) {      size_t res;      rcc_iconv icnv;      rcc_string str;      char *extracted; -          if (!charset) return NULL; diff --git a/ui/rccnames.c b/ui/rccnames.c index b6d08dd..0e4f586 100644 --- a/ui/rccnames.c +++ b/ui/rccnames.c @@ -32,6 +32,7 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = {  rcc_option_value_name rcc_default_option_boolean_names[] = { "Off", "On", NULL };  rcc_option_value_name rcc_default_option_learning_names[] = { "Off", "On", "Relearn", "Learn", NULL };  rcc_option_value_name rcc_default_option_clo_names[] = { "All Languages", "Configured / AutoEngine", "Configured Only", NULL }; +rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip English Translation", "Full", NULL };  rcc_option_name rcc_default_option_names[RCC_MAX_OPTIONS+1];  rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = { @@ -39,8 +40,9 @@ rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = {      { RCC_OPTION_AUTODETECT_FS_NAMES, "Autodetect File Names",  rcc_default_option_boolean_names },      { RCC_OPTION_AUTODETECT_FS_TITLES, "Autodetect FS Titles", rcc_default_option_boolean_names },      { RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, "Enabled Languages", rcc_default_option_clo_names }, -    { RCC_OPTION_TRANSLATE, "Translate Text", rcc_default_option_boolean_names },      { RCC_OPTION_AUTOENGINE_SET_CURRENT, "AutoEngine Set Current Encoding", rcc_default_option_boolean_names }, +    { RCC_OPTION_AUTODETECT_LANGUAGE, "Autodetect Language", rcc_default_option_boolean_names }, +    { RCC_OPTION_TRANSLATE, "Translate Text", rcc_default_option_translate_names },      { RCC_MAX_OPTIONS }  };  | 
