/* LibRCC Copyright (C) 2005-2018 Suren A. Chilingaryan This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 or later as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include #include #include #include #include "config.h" #ifdef HAVE_SYS_STAT_H # include #endif /* HAVE_SYS_STAT_H */ #ifdef HAVE_SYS_FILE_H # include #endif /* HAVE_SYS_FILE_H */ #ifdef HAVE_SYS_TYPES_H # include #endif /* HAVE_SYS_TYPES_H */ #ifdef HAVE_DIRENT_H # include #endif /* HAVE_DIRENT_H */ #ifdef HAVE_GETOPT_H # include #endif /* HAVE_GETOPT_H */ #include #ifndef RCC_OPTION_TRANSLATE_SKIP_PARENT # define RCC_OPTION_TRANSLATE_SKIP_PARENT RCC_OPTION_TRANSLATE_SKIP_PARRENT #endif typedef enum { MODE_STDIN = 0x1000, MODE_DIRECTORY, MODE_FILE, MODE_FILELIST } Modes; int mode = MODE_STDIN; typedef enum { OPT_CONFIG = 'c', OPT_ENCODING_IN = 'e', OPT_FROM = 'f', OPT_HELP = 'h', OPT_LANGUAGE_IN = 'l', OPT_TO = 't', OPT_YES = 'y', OPT_ENCODING_OUT, OPT_LANGUAGE_OUT, OPT_TRANSLATION, OPT_CACHING, OPT_CACHE, OPT_AUTODETECT, OPT_OFFLINE, OPT_TIMEOUT, OPT_SUBDIRS, } Options; static struct option long_options[] = { {"config", required_argument, 0, OPT_CONFIG }, {"from", required_argument, 0, OPT_FROM }, {"to", required_argument, 0, OPT_TO }, {"force-encoding", required_argument, 0, OPT_ENCODING_IN }, {"force-language", required_argument, 0, OPT_LANGUAGE_IN }, {"force-target-encoding", required_argument, 0, OPT_ENCODING_OUT }, {"force-target-language", required_argument, 0, OPT_LANGUAGE_OUT }, {"language-detection", required_argument, 0, OPT_AUTODETECT }, {"translation", optional_argument, 0, OPT_TRANSLATION }, {"caching", optional_argument, 0, OPT_CACHING }, {"cache", required_argument, 0, OPT_CACHE }, {"timeout", required_argument, 0, OPT_TIMEOUT }, {"force", no_argument, 0, OPT_YES }, #ifdef RCC_OPTION_OFFLINE {"allow-offline-processing",no_argument, 0, OPT_OFFLINE }, #endif /* RCC_OPTION_OFFLINE */ {"disable-subdirs", no_argument, 0, OPT_SUBDIRS }, {"stdin", no_argument, &mode, MODE_STDIN }, {"directory", no_argument, &mode, MODE_DIRECTORY }, {"file", no_argument, &mode, MODE_FILE }, {"filelist", no_argument, &mode, MODE_FILELIST }, {"help", no_argument, 0, OPT_HELP }, { 0, 0, 0, 0 } }; void Usage(int argc, char *argv[]) { printf( "Usage:\n" " %s [options] [mode] [file|directory]\n" " Modes:\n" " --stdin - Convert stdin to stdout\n" " --directory - Convert file names in specified directory\n" " --file - Convert specified file\n" " --filelist - Convert all files writed on stdin\n" " --help - Help message\n" "\n" " Options:\n" " -c - Specify configuration name\n" " -f - Source class ('in' is default)\n" " -t - Output class ('out' is default)\n" " -e - Force specified source encoding (autodetection)\n" " -l - Force specified source language (from LC_CTYPE)\n" " --force-target-encoding=\n" " - Convert to the specified encoding\n" " --force-target-language=\n" " - Translate to the specified language\n" " --caching=[mode]\n" " - Use recodings cache. Following modes are supported\n" " off - Turn off\n" " use - Use cached values (default)\n" " add - Add new recodings to cache\n" " replace - Replace encodings in cache\n" " --cache=\n" " - Use specified cache database instead of default one\n" " --translation=[mode]\n" " - Enable translation. Following modes are supported:\n" " full - Full\n" " skip_parent - Skip translation to parent lang\n" " skip_related - Skip translation between related langs\n" " english - Translate to english (default)\n" " transliterate - Transliterate\n" " --language-detection=[mode]\n" " - Lanuage autodetection. Following modes are supported:\n" " off - Current language is considered\n" " on - Use only configured langs (default)\n" " all - Try everything (slow)\n" " --timeout=\n" " - Specify recoding timeout in microseconds (1s default)\n" "\n" " -y - Do not ask any question\n" " --disable-subdirs\n" " - Do not descend into the sub directories\n" "\n" " Language Relations:\n" " To prevent unneccesary translations the concept of related/parent languages is\n" " introduced. For each language you can specify a parent language.\n" " skip_parent translation option will turn off translation to parent language\n" " skip_related translation option will additionaly turn off translation from\n" " parent language.\n" "\n" " For example, in the default configuration Russian is parent of Ukrainian, and\n" " English is parent of all other languages. With \"skip_parrent\" option the\n" " translation from Russian to Ukrainian would be turned off, but translation\n" " from Ukrainian to Russian would operate. With \"skip_related\" option the\n" " translation in both directions would be disabled\n" "\n\n" " Language Detection:\n" " Current version uses aspell dictionaries to autodetect language. Therefore,\n" " only languages with aspell available in the system aspell dictionaries are\n" " autodected. Beware, if your system contains a lot of installed languages,\n" " the autodection may take considerable amount of time.\n" "\n\n", argv[0]); } /* fs: is a standard class here, we do not need fs detecting here */ static rcc_class classes[] = { { "unicode", RCC_CLASS_TRANSLATE_CURRENT, "UTF-8", NULL, "Dummy", 0 }, { "in", RCC_CLASS_STANDARD, NULL, NULL, "Input Encoding", 0 }, { "out", RCC_CLASS_TRANSLATE_CURRENT, "LC_CTYPE", NULL, "Output Encoding", 0 }, { "id3", RCC_CLASS_STANDARD, "in", NULL, "ID3 Encoding", 0 }, { "id3v2", RCC_CLASS_STANDARD, "id3", NULL, "ID3 v.2 Encoding", 0}, { "pl", RCC_CLASS_STANDARD, "id3", NULL, "PlayList Title Encoding", 0}, { "plfs", RCC_CLASS_STANDARD, "pl", NULL, "PlayList File Encoding", 0 }, { "fs", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "FileSystem Encoding", 0 }, { "oem", RCC_CLASS_STANDARD, "in", NULL, "Zip OEM Encoding", 0 }, { "iso", RCC_CLASS_STANDARD, "in", NULL, "Zip ISO Encoding", 0 }, { "ftp", RCC_CLASS_STANDARD, "in", NULL, "FTP Encoding", 0 }, { NULL } }; rcc_class_id GetClass(const char *name) { int i; for (i = 1; classes[i].name; i++) { if ((!strcasecmp(name, classes[i].name))||(!strcasecmp(name, classes[i].fullname))) return i; } return (rcc_class_id)-1; } static char ask = 1; static char process_subdirs = 1; static rcc_language_id source_language_id, target_language_id; static rcc_class_id source_class_id = 1, target_class_id = 2; static char *efrom = NULL, *eto = NULL; static int translate = RCC_OPTION_TRANSLATE_OFF; char *Translate(const char *source); int Stdin(const char *arg); int Directory(const char *arg); int main(int argc, char *argv[]) { rcc_language_id language_id, current_language_id, english_language_id; unsigned char c; char *arg = NULL; char *config_name = NULL; char *cache_name = NULL; char *from = "in"; char *to = "out"; unsigned char from_forced = 0; unsigned char to_forced = 0; char *lfrom = NULL; char *lto = NULL; int cache = RCC_OPTION_LEARNING_FLAG_USE; int ldetect = 0; int ldetect_all = 0; int ldetect_force = 0; unsigned long timeout = 0; char offline = 0; int option_index = 0; while ((c = getopt_long(argc, argv, "yhe:f:l:t:", long_options, &option_index)) != (unsigned char)-1) { switch (c) { case 0: break; case OPT_HELP: Usage(argc, argv); exit(0); break; case OPT_CONFIG: config_name = optarg; break; case OPT_CACHE: cache_name = optarg; case OPT_FROM: from_forced = 1; from = optarg; break; case OPT_TO: to_forced = 1; to = optarg; break; case OPT_ENCODING_IN: efrom = optarg; break; case OPT_ENCODING_OUT: eto = optarg; break; case OPT_LANGUAGE_IN: lfrom = optarg; /* Selects main language, but for translation we can switch on autodetection. Should do it manualy. */ if (!ldetect_force) { ldetect = 0; ldetect_force = 1; } break; case OPT_LANGUAGE_OUT: lto = optarg; break; case OPT_TRANSLATION: if (!optarg) translate = RCC_OPTION_TRANSLATE_TO_ENGLISH; else if (!strcasecmp(optarg, "full")) translate = RCC_OPTION_TRANSLATE_FULL; else if (!strcasecmp(optarg, "skip_parent")) translate = RCC_OPTION_TRANSLATE_SKIP_PARENT; else if (!strcasecmp(optarg, "skip_related")) translate = RCC_OPTION_TRANSLATE_SKIP_RELATED; else if (!strcasecmp(optarg, "english")) translate = RCC_OPTION_TRANSLATE_TO_ENGLISH; else if (!strcasecmp(optarg, "transliterate")) translate = RCC_OPTION_TRANSLATE_TRANSLITERATE; else if (!strcasecmp(optarg, "off")) translate = RCC_OPTION_TRANSLATE_OFF; else { fprintf(stderr, "*** Unknown translation mode: %s\n\n", optarg); Usage(argc, argv); exit(0); } if (!ldetect_force) { if (!strcasecmp(optarg, "off")) ldetect = 0; else ldetect = 1; } break; case OPT_CACHING: if (!optarg) cache = RCC_OPTION_LEARNING_FLAG_USE; else if (!strcasecmp(optarg, "off")) cache = 0; else if (!strcasecmp(optarg, "use")) cache = RCC_OPTION_LEARNING_FLAG_USE; else if (!strcasecmp(optarg, "add")) cache = RCC_OPTION_LEARNING_FLAG_USE|RCC_OPTION_LEARNING_FLAG_LEARN; else if (!strcasecmp(optarg, "replace")) cache = RCC_OPTION_LEARNING_FLAG_LEARN; else { fprintf(stderr, "*** Unknown caching mode: %s\n\n", optarg); Usage(argc, argv); exit(0); } break; case OPT_AUTODETECT: ldetect_force = 1; if (!optarg) ldetect = 1; else if (!strcasecmp(optarg, "off")) { ldetect = 0; ldetect_force = 1; } else if (!strcasecmp(optarg, "on")) { ldetect = 1; ldetect_all = 0; ldetect_force = 1; } else if (!strcasecmp(optarg, "all")) { ldetect = 1; ldetect_all = 1; ldetect_force = 1; } break; case OPT_TIMEOUT: timeout = atoi(optarg); break; case OPT_OFFLINE: offline = 1; break; case OPT_SUBDIRS: process_subdirs = 0; break; case OPT_YES: ask = 0; break; default: Usage(argc, argv); exit(0); } } if (optind < argc) { if ((optind + 1) < argc) { fprintf(stderr, "*** Invalid non-option arguments:\n"); for (;optind < argc;optind++) { puts(argv[optind]); } fprintf(stderr, "\n\n"); Usage(argc,argv); exit(0); } arg = argv[optind]; } switch (mode) { case MODE_DIRECTORY: if (!from_forced) from = "fs"; if (!to_forced) to = "fs"; break; default: ; } setlocale(LC_ALL, ""); rccInit(); rccInitDefaultContext(NULL, 0, 0, classes, 0); rccInitDb4(NULL, cache_name, 0); if (timeout) rccSetOption(NULL, RCC_OPTION_TIMEOUT, timeout); if (config_name) rccLoad(NULL, config_name); rccSetOption(NULL, RCC_OPTION_LEARNING_MODE, cache); if (translate != RCC_OPTION_TRANSLATE_OFF) rccSetOption(NULL, RCC_OPTION_TRANSLATE, translate); if (ldetect) { rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1); if (ldetect_all) { rccSetOption(NULL, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 0); } } // DS: More checks, sometimes we can skip that. if ((lfrom)||(lto)) { // if (lfrom) rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1); rccSetOption(NULL, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 0); } #ifdef RCC_OPTION_OFFLINE if (offline) rccSetOption(NULL, RCC_OPTION_OFFLINE, 1); #endif /* RCC_OPTION_OFFLINE */ if (from) { source_class_id = GetClass(from); if (source_class_id == (rcc_class_id)-1) { rccFree(); fprintf(stderr, "*** Invalid source class (%s) specified\n", from); exit(1); } } if (to) { target_class_id = GetClass(to); if (target_class_id == (rcc_class_id)-1) { rccFree(); fprintf(stderr, "*** Invalid target class (%s) specified\n", to); exit(1); } } current_language_id = rccGetCurrentLanguage(NULL); english_language_id = rccGetLanguageByName(NULL, "en"); if (lfrom) { source_language_id = rccGetLanguageByName(NULL, lfrom); if (source_language_id == (rcc_language_id)-1) { rccFree(); fprintf(stderr, "*** Invalid source language (%s) specified\n", lfrom); exit(1); } } else source_language_id = current_language_id; if (lto) { target_language_id = rccGetLanguageByName(NULL, lto); if (target_language_id == (rcc_language_id)-1) { rccFree(); fprintf(stderr, "*** Invalid target language (%s) specified\n", lto); exit(1); } } else target_language_id = current_language_id; if (source_language_id == target_language_id) { language_id = source_language_id; if (language_id != current_language_id) { if ((rccSetLanguage(NULL, language_id))||(!rccGetCurrentLanguageName(NULL))) { rccFree(); fprintf(stderr, "*** Unable to set the specified language (%s)\n", rccGetLanguageName(NULL, language_id)); exit(1); } } else { // Automatic if (!rccGetCurrentLanguageName(NULL)) { if (current_language_id != english_language_id) { language_id = english_language_id; rccSetLanguage(NULL, english_language_id); } if (!rccGetCurrentLanguageName(NULL)) { rccFree(); fprintf(stderr, "*** Default language (%s) is not configured\n", rccGetLanguageName(NULL, current_language_id)); exit(1); } } } } else { language_id = (rcc_language_id)-1; // Checking if languages are selectable if ((rccSetLanguage(NULL, source_language_id))||(!rccGetCurrentLanguageName(NULL))) { rccFree(); fprintf(stderr, "*** Unable to set source language (%s)\n", rccGetLanguageName(NULL, source_language_id)); exit(1); } if ((rccSetLanguage(NULL, target_language_id))||(!rccGetCurrentLanguageName(NULL))) { rccFree(); fprintf(stderr, "*** Unable to set target language (%s)\n", rccGetLanguageName(NULL, target_language_id)); exit(1); } } switch (mode) { case MODE_STDIN: Stdin(arg); break; case MODE_DIRECTORY: Directory(arg); break; case MODE_FILE: fprintf(stderr, "*** Mode (FILE) is not supported in current version\n"); break; case MODE_FILELIST: fprintf(stderr, "*** Mode (FILELIST) is not supported in current version\n"); break; } rccFree(); return 0; } // DS. Dynamicaly raise string length? int Stdin(const char *arg) { char *res; char buf[16384]; while (fgets(buf,16384,stdin)) { res = Translate(buf); fprintf(stdout, res?res:buf); if (res) free(res); } return 0; } char *Fullname(const char *path, const char *name) { char *res; res = (char*)malloc(strlen(path) + strlen(name) + 2); if (res) { if (path[strlen(path)-1] == '/') sprintf(res, "%s%s",path,name); else sprintf(res, "%s/%s",path,name); } return res; } // DS: We do not follow symbolic links (add option?) // DS: Skipping everything begining with point (system files) int Directory(const char *arg) { int err; struct stat st; DIR *dir; struct dirent *entry; char *res; char answer; char stmp[255]; char *fn, *nfn; if (!arg) arg = "."; printf("Processing directory: %s\n", arg); dir = opendir(arg); if (!dir) { fprintf(stderr, "*** Failed to process directory: %s\n", arg); return -1; } entry = readdir(dir); while (entry) { if (entry->d_name[0] == '.') { entry = readdir(dir); continue; } res = Translate(entry->d_name); if (res) { if (strcmp(res, entry->d_name)) { if (ask) { printf("Rename \"%s\" to \"%s\" (y/[n]) ", entry->d_name, res); scanf("%c", &answer); if (answer != '\n') fgets(stmp, 255, stdin); answer = ((answer=='y')||(answer=='Y'))?1:0; } else { answer = 1; } if (answer) { fn = Fullname(arg, entry->d_name); nfn = Fullname(arg, res); if ((fn)&&(nfn)) { if (!lstat(nfn, &st)) { if (!ask) { printf("Trying rename \"%s\" to \"%s\"\n", entry->d_name, res); } if (S_ISDIR(st.st_mode)) { printf("*** Directory with that name exists, skipping\n"); answer = 0; } else { printf("*** File exists, overwrite (y/[n]) "); scanf("%c", &answer); if (answer != '\n') fgets(stmp, 255, stdin); answer = ((answer=='y')||(answer=='Y'))?1:0; } } if (answer) { err = rename(fn, nfn); } } else err = ENOMEM; if (fn) free(fn); if (nfn) free(nfn); if (err) { printf("*** Renaming \"%s\" to \"%s\" is failed (errno: %u)\n", entry->d_name, res, errno); } else if (!ask) { printf("Rename completed: \"%s\" to \"%s\"\n", entry->d_name, res); } } } free(res); } entry = readdir(dir); } closedir(dir); if (process_subdirs) { dir = opendir(arg); if (!dir) return 0; entry = readdir(dir); while (entry) { if (entry->d_name[0] == '.') { entry = readdir(dir); continue; } fn = Fullname(arg, entry->d_name); if (fn) { if ((!lstat(fn, &st))&&((S_ISDIR(st.st_mode)))) { Directory(fn); } free(fn); } entry = readdir(dir); } closedir(dir); } return 0; } char *Translate(const char *source) { rcc_string rccstring; char *recoded, *stmp; if (strlen(source)<2) return NULL; if (source_language_id != target_language_id) { rccSetLanguage(NULL, source_language_id); } if (efrom) rccstring = rccFromCharset(NULL, efrom, source); else rccstring = rccFrom(NULL, source_class_id, source); if (!rccstring) return NULL; if (source_language_id != target_language_id) rccSetLanguage(NULL, target_language_id); if (eto) { if (translate = RCC_OPTION_TRANSLATE_OFF) { stmp = rccTo(NULL, target_class_id, rccstring); if (stmp) { recoded = rccRecodeCharsets(NULL, "UTF-8", eto, stmp); if (recoded) free(stmp); else recoded = stmp; } else recoded = NULL; } else { recoded = rccToCharset(NULL, eto, rccstring); } } else recoded = rccTo(NULL, target_class_id, rccstring); free(rccstring); return recoded; }