/* Copyright (c) 2007-2010, Dirk Krause All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above opyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Dirk Krause nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** @file text2lat.c The text2lat program. */ #include #include #if DK_HAVE_STDLIB_H #include #endif #if DK_HAVE_UNISTD_H #include #endif #if DK_HAVE_PROCESS_H #include #endif #if DK_HAVE_STRING_H #include #endif #if DK_HAVE_STRINGS_H #include #endif #include #include #include #include #include #include #include #include #include #include #include /** Record for not-found character. */ typedef struct { unsigned long uc; /**< character. */ unsigned long ln; /**< Line number in file. */ } me_t; /** text2lat job. */ typedef struct { int success; /**< Flag: Success. */ dk_app_t *a; /**< Application. */ char *b_i; /**< Input buffer. */ size_t sz_b_i; /**< Size of \a b_i. */ size_t u_b_i; /**< Used bytes in \a b_i. */ unsigned char enc_auto; /**< Flag: Retrieve encoding from LANG. */ int enc_type; /**< Input encoding. */ unsigned char cmd; /**< Command. */ unsigned char convert_spaces; /**< Flag: Convert spaces. */ unsigned char convert_newlines; /**< Flag: Convert newlines. */ unsigned char in_math_mode; /**< Flag: Currently in mathematics mode. */ unsigned char there_were_missing_encodings; /**< Missing enc. */ unsigned char there_was_an_input_decoding_error; /**< Decoding error. */ unsigned char last_was_nl; /**< Flag: Last character was newline. */ char *ifn1; /**< Input file name before expansion. */ char *ofn1; /**< Output file name before expansion. */ char *ifn2; /**< Input file name after expansion. */ char *ofn2; /**< Output file name after expansion. */ char **msg; /**< Messages texts issued by the program. */ FILE *inf; /**< Input file. */ FILE *outf; /**< Output file. */ dk_le_t *le; /**< Encoding table container. */ unsigned long ln; /**< Current line number. */ dk_storage_t *s_uc; /**< Missing encodings container (char). */ dk_storage_t *s_ln; /**< Missing encodings container (lineno). */ dk_storage_t *s_fn; /**< Missing encodings container (char). */ dk_storage_iterator_t *it_uc; /**< Missing encodings iterator. */ dk_storage_iterator_t *it_ln; /**< Missing encodings iterator. */ dk_storage_iterator_t *it_fn; /**< Missing encodings iterator. */ unsigned char cc1; /**< Flag: Can continue outer loop. */ unsigned char cc2; /**< Flag: Can continue inner loop. */ char c; /**< Current char to process. */ unsigned long u; /**< Current char to process. */ } T2L; /** Prototype for functions return void. */ #define T2L_VOID(x) static void x DK_P1(T2L *,t) /** Prototype for functions returning int. */ #define T2L_INT(x) static int x DK_P1(T2L *,t) #ifndef GROUPNAME /** Application group name. */ #define GROUPNAME "dktools" #endif /* char **t2l_str(T2L *t); */ /** Encoding: ISO_LATIN_8859-1. */ #define ENCODING_ISO_LATIN_8859_1 0 /** Encoding: UTF-8. */ #define ENCODING_UTF_8 1 #include "dktools-version.h" #line 158 "text2lat.ctr" /** Command: Show help text. */ #define CMD_HELP ((unsigned char)0x01) /** Command: Show version information. */ #define CMD_VERSION ((unsigned char)0x02) /** Command: Configure application (save settings to preferences). */ #define CMD_CONFIGURE ((unsigned char)0x04) /** Command: Unconfigure application. */ #define CMD_UNCONFIGURE ((unsigned char)0x08) /** Command: Show current configuration. */ #define CMD_SHOW_CONFIGURATION ((unsigned char)0x10) /** Command: Abort due to error. */ #define CMD_ABORT_ERROR ((unsigned char)0x20) /** Flag: Can not continue. */ #define CAN_NOT_CONTINUE 0x00 /** Flag: Can continue. */ #define CAN_CONTINUE 0x01 /** Application group name. */ static char group_name[] = { GROUPNAME }; /** System configuration directory. */ static char sysconf_dir[] = { DK_SYSCONFDIR }; /** File name separator. */ #if DK_HAVE_FEATURE_BACKSLASH static char str_fnsep[] = { "\\" }; #else static char str_fnsep[] = { "/" }; #endif /** File name suffix for encoding tables. */ static char str_dotdat[] = { ".dat" }; /** Prefrence key: Input encoding. */ static char key_enc[] = { "/input/encoding" }; /** Preference key: Convert spaces. */ static char key_spaces[] = { "/convert/spaces" }; /** Preference key: Convert newlines. */ static char key_newlines[] = { "/convert/newlines" }; /** Preference values: Encodings (used to restore). */ static char *val_enc[] = { /* 0 */ "a$utomatic", /* 1 */ "l$atin-1", /* 2 */ "u$tf-8", NULL }; /** Preference values: Encodings (used to save). */ static char *save_enc[] = { "automatic", "latin-1", "utf-8", NULL }; /** Preference value: on. */ static char val_on[] = { "on" }; /** Preference value: off. */ static char val_off[] = { "off" }; /** String: UTF-8, used when checking the LANG environment variable. */ static char str_utf8[] = { "UTF-8" }; /** Default input file name. */ static char str_stdin[] = { "" }; /** Default output file name. */ static char str_stdout[] = { "" }; /** String: File mode "w". */ static char str_w[] = { "w" }; /** String: File mode "r". */ static char str_r[] = { "r" }; /** String: Option -e. */ static char minus_e[] = { "-e" }; /** String: Option -s. */ static char minus_s[] = { "-s" }; /** String: Option -n. */ static char minus_n[] = { "-n" }; /** String: Five spaces are used before and after the value. */ static char five_spaces[] = { " " }; /** Default directory name for UC to LaTeX tables. */ static char table_directory[] = { "$(shared.dir)/uc2lat-t" }; /** Long options. */ static char *long_options[] = { /* 0 */ "h$elp", /* 1 */ "v$ersion", /* 2 */ "conf$igure", /* 3 */ "unconf$igure", /* 4 */ "res$et", /* 5 */ "sh$ow-configuration", /* 6 */ "sil$ently", /* 7 */ "e$ncoding", /* 8 */ "convert-n$ewlines", /* 9 */ "convert-s$paces", NULL }; /** Help file name. */ static char help_file_name[] = { "text2lat.txt" }; /** Version text. */ static char *version_text[] = { "text2lat (part of the dktools collection, version " VERSNUMB ")", "Copyright (C) 2007-2010 Dipl.-Ing. D. Krause", "http://dktools.sourceforge.net/", "", "Redistribution and use in source and binary forms, with or without", "modification, are permitted provided that the following conditions are met:", "* Redistributions of source code must retain the above copyright notice, this", " list of conditions and the following disclaimer.", "* Redistributions in binary form must reproduce the above copyright notice,", " this list of conditions and the following disclaimer in the documentation", " and/or other materials provided with the distribution.", "* Neither the name of the Dirk Krause nor the names of other contributors may", " be used to endorse or promote products derived from this software without", " specific prior written permission.", "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"", "AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE", "IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE", "ARE DISCLAIMED.", "IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY", "DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES", "(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;", "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND", "ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT", "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS", "SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.", NULL }; /** Default help text, printed if help text file is not found. */ static char *help_text[] = { "Usage:", "------", "", "text2lat [] [ []]", "", "Options:", "--------", "", "-e Encoding:", " automatic: inspect LANG environment variable", " latin-1: ISO-LATIN-8859-1 encoded text", " utf-8: UTF-8 encoded text", NULL }; /** String: Space. */ static char str_space[] = { " " }; /** String: Tilde. */ static char str_tilde[] = { "~" }; /** String: Newline. */ static char str_tex_newline[] = { "\\\\" }; /** String: Start of math mode. */ static char str_begin_math_mode[] = { "\\(" }; /** String: End of math mode. */ static char str_end_math_mode[] = { "\\)" }; /** String finder data for localization. */ dk_key_value_t kv[] = { { (char *)"/m/00", (char *)"Current configuration:" }, { (char *)"/m/01", (char *)"Encoding:", }, { (char *)"/m/02", (char *)"Select automatically from $LANG" }, { (char *)"/m/03", (char *)"ISO-LATIN-8859-1" }, { (char *)"/m/04", (char *)"UTF-8" }, { (char *)"/m/05", (char *)"For some input characters the LaTeX encoding was missing!" }, { (char *)"/m/06", (char *)"Missing encoding for: " }, { (char *)"/m/07", (char *)"Some LaTeX encoding files need modifications!" }, { (char *)"/m/08", (char *)"Please check encoding file \"" }, { (char *)"/m/09", (char *)"\"!" }, { (char *)"/m/10", (char *)"Error while decoding input character at position " }, { (char *)"/m/11", (char *)"!" }, { (char *)"/m/12", (char *)"Conversion aborted, output incomplete!" }, { (char *)"/m/13", (char *)"Not enough memory (RAM/swap space)!" }, { (char *)"/m/14", (char *)"Directory for LaTeX encoding tables not found!" }, { (char *)"/m/15", (char *)"Input file name pattern must match exactly one name!" }, { (char *)"/m/16", (char *)"Output file name pattern must match exactly one name!" }, { (char *)"/m/17", (char *)"Failed to create file name expander for input file!" }, { (char *)"/m/18", (char *)"Failed to create file name expander for output file!" }, { (char *)"/m/19", (char *)"Unknown option \"-" }, { (char *)"/m/20", (char *)"\"!" }, { (char *)"/m/21", (char *)"Too many file names!" }, { (char *)"/m/22", (char *)"Spaces conversion:" }, { (char *)"/m/23", (char *)"Newlines conversion:" }, { (char *)"/m/24", (char *)"on" }, { (char *)"/m/25", (char *)"off" }, }; /** Number of entries in \a kv. */ static size_t szkv = sizeof(kv)/sizeof(dk_key_value_t); /** String table name. */ static char table_name[] = { "text2lat" }; /** Set up the message array from the key/value pairs. @param t Text2lat job. @return Message array. */ char **t2l_str DK_P1(T2L *,t) { char **back = NULL; back = dkapp_find_key_value(t->a, kv, szkv, table_name); return back; } /** Compare two missing-encoding-entries, either by line number of appearance (cr=1) or by character value (cr!=1). @param l Left entry. @param r Right entry. @param cr Comparison criteria (1=number of appearance, other=character value). @return Comparison result. */ static int compare_me DK_P3(void *,l, void *,r, int,cr) { int back = 0; me_t *lp = NULL, *rp = NULL; if(l) { if(r) { lp = (me_t *)l; rp = (me_t *)r; switch(cr) { case 1: { /* by ln */ if(lp->ln > rp->ln) { back = 1; } else { if(lp->ln < rp->ln) { back = -1; } } if(back == 0) { if(lp->uc > rp->uc) { back = 1; } else { if(lp->uc < rp->uc) { back = -1; } } } } break; default: { /* by uc */ if(lp->uc > rp->uc) { back = 1; } else { if(lp->uc < rp->uc) { back = -1; } } } break; } } else { back = 1; } } else { if(r) { back = -1; } } return back; } /** Check whether or not the program must run silently. Set rs if the program should run silently (no diagnostics to standard output or standard error) and/or rf if the program runs as a filter (normal output goes to standard output). @param argc Number of command line arguments. @param argv Command line arguments array. @param rs Pointer to result variable (run silently). @param rf Pointer to result variable (run as filter). */ static void silence_check DK_P4(int,argc, char **,argv, int *,rs, int *,rf) { int i; char *ptr, **lfdptr; int myrf = 1, myrs = 0; lfdptr = argv; lfdptr++; i = 1; while(i < argc) { ptr = *lfdptr; if(*ptr == '-') { ptr++; if(*ptr == '-') { ptr++; switch(dkstr_array_abbr(long_options, ptr, '$', 0)) { case 6: myrs = 1; break; } } else { switch(*ptr) { case 'e': { ptr++; if(!(*ptr)) { lfdptr++; i++; } } break; } } } else { myrf = 0; } lfdptr++; i++; } if(rs) { *rs = myrs; } if(rf) { *rf = myrf; } } /** Initialize text2lat job. @param t Text2lat job. */ T2L_VOID(t2l_init) { t->success = 0; t->a = NULL; t->b_i = NULL; t->sz_b_i = 0; t->enc_auto = 0x01; t->enc_type = ENCODING_ISO_LATIN_8859_1; t->cmd = 0x00; t->ifn1 = t->ofn1 = t->ifn2 = t->ofn2 = NULL; t->msg = NULL; t->inf = NULL; t->outf = NULL; t->convert_spaces = 0x00; t->convert_newlines = 0x00; t->le = NULL; t->ln = 1UL; t->s_uc = NULL; t->s_ln = NULL; t->s_fn = NULL; t->it_uc = NULL; t->it_ln = NULL; t->it_fn = NULL; t->in_math_mode = 0x00; t->there_were_missing_encodings = 0x00; t->last_was_nl = 0x00; } /** Retrieve encoding form LANG env var. @param t Text2lat job. */ T2L_VOID(encoding_auto) { char *ptr; ptr = getenv("LANG"); if(ptr) { ptr = strchr(ptr, '.'); if(ptr) { ptr++; if(dkstr_casecmp(ptr, str_utf8) == 0) { t->enc_type = ENCODING_UTF_8; } else { t->enc_type = ENCODING_ISO_LATIN_8859_1; } } else { t->enc_type = ENCODING_ISO_LATIN_8859_1; } } else { t->enc_type = ENCODING_ISO_LATIN_8859_1; } } /** Retrieve encoding name from string. The string may be "automatic", "latin-1" or "utf-8". @param t Text2lat job. @param str String containing the encoding. */ static void encoding_from_string DK_P2(T2L* ,t, char *,str) { switch(dkstr_array_abbr(val_enc, str, '$', 0)) { case 1: { t->enc_type = ENCODING_ISO_LATIN_8859_1; t->enc_auto = 0x00; } break; case 2: { t->enc_type = ENCODING_UTF_8; t->enc_auto = 0x00; } break; default: { encoding_auto(t); t->enc_auto = 0x01; } break; } } /** Retrieve encoding name from preferences. @param t Text2lat job. */ T2L_VOID(encoding_from_preferences) { int done = 0; char buffer[32], *ptr; if(dkapp_get_pref(t->a, key_enc, buffer, sizeof(buffer), 0)) { ptr = dkstr_start(buffer, NULL); if(ptr) { dkstr_chomp(ptr, NULL); encoding_from_string(t, ptr); done = 1; } } if(!done) { encoding_auto(t); } } /** Find maximum string length of multiple strings. Needed to show configuration. @param t Text2lat job. @param s String to analyze @param old Result of previous strings. @return Maximum of length of \a s and \a old. */ static size_t max_strlen DK_P3(T2L *,t, char *,s, size_t,old) { size_t back = 0, sz = 0; back = old; sz = strlen(s); if(sz > back) back = sz; return back; } /** Show the current configuration. @param t Text2lat job. */ T2L_VOID(show_configuration) { size_t save_enc_index, s1, sz_middle, sz, j, i; save_enc_index = 0; j = 2; if(!(t->enc_auto)) { switch(t->enc_type) { case ENCODING_UTF_8: { save_enc_index = 2; j = 4; } break; default: { save_enc_index = 1; j = 3; } break; } } s1 = dkapp_prlen(t->a, save_enc[save_enc_index]); sz_middle = dkapp_prlen(t->a, (t->msg)[1]); sz_middle = max_strlen(t, (t->msg)[22], sz_middle); sz_middle = max_strlen(t, (t->msg)[23], sz_middle); /* encoding line */ sz = dkapp_prlen(t->a, (t->msg)[0]); dkapp_stdout(t->a, (t->msg)[0]); fputc('\n', stdout); for(i = 0; i < sz; i++) fputc('-', stdout); fputc('\n', stdout); dkapp_stdout(t->a, minus_e); fputc(' ', stdout); fputc(' ', stdout); dkapp_stdout(t->a, save_enc[save_enc_index]); dkapp_stdout(t->a, five_spaces); for(i = dkapp_prlen(t->a, (t->msg)[1]); i < sz_middle; i++) { fputc(' ', stdout); } dkapp_stdout(t->a, (t->msg)[1]); fputc(' ', stdout); dkapp_stdout(t->a, (t->msg)[j]); fputc('\n', stdout); /* convert newlines (23) */ dkapp_stdout(t->a, minus_n); if(t->convert_newlines) { fputc(' ', stdout); } else { fputc('-', stdout); } fputc(' ', stdout); sz = dkapp_prlen(t->a, save_enc[save_enc_index]); for(i = 0; i < sz; i++) { fputc(' ', stdout); } dkapp_stdout(t->a, five_spaces); for(i = dkapp_prlen(t->a, (t->msg)[23]); i < sz_middle; i++) { fputc(' ', stdout); } dkapp_stdout(t->a, (t->msg)[23]); fputc(' ', stdout); dkapp_stdout(t->a, (t->msg)[((t->convert_newlines) ? 24 : 25)]); fputc('\n', stdout); /* convert spaces (22) */ dkapp_stdout(t->a, minus_s); if(t->convert_newlines) { fputc(' ', stdout); } else { fputc('-', stdout); } fputc(' ', stdout); sz = dkapp_prlen(t->a, save_enc[save_enc_index]); for(i = 0; i < sz; i++) { fputc(' ', stdout); } dkapp_stdout(t->a, five_spaces); for(i = dkapp_prlen(t->a, (t->msg)[22]); i < sz_middle; i++) { fputc(' ', stdout); } dkapp_stdout(t->a, (t->msg)[22]); fputc(' ', stdout); dkapp_stdout(t->a, (t->msg)[((t->convert_newlines) ? 24 : 25)]); fputc('\n', stdout); } /** Save current configuration to preferences. @param t Text2lat job. */ T2L_VOID(save_configuration) { char *ptr; ptr = save_enc[0]; if(!(t->enc_auto)) { if(t->enc_type == ENCODING_UTF_8) { ptr = save_enc[2]; } else { ptr = save_enc[1]; } } dkapp_set_pref(t->a, key_enc, ptr); dkapp_set_pref(t->a, key_spaces, ((t->convert_spaces) ? val_on : val_off)); dkapp_set_pref(t->a,key_newlines,((t->convert_newlines) ? val_on : val_off)); } /** Show program version. @param t Text2lat job. */ T2L_VOID(show_version) { char **lfdptr; lfdptr = version_text; while(*lfdptr) { fputs(*(lfdptr++), stdout); fputc('\n', stdout); } } /** Show help text. @param t Text2lat job. */ T2L_VOID(show_help) { dkapp_help(t->a, help_file_name, help_text); } /** Run for special purposes (help, version, configuration). @param t Text2lat job. */ T2L_VOID(run_special) { if(!((t->cmd) & CMD_ABORT_ERROR)) { if((t->cmd) & (CMD_HELP | CMD_VERSION)) { show_version(t); if((t->cmd) & CMD_HELP) { fputc('\n', stdout); show_help(t); } } else { if((t->cmd) & CMD_UNCONFIGURE) { dkapp_unconfigure(t->a); } else { if((t->cmd) & CMD_CONFIGURE) { save_configuration(t); } show_configuration(t); } } t->success = 1; } } /** Write end-of-math-mode code if necessary. @param t Text2lat job. */ T2L_INT(end_math_mode) { int back = 0; if(t->in_math_mode) { fputs(str_end_math_mode, t->outf); back = 1; } t->in_math_mode = 0x00; return back; } /** Write start-of-math-mode code. @param t Text2lat job. */ T2L_VOID(begin_math_mode) { if(!(t->in_math_mode)) { fputs(str_begin_math_mode, t->outf); } t->in_math_mode = 0x01; } /** Process one character. @param t Text2lat job. */ T2L_VOID(add_one_uc) { int was_handled; char *ptr; me_t myme, *meptr; char obuffer[16]; size_t u_obuffer; unsigned char err_memory = 0x00; char *messages[3]; if(t->u == 0x00000020UL) { end_math_mode(t); if(t->convert_spaces) { fputs(str_tilde, t->outf); } else { fputs(str_space, t->outf); } t->last_was_nl = 0x00; } else { if(t->u == 0x0000000A) { end_math_mode(t); if(t->convert_newlines) { fputs(str_tex_newline, t->outf); } fputc('\n', t->outf); t->ln += 1UL; t->last_was_nl = 0x01; } else { if(t->u == 0x0000000D) { end_math_mode(t); fputc(0x0D, t->outf); } else { was_handled = 0; if(dkle_load(t->le, t->u)) { ptr = dkle_get_encoding(t->le, t->u, 0); if(ptr) { was_handled = 1; end_math_mode(t); fputs(ptr, t->outf); } else { ptr = dkle_get_encoding(t->le, t->u, 1); if(ptr) { was_handled = 1; begin_math_mode(t); fputs(ptr, t->outf); } } } if(!was_handled) { if(t->u < 256UL) { end_math_mode(t); ptr = dk_l2l_encoding(t->c); if(ptr) { fputs(ptr, t->outf); } else { fputc(t->c, t->outf); } was_handled = 1; } } if(!was_handled) { u_obuffer = dkenc_uc2utf8(t->u, (dk_ubyte *)obuffer, sizeof(obuffer)); if(u_obuffer > 0) { (void)fwrite((void *)obuffer, 1, u_obuffer, t->outf); } t->there_were_missing_encodings = 0x01; /* keep track of unhandled uc32 */ myme.uc = t->u; myme.ln = t->ln; if(!dksto_it_find_like(t->it_uc, (void *)(&myme), 0)) { meptr = dk_new(me_t,1); if(meptr) { meptr->uc = t->u; meptr->ln = t->ln; if(dksto_add(t->s_uc, (void *)meptr)) { if(dksto_add(t->s_ln, (void *)meptr)) { myme.uc = ((myme.uc >> 8) & 0x00FFFFFFUL); myme.ln = t->ln; if(!dksto_it_find_like(t->it_fn, (void *)(&myme), 0)) { meptr = dk_new(me_t,1); if(meptr) { meptr->uc = myme.uc; meptr->ln = myme.ln; if(!dksto_add(t->s_fn, (void *)meptr)) { err_memory = 0x01; meptr->uc = 0UL; meptr->ln = 0UL; dk_delete(meptr); meptr = NULL; } } else { err_memory = 0x01; } } } else { err_memory = 0x01; } } else { meptr->uc = 0UL; meptr->ln = 0UL; dk_delete(meptr); meptr = NULL; } } else { err_memory = 0x01; } if(err_memory) { messages[0] = (t->msg)[13]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } } } } t->last_was_nl = 0x00; } } } /** Process an UTF-8-encoded file. @param t Text2lat job. */ T2L_VOID(process_utf8) { /* conversion buffer, pointers within that buffer for copy operations */ dk_ubyte cobu[16], *p1, *p2; /* conversion result (unsigned long character) */ dk_udword uc; /* pointer in input buffer */ char *ptr; /* used bytes in cobu, indexes for array traversals */ size_t u_cobu, i, used, j; /* number of bytes successfully processed */ unsigned long nofbytes; /* buffer to show position */ char buffer[24]; /* message buffer to create log messages */ char *messages[8]; t->cc1 = CAN_CONTINUE; u_cobu = 0; nofbytes = 0UL; t->there_was_an_input_decoding_error = 0x00; while(t->cc1) { t->u_b_i = fread(t->b_i, 1, t->sz_b_i, t->inf); if(t->u_b_i > 0) { ptr = t->b_i; for(i = 0; ((i < t->u_b_i) && (t->cc1)); i++) { cobu[u_cobu++] = (dk_ubyte)(*ptr++); if(u_cobu > 12) { while((u_cobu > 8) && (t->cc1)) { used = 0; if(dkenc_utf82uc(&uc, cobu, u_cobu, &used)) { if((used > 0) && (used <= u_cobu)) { p1 = cobu; p2 = &(cobu[used]); j = u_cobu - used; while(j--) { *(p1++) = *(p2++); } u_cobu = u_cobu - used; t->u = uc; if(t->u < 256UL) { t->c = (char)uc; } add_one_uc(t); if(uc == 0x0AUL) { nofbytes = 0UL; } else { nofbytes++; } } else { t->cc1 = CAN_NOT_CONTINUE; t->there_was_an_input_decoding_error = 0x01; } } else { t->cc1 = CAN_NOT_CONTINUE; t->there_was_an_input_decoding_error = 0x01; } } } } } else { t->cc1 = CAN_NOT_CONTINUE; } } /* flush conversion buffer */ while((u_cobu > 0) && (!(t->there_was_an_input_decoding_error))) { used = 0; if(dkenc_utf82uc(&uc, cobu, u_cobu, &used)) { if((used > 0) && (used <= u_cobu)) { p1 = cobu; p2 = &(cobu[used]); j = u_cobu - used; while(j--) { *(p1++) = *(p2++); } u_cobu = u_cobu - used; t->u = uc; if(t->u < 256UL) { t->c = (char)uc; } add_one_uc(t); } else { t->cc1 = CAN_NOT_CONTINUE; t->there_was_an_input_decoding_error = 0x01; } } else { t->cc1 = CAN_NOT_CONTINUE; t->there_was_an_input_decoding_error = 0x01; } } if(t->there_was_an_input_decoding_error) { dkapp_set_source_lineno(t->a, t->ln); sprintf(buffer, "%ld", (nofbytes+1UL)); messages[0] = (t->msg)[10]; messages[1] = buffer; messages[2] = (t->msg)[11]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 3); messages[0] = (t->msg)[12]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } /* end math mode */ if(end_math_mode(t)) { fputc('\n', t->outf); } } /** Process an ISO-LATIN-8859-1 encoded file. @param t Text2lat job. */ T2L_VOID(process_latin) { char *ptr; size_t i; t->cc1 = CAN_CONTINUE; while(t->cc1) { t->u_b_i = fread(t->b_i, 1, t->sz_b_i, t->inf); if(t->u_b_i > 0) { ptr = t->b_i; for(i = 0; ((i < t->u_b_i) && (t->cc1)); i++) { t->c = *(ptr++); t->u = (unsigned long)((unsigned char)(t->c)); t->u &= 255UL; add_one_uc(t); } } else { t->cc1 = CAN_NOT_CONTINUE; } } /* cleanup (end math mode... ) */ if(end_math_mode(t)) { fputc('\n', t->outf); } } /** Report missing encodings (if there were any). @param t Text2lat job. @param d Missing encoding. */ static void report_missing_encodings DK_P2(T2L *,t, char *,d) { char buffer[16], b1[16], b2[16], b3[16]; char *messages[16]; me_t *meptr; if(t->there_were_missing_encodings) { t->success = 0; /* ERROR: There were missing encodings */ dkapp_set_source_filename(t->a, t->ifn2); dkapp_set_source_lineno(t->a, 0UL); messages[0] = (t->msg)[5]; dkapp_log_msg(t->a, DK_LOG_LEVEL_WARNING, messages, 1); /* show all missing encodings */ dksto_it_reset(t->it_ln); while((meptr = dksto_it_next(t->it_ln)) != NULL) { dkapp_set_source_lineno(t->a, meptr->ln); sprintf(buffer, "0x%08lx", meptr->uc); messages[0] = (t->msg)[6]; messages[1] = buffer; dkapp_log_msg(t->a, DK_LOG_LEVEL_WARNING, messages, 2); } messages[0] = (t->msg)[7]; dkapp_log_msg(t->a, DK_LOG_LEVEL_WARNING, messages, 1); dksto_it_reset(t->it_fn); while((meptr = dksto_it_next(t->it_fn)) != NULL) { sprintf(b1, "%02lx", (((meptr->uc) >> 16) & 255UL)); sprintf(b2, "%02lx", (((meptr->uc) >> 8) & 255UL)); sprintf(b3, "%02lx", ( (meptr->uc) & 255UL)); dkapp_set_source_lineno(t->a, meptr->ln); messages[0] = (t->msg)[8]; messages[1] = d; messages[2] = str_fnsep; messages[3] = b1; messages[4] = str_fnsep; messages[5] = b2; messages[6] = str_fnsep; messages[7] = b3; messages[8] = str_dotdat; messages[9] = (t->msg)[9]; dkapp_log_msg(t->a, DK_LOG_LEVEL_WARNING, messages, 10); } dkapp_set_source_lineno(t->a, 0UL); } } /** Process input file and create output. @param t Text2lat job. */ T2L_VOID(process_data) { t->success = 1; t->there_were_missing_encodings = 0x00; dkapp_set_source_filename(t->a, t->ifn2); if(t->enc_type == ENCODING_UTF_8) { process_utf8(t); } else { process_latin(t); } fflush(t->outf); } /** All files are opened, now run. @param t Text2lat job. */ T2L_VOID(run_for_files) { unsigned long mpl; char *fnb; me_t *meptr; unsigned char err_memory; char *messages[3]; err_memory = 0x00; t->le = NULL; mpl = dksf_get_maxpathlen(); if(mpl < 1024L) mpl = 1024L; fnb = dk_new(char,mpl); if(fnb) { if(dkapp_transform_string_ext1(t->a,fnb,(size_t)mpl,table_directory,1)) { t->le = dkle_open(fnb); if(t->le) { t->s_uc = dksto_open(0); if(t->s_uc) { t->s_ln = dksto_open(0); if(t->s_ln) { t->s_fn = dksto_open(0); if(t->s_fn) { dksto_set_comp(t->s_uc, compare_me, 0); dksto_set_comp(t->s_fn, compare_me, 0); dksto_set_comp(t->s_ln, compare_me, 1); t->it_uc = dksto_it_open(t->s_uc); if(t->it_uc) { t->it_ln = dksto_it_open(t->s_ln); if(t->it_ln) { t->it_fn = dksto_it_open(t->s_fn); if(t->it_fn) { /* do the conversion */ process_data(t); /* report missing encodings, if any */ report_missing_encodings(t, fnb); /* release file name me_t for missing encoding */ dksto_it_reset(t->it_fn); while((meptr = (me_t *)dksto_it_next(t->it_fn)) != NULL) { meptr->uc = 0UL; meptr->ln = 0UL; dk_delete(meptr); } /* release data about missing encoding */ dksto_it_reset(t->it_uc); while((meptr = (me_t *)dksto_it_next(t->it_uc)) != NULL) { meptr->uc = 0UL; meptr->ln = 0UL; dk_delete(meptr); } dksto_it_close(t->it_fn); t->it_fn = NULL; } else { err_memory = 0x01; } dksto_it_close(t->it_ln); t->it_ln = NULL; } else { err_memory = 0x01; } dksto_it_close(t->it_uc); t->it_uc = NULL; } else { err_memory = 0x01; } dksto_close(t->s_fn); t->s_fn = NULL; } else { err_memory = 0x01; } dksto_close(t->s_ln); t->s_ln = NULL; } else { err_memory = 0x01; } dksto_close(t->s_uc); t->s_uc = NULL; } else { err_memory = 0x01; } dkle_close(t->le); t->le = NULL; } else { err_memory = 0x01; } } else { messages[0] = (t->msg)[14]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } dk_delete(fnb); } else { err_memory = 0x01; } if(err_memory) { t->success = 0; messages[0] = (t->msg)[13]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } } /** Find output file name if necessary, open output file and run. @param t Text2lat job. */ T2L_VOID(run_with_output_file) { dk_fne_t *fneo = NULL; char *messages[3]; if(dksf_must_expand_filename(t->ofn1)) { fneo = dkfne_open(t->ofn1, 1, 0); if(fneo) { t->ofn2 = dkfne_get_one(fneo); if(t->ofn2) { t->outf = dkapp_fopen(t->a, t->ofn2, str_w); if(t->outf) { run_for_files(t); fclose(t->outf); t->outf = NULL; } else { dkapp_err_fopenw(t->a, t->ofn2); } } else { messages[0] = (t->msg)[16]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } dkfne_close(fneo); fneo = NULL; } else { messages[0] = (t->msg)[18]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } } else { t->ofn2 = t->ofn1; t->outf = dkapp_fopen(t->a, t->ofn2, str_w); if(t->outf) { run_for_files(t); fclose(t->outf); t->outf = NULL; } else { dkapp_err_fopenw(t->a, t->ofn2); } } } /** Open the files (find file names if necessary) and run. @param t Text2lat job. */ T2L_VOID(run_for_file_names) { dk_fne_t *fnei = NULL; char *messages[3]; if(dksf_must_expand_filename(t->ifn1)) { fnei = dkfne_open(t->ifn1, 1, 0); if(fnei) { t->ifn2 = dkfne_get_one(fnei); if(t->ifn2) { t->inf = dkapp_fopen(t->a, t->ifn2, str_r); if(t->inf) { if(t->ofn1) { run_with_output_file(t); } else { t->ofn2 = str_stdout; t->outf = stdout; run_for_files(t); } fclose(t->inf); t->inf = NULL; } else { dkapp_err_fopenr(t->a, t->ifn2); } } else { messages[0] = (t->msg)[15]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } dkfne_close(fnei); fnei = NULL; } else { messages[0] = (t->msg)[17]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); } } else { t->ifn2 = t->ifn1; t->inf = dkapp_fopen(t->a, t->ifn2, str_r); if(t->inf) { if(t->ofn1) { run_with_output_file(t); } else { t->outf = stdout; t->ofn2 = str_stdout; run_for_files(t); } fclose(t->inf); t->inf = NULL; } else { dkapp_err_fopenr(t->a, t->ifn2); } } } /** Run a conversion. @param t Text2lat job. */ T2L_VOID(run_conversion) { if(t->ifn1) { run_for_file_names(t); } else { /* run as filter */ t->inf = stdin; t->outf = stdout; t->ifn2 = str_stdin; t->ofn2 = str_stdout; run_for_files(t); } } /** Retrieve a boolean value from a string. @param s Boolean value as text. @param def Default value. @return Boolean value from \a s or default value. */ static unsigned char bool_option DK_P2(char *,s, unsigned char,def) { unsigned char back; back = def; if(s) { if(*s) { switch(*s) { case '+': { back = 0x01; } break; case '-': { back = 0x00; } break; default: { if(dkstr_is_bool(s)) { if(dkstr_is_on(s)) { back = 0x01; } else { back = 0x00; } } } break; } } } return back; } /** Apply setting to convert newlines. @param t Text2lat job. @param s Boolean value as text. */ static void setting_convert_newlines DK_P2(T2L *,t, char *,s) { t->convert_newlines = bool_option(s, 0x01); } /** Apply setting to convert spaces. @param t Text2lat job. @param s Boolean value as text. */ static void setting_convert_spaces DK_P2(T2L *,t, char *,s) { t->convert_spaces = bool_option(s, 0x01); } /** Get boolean value from preferences. @param t Text2lat job. @param pk Preference key. @param def Default value. @return Boolean value found in preferences or default value. */ static unsigned char find_from_pref DK_P3(T2L *,t, char *,pk, unsigned char,def) { unsigned char back; char buffer[32], *p1; back = def; if(dkapp_get_pref(t->a, pk, buffer, sizeof(buffer), 0)) { p1 = dkstr_start(buffer, NULL); if(p1) { dkstr_chomp(p1, NULL); switch(*p1) { case '+': { back = 0x01; } break; case '-': { back = 0x00; } break; default: { if(dkstr_is_bool(p1)) { if(dkstr_is_on(p1)) { back = 0x01; } else { back = 0x00; } } } break; } } } return back; } /** Process command line arguments and decide what to do. @param t Text2lat job. */ T2L_VOID(run) { char *ptr, *arg, *bp, **lfdptr, **xargv; int i, xargc; char *messages[8]; encoding_from_preferences(t); t->convert_newlines = find_from_pref(t, key_newlines, 0x00); t->convert_spaces = find_from_pref(t, key_spaces, 0x00); xargc = dkapp_get_argc(t->a); xargv = dkapp_get_argv(t->a); lfdptr = xargv; lfdptr++; i = 1; while(i < xargc) { ptr = *lfdptr; if(*ptr == '-') { ptr++; switch(*ptr) { case '-': { ptr++; arg = strchr(ptr, '='); bp = NULL; if(arg) { bp = arg; *(arg++) = '\0'; } switch(dkstr_array_abbr(long_options, ptr, '$', 0)) { case 0: { t->cmd = CMD_HELP; } break; case 1: { t->cmd = CMD_VERSION; } break; case 2: { t->cmd = CMD_CONFIGURE; } break; case 3: { t->cmd = CMD_UNCONFIGURE; } break; case 4: { encoding_auto(t); } break; case 5: { t->cmd = CMD_SHOW_CONFIGURATION; } break; case 7: { if(arg) { encoding_from_string(t, arg); } } break; } if(bp) { *bp = '='; } } break; case 'h': { t->cmd = CMD_HELP; } break; case 'v': { t->cmd = CMD_VERSION; } break; case 'c': { t->cmd = CMD_CONFIGURE; } break; case 'u': { t->cmd = CMD_UNCONFIGURE; } break; case 'C': { t->cmd = CMD_SHOW_CONFIGURATION; } break; case 'e': { ptr++; if(!(*ptr)) { ptr = NULL; lfdptr++; i++; if(i < xargc) { ptr = *lfdptr; } } if(ptr) { encoding_from_string(t, ptr); } } break; case 'r': { t->convert_spaces = 0x00; t->convert_newlines = 0x00; encoding_auto(t); } break; case 's': { setting_convert_spaces(t, ++ptr); } break; case 'n': { setting_convert_newlines(t, ++ptr); } break; default: { /* ERROR: Unknown option */ t->cmd = CMD_ABORT_ERROR; messages[0] = (t->msg)[19]; messages[1] = ptr; messages[2] = (t->msg)[20]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 3); } break; } } else { if(t->ifn1) { if(t->ofn1) { /* ERROR: Too many file names */ messages[0] = (t->msg)[21]; dkapp_log_msg(t->a, DK_LOG_LEVEL_ERROR, messages, 1); t->cmd = CMD_HELP; } else { t->ofn1 = ptr; } } else { t->ifn1 = ptr; } } lfdptr++; i++; } if(t->cmd) { run_special(t); } else { run_conversion(t); } } /** The main() function of the text2lat program. @param argc Number of command line arguments. @param argv Command line arguments array. @return 0 on success, any other value indicates an error. */ #if DK_HAVE_PROTOTYPES int main(int argc, char *argv[]) #else int main(argc, argv) int argc; char *argv[]; #endif { int exval = 0, rs = 0, rf = 0; char ibuffer[4096]; T2L t2l; /* initialize structure */ #line 1763 "text2lat.ctr" t2l_init(&t2l); t2l.b_i = ibuffer; t2l.sz_b_i = sizeof(ibuffer); silence_check(argc, argv, &rs, &rf); t2l.a = dkapp_open_ext1(argc, argv, group_name, sysconf_dir, rs, rf); if(t2l.a) { t2l.msg = t2l_str(&t2l); if(t2l.msg) { run(&t2l); dkapp_close(t2l.a); t2l.a = NULL; { char **cp; cp = t2l.msg; dk_delete(cp); t2l.msg = NULL; } } else { } } else { if(!rs) { fprintf(stderr, "text2lat: ERROR: Not enough memory (RAM/swap)!\n"); fflush(stderr); } } /* prepare exit code */ exval = t2l.success; exval = (exval ? 0 : 1); #line 1789 "text2lat.ctr" exit(exval); return exval; } /* vim: set ai foldmethod=marker foldopen=all sw=2 : */