%% options copyright owner = Dirk Krause copyright year = 2013-2014 license = bsd %% header #include #include #ifdef __cplusplus extern "C" { #endif /** Write ANSI character or check ANSI compatibility. @param c32 Character to write or check. @param fipo File to write to (NULL for check). @return 1 on success, 0 on error (No representation for this character in ANSI encoding). */ int dk3se_ansi_fputc(dk3_c32_t c32, FILE *fipo); /** Write URL character or check URL compatibility. @param c32 Character to write or check. @param fipo File to write to (NULL for check). @return 1 on success, 0 on error. */ int dk3se_url_fputc(dk3_c32_t c32, FILE *fipo); /** Convert to ANSI and write URL character or check URL compatibility. @param c32 Character to write or check. @param fipo File to write to (NULL for check). @return 1 on success, 0 on error. */ int dk3se_ansi_url_fputc(dk3_c32_t c32, FILE *fipo); /** Write ANSI string or check ANSI compatibility. @param str String to write. @param enc String encoding. @param fipo File to write to (NULL for compatibility check). @param app Application structure for diagnostics, may be NULL. @return 1 on success, 0 on error. */ int dk3se_ansi_fputs(dkChar const *str, int enc, FILE *fipo, dk3_app_t *app); /** Write URL string or check URL compatibility. @param str String to write. @param enc String encoding. @param fipo File to write to (NULL for compatibility check). @param app Application structure for diagnostics, may be NULL. @return 1 on success, 0 on error. */ int dk3se_url_fputs(dkChar const *str, int enc, FILE *fipo, dk3_app_t *app); /** Convert to ANSI and write URL string or check URL compatibility. @param str String to write. @param enc String encoding. @param fipo File to write to (NULL for compatibility check). @param app Application structure for diagnostics, may be NULL. @return 1 on success, 0 on error. */ int dk3se_ansi_url_fputs(dkChar const *str, int enc, FILE *fipo, dk3_app_t *app); /** Check whether a string can be converted to ANSI or URL. @param str String to test. @param enc String encoding. @param se Output encoding (DK3SE_ANSI and/or DK3SE_URL). @param ev Pointer to encoding violation variable. @param we Pointer to write error variable. @param app Application structure for diagnostics, may be NULL. @return 1 on success, 0 on error. */ int dk3se_check(dkChar const *str,int enc,int se,int *ev,int *we,dk3_app_t *app); #ifdef __cplusplus } #endif /** ANSI encoding. */ #define DK3SE_ANSI 1 /** URL encoding. */ #define DK3SE_URL 2 %% module #include "dk3all.h" #include "dk3se.h" $!trace-include /** Find ANSI character for 32-bit character. @param dp Pointer to destination variable. @param c32 32-bit character. @return 1 on success (character found), 0 on error (not available). */ static int dk3se_find_ansi_char(char *dp, dk3_c32_t c32) { int back = 0; unsigned u; unsigned char uc; char c = '\0'; $? "+ dk3se_find_ansi_char 0x%lx %lu", (unsigned long)c32, (unsigned long)c32 if((dk3_c32_t)0x00010000UL > c32) { u = (unsigned)c32; if(((dk3_c32_t)0x00000019UL < c32) && ((dk3_c32_t)0x00000080UL > c32)) { uc = (unsigned char)u; c = (char)uc; back = 1; } else { if(((dk3_c32_t)0x0000009FUL < c32) && ((dk3_c32_t)0x00000100UL > c32)) { uc = (unsigned char)u; c = (char)uc; back = 1; } else { switch(u) { case 0x20AC: { c = (char)0x80; back = 1; } break; case 0x0081: { c = (char)0x81; back = 1; } break; case 0x201A: { c = (char)0x82; back = 1; } break; case 0x0192: { c = (char)0x83; back = 1; } break; case 0x201E: { c = (char)0x84; back = 1; } break; case 0x2026: { c = (char)0x85; back = 1; } break; case 0x2020: { c = (char)0x86; back = 1; } break; case 0x2021: { c = (char)0x87; back = 1; } break; case 0x02C6: { c = (char)0x88; back = 1; } break; case 0x2030: { c = (char)0x89; back = 1; } break; case 0x0160: { c = (char)0x8A; back = 1; } break; case 0x2039: { c = (char)0x8B; back = 1; } break; case 0x0152: { c = (char)0x8C; back = 1; } break; case 0x008D: { c = (char)0x8D; back = 1; } break; case 0x017D: { c = (char)0x8E; back = 1; } break; case 0x008F: { c = (char)0x8F; back = 1; } break; case 0x0090: { c = (char)0x90; back = 1; } break; case 0x2018: { c = (char)0x91; back = 1; } break; case 0x2019: { c = (char)0x92; back = 1; } break; case 0x201C: { c = (char)0x93; back = 1; } break; case 0x201D: { c = (char)0x94; back = 1; } break; case 0x2022: { c = (char)0x95; back = 1; } break; case 0x2013: { c = (char)0x96; back = 1; } break; case 0x2014: { c = (char)0x97; back = 1; } break; case 0x02DC: { c = (char)0x98; back = 1; } break; case 0x2122: { c = (char)0x99; back = 1; } break; case 0x0161: { c = (char)0x9A; back = 1; } break; case 0x203A: { c = (char)0x9B; back = 1; } break; case 0x0153: { c = (char)0x9C; back = 1; } break; case 0x009D: { c = (char)0x9D; back = 1; } break; case 0x017E: { c = (char)0x9E; back = 1; } break; case 0x0178: { c = (char)0x9F; back = 1; } break; } } } } if(back) { $? ". success \"%c\"", c *dp = c; } $? "- dk3se_find_ansi_char %d", back return back; } /** Check whether a character must be hex-encoded for output as URL. @param c Character to check. @return 1 for percent-hex encoding, 0 for direct output. */ static int dk3se_must_encode_for_url(char c) { int back = 1; $? "+ dk3se_must_encode_for_url %x \"%c\"", (int)c, c if(('a' <= c) && ('z' >= c)) { back = 0; } else { if(('A' <= c) && ('Z' >= c)) { back = 0; } else { if(('0' <= c) && ('9' >= c)) { back = 0; } else { switch(c) { case '-': case '_': case '.': case '~': { back = 0; } break; } } } } $? "- dk3se_must_encode_for_url %d", back return back; } /** Write one 32-bit character to 8-bit character file or check whether writing would be possible. @param c32 Character to write or test. @param fipo File to write to (NULL for check only). @param se Special encoding conditions. @param ev Pointer to variable to store encoding violations. @param we Pointer to write error variable. @param app Application structure for diagnostics, may be NULL. */ static int dk3se_fputc_checked( dk3_c32_t c32, FILE *fipo, int se, int *ev, int *we, dk3_app_t *app ) { char buf[16]; int back = 0; unsigned u; unsigned char uc = '\0'; char c = '\0'; $? "+ dk3se_fputc_checked 0x%lx %lu", (unsigned long)c32, (unsigned long)c32 if(se & DK3SE_ANSI) { $? ". ANSI required" back = dk3se_find_ansi_char(&c, c32); if(!(back)) { if(ev) { *ev = ((*ev) | DK3SE_ANSI); } if(app) { } } } else { $? ". ANSI not required" if((dk3_c32_t)0x00000100UL > c32) { u = (unsigned)c32; uc = (unsigned char)u; c = (char)uc; back = 1; } else { if(ev) { *ev = ((*ev) | DK3SE_URL); } if(app) { } } } if(back) { $? ". ok so far" if(fipo) { if(se & DK3SE_URL) { $? ". output as URL" if(dk3se_must_encode_for_url(c)) { buf[0] = '%'; $? ". percent/hex encoding" sprintf(&(buf[1]), "%02lx", (unsigned long)c32); if(EOF == fputs(buf, fipo)) { back = 0; if(we) { *we = 1;} } } else { $? ". no percent/hex encoding necessary" if(EOF == fputc(c, fipo)) { back = 0; if(we) { *we = 1; } } } } else { $? ". output as is" if(EOF == fputc(c, fipo)) { back = 0; if(we) { *we = 1; } } } } } $? "- dk3se_fputc_checked %d", back return back; } /** Show results for testing or writing a string. @param str String to write or test. @param ev Pointer to result variable: Encoding violations. @param we Pointer to result variable: Write error. @param myev Found encoding violations. @param mywe Found write errors. @param myde Found decoding errors. @param app Application structure for diagnostics, may be NULL. */ static void dk3se_show_results( dkChar const *str, int *ev, int *we, int myev, int mywe, int myde, dk3_app_t *app ) { if(ev) { *ev = myev; } if(we) { *we = mywe; } if(app) { if(myde) { dk3app_log_i3(app, DK3_LL_ERROR, 392, 367, str); } if(myev & DK3SE_ANSI) { dk3app_log_i3(app, DK3_LL_ERROR, 393, 367, str); } if(myev & DK3SE_URL) { dk3app_log_i3(app, DK3_LL_ERROR, 394, 367, str); } if(mywe) { dk3app_log_i3(app, DK3_LL_ERROR, 343, 344, str); } } } /** Write or test one string. @param str String to write or test. @param enc String encoding. @param fipo Output file. @param se Required encodings. @param ev Pointer to result variable: Encoding violations. @param we Pointer to result variable: Write error. @param app Application structure for diagnostics, may be NULL. @return 1 on success, 0 on errors. */ static int dk3se_fct ( dkChar const *str, int enc, FILE *fipo, int se, int *ev, int *we, dk3_app_t *app ) { #if DK3_CHAR_SIZE > 1 #if DK3_CHAR_SIZE > 2 dkChar const *ptr; int back = 1; int myev = 0; int mywe = 0; $? "+ dk3se_fct (32 bit)" ptr = str; while(*ptr) { if(!dk3se_fputc_checked(*ptr, fipo, se, &myev, &mywe, app)) { back = 0; } ptr++; } if(!(back)) { dk3se_show_results(str, ev, we, myev, mywe, 0, app); } $? "- dk3se_fct %d", back return back; #else dk3_c16_t const *sp; dk3_c32_t c32; size_t sl; size_t used; int back = 1; int myev = 0; int mywe = 0; int myde = 0; $? "+ dk3se_fct (16 bit)" sp = (dk3_c16_t const *)str; sl = dk3str_len(str); while(sl > 0) { used = 0; if(dk3enc_utf162uc(&c32, sp, sl, &used)) { if(!dk3se_fputc_checked(c32, fipo, se, &myev, &mywe, app)) { back = 0; } if(used > 0) { if(sl >= used) { sl = sl - used; sp = &(sp[used]); } else { sl = 0; back = 0; myde = 1; } } else { sl = 0; back = 0; myde = 1; } } else { sl = 0; back = 0; myde = 1; } } if(!(back)) { dk3se_show_results(str, ev, we, myev, mywe, myde, app); } $? "- dk3se_fct %d", back return back; #endif #else dk3_c32_t c32; unsigned char const *sp; size_t sl; size_t used; int back = 1; int myev = 0; int mywe = 0; int myde = 0; char c; $? "+ dk3se_fct (8 bit)" sp = (unsigned char const *)str; sl = dk3str_c8_len((char const *)sp); if(DK3_ENCODING_UTF8 == enc) { while(sl > 0) { used = 0; if(dk3enc_utf82uc(&c32, sp, sl, &used)) { if(!dk3se_fputc_checked(c32, fipo, se, &myev, &mywe, app)) { back = 0; } if(used > 0) { if(sl >= used) { sl = sl - used; sp = &(sp[used]); } else { sl = 0; back = 0; myde = 1; } } else { sl = 0; back = 0; myde = 1; } } else { sl = 0; back = 0; myde = 1; } } if(!(back)) { dk3se_show_results(str, ev, we, myev, mywe, myde, app); } } else { while(*sp) { c = *(sp++); c32 = (dk3_c32_t)c; c32 &= 0x000000FFUL; if(!dk3se_fputc_checked(c32, fipo, se, &myev, &mywe, app)) { back = 0; } } if(!(back)) { dk3se_show_results(str, ev, we, myev, mywe, 0, app); } } $? "- dk3se_fct %d", back return back; #endif } int dk3se_ansi_fputc(dk3_c32_t c32, FILE *fipo) { return(dk3se_fputc_checked(c32, fipo, DK3SE_ANSI, NULL, NULL, NULL)); } int dk3se_url_fputc(dk3_c32_t c32, FILE *fipo) { return(dk3se_fputc_checked(c32, fipo, DK3SE_URL, NULL, NULL, NULL)); } int dk3se_ansi_url_fputc(dk3_c32_t c32, FILE *fipo) { return(dk3se_fputc_checked(c32,fipo,(DK3SE_ANSI | DK3SE_URL),NULL,NULL,NULL)); } int dk3se_ansi_fputs(dkChar const *str, int enc, FILE *fipo, dk3_app_t *app) { return(dk3se_fct(str, enc, fipo, DK3SE_ANSI, NULL, NULL, app)); } int dk3se_url_fputs(dkChar const *str, int enc, FILE *fipo, dk3_app_t *app) { return(dk3se_fct(str, enc, fipo, DK3SE_URL, NULL, NULL, app)); } int dk3se_ansi_url_fputs(dkChar const *str, int enc, FILE *fipo, dk3_app_t *app) { return(dk3se_fct(str, enc, fipo, (DK3SE_ANSI | DK3SE_URL), NULL, NULL, app)); } int dk3se_check(dkChar const *str, int enc, int se, int *ev, int *we,dk3_app_t *app) { return(dk3se_fct(str, enc, NULL, se, ev, we, app)); }