b.liu | d440f9f | 2025-04-18 10:44:31 +0800 | [diff] [blame^] | 1 | #include <stdio.h> |
| 2 | #include <stdlib.h> |
| 3 | #include <string.h> |
| 4 | #include <iconv.h> |
| 5 | #include <errno.h> |
| 6 | #include <stddef.h> |
| 7 | |
| 8 | struct outbuf |
| 9 | { |
| 10 | struct outbuf *next; |
| 11 | char *outptr; |
| 12 | size_t outbytesleft; |
| 13 | char buf[256]; |
| 14 | }; |
| 15 | |
| 16 | char *eazyiconv(const char *to, const char *from, |
| 17 | char *str, size_t str_blen, size_t str_elemsize, size_t out_tailzero_blen, size_t *out_size, |
| 18 | const char *replchr) |
| 19 | { |
| 20 | char *retstr = NULL; |
| 21 | struct outbuf *outhead = NULL; |
| 22 | struct outbuf *outtail = NULL; |
| 23 | struct outbuf *outiter = NULL; |
| 24 | iconv_t cd = NULL; |
| 25 | char *inptr = str; |
| 26 | size_t inbytesleft = str_blen; |
| 27 | int retval = 0; |
| 28 | int err = 0; |
| 29 | size_t blocksize = 0; |
| 30 | size_t totalsize = 0; |
| 31 | char *retiter = NULL; |
| 32 | unsigned int chrval = 0; |
| 33 | iconv_t cdreplchr = NULL; |
| 34 | char replchrfmtbuf[256] = ""; |
| 35 | char replchrbuf[256] = ""; |
| 36 | char *replchrfmtptr = replchrfmtbuf; |
| 37 | size_t replchrfmtleft = sizeof replchrfmtbuf; |
| 38 | char *replchrptr = replchrbuf; |
| 39 | size_t replchrleft = sizeof replchrbuf; |
| 40 | int replchr_blen = 0; |
| 41 | |
| 42 | cd = iconv_open(to, from); |
| 43 | if (cd == (iconv_t)-1) |
| 44 | { |
| 45 | goto noclean; |
| 46 | } |
| 47 | |
| 48 | outhead = outtail = calloc(1, sizeof(struct outbuf)); |
| 49 | if (outtail == NULL) |
| 50 | { |
| 51 | goto clean_cd; |
| 52 | } |
| 53 | outtail->next = NULL; |
| 54 | outtail->outptr = outtail->buf; |
| 55 | outtail->outbytesleft = sizeof outtail->buf; |
| 56 | memset(outtail->buf, 0, sizeof outtail->buf); |
| 57 | |
| 58 | while (1) |
| 59 | { |
| 60 | retval = iconv(cd, &inptr, &inbytesleft, &outtail->outptr, &outtail->outbytesleft); |
| 61 | if (retval == -1) |
| 62 | err = errno; |
| 63 | else |
| 64 | err = 0; |
| 65 | switch (err) |
| 66 | { |
| 67 | case 0: |
| 68 | outiter = calloc(1, sizeof(struct outbuf)); |
| 69 | if (outiter == NULL) |
| 70 | { |
| 71 | goto clean_outbufs; |
| 72 | } |
| 73 | if (inptr == NULL) // succeeded cleanup iconv |
| 74 | { |
| 75 | goto succeeded; |
| 76 | } |
| 77 | else // fully succeeded iconv |
| 78 | { |
| 79 | inptr = NULL; // do cleanup iconv |
| 80 | inbytesleft = 0; |
| 81 | } |
| 82 | break; |
| 83 | case EINVAL: // incomplete tail sequence |
| 84 | case EILSEQ: // invalid sequence |
| 85 | chrval = 0; |
| 86 | memcpy(&chrval, inptr, str_elemsize > sizeof chrval ? sizeof chrval : str_elemsize); |
| 87 | snprintf(replchrfmtbuf, sizeof replchrfmtbuf, replchr, chrval); |
| 88 | inptr += str_elemsize; |
| 89 | inbytesleft -= str_elemsize; |
| 90 | |
| 91 | cdreplchr = iconv_open(to, "UTF-8"); |
| 92 | if (cdreplchr == (iconv_t)-1) |
| 93 | { |
| 94 | goto clean_outbufs; |
| 95 | } |
| 96 | replchrfmtptr = replchrfmtbuf; |
| 97 | replchrfmtleft = strlen(replchrfmtbuf); |
| 98 | replchrptr = replchrbuf; |
| 99 | replchrleft = sizeof replchrbuf; |
| 100 | iconv(cdreplchr, &replchrfmtptr, &replchrfmtleft, &replchrptr, &replchrleft); |
| 101 | iconv(cdreplchr, NULL, NULL, &replchrptr, &replchrleft); |
| 102 | iconv_close(cdreplchr); |
| 103 | replchr_blen = replchrptr - replchrbuf; |
| 104 | |
| 105 | if (outtail->outbytesleft < replchr_blen) |
| 106 | { |
| 107 | outiter = calloc(1, sizeof(struct outbuf)); |
| 108 | if (outiter == NULL) |
| 109 | { |
| 110 | goto clean_outbufs; |
| 111 | } |
| 112 | outtail->next = outiter; |
| 113 | outtail = outiter; |
| 114 | outtail->next = NULL; |
| 115 | outtail->outptr = outtail->buf; |
| 116 | outtail->outbytesleft = sizeof outtail->buf; |
| 117 | memset(outtail->buf, 0, sizeof outtail->buf); |
| 118 | } |
| 119 | memcpy(outtail->outptr, replchrbuf, replchr_blen); |
| 120 | outtail->outptr += replchr_blen; |
| 121 | outtail->outbytesleft -= replchr_blen; |
| 122 | break; |
| 123 | case E2BIG: // no enough space |
| 124 | outiter = calloc(1, sizeof(struct outbuf)); |
| 125 | if (outiter == NULL) |
| 126 | { |
| 127 | goto clean_outbufs; |
| 128 | } |
| 129 | outtail->next = outiter; |
| 130 | outtail = outiter; |
| 131 | outtail->next = NULL; |
| 132 | outtail->outptr = outtail->buf; |
| 133 | outtail->outbytesleft = sizeof outtail->buf; |
| 134 | memset(outtail->buf, 0, sizeof outtail->buf); |
| 135 | break; |
| 136 | default: |
| 137 | break; |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | succeeded: |
| 142 | totalsize = 0; |
| 143 | for (outiter = outhead; outiter != NULL; outiter = outiter->next) |
| 144 | { |
| 145 | blocksize = outiter->outptr - outiter->buf; |
| 146 | totalsize += blocksize; |
| 147 | } |
| 148 | retstr = calloc(totalsize + out_tailzero_blen, 1); |
| 149 | if (retstr == NULL) |
| 150 | { |
| 151 | goto clean_outbufs; |
| 152 | } |
| 153 | retiter = retstr; |
| 154 | for (outiter = outhead; outiter != NULL; outiter = outiter->next) |
| 155 | { |
| 156 | blocksize = outiter->outptr - outiter->buf; |
| 157 | memcpy(retiter, outiter->buf, blocksize); |
| 158 | retiter += blocksize; |
| 159 | } |
| 160 | memset(retiter, 0, out_tailzero_blen); |
| 161 | *out_size = totalsize; |
| 162 | |
| 163 | clean_outbufs: |
| 164 | while (outhead != NULL) |
| 165 | { |
| 166 | outiter = outhead; |
| 167 | outhead = outhead->next; |
| 168 | free(outiter); |
| 169 | } |
| 170 | outtail = NULL; |
| 171 | clean_cd: |
| 172 | iconv_close(cd); |
| 173 | noclean: |
| 174 | return retstr; |
| 175 | } |
| 176 | |
| 177 | int main(int argc, char **argv) |
| 178 | { |
| 179 | if (argc < 7) |
| 180 | { |
| 181 | printf("usage: eiconv_test from_charset from_elemsize to_charset to_elemsize from_file to_file (no utf-16/32)\n"); |
| 182 | return 0; |
| 183 | } |
| 184 | FILE *from_file = fopen(argv[5], "rb"); |
| 185 | fseek(from_file, 0, SEEK_END); |
| 186 | off_t fsize = ftell(from_file); |
| 187 | fseek(from_file, 0, SEEK_SET); |
| 188 | char *from_str = malloc(fsize + 1); |
| 189 | fread(from_str, 1, fsize, from_file); |
| 190 | fclose(from_file); |
| 191 | |
| 192 | size_t out_size = 0; |
| 193 | char *to_str = eazyiconv(argv[3], argv[1], |
| 194 | from_str, fsize, atoi(argv[2]), atoi(argv[4]), &out_size, |
| 195 | "<0x%02X>"); |
| 196 | |
| 197 | FILE *to_file = fopen(argv[6], "wb"); |
| 198 | fwrite(to_str, 1, out_size, to_file); |
| 199 | free(to_str); |
| 200 | fclose(to_file); |
| 201 | return 0; |
| 202 | } |
| 203 | |
| 204 | |