liubin | 281ac46 | 2023-07-19 14:22:54 +0800 | [diff] [blame^] | 1 | //
|
| 2 | // Created by hitmoon on 15-12-17.
|
| 3 | //
|
| 4 |
|
| 5 | #ifndef SMS_UTF_H
|
| 6 | #define SMS_UTF_H
|
| 7 |
|
| 8 | #include <stddef.h>
|
| 9 | #include <sys/types.h>
|
| 10 |
|
| 11 | typedef unsigned int UTF32;
|
| 12 | /* at least 32 bits */
|
| 13 | typedef unsigned short UTF16;
|
| 14 | /* at least 16 bits */
|
| 15 | typedef unsigned char UTF8;
|
| 16 | /* typically 8 bits */
|
| 17 | typedef unsigned char Boolean; /* 0 or 1 */
|
| 18 |
|
| 19 | /* Some fundamental constants */
|
| 20 | #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
| 21 | #define UNI_MAX_BMP (UTF32)0x0000FFFF
|
| 22 | #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
|
| 23 | #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
|
| 24 | #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
|
| 25 |
|
| 26 | typedef enum {
|
| 27 | conversionOK, /* conversion successful */
|
| 28 | sourceExhausted, /* partial character in source, but hit end */
|
| 29 | targetExhausted, /* insuff. room in target for conversion */
|
| 30 | sourceIllegal /* source sequence is illegal/malformed */
|
| 31 | } ConversionResult;
|
| 32 |
|
| 33 | typedef enum {
|
| 34 | strictConversion = 0,
|
| 35 | lenientConversion
|
| 36 | } ConversionFlags;
|
| 37 |
|
| 38 | /* This is for C++ and does no harm in C */
|
| 39 | #ifdef __cplusplus
|
| 40 | extern "C" {
|
| 41 | #endif
|
| 42 |
|
| 43 | ConversionResult ConvertUTF8toUTF16(
|
| 44 | const UTF8 **sourceStart, const UTF8 *sourceEnd,
|
| 45 | UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags);
|
| 46 |
|
| 47 | ConversionResult ConvertUTF16toUTF8(
|
| 48 | const UTF16 **sourceStart, const UTF16 *sourceEnd,
|
| 49 | UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags);
|
| 50 |
|
| 51 | ConversionResult ConvertUTF8toUTF32(
|
| 52 | const UTF8 **sourceStart, const UTF8 *sourceEnd,
|
| 53 | UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags);
|
| 54 |
|
| 55 | ConversionResult ConvertUTF32toUTF8(
|
| 56 | const UTF32 **sourceStart, const UTF32 *sourceEnd,
|
| 57 | UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags);
|
| 58 |
|
| 59 | ConversionResult ConvertUTF16toUTF32(
|
| 60 | const UTF16 **sourceStart, const UTF16 *sourceEnd,
|
| 61 | UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags);
|
| 62 |
|
| 63 | ConversionResult ConvertUTF32toUTF16(
|
| 64 | const UTF32 **sourceStart, const UTF32 *sourceEnd,
|
| 65 | UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags);
|
| 66 |
|
| 67 | Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
| 68 |
|
| 69 | #ifdef __cplusplus
|
| 70 | }
|
| 71 | #endif
|
| 72 |
|
| 73 | // 获得下一个char的起始地址
|
| 74 | u_int32_t next_char(unsigned char **string);
|
| 75 |
|
| 76 | const unsigned char *utf32toutf8(wchar_t *source, unsigned char *target, size_t size, int *len);
|
| 77 |
|
| 78 | unsigned char *utf16toutf8(unsigned short *source, unsigned char *target, size_t size, int *len);
|
| 79 | unsigned short *utf8toutf16(unsigned char *source, unsigned short *target, size_t size, int *len);
|
| 80 |
|
| 81 | int utf8len(unsigned char *string);
|
| 82 | int is_acsii(unsigned char *string);
|
| 83 | size_t utf8_get_size(unsigned char *source, size_t num);
|
| 84 |
|
| 85 | #endif //SMS_UTF_H
|