blob: 6a16ced121cc80e36a537bf50035e854e23648ea [file] [log] [blame]
liubin281ac462023-07-19 14:22:54 +08001//
2// Created by hitmoon on 15-12-17.
3//
4
5#ifndef SMS_UTF_H
6#define SMS_UTF_H
7
8#include <stddef.h>
9#include <sys/types.h>
10
11typedef unsigned int UTF32;
12/* at least 32 bits */
13typedef unsigned short UTF16;
14/* at least 16 bits */
15typedef unsigned char UTF8;
16/* typically 8 bits */
17typedef unsigned char Boolean; /* 0 or 1 */
18
19/* Some fundamental constants */
20#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
21#define UNI_MAX_BMP (UTF32)0x0000FFFF
22#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
23#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
24#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
25
26typedef enum {
27 conversionOK, /* conversion successful */
28 sourceExhausted, /* partial character in source, but hit end */
29 targetExhausted, /* insuff. room in target for conversion */
30 sourceIllegal /* source sequence is illegal/malformed */
31} ConversionResult;
32
33typedef enum {
34 strictConversion = 0,
35 lenientConversion
36} ConversionFlags;
37
38/* This is for C++ and does no harm in C */
39#ifdef __cplusplus
40extern "C" {
41#endif
42
43ConversionResult ConvertUTF8toUTF16(
44 const UTF8 **sourceStart, const UTF8 *sourceEnd,
45 UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags);
46
47ConversionResult ConvertUTF16toUTF8(
48 const UTF16 **sourceStart, const UTF16 *sourceEnd,
49 UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags);
50
51ConversionResult ConvertUTF8toUTF32(
52 const UTF8 **sourceStart, const UTF8 *sourceEnd,
53 UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags);
54
55ConversionResult ConvertUTF32toUTF8(
56 const UTF32 **sourceStart, const UTF32 *sourceEnd,
57 UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags);
58
59ConversionResult ConvertUTF16toUTF32(
60 const UTF16 **sourceStart, const UTF16 *sourceEnd,
61 UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags);
62
63ConversionResult ConvertUTF32toUTF16(
64 const UTF32 **sourceStart, const UTF32 *sourceEnd,
65 UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags);
66
67Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
68
69#ifdef __cplusplus
70}
71#endif
72
73// 获得下一个char的起始地址
74u_int32_t next_char(unsigned char **string);
75
76const unsigned char *utf32toutf8(wchar_t *source, unsigned char *target, size_t size, int *len);
77
78unsigned char *utf16toutf8(unsigned short *source, unsigned char *target, size_t size, int *len);
79unsigned short *utf8toutf16(unsigned char *source, unsigned short *target, size_t size, int *len);
80
81int utf8len(unsigned char *string);
82int is_acsii(unsigned char *string);
83size_t utf8_get_size(unsigned char *source, size_t num);
84
85#endif //SMS_UTF_H