blob: bbb9bb8d8df69700203803a588fcbab43d45fb9e [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef GSMALPHABET_H_
18#define GSMALPHABET_H_
19#include <cstdint>
20#include <string>
21#include <map>
22#include <memory>
23#include <vector>
24
25class GsmAlphabet {
26public:
27 GsmAlphabet();
28 virtual ~GsmAlphabet();
29 /**
30 * This escapes extended characters, and when present indicates that the
31 * following character should be looked up in the "extended" table.
32 *
33 * gsmToChar(GSM_EXTENDED_ESCAPE) returns 0xffff
34 */
35 static constexpr uint8_t GSM_EXTENDED_ESCAPE = 0x1B;
36
37 /**
38 * User data header requires one octet for length. Count as one septet, because
39 * all combinations of header elements below will have at least one free bit
40 * when padding to the nearest septet boundary.
41 */
42 static constexpr int UDH_SEPTET_COST_LENGTH = 1;
43
44 /**
45 * Using a non-default language locking shift table OR single shift table
46 * requires a user data header of 3 octets, or 4 septets, plus UDH length.
47 */
48 static constexpr int UDH_SEPTET_COST_ONE_SHIFT_TABLE = 4;
49
50 /**
51 * Using a non-default language locking shift table AND single shift table
52 * requires a user data header of 6 octets, or 7 septets, plus UDH length.
53 */
54 static constexpr int UDH_SEPTET_COST_TWO_SHIFT_TABLES = 7;
55
56 /**
57 * Multi-part messages require a user data header of 5 octets, or 6 septets,
58 * plus UDH length.
59 */
60 static constexpr int UDH_SEPTET_COST_CONCATENATED_MESSAGE = 6;
61
62 /**
63 * For a specific text string, this object describes protocol
64 * properties of encoding it for transmission as message user
65 * data.
66 */
67 class TextEncodingDetails {
68 public:
69 /**
70 *The number of SMS's required to encode the text.
71 */
72 int msgCount;
73
74 /**
75 * The number of code units consumed so far, where code units
76 * are basically characters in the encoding -- for example,
77 * septets for the standard ASCII and GSM encodings, and 16
78 * bits for Unicode.
79 */
80 int codeUnitCount;
81
82 /**
83 * How many code units are still available without spilling
84 * into an additional message.
85 */
86 int codeUnitsRemaining;
87
88 /**
89 * The encoding code unit size (specified using
90 * android.telephony.SmsMessage ENCODING_*).
91 */
92 int codeUnitSize;
93
94 /**
95 * The GSM national language table to use, or 0 for the default 7-bit alphabet.
96 */
97 int languageTable;
98
99 /**
100 * The GSM national language shift table to use, or 0 for the default 7-bit extension table.
101 */
102 int languageShiftTable;
103
104 std::string toString() {
105 return "TextEncodingDetails { msgCount=" + std::to_string(msgCount)
106 + ", codeUnitCount=" + std::to_string(codeUnitCount)
107 + ", codeUnitsRemaining=" + std::to_string(codeUnitsRemaining)
108 + ", codeUnitSize=" + std::to_string(codeUnitSize)
109 + ", languageTable=" + std::to_string(languageTable)
110 + ", languageShiftTable=" + std::to_string(languageShiftTable) + " }";
111 }
112 };
113
114 static std::string gsm7BitPackedToString(std::vector<uint8_t> pdu, int offset,
115 int lengthSeptets, int numPaddingBits, int languageTable, int shiftTable);
116 static std::vector<uint8_t> stringToGsm7BitPacked(std::string data, int startingSeptetOffset,
117 bool throwException, int languageTable, int languageShiftTable);
118private:
119 /** Reverse mapping from Unicode characters to indexes into language tables. */
120 static const std::vector<std::shared_ptr<std::map<char, int>>> sCharsToGsmTables;
121 static std::vector<std::shared_ptr<std::map<char, int>>> initCharsToGsmTables();
122 /** Reverse mapping from Unicode characters to indexes into language shift tables. */
123 static const std::vector<std::shared_ptr<std::map<char, int>>> sCharsToShiftTables;
124 static std::vector<std::shared_ptr<std::map<char, int>>> initCharsToShiftTables();
125 /** OEM configured list of enabled national language single shift tables for encoding. */
126 static std::vector<int> sEnabledSingleShiftTables;
127
128 /** OEM configured list of enabled national language locking shift tables for encoding. */
129 static std::vector<int> sEnabledLockingShiftTables;
130
131 /** Highest language code to include in array of single shift counters. */
132 static int sHighestEnabledSingleShiftCode;
133
134 /** Flag to bypass check for country-specific overlays (for test cases only). */
135 static bool sDisableCountryEncodingCheck;
136
137 /**
138 * Septet counter for a specific locking shift table and all of
139 * the single shift tables that it can be paired with.
140 */
141 class LanguagePairCount {
142 public:
143 int languageCode;
144 std::vector<int> septetCounts;
145 std::vector<int> unencodableCounts;
146 LanguagePairCount(int code) {
147 languageCode = code;
148 int maxSingleShiftCode = sHighestEnabledSingleShiftCode;
149 septetCounts.assign((maxSingleShiftCode + 1), 0);
150 unencodableCounts.assign(maxSingleShiftCode + 1, 0);
151 // set counters for disabled single shift tables to -1
152 // (GSM default extension table index 0 is always enabled)
153 for (int i = 1, tableOffset = 0; i <= maxSingleShiftCode; i++) {
154 if (sEnabledSingleShiftTables[tableOffset] == i) {
155 tableOffset++;
156 } else {
157 septetCounts[i] = -1; // disabled
158 }
159 }
160 // exclude Turkish locking + Turkish single shift table and
161 // Portuguese locking + Spanish single shift table (these
162 // combinations will never be optimal for any input).
163 if (code == 1 && maxSingleShiftCode >= 1) {
164 septetCounts[1] = -1; // Turkish + Turkish
165 } else if (code == 3 && maxSingleShiftCode >= 2) {
166 septetCounts[2] = -1; // Portuguese + Spanish
167 }
168 }
169 };
170 static std::vector<std::string> sLanguageTables;
171 static std::vector<std::string> sLanguageShiftTables;
172 static void packSmsChar(std::vector<uint8_t> packedChars, int bitOffset, int value);
173 static int countGsmSeptetsUsingTables(std::string s, bool use7bitOnly,int languageTable, int languageShiftTable);
174};
175
176#endif /* GSMALPHABET_H_ */