rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2008 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef GSMALPHABET_H_ |
| 18 | #define GSMALPHABET_H_ |
| 19 | #include <cstdint> |
| 20 | #include <string> |
| 21 | #include <map> |
| 22 | #include <memory> |
| 23 | #include <vector> |
| 24 | |
| 25 | class GsmAlphabet { |
| 26 | public: |
| 27 | GsmAlphabet(); |
| 28 | virtual ~GsmAlphabet(); |
| 29 | /** |
| 30 | * This escapes extended characters, and when present indicates that the |
| 31 | * following character should be looked up in the "extended" table. |
| 32 | * |
| 33 | * gsmToChar(GSM_EXTENDED_ESCAPE) returns 0xffff |
| 34 | */ |
| 35 | static constexpr uint8_t GSM_EXTENDED_ESCAPE = 0x1B; |
| 36 | |
| 37 | /** |
| 38 | * User data header requires one octet for length. Count as one septet, because |
| 39 | * all combinations of header elements below will have at least one free bit |
| 40 | * when padding to the nearest septet boundary. |
| 41 | */ |
| 42 | static constexpr int UDH_SEPTET_COST_LENGTH = 1; |
| 43 | |
| 44 | /** |
| 45 | * Using a non-default language locking shift table OR single shift table |
| 46 | * requires a user data header of 3 octets, or 4 septets, plus UDH length. |
| 47 | */ |
| 48 | static constexpr int UDH_SEPTET_COST_ONE_SHIFT_TABLE = 4; |
| 49 | |
| 50 | /** |
| 51 | * Using a non-default language locking shift table AND single shift table |
| 52 | * requires a user data header of 6 octets, or 7 septets, plus UDH length. |
| 53 | */ |
| 54 | static constexpr int UDH_SEPTET_COST_TWO_SHIFT_TABLES = 7; |
| 55 | |
| 56 | /** |
| 57 | * Multi-part messages require a user data header of 5 octets, or 6 septets, |
| 58 | * plus UDH length. |
| 59 | */ |
| 60 | static constexpr int UDH_SEPTET_COST_CONCATENATED_MESSAGE = 6; |
| 61 | |
| 62 | /** |
| 63 | * For a specific text string, this object describes protocol |
| 64 | * properties of encoding it for transmission as message user |
| 65 | * data. |
| 66 | */ |
| 67 | class TextEncodingDetails { |
| 68 | public: |
| 69 | /** |
| 70 | *The number of SMS's required to encode the text. |
| 71 | */ |
| 72 | int msgCount; |
| 73 | |
| 74 | /** |
| 75 | * The number of code units consumed so far, where code units |
| 76 | * are basically characters in the encoding -- for example, |
| 77 | * septets for the standard ASCII and GSM encodings, and 16 |
| 78 | * bits for Unicode. |
| 79 | */ |
| 80 | int codeUnitCount; |
| 81 | |
| 82 | /** |
| 83 | * How many code units are still available without spilling |
| 84 | * into an additional message. |
| 85 | */ |
| 86 | int codeUnitsRemaining; |
| 87 | |
| 88 | /** |
| 89 | * The encoding code unit size (specified using |
| 90 | * android.telephony.SmsMessage ENCODING_*). |
| 91 | */ |
| 92 | int codeUnitSize; |
| 93 | |
| 94 | /** |
| 95 | * The GSM national language table to use, or 0 for the default 7-bit alphabet. |
| 96 | */ |
| 97 | int languageTable; |
| 98 | |
| 99 | /** |
| 100 | * The GSM national language shift table to use, or 0 for the default 7-bit extension table. |
| 101 | */ |
| 102 | int languageShiftTable; |
| 103 | |
| 104 | std::string toString() { |
| 105 | return "TextEncodingDetails { msgCount=" + std::to_string(msgCount) |
| 106 | + ", codeUnitCount=" + std::to_string(codeUnitCount) |
| 107 | + ", codeUnitsRemaining=" + std::to_string(codeUnitsRemaining) |
| 108 | + ", codeUnitSize=" + std::to_string(codeUnitSize) |
| 109 | + ", languageTable=" + std::to_string(languageTable) |
| 110 | + ", languageShiftTable=" + std::to_string(languageShiftTable) + " }"; |
| 111 | } |
| 112 | }; |
| 113 | |
| 114 | static std::string gsm7BitPackedToString(std::vector<uint8_t> pdu, int offset, |
| 115 | int lengthSeptets, int numPaddingBits, int languageTable, int shiftTable); |
| 116 | static std::vector<uint8_t> stringToGsm7BitPacked(std::string data, int startingSeptetOffset, |
| 117 | bool throwException, int languageTable, int languageShiftTable); |
| 118 | private: |
| 119 | /** Reverse mapping from Unicode characters to indexes into language tables. */ |
| 120 | static const std::vector<std::shared_ptr<std::map<char, int>>> sCharsToGsmTables; |
| 121 | static std::vector<std::shared_ptr<std::map<char, int>>> initCharsToGsmTables(); |
| 122 | /** Reverse mapping from Unicode characters to indexes into language shift tables. */ |
| 123 | static const std::vector<std::shared_ptr<std::map<char, int>>> sCharsToShiftTables; |
| 124 | static std::vector<std::shared_ptr<std::map<char, int>>> initCharsToShiftTables(); |
| 125 | /** OEM configured list of enabled national language single shift tables for encoding. */ |
| 126 | static std::vector<int> sEnabledSingleShiftTables; |
| 127 | |
| 128 | /** OEM configured list of enabled national language locking shift tables for encoding. */ |
| 129 | static std::vector<int> sEnabledLockingShiftTables; |
| 130 | |
| 131 | /** Highest language code to include in array of single shift counters. */ |
| 132 | static int sHighestEnabledSingleShiftCode; |
| 133 | |
| 134 | /** Flag to bypass check for country-specific overlays (for test cases only). */ |
| 135 | static bool sDisableCountryEncodingCheck; |
| 136 | |
| 137 | /** |
| 138 | * Septet counter for a specific locking shift table and all of |
| 139 | * the single shift tables that it can be paired with. |
| 140 | */ |
| 141 | class LanguagePairCount { |
| 142 | public: |
| 143 | int languageCode; |
| 144 | std::vector<int> septetCounts; |
| 145 | std::vector<int> unencodableCounts; |
| 146 | LanguagePairCount(int code) { |
| 147 | languageCode = code; |
| 148 | int maxSingleShiftCode = sHighestEnabledSingleShiftCode; |
| 149 | septetCounts.assign((maxSingleShiftCode + 1), 0); |
| 150 | unencodableCounts.assign(maxSingleShiftCode + 1, 0); |
| 151 | // set counters for disabled single shift tables to -1 |
| 152 | // (GSM default extension table index 0 is always enabled) |
| 153 | for (int i = 1, tableOffset = 0; i <= maxSingleShiftCode; i++) { |
| 154 | if (sEnabledSingleShiftTables[tableOffset] == i) { |
| 155 | tableOffset++; |
| 156 | } else { |
| 157 | septetCounts[i] = -1; // disabled |
| 158 | } |
| 159 | } |
| 160 | // exclude Turkish locking + Turkish single shift table and |
| 161 | // Portuguese locking + Spanish single shift table (these |
| 162 | // combinations will never be optimal for any input). |
| 163 | if (code == 1 && maxSingleShiftCode >= 1) { |
| 164 | septetCounts[1] = -1; // Turkish + Turkish |
| 165 | } else if (code == 3 && maxSingleShiftCode >= 2) { |
| 166 | septetCounts[2] = -1; // Portuguese + Spanish |
| 167 | } |
| 168 | } |
| 169 | }; |
| 170 | static std::vector<std::string> sLanguageTables; |
| 171 | static std::vector<std::string> sLanguageShiftTables; |
| 172 | static void packSmsChar(std::vector<uint8_t> packedChars, int bitOffset, int value); |
| 173 | static int countGsmSeptetsUsingTables(std::string s, bool use7bitOnly,int languageTable, int languageShiftTable); |
| 174 | }; |
| 175 | |
| 176 | #endif /* GSMALPHABET_H_ */ |