lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* toutf8.c --- Convert strings from system locale into UTF-8. |
| 2 | * Copyright (C) 2002, 2003, 2004, 2005 Simon Josefsson |
| 3 | * |
| 4 | * This file is part of GNU Libidn. |
| 5 | * |
| 6 | * GNU Libidn is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * GNU Libidn is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. |
| 18 | */ |
| 19 | |
| 20 | #if HAVE_CONFIG_H |
| 21 | # include "config.h" |
| 22 | #endif |
| 23 | |
| 24 | /* Get prototypes. */ |
| 25 | #include "stringprep.h" |
| 26 | |
| 27 | /* Get fprintf. */ |
| 28 | #include <stdio.h> |
| 29 | |
| 30 | /* Get getenv. */ |
| 31 | #include <stdlib.h> |
| 32 | |
| 33 | /* Get strlen. */ |
| 34 | #include <string.h> |
| 35 | |
| 36 | /* Get iconv_string. */ |
| 37 | #include "iconvme.h" |
| 38 | |
| 39 | #ifdef _LIBC |
| 40 | # define HAVE_ICONV 1 |
| 41 | # define LOCALE_WORKS 1 |
| 42 | #endif |
| 43 | |
| 44 | #if LOCALE_WORKS |
| 45 | # include <langinfo.h> |
| 46 | # include <locale.h> |
| 47 | #endif |
| 48 | |
| 49 | #ifdef _LIBC |
| 50 | # define stringprep_locale_charset() nl_langinfo (CODESET) |
| 51 | #else |
| 52 | /** |
| 53 | * stringprep_locale_charset - return charset used in current locale |
| 54 | * |
| 55 | * Find out current locale charset. The function respect the CHARSET |
| 56 | * environment variable, but typically uses nl_langinfo(CODESET) when |
| 57 | * it is supported. It fall back on "ASCII" if CHARSET isn't set and |
| 58 | * nl_langinfo isn't supported or return anything. |
| 59 | * |
| 60 | * Note that this function return the application's locale's preferred |
| 61 | * charset (or thread's locale's preffered charset, if your system |
| 62 | * support thread-specific locales). It does not return what the |
| 63 | * system may be using. Thus, if you receive data from external |
| 64 | * sources you cannot in general use this function to guess what |
| 65 | * charset it is encoded in. Use stringprep_convert from the external |
| 66 | * representation into the charset returned by this function, to have |
| 67 | * data in the locale encoding. |
| 68 | * |
| 69 | * Return value: Return the character set used by the current locale. |
| 70 | * It will never return NULL, but use "ASCII" as a fallback. |
| 71 | **/ |
| 72 | const char * |
| 73 | stringprep_locale_charset (void) |
| 74 | { |
| 75 | const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */ |
| 76 | |
| 77 | if (charset && *charset) |
| 78 | return charset; |
| 79 | |
| 80 | # ifdef LOCALE_WORKS |
| 81 | charset = nl_langinfo (CODESET); |
| 82 | |
| 83 | if (charset && *charset) |
| 84 | return charset; |
| 85 | # endif |
| 86 | |
| 87 | return "ASCII"; |
| 88 | } |
| 89 | #endif |
| 90 | |
| 91 | /** |
| 92 | * stringprep_convert - encode string using new character set |
| 93 | * @str: input zero-terminated string. |
| 94 | * @to_codeset: name of destination character set. |
| 95 | * @from_codeset: name of origin character set, as used by @str. |
| 96 | * |
| 97 | * Convert the string from one character set to another using the |
| 98 | * system's iconv() function. |
| 99 | * |
| 100 | * Return value: Returns newly allocated zero-terminated string which |
| 101 | * is @str transcoded into to_codeset. |
| 102 | **/ |
| 103 | char * |
| 104 | stringprep_convert (const char *str, |
| 105 | const char *to_codeset, const char *from_codeset) |
| 106 | { |
| 107 | #if HAVE_ICONV |
| 108 | return iconv_string (str, from_codeset, to_codeset); |
| 109 | #else |
| 110 | char *p; |
| 111 | fprintf (stderr, "libidn: warning: libiconv not installed, cannot " |
| 112 | "convert data to UTF-8\n"); |
| 113 | p = malloc (strlen (str) + 1); |
| 114 | if (!p) |
| 115 | return NULL; |
| 116 | return strcpy (p, str); |
| 117 | #endif |
| 118 | } |
| 119 | |
| 120 | /** |
| 121 | * stringprep_locale_to_utf8 - convert locale encoded string to UTF-8 |
| 122 | * @str: input zero terminated string. |
| 123 | * |
| 124 | * Convert string encoded in the locale's character set into UTF-8 by |
| 125 | * using stringprep_convert(). |
| 126 | * |
| 127 | * Return value: Returns newly allocated zero-terminated string which |
| 128 | * is @str transcoded into UTF-8. |
| 129 | **/ |
| 130 | char * |
| 131 | stringprep_locale_to_utf8 (const char *str) |
| 132 | { |
| 133 | return stringprep_convert (str, "UTF-8", stringprep_locale_charset ()); |
| 134 | } |
| 135 | |
| 136 | /** |
| 137 | * stringprep_utf8_to_locale - encode UTF-8 string to locale encoding |
| 138 | * @str: input zero terminated string. |
| 139 | * |
| 140 | * Convert string encoded in UTF-8 into the locale's character set by |
| 141 | * using stringprep_convert(). |
| 142 | * |
| 143 | * Return value: Returns newly allocated zero-terminated string which |
| 144 | * is @str transcoded into the locale's character set. |
| 145 | **/ |
| 146 | char * |
| 147 | stringprep_utf8_to_locale (const char *str) |
| 148 | { |
| 149 | return stringprep_convert (str, stringprep_locale_charset (), "UTF-8"); |
| 150 | } |