| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* toutf8.c --- Convert strings from system locale into UTF-8. | 
 | 2 |  * Copyright (C) 2002, 2003, 2004, 2005  Simon Josefsson | 
 | 3 |  * | 
 | 4 |  * This file is part of GNU Libidn. | 
 | 5 |  * | 
 | 6 |  * GNU Libidn is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU Lesser General Public | 
 | 8 |  * License as published by the Free Software Foundation; either | 
 | 9 |  * version 2.1 of the License, or (at your option) any later version. | 
 | 10 |  * | 
 | 11 |  * GNU Libidn is distributed in the hope that it will be useful, | 
 | 12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |  * Lesser General Public License for more details. | 
 | 15 |  * | 
 | 16 |  * You should have received a copy of the GNU Lesser General Public | 
 | 17 |  * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. | 
 | 18 |  */ | 
 | 19 |  | 
 | 20 | #if HAVE_CONFIG_H | 
 | 21 | # include "config.h" | 
 | 22 | #endif | 
 | 23 |  | 
 | 24 | /* Get prototypes. */ | 
 | 25 | #include "stringprep.h" | 
 | 26 |  | 
 | 27 | /* Get fprintf. */ | 
 | 28 | #include <stdio.h> | 
 | 29 |  | 
 | 30 | /* Get getenv. */ | 
 | 31 | #include <stdlib.h> | 
 | 32 |  | 
 | 33 | /* Get strlen. */ | 
 | 34 | #include <string.h> | 
 | 35 |  | 
 | 36 | /* Get iconv_string. */ | 
 | 37 | #include "iconvme.h" | 
 | 38 |  | 
 | 39 | #ifdef _LIBC | 
 | 40 | # define HAVE_ICONV 1 | 
 | 41 | # define LOCALE_WORKS 1 | 
 | 42 | #endif | 
 | 43 |  | 
 | 44 | #if LOCALE_WORKS | 
 | 45 | # include <langinfo.h> | 
 | 46 | # include <locale.h> | 
 | 47 | #endif | 
 | 48 |  | 
 | 49 | #ifdef _LIBC | 
 | 50 | # define stringprep_locale_charset() nl_langinfo (CODESET) | 
 | 51 | #else | 
 | 52 | /** | 
 | 53 |  * stringprep_locale_charset - return charset used in current locale | 
 | 54 |  * | 
 | 55 |  * Find out current locale charset.  The function respect the CHARSET | 
 | 56 |  * environment variable, but typically uses nl_langinfo(CODESET) when | 
 | 57 |  * it is supported.  It fall back on "ASCII" if CHARSET isn't set and | 
 | 58 |  * nl_langinfo isn't supported or return anything. | 
 | 59 |  * | 
 | 60 |  * Note that this function return the application's locale's preferred | 
 | 61 |  * charset (or thread's locale's preffered charset, if your system | 
 | 62 |  * support thread-specific locales).  It does not return what the | 
 | 63 |  * system may be using.  Thus, if you receive data from external | 
 | 64 |  * sources you cannot in general use this function to guess what | 
 | 65 |  * charset it is encoded in.  Use stringprep_convert from the external | 
 | 66 |  * representation into the charset returned by this function, to have | 
 | 67 |  * data in the locale encoding. | 
 | 68 |  * | 
 | 69 |  * Return value: Return the character set used by the current locale. | 
 | 70 |  *   It will never return NULL, but use "ASCII" as a fallback. | 
 | 71 |  **/ | 
 | 72 | const char * | 
 | 73 | stringprep_locale_charset (void) | 
 | 74 | { | 
 | 75 |   const char *charset = getenv ("CHARSET");	/* flawfinder: ignore */ | 
 | 76 |  | 
 | 77 |   if (charset && *charset) | 
 | 78 |     return charset; | 
 | 79 |  | 
 | 80 | # ifdef LOCALE_WORKS | 
 | 81 |   charset = nl_langinfo (CODESET); | 
 | 82 |  | 
 | 83 |   if (charset && *charset) | 
 | 84 |     return charset; | 
 | 85 | # endif | 
 | 86 |  | 
 | 87 |   return "ASCII"; | 
 | 88 | } | 
 | 89 | #endif | 
 | 90 |  | 
 | 91 | /** | 
 | 92 |  * stringprep_convert - encode string using new character set | 
 | 93 |  * @str: input zero-terminated string. | 
 | 94 |  * @to_codeset: name of destination character set. | 
 | 95 |  * @from_codeset: name of origin character set, as used by @str. | 
 | 96 |  * | 
 | 97 |  * Convert the string from one character set to another using the | 
 | 98 |  * system's iconv() function. | 
 | 99 |  * | 
 | 100 |  * Return value: Returns newly allocated zero-terminated string which | 
 | 101 |  *   is @str transcoded into to_codeset. | 
 | 102 |  **/ | 
 | 103 | char * | 
 | 104 | stringprep_convert (const char *str, | 
 | 105 | 		    const char *to_codeset, const char *from_codeset) | 
 | 106 | { | 
 | 107 | #if HAVE_ICONV | 
 | 108 |   return iconv_string (str, from_codeset, to_codeset); | 
 | 109 | #else | 
 | 110 |   char *p; | 
 | 111 |   fprintf (stderr, "libidn: warning: libiconv not installed, cannot " | 
 | 112 | 	   "convert data to UTF-8\n"); | 
 | 113 |   p = malloc (strlen (str) + 1); | 
 | 114 |   if (!p) | 
 | 115 |     return NULL; | 
 | 116 |   return strcpy (p, str); | 
 | 117 | #endif | 
 | 118 | } | 
 | 119 |  | 
 | 120 | /** | 
 | 121 |  * stringprep_locale_to_utf8 - convert locale encoded string to UTF-8 | 
 | 122 |  * @str: input zero terminated string. | 
 | 123 |  * | 
 | 124 |  * Convert string encoded in the locale's character set into UTF-8 by | 
 | 125 |  * using stringprep_convert(). | 
 | 126 |  * | 
 | 127 |  * Return value: Returns newly allocated zero-terminated string which | 
 | 128 |  *   is @str transcoded into UTF-8. | 
 | 129 |  **/ | 
 | 130 | char * | 
 | 131 | stringprep_locale_to_utf8 (const char *str) | 
 | 132 | { | 
 | 133 |   return stringprep_convert (str, "UTF-8", stringprep_locale_charset ()); | 
 | 134 | } | 
 | 135 |  | 
 | 136 | /** | 
 | 137 |  * stringprep_utf8_to_locale - encode UTF-8 string to locale encoding | 
 | 138 |  * @str: input zero terminated string. | 
 | 139 |  * | 
 | 140 |  * Convert string encoded in UTF-8 into the locale's character set by | 
 | 141 |  * using stringprep_convert(). | 
 | 142 |  * | 
 | 143 |  * Return value: Returns newly allocated zero-terminated string which | 
 | 144 |  *   is @str transcoded into the locale's character set. | 
 | 145 |  **/ | 
 | 146 | char * | 
 | 147 | stringprep_utf8_to_locale (const char *str) | 
 | 148 | { | 
 | 149 |   return stringprep_convert (str, stringprep_locale_charset (), "UTF-8"); | 
 | 150 | } |