| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* toutf8.c --- Convert strings from system locale into UTF-8. | 
|  | 2 | * Copyright (C) 2002, 2003, 2004, 2005  Simon Josefsson | 
|  | 3 | * | 
|  | 4 | * This file is part of GNU Libidn. | 
|  | 5 | * | 
|  | 6 | * GNU Libidn is free software; you can redistribute it and/or | 
|  | 7 | * modify it under the terms of the GNU Lesser General Public | 
|  | 8 | * License as published by the Free Software Foundation; either | 
|  | 9 | * version 2.1 of the License, or (at your option) any later version. | 
|  | 10 | * | 
|  | 11 | * GNU Libidn is distributed in the hope that it will be useful, | 
|  | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 14 | * Lesser General Public License for more details. | 
|  | 15 | * | 
|  | 16 | * You should have received a copy of the GNU Lesser General Public | 
|  | 17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. | 
|  | 18 | */ | 
|  | 19 |  | 
|  | 20 | #if HAVE_CONFIG_H | 
|  | 21 | # include "config.h" | 
|  | 22 | #endif | 
|  | 23 |  | 
|  | 24 | /* Get prototypes. */ | 
|  | 25 | #include "stringprep.h" | 
|  | 26 |  | 
|  | 27 | /* Get fprintf. */ | 
|  | 28 | #include <stdio.h> | 
|  | 29 |  | 
|  | 30 | /* Get getenv. */ | 
|  | 31 | #include <stdlib.h> | 
|  | 32 |  | 
|  | 33 | /* Get strlen. */ | 
|  | 34 | #include <string.h> | 
|  | 35 |  | 
|  | 36 | /* Get iconv_string. */ | 
|  | 37 | #include "iconvme.h" | 
|  | 38 |  | 
|  | 39 | #ifdef _LIBC | 
|  | 40 | # define HAVE_ICONV 1 | 
|  | 41 | # define LOCALE_WORKS 1 | 
|  | 42 | #endif | 
|  | 43 |  | 
|  | 44 | #if LOCALE_WORKS | 
|  | 45 | # include <langinfo.h> | 
|  | 46 | # include <locale.h> | 
|  | 47 | #endif | 
|  | 48 |  | 
|  | 49 | #ifdef _LIBC | 
|  | 50 | # define stringprep_locale_charset() nl_langinfo (CODESET) | 
|  | 51 | #else | 
|  | 52 | /** | 
|  | 53 | * stringprep_locale_charset - return charset used in current locale | 
|  | 54 | * | 
|  | 55 | * Find out current locale charset.  The function respect the CHARSET | 
|  | 56 | * environment variable, but typically uses nl_langinfo(CODESET) when | 
|  | 57 | * it is supported.  It fall back on "ASCII" if CHARSET isn't set and | 
|  | 58 | * nl_langinfo isn't supported or return anything. | 
|  | 59 | * | 
|  | 60 | * Note that this function return the application's locale's preferred | 
|  | 61 | * charset (or thread's locale's preffered charset, if your system | 
|  | 62 | * support thread-specific locales).  It does not return what the | 
|  | 63 | * system may be using.  Thus, if you receive data from external | 
|  | 64 | * sources you cannot in general use this function to guess what | 
|  | 65 | * charset it is encoded in.  Use stringprep_convert from the external | 
|  | 66 | * representation into the charset returned by this function, to have | 
|  | 67 | * data in the locale encoding. | 
|  | 68 | * | 
|  | 69 | * Return value: Return the character set used by the current locale. | 
|  | 70 | *   It will never return NULL, but use "ASCII" as a fallback. | 
|  | 71 | **/ | 
|  | 72 | const char * | 
|  | 73 | stringprep_locale_charset (void) | 
|  | 74 | { | 
|  | 75 | const char *charset = getenv ("CHARSET");	/* flawfinder: ignore */ | 
|  | 76 |  | 
|  | 77 | if (charset && *charset) | 
|  | 78 | return charset; | 
|  | 79 |  | 
|  | 80 | # ifdef LOCALE_WORKS | 
|  | 81 | charset = nl_langinfo (CODESET); | 
|  | 82 |  | 
|  | 83 | if (charset && *charset) | 
|  | 84 | return charset; | 
|  | 85 | # endif | 
|  | 86 |  | 
|  | 87 | return "ASCII"; | 
|  | 88 | } | 
|  | 89 | #endif | 
|  | 90 |  | 
|  | 91 | /** | 
|  | 92 | * stringprep_convert - encode string using new character set | 
|  | 93 | * @str: input zero-terminated string. | 
|  | 94 | * @to_codeset: name of destination character set. | 
|  | 95 | * @from_codeset: name of origin character set, as used by @str. | 
|  | 96 | * | 
|  | 97 | * Convert the string from one character set to another using the | 
|  | 98 | * system's iconv() function. | 
|  | 99 | * | 
|  | 100 | * Return value: Returns newly allocated zero-terminated string which | 
|  | 101 | *   is @str transcoded into to_codeset. | 
|  | 102 | **/ | 
|  | 103 | char * | 
|  | 104 | stringprep_convert (const char *str, | 
|  | 105 | const char *to_codeset, const char *from_codeset) | 
|  | 106 | { | 
|  | 107 | #if HAVE_ICONV | 
|  | 108 | return iconv_string (str, from_codeset, to_codeset); | 
|  | 109 | #else | 
|  | 110 | char *p; | 
|  | 111 | fprintf (stderr, "libidn: warning: libiconv not installed, cannot " | 
|  | 112 | "convert data to UTF-8\n"); | 
|  | 113 | p = malloc (strlen (str) + 1); | 
|  | 114 | if (!p) | 
|  | 115 | return NULL; | 
|  | 116 | return strcpy (p, str); | 
|  | 117 | #endif | 
|  | 118 | } | 
|  | 119 |  | 
|  | 120 | /** | 
|  | 121 | * stringprep_locale_to_utf8 - convert locale encoded string to UTF-8 | 
|  | 122 | * @str: input zero terminated string. | 
|  | 123 | * | 
|  | 124 | * Convert string encoded in the locale's character set into UTF-8 by | 
|  | 125 | * using stringprep_convert(). | 
|  | 126 | * | 
|  | 127 | * Return value: Returns newly allocated zero-terminated string which | 
|  | 128 | *   is @str transcoded into UTF-8. | 
|  | 129 | **/ | 
|  | 130 | char * | 
|  | 131 | stringprep_locale_to_utf8 (const char *str) | 
|  | 132 | { | 
|  | 133 | return stringprep_convert (str, "UTF-8", stringprep_locale_charset ()); | 
|  | 134 | } | 
|  | 135 |  | 
|  | 136 | /** | 
|  | 137 | * stringprep_utf8_to_locale - encode UTF-8 string to locale encoding | 
|  | 138 | * @str: input zero terminated string. | 
|  | 139 | * | 
|  | 140 | * Convert string encoded in UTF-8 into the locale's character set by | 
|  | 141 | * using stringprep_convert(). | 
|  | 142 | * | 
|  | 143 | * Return value: Returns newly allocated zero-terminated string which | 
|  | 144 | *   is @str transcoded into the locale's character set. | 
|  | 145 | **/ | 
|  | 146 | char * | 
|  | 147 | stringprep_utf8_to_locale (const char *str) | 
|  | 148 | { | 
|  | 149 | return stringprep_convert (str, stringprep_locale_charset (), "UTF-8"); | 
|  | 150 | } |