Blame - ap/os/linux/linux-3.4.x/fs/udf/unicode.c - T106_DC

blob: d29c06fbf4cec196ab271921fce17d7c730d2e3f [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* unicode.c
				3	*
				4	* PURPOSE
				5	* Routines for converting between UTF-8 and OSTA Compressed Unicode.
				6	* Also handles filename mangling
				7	*
				8	* DESCRIPTION
				9	* OSTA Compressed Unicode is explained in the OSTA UDF specification.
				10	* http://www.osta.org/
				11	* UTF-8 is explained in the IETF RFC XXXX.
				12	* ftp://ftp.internic.net/rfc/rfcxxxx.txt
				13	*
				14	* COPYRIGHT
				15	* This file is distributed under the terms of the GNU General Public
				16	* License (GPL). Copies of the GPL can be obtained from:
				17	* ftp://prep.ai.mit.edu/pub/gnu/GPL
				18	* Each contributing author retains all rights to their own work.
				19	*/
				20
				21	#include "udfdecl.h"
				22
				23	#include <linux/kernel.h>
				24	#include <linux/string.h> /* for memset */
				25	#include <linux/nls.h>
				26	#include <linux/crc-itu-t.h>
				27	#include <linux/slab.h>
				28
				29	#include "udf_sb.h"
				30
				31	static int udf_translate_to_linux(uint8_t , int, uint8_t , int, uint8_t *,
				32	int);
				33
				34	static int udf_char_to_ustr(struct ustr dest, const uint8_t src, int strlen)
				35	{
				36	if ((!dest) \|\| (!src) \|\| (!strlen) \|\| (strlen > UDF_NAME_LEN - 2))
				37	return 0;
				38
				39	memset(dest, 0, sizeof(struct ustr));
				40	memcpy(dest->u_name, src, strlen);
				41	dest->u_cmpID = 0x08;
				42	dest->u_len = strlen;
				43
				44	return strlen;
				45	}
				46
				47	/*
				48	* udf_build_ustr
				49	*/
				50	int udf_build_ustr(struct ustr dest, dstring ptr, int size)
				51	{
				52	int usesize;
				53
				54	if (!dest \|\| !ptr \|\| !size)
				55	return -1;
				56	BUG_ON(size < 2);
				57
				58	usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name));
				59	usesize = min(usesize, size - 2);
				60	dest->u_cmpID = ptr[0];
				61	dest->u_len = usesize;
				62	memcpy(dest->u_name, ptr + 1, usesize);
				63	memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize);
				64
				65	return 0;
				66	}
				67
				68	/*
				69	* udf_build_ustr_exact
				70	*/
				71	static int udf_build_ustr_exact(struct ustr dest, dstring ptr, int exactsize)
				72	{
				73	if ((!dest) \|\| (!ptr) \|\| (!exactsize))
				74	return -1;
				75
				76	memset(dest, 0, sizeof(struct ustr));
				77	dest->u_cmpID = ptr[0];
				78	dest->u_len = exactsize - 1;
				79	memcpy(dest->u_name, ptr + 1, exactsize - 1);
				80
				81	return 0;
				82	}
				83
				84	/*
				85	* udf_ocu_to_utf8
				86	*
				87	* PURPOSE
				88	* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
				89	*
				90	* PRE-CONDITIONS
				91	* utf Pointer to UTF-8 output buffer.
				92	* ocu Pointer to OSTA Compressed Unicode input buffer
				93	* of size UDF_NAME_LEN bytes.
				94	* both of type "struct ustr *"
				95	*
				96	* POST-CONDITIONS
				97	* <return> Zero on success.
				98	*
				99	* HISTORY
				100	* November 12, 1997 - Andrew E. Mileski
				101	* Written, tested, and released.
				102	*/
				103	int udf_CS0toUTF8(struct ustr utf_o, const struct ustr ocu_i)
				104	{
				105	const uint8_t *ocu;
				106	uint8_t cmp_id, ocu_len;
				107	int i;
				108
				109	ocu_len = ocu_i->u_len;
				110	if (ocu_len == 0) {
				111	memset(utf_o, 0, sizeof(struct ustr));
				112	return 0;
				113	}
				114
				115	cmp_id = ocu_i->u_cmpID;
				116	if (cmp_id != 8 && cmp_id != 16) {
				117	memset(utf_o, 0, sizeof(struct ustr));
				118	pr_err("unknown compression code (%d) stri=%s\n",
				119	cmp_id, ocu_i->u_name);
				120	return 0;
				121	}
				122
				123	ocu = ocu_i->u_name;
				124	utf_o->u_len = 0;
				125	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
				126
				127	/* Expand OSTA compressed Unicode to Unicode */
				128	uint32_t c = ocu[i++];
				129	if (cmp_id == 16)
				130	c = (c << 8) \| ocu[i++];
				131
				132	/* Compress Unicode to UTF-8 */
				133	if (c < 0x80U)
				134	utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
				135	else if (c < 0x800U) {
				136	utf_o->u_name[utf_o->u_len++] =
				137	(uint8_t)(0xc0 \| (c >> 6));
				138	utf_o->u_name[utf_o->u_len++] =
				139	(uint8_t)(0x80 \| (c & 0x3f));
				140	} else {
				141	utf_o->u_name[utf_o->u_len++] =
				142	(uint8_t)(0xe0 \| (c >> 12));
				143	utf_o->u_name[utf_o->u_len++] =
				144	(uint8_t)(0x80 \|
				145	((c >> 6) & 0x3f));
				146	utf_o->u_name[utf_o->u_len++] =
				147	(uint8_t)(0x80 \| (c & 0x3f));
				148	}
				149	}
				150	utf_o->u_cmpID = 8;
				151
				152	return utf_o->u_len;
				153	}
				154
				155	/*
				156	*
				157	* udf_utf8_to_ocu
				158	*
				159	* PURPOSE
				160	* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
				161	*
				162	* DESCRIPTION
				163	* This routine is only called by udf_lookup().
				164	*
				165	* PRE-CONDITIONS
				166	* ocu Pointer to OSTA Compressed Unicode output
				167	* buffer of size UDF_NAME_LEN bytes.
				168	* utf Pointer to UTF-8 input buffer.
				169	* utf_len Length of UTF-8 input buffer in bytes.
				170	*
				171	* POST-CONDITIONS
				172	* <return> Zero on success.
				173	*
				174	* HISTORY
				175	* November 12, 1997 - Andrew E. Mileski
				176	* Written, tested, and released.
				177	*/
				178	static int udf_UTF8toCS0(dstring ocu, struct ustr utf, int length)
				179	{
				180	unsigned c, i, max_val, utf_char;
				181	int utf_cnt, u_len;
				182
				183	memset(ocu, 0, sizeof(dstring) * length);
				184	ocu[0] = 8;
				185	max_val = 0xffU;
				186
				187	try_again:
				188	u_len = 0U;
				189	utf_char = 0U;
				190	utf_cnt = 0U;
				191	for (i = 0U; i < utf->u_len; i++) {
				192	c = (uint8_t)utf->u_name[i];
				193
				194	/* Complete a multi-byte UTF-8 character */
				195	if (utf_cnt) {
				196	utf_char = (utf_char << 6) \| (c & 0x3fU);
				197	if (--utf_cnt)
				198	continue;
				199	} else {
				200	/* Check for a multi-byte UTF-8 character */
				201	if (c & 0x80U) {
				202	/* Start a multi-byte UTF-8 character */
				203	if ((c & 0xe0U) == 0xc0U) {
				204	utf_char = c & 0x1fU;
				205	utf_cnt = 1;
				206	} else if ((c & 0xf0U) == 0xe0U) {
				207	utf_char = c & 0x0fU;
				208	utf_cnt = 2;
				209	} else if ((c & 0xf8U) == 0xf0U) {
				210	utf_char = c & 0x07U;
				211	utf_cnt = 3;
				212	} else if ((c & 0xfcU) == 0xf8U) {
				213	utf_char = c & 0x03U;
				214	utf_cnt = 4;
				215	} else if ((c & 0xfeU) == 0xfcU) {
				216	utf_char = c & 0x01U;
				217	utf_cnt = 5;
				218	} else {
				219	goto error_out;
				220	}
				221	continue;
				222	} else {
				223	/* Single byte UTF-8 character (most common) */
				224	utf_char = c;
				225	}
				226	}
				227
				228	/* Choose no compression if necessary */
				229	if (utf_char > max_val) {
				230	if (max_val == 0xffU) {
				231	max_val = 0xffffU;
				232	ocu[0] = (uint8_t)0x10U;
				233	goto try_again;
				234	}
				235	goto error_out;
				236	}
				237
				238	if (max_val == 0xffffU)
				239	ocu[++u_len] = (uint8_t)(utf_char >> 8);
				240	ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
				241	}
				242
				243	if (utf_cnt) {
				244	error_out:
				245	ocu[++u_len] = '?';
				246	printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
				247	}
				248
				249	ocu[length - 1] = (uint8_t)u_len + 1;
				250
				251	return u_len + 1;
				252	}
				253
				254	static int udf_CS0toNLS(struct nls_table nls, struct ustr utf_o,
				255	const struct ustr *ocu_i)
				256	{
				257	const uint8_t *ocu;
				258	uint8_t cmp_id, ocu_len;
				259	int i, len;
				260
				261
				262	ocu_len = ocu_i->u_len;
				263	if (ocu_len == 0) {
				264	memset(utf_o, 0, sizeof(struct ustr));
				265	return 0;
				266	}
				267
				268	cmp_id = ocu_i->u_cmpID;
				269	if (cmp_id != 8 && cmp_id != 16) {
				270	memset(utf_o, 0, sizeof(struct ustr));
				271	pr_err("unknown compression code (%d) stri=%s\n",
				272	cmp_id, ocu_i->u_name);
				273	return 0;
				274	}
				275
				276	ocu = ocu_i->u_name;
				277	utf_o->u_len = 0;
				278	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
				279	/* Expand OSTA compressed Unicode to Unicode */
				280	uint32_t c = ocu[i++];
				281	if (cmp_id == 16)
				282	c = (c << 8) \| ocu[i++];
				283
				284	len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
				285	UDF_NAME_LEN - utf_o->u_len);
				286	/* Valid character? */
				287	if (len >= 0)
				288	utf_o->u_len += len;
				289	else
				290	utf_o->u_name[utf_o->u_len++] = '?';
				291	}
				292	utf_o->u_cmpID = 8;
				293
				294	return utf_o->u_len;
				295	}
				296
				297	static int udf_NLStoCS0(struct nls_table nls, dstring ocu, struct ustr *uni,
				298	int length)
				299	{
				300	int len;
				301	unsigned i, max_val;
				302	uint16_t uni_char;
				303	int u_len;
				304
				305	memset(ocu, 0, sizeof(dstring) * length);
				306	ocu[0] = 8;
				307	max_val = 0xffU;
				308
				309	try_again:
				310	u_len = 0U;
				311	for (i = 0U; i < uni->u_len; i++) {
				312	len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
				313	if (!len)
				314	continue;
				315	/* Invalid character, deal with it */
				316	if (len < 0) {
				317	len = 1;
				318	uni_char = '?';
				319	}
				320
				321	if (uni_char > max_val) {
				322	max_val = 0xffffU;
				323	ocu[0] = (uint8_t)0x10U;
				324	goto try_again;
				325	}
				326
				327	if (max_val == 0xffffU)
				328	ocu[++u_len] = (uint8_t)(uni_char >> 8);
				329	ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
				330	i += len - 1;
				331	}
				332
				333	ocu[length - 1] = (uint8_t)u_len + 1;
				334	return u_len + 1;
				335	}
				336
				337	int udf_get_filename(struct super_block sb, uint8_t sname, int slen,
				338	uint8_t *dname, int dlen)
				339	{
				340	struct ustr filename, unifilename;
				341	int len = 0;
				342
				343	filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
				344	if (!filename)
				345	return 0;
				346
				347	unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
				348	if (!unifilename)
				349	goto out1;
				350
				351	if (udf_build_ustr_exact(unifilename, sname, slen))
				352	goto out2;
				353
				354	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
				355	if (!udf_CS0toUTF8(filename, unifilename)) {
				356	udf_debug("Failed in udf_get_filename: sname = %s\n",
				357	sname);
				358	goto out2;
				359	}
				360	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
				361	if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
				362	unifilename)) {
				363	udf_debug("Failed in udf_get_filename: sname = %s\n",
				364	sname);
				365	goto out2;
				366	}
				367	} else
				368	goto out2;
				369
				370	len = udf_translate_to_linux(dname, dlen,
				371	filename->u_name, filename->u_len,
				372	unifilename->u_name, unifilename->u_len);
				373	out2:
				374	kfree(unifilename);
				375	out1:
				376	kfree(filename);
				377	return len;
				378	}
				379
				380	int udf_put_filename(struct super_block sb, const uint8_t sname,
				381	uint8_t *dname, int flen)
				382	{
				383	struct ustr unifilename;
				384	int namelen;
				385
				386	if (!udf_char_to_ustr(&unifilename, sname, flen))
				387	return 0;
				388
				389	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
				390	namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN);
				391	if (!namelen)
				392	return 0;
				393	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
				394	namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
				395	&unifilename, UDF_NAME_LEN);
				396	if (!namelen)
				397	return 0;
				398	} else
				399	return 0;
				400
				401	return namelen;
				402	}
				403
				404	#define ILLEGAL_CHAR_MARK '_'
				405	#define EXT_MARK '.'
				406	#define CRC_MARK '#'
				407	#define EXT_SIZE 5
				408	/* Number of chars we need to store generated CRC to make filename unique */
				409	#define CRC_LEN 5
				410
				411	static int udf_translate_to_linux(uint8_t *newName, int newLen,
				412	uint8_t *udfName, int udfLen,
				413	uint8_t *fidName, int fidNameLen)
				414	{
				415	int index, newIndex = 0, needsCRC = 0;
				416	int extIndex = 0, newExtIndex = 0, hasExt = 0;
				417	unsigned short valueCRC;
				418	uint8_t curr;
				419	const uint8_t hexChar[] = "0123456789ABCDEF";
				420
				421	if (udfName[0] == '.' &&
				422	(udfLen == 1 \|\| (udfLen == 2 && udfName[1] == '.'))) {
				423	needsCRC = 1;
				424	newIndex = udfLen;
				425	memcpy(newName, udfName, udfLen);
				426	} else {
				427	for (index = 0; index < udfLen; index++) {
				428	curr = udfName[index];
				429	if (curr == '/' \|\| curr == 0) {
				430	needsCRC = 1;
				431	curr = ILLEGAL_CHAR_MARK;
				432	while (index + 1 < udfLen &&
				433	(udfName[index + 1] == '/' \|\|
				434	udfName[index + 1] == 0))
				435	index++;
				436	}
				437	if (curr == EXT_MARK &&
				438	(udfLen - index - 1) <= EXT_SIZE) {
				439	if (udfLen == index + 1)
				440	hasExt = 0;
				441	else {
				442	hasExt = 1;
				443	extIndex = index;
				444	newExtIndex = newIndex;
				445	}
				446	}
				447	if (newIndex < newLen)
				448	newName[newIndex++] = curr;
				449	else
				450	needsCRC = 1;
				451	}
				452	}
				453	if (needsCRC) {
				454	uint8_t ext[EXT_SIZE];
				455	int localExtIndex = 0;
				456
				457	if (hasExt) {
				458	int maxFilenameLen;
				459	for (index = 0;
				460	index < EXT_SIZE && extIndex + index + 1 < udfLen;
				461	index++) {
				462	curr = udfName[extIndex + index + 1];
				463
				464	if (curr == '/' \|\| curr == 0) {
				465	needsCRC = 1;
				466	curr = ILLEGAL_CHAR_MARK;
				467	while (extIndex + index + 2 < udfLen &&
				468	(index + 1 < EXT_SIZE &&
				469	(udfName[extIndex + index + 2] == '/' \|\|
				470	udfName[extIndex + index + 2] == 0)))
				471	index++;
				472	}
				473	ext[localExtIndex++] = curr;
				474	}
				475	maxFilenameLen = newLen - CRC_LEN - localExtIndex;
				476	if (newIndex > maxFilenameLen)
				477	newIndex = maxFilenameLen;
				478	else
				479	newIndex = newExtIndex;
				480	} else if (newIndex > newLen - CRC_LEN)
				481	newIndex = newLen - CRC_LEN;
				482	newName[newIndex++] = CRC_MARK;
				483	valueCRC = crc_itu_t(0, fidName, fidNameLen);
				484	newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
				485	newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
				486	newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
				487	newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
				488
				489	if (hasExt) {
				490	newName[newIndex++] = EXT_MARK;
				491	for (index = 0; index < localExtIndex; index++)
				492	newName[newIndex++] = ext[index];
				493	}
				494	}
				495
				496	return newIndex;
				497	}