M7350/kernel/fs/cifs/cifs_unicode.c

/*
 *   fs/cifs/cifs_unicode.c
 *
 *   Copyright (c) International Business Machines  Corp., 2000,2009
 *   Modified by Steve French (sfrench@us.ibm.com)
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
#include <linux/fs.h>
#include <linux/slab.h>
#include "cifs_unicode.h"
#include "cifs_uniupr.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifs_debug.h"

/*
 * cifs_utf16_bytes - how long will a string be after conversion?
 * @utf16 - pointer to input string
 * @maxbytes - don't go past this many bytes of input string
 * @codepage - destination codepage
 *
 * Walk a utf16le string and return the number of bytes that the string will
 * be after being converted to the given charset, not including any null
 * termination required. Don't walk past maxbytes in the source buffer.
 */
int
cifs_utf16_bytes(const __le16 *from, int maxbytes,
		const struct nls_table *codepage)
{
	int i;
	int charlen, outlen = 0;
	int maxwords = maxbytes / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
	__u16 ftmp;

	for (i = 0; i < maxwords; i++) {
		ftmp = get_unaligned_le16(&from[i]);
		if (ftmp == 0)
			break;

		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
		if (charlen > 0)
			outlen += charlen;
		else
			outlen++;
	}

	return outlen;
}

/*
 * cifs_mapchar - convert a host-endian char to proper char in codepage
 * @target - where converted character should be copied
 * @src_char - 2 byte host-endian source character
 * @cp - codepage to which character should be converted
 * @mapchar - should character be mapped according to mapchars mount option?
 *
 * This function handles the conversion of a single character. It is the
 * responsibility of the caller to ensure that the target buffer is large
 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 */
static int
cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
	     bool mapchar)
{
	int len = 1;

	if (!mapchar)
		goto cp_convert;

	/*
	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
	 *     build_path_from_dentry are modified, as they use slash as
	 *     separator.
	 */
	switch (src_char) {
	case UNI_COLON:
		*target = ':';
		break;
	case UNI_ASTERISK:
		*target = '*';
		break;
	case UNI_QUESTION:
		*target = '?';
		break;
	case UNI_PIPE:
		*target = '|';
		break;
	case UNI_GRTRTHAN:
		*target = '>';
		break;
	case UNI_LESSTHAN:
		*target = '<';
		break;
	default:
		goto cp_convert;
	}

out:
	return len;

cp_convert:
	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
	if (len <= 0) {
		*target = '?';
		len = 1;
	}
	goto out;
}

/*
 * cifs_from_utf16 - convert utf16le string to local charset
 * @to - destination buffer
 * @from - source buffer
 * @tolen - destination buffer size (in bytes)
 * @fromlen - source buffer size (in bytes)
 * @codepage - codepage to which characters should be converted
 * @mapchar - should characters be remapped according to the mapchars option?
 *
 * Convert a little-endian utf16le string (as sent by the server) to a string
 * in the provided codepage. The tolen and fromlen parameters are to ensure
 * that the code doesn't walk off of the end of the buffer (which is always
 * a danger if the alignment of the source buffer is off). The destination
 * string is always properly null terminated and fits in the destination
 * buffer. Returns the length of the destination string in bytes (including
 * null terminator).
 *
 * Note that some windows versions actually send multiword UTF-16 characters
 * instead of straight UTF16-2. The linux nls routines however aren't able to
 * deal with those characters properly. In the event that we get some of
 * those characters, they won't be translated properly.
 */
int
cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
		 const struct nls_table *codepage, bool mapchar)
{
	int i, charlen, safelen;
	int outlen = 0;
	int nullsize = nls_nullsize(codepage);
	int fromwords = fromlen / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
	__u16 ftmp;

	/*
	 * because the chars can be of varying widths, we need to take care
	 * not to overflow the destination buffer when we get close to the
	 * end of it. Until we get to this offset, we don't need to check
	 * for overflow however.
	 */
	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);

	for (i = 0; i < fromwords; i++) {
		ftmp = get_unaligned_le16(&from[i]);
		if (ftmp == 0)
			break;

		/*
		 * check to see if converting this character might make the
		 * conversion bleed into the null terminator
		 */
		if (outlen >= safelen) {
			charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
			if ((outlen + charlen) > (tolen - nullsize))
				break;
		}

		/* put converted char into 'to' buffer */
		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
		outlen += charlen;
	}

	/* properly null-terminate string */
	for (i = 0; i < nullsize; i++)
		to[outlen++] = 0;

	return outlen;
}

/*
 * NAME:	cifs_strtoUTF16()
 *
 * FUNCTION:	Convert character string to unicode string
 *
 */
int
cifs_strtoUTF16(__le16 *to, const char *from, int len,
	      const struct nls_table *codepage)
{
	int charlen;
	int i;
	wchar_t wchar_to; /* needed to quiet sparse */

	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
		charlen = codepage->char2uni(from, len, &wchar_to);
		if (charlen < 1) {
			cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d",
				*from, charlen);
			/* A question mark */
			wchar_to = 0x003f;
			charlen = 1;
		}
		put_unaligned_le16(wchar_to, &to[i]);
	}

	put_unaligned_le16(0, &to[i]);
	return i;
}

/*
 * cifs_strndup_from_utf16 - copy a string from wire format to the local
 * codepage
 * @src - source string
 * @maxlen - don't walk past this many bytes in the source string
 * @is_unicode - is this a unicode string?
 * @codepage - destination codepage
 *
 * Take a string given by the server, convert it to the local codepage and
 * put it in a new buffer. Returns a pointer to the new string or NULL on
 * error.
 */
char *
cifs_strndup_from_utf16(const char *src, const int maxlen,
			const bool is_unicode, const struct nls_table *codepage)
{
	int len;
	char *dst;

	if (is_unicode) {
		len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
		len += nls_nullsize(codepage);
		dst = kmalloc(len, GFP_KERNEL);
		if (!dst)
			return NULL;
		cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
			       false);
	} else {
		len = strnlen(src, maxlen);
		len++;
		dst = kmalloc(len, GFP_KERNEL);
		if (!dst)
			return NULL;
		strlcpy(dst, src, len);
	}

	return dst;
}

/*
 * Convert 16 bit Unicode pathname to wire format from string in current code
 * page. Conversion may involve remapping up the six characters that are
 * only legal in POSIX-like OS (if they are present in the string). Path
 * names are little endian 16 bit Unicode on the wire
 */
int
cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
		 const struct nls_table *cp, int mapChars)
{
	int i, j, charlen;
	char src_char;
	__le16 dst_char;
	wchar_t tmp;

	if (!mapChars)
		return cifs_strtoUTF16(target, source, PATH_MAX, cp);

	for (i = 0, j = 0; i < srclen; j++) {
		src_char = source[i];
		charlen = 1;
		switch (src_char) {
		case 0:
			put_unaligned(0, &target[j]);
			goto ctoUTF16_out;
		case ':':
			dst_char = cpu_to_le16(UNI_COLON);
			break;
		case '*':
			dst_char = cpu_to_le16(UNI_ASTERISK);
			break;
		case '?':
			dst_char = cpu_to_le16(UNI_QUESTION);
			break;
		case '<':
			dst_char = cpu_to_le16(UNI_LESSTHAN);
			break;
		case '>':
			dst_char = cpu_to_le16(UNI_GRTRTHAN);
			break;
		case '|':
			dst_char = cpu_to_le16(UNI_PIPE);
			break;
		/*
		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
		 * until all the calls to build_path_from_dentry are modified,
		 * as they use backslash as separator.
		 */
		default:
			charlen = cp->char2uni(source + i, srclen - i, &tmp);
			dst_char = cpu_to_le16(tmp);

			/*
			 * if no match, use question mark, which at least in
			 * some cases serves as wild card
			 */
			if (charlen < 1) {
				dst_char = cpu_to_le16(0x003f);
				charlen = 1;
			}
		}
		/*
		 * character may take more than one byte in the source string,
		 * but will take exactly two bytes in the target string
		 */
		i += charlen;
		put_unaligned(dst_char, &target[j]);
	}

ctoUTF16_out:
	return i;
}
M7350v1_en_gpl 2024-09-09 08:52:07 +00:00			`/*`
			`* fs/cifs/cifs_unicode.c`
			`*`
			`* Copyright (c) International Business Machines Corp., 2000,2009`
			`* Modified by Steve French (sfrench@us.ibm.com)`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See`
			`* the GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
			`* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`
			`*/`
			`#include <linux/fs.h>`
			`#include <linux/slab.h>`
			`#include "cifs_unicode.h"`
			`#include "cifs_uniupr.h"`
			`#include "cifspdu.h"`
			`#include "cifsglob.h"`
			`#include "cifs_debug.h"`

			`/*`
			`* cifs_utf16_bytes - how long will a string be after conversion?`
			`* @utf16 - pointer to input string`
			`* @maxbytes - don't go past this many bytes of input string`
			`* @codepage - destination codepage`
			`*`
			`* Walk a utf16le string and return the number of bytes that the string will`
			`* be after being converted to the given charset, not including any null`
			`* termination required. Don't walk past maxbytes in the source buffer.`
			`*/`
			`int`
			`cifs_utf16_bytes(const __le16 *from, int maxbytes,`
			`const struct nls_table *codepage)`
			`{`
			`int i;`
			`int charlen, outlen = 0;`
			`int maxwords = maxbytes / 2;`
			`char tmp[NLS_MAX_CHARSET_SIZE];`
			`__u16 ftmp;`

			`for (i = 0; i < maxwords; i++) {`
			`ftmp = get_unaligned_le16(&from[i]);`
			`if (ftmp == 0)`
			`break;`

			`charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);`
			`if (charlen > 0)`
			`outlen += charlen;`
			`else`
			`outlen++;`
			`}`

			`return outlen;`
			`}`

			`/*`
			`* cifs_mapchar - convert a host-endian char to proper char in codepage`
			`* @target - where converted character should be copied`
			`* @src_char - 2 byte host-endian source character`
			`* @cp - codepage to which character should be converted`
			`* @mapchar - should character be mapped according to mapchars mount option?`
			`*`
			`* This function handles the conversion of a single character. It is the`
			`* responsibility of the caller to ensure that the target buffer is large`
			`* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).`
			`*/`
			`static int`
			`cifs_mapchar(char target, const __u16 src_char, const struct nls_table cp,`
			`bool mapchar)`
			`{`
			`int len = 1;`

			`if (!mapchar)`
			`goto cp_convert;`

			`/*`
			`* BB: Cannot handle remapping UNI_SLASH until all the calls to`
			`* build_path_from_dentry are modified, as they use slash as`
			`* separator.`
			`*/`
			`switch (src_char) {`
			`case UNI_COLON:`
			`*target = ':';`
			`break;`
			`case UNI_ASTERISK:`
			`target = '';`
			`break;`
			`case UNI_QUESTION:`
			`*target = '?';`
			`break;`
			`case UNI_PIPE:`
			`*target = '\|';`
			`break;`
			`case UNI_GRTRTHAN:`
			`*target = '>';`
			`break;`
			`case UNI_LESSTHAN:`
			`*target = '<';`
			`break;`
			`default:`
			`goto cp_convert;`
			`}`

			`out:`
			`return len;`

			`cp_convert:`
			`len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);`
			`if (len <= 0) {`
			`*target = '?';`
			`len = 1;`
			`}`
			`goto out;`
			`}`

			`/*`
			`* cifs_from_utf16 - convert utf16le string to local charset`
			`* @to - destination buffer`
			`* @from - source buffer`
			`* @tolen - destination buffer size (in bytes)`
			`* @fromlen - source buffer size (in bytes)`
			`* @codepage - codepage to which characters should be converted`
			`* @mapchar - should characters be remapped according to the mapchars option?`
			`*`
			`* Convert a little-endian utf16le string (as sent by the server) to a string`
			`* in the provided codepage. The tolen and fromlen parameters are to ensure`
			`* that the code doesn't walk off of the end of the buffer (which is always`
			`* a danger if the alignment of the source buffer is off). The destination`
			`* string is always properly null terminated and fits in the destination`
			`* buffer. Returns the length of the destination string in bytes (including`
			`* null terminator).`
			`*`
			`* Note that some windows versions actually send multiword UTF-16 characters`
			`* instead of straight UTF16-2. The linux nls routines however aren't able to`
			`* deal with those characters properly. In the event that we get some of`
			`* those characters, they won't be translated properly.`
			`*/`
			`int`
			`cifs_from_utf16(char to, const __le16 from, int tolen, int fromlen,`
			`const struct nls_table *codepage, bool mapchar)`
			`{`
			`int i, charlen, safelen;`
			`int outlen = 0;`
			`int nullsize = nls_nullsize(codepage);`
			`int fromwords = fromlen / 2;`
			`char tmp[NLS_MAX_CHARSET_SIZE];`
			`__u16 ftmp;`

			`/*`
			`* because the chars can be of varying widths, we need to take care`
			`* not to overflow the destination buffer when we get close to the`
			`* end of it. Until we get to this offset, we don't need to check`
			`* for overflow however.`
			`*/`
			`safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);`

			`for (i = 0; i < fromwords; i++) {`
			`ftmp = get_unaligned_le16(&from[i]);`
			`if (ftmp == 0)`
			`break;`

			`/*`
			`* check to see if converting this character might make the`
			`* conversion bleed into the null terminator`
			`*/`
			`if (outlen >= safelen) {`
			`charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);`
			`if ((outlen + charlen) > (tolen - nullsize))`
			`break;`
			`}`

			`/* put converted char into 'to' buffer */`
			`charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);`
			`outlen += charlen;`
			`}`

			`/* properly null-terminate string */`
			`for (i = 0; i < nullsize; i++)`
			`to[outlen++] = 0;`

			`return outlen;`
			`}`

			`/*`
			`* NAME: cifs_strtoUTF16()`
			`*`
			`* FUNCTION: Convert character string to unicode string`
			`*`
			`*/`
			`int`
			`cifs_strtoUTF16(__le16 to, const char from, int len,`
			`const struct nls_table *codepage)`
			`{`
			`int charlen;`
			`int i;`
			`wchar_t wchar_to; /* needed to quiet sparse */`

			`for (i = 0; len && *from; i++, from += charlen, len -= charlen) {`
			`charlen = codepage->char2uni(from, len, &wchar_to);`
			`if (charlen < 1) {`
			`cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d",`
			`*from, charlen);`
			`/* A question mark */`
			`wchar_to = 0x003f;`
			`charlen = 1;`
			`}`
			`put_unaligned_le16(wchar_to, &to[i]);`
			`}`

			`put_unaligned_le16(0, &to[i]);`
			`return i;`
			`}`

			`/*`
			`* cifs_strndup_from_utf16 - copy a string from wire format to the local`
			`* codepage`
			`* @src - source string`
			`* @maxlen - don't walk past this many bytes in the source string`
			`* @is_unicode - is this a unicode string?`
			`* @codepage - destination codepage`
			`*`
			`* Take a string given by the server, convert it to the local codepage and`
			`* put it in a new buffer. Returns a pointer to the new string or NULL on`
			`* error.`
			`*/`
			`char *`
			`cifs_strndup_from_utf16(const char *src, const int maxlen,`
			`const bool is_unicode, const struct nls_table *codepage)`
			`{`
			`int len;`
			`char *dst;`

			`if (is_unicode) {`
			`len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);`
			`len += nls_nullsize(codepage);`
			`dst = kmalloc(len, GFP_KERNEL);`
			`if (!dst)`
			`return NULL;`
			`cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,`
			`false);`
			`} else {`
			`len = strnlen(src, maxlen);`
			`len++;`
			`dst = kmalloc(len, GFP_KERNEL);`
			`if (!dst)`
			`return NULL;`
			`strlcpy(dst, src, len);`
			`}`

			`return dst;`
			`}`

			`/*`
			`* Convert 16 bit Unicode pathname to wire format from string in current code`
			`* page. Conversion may involve remapping up the six characters that are`
			`* only legal in POSIX-like OS (if they are present in the string). Path`
			`* names are little endian 16 bit Unicode on the wire`
			`*/`
			`int`
			`cifsConvertToUTF16(__le16 target, const char source, int srclen,`
			`const struct nls_table *cp, int mapChars)`
			`{`
			`int i, j, charlen;`
			`char src_char;`
			`__le16 dst_char;`
			`wchar_t tmp;`

			`if (!mapChars)`
			`return cifs_strtoUTF16(target, source, PATH_MAX, cp);`

			`for (i = 0, j = 0; i < srclen; j++) {`
			`src_char = source[i];`
			`charlen = 1;`
			`switch (src_char) {`
			`case 0:`
			`put_unaligned(0, &target[j]);`
			`goto ctoUTF16_out;`
			`case ':':`
			`dst_char = cpu_to_le16(UNI_COLON);`
			`break;`
			`case '*':`
			`dst_char = cpu_to_le16(UNI_ASTERISK);`
			`break;`
			`case '?':`
			`dst_char = cpu_to_le16(UNI_QUESTION);`
			`break;`
			`case '<':`
			`dst_char = cpu_to_le16(UNI_LESSTHAN);`
			`break;`
			`case '>':`
			`dst_char = cpu_to_le16(UNI_GRTRTHAN);`
			`break;`
			`case '\|':`
			`dst_char = cpu_to_le16(UNI_PIPE);`
			`break;`
			`/*`
			`* FIXME: We can not handle remapping backslash (UNI_SLASH)`
			`* until all the calls to build_path_from_dentry are modified,`
			`* as they use backslash as separator.`
			`*/`
			`default:`
			`charlen = cp->char2uni(source + i, srclen - i, &tmp);`
			`dst_char = cpu_to_le16(tmp);`

			`/*`
			`* if no match, use question mark, which at least in`
			`* some cases serves as wild card`
			`*/`
			`if (charlen < 1) {`
			`dst_char = cpu_to_le16(0x003f);`
			`charlen = 1;`
			`}`
			`}`
			`/*`
			`* character may take more than one byte in the source string,`
			`* but will take exactly two bytes in the target string`
			`*/`
			`i += charlen;`
			`put_unaligned(dst_char, &target[j]);`
			`}`

			`ctoUTF16_out:`
			`return i;`
			`}`