M7350/kernel/lib/glob.c

#include <linux/module.h>
#include <linux/glob.h>

/*
 * The only reason this code can be compiled as a module is because the
 * ATA code that depends on it can be as well.  In practice, they're
 * both usually compiled in and the module overhead goes away.
 */
MODULE_DESCRIPTION("glob(7) matching");
MODULE_LICENSE("Dual MIT/GPL");

/**
 * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
 * @pat: Shell-style pattern to match, e.g. "*.[ch]".
 * @str: String to match.  The pattern must match the entire string.
 *
 * Perform shell-style glob matching, returning true (1) if the match
 * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
 *
 * Pattern metacharacters are ?, *, [ and \.
 * (And, inside character classes, !, - and ].)
 *
 * This is small and simple implementation intended for device blacklists
 * where a string is matched against a number of patterns.  Thus, it
 * does not preprocess the patterns.  It is non-recursive, and run-time
 * is at most quadratic: strlen(@str)*strlen(@pat).
 *
 * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
 * it takes 6 passes over the pattern before matching the string.
 *
 * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
 * treat / or leading . specially; it isn't actually used for pathnames.
 *
 * Note that according to glob(7) (and unlike bash), character classes
 * are complemented by a leading !; this does not support the regex-style
 * [^a-z] syntax.
 *
 * An opening bracket without a matching close is matched literally.
 */
bool __pure glob_match(char const *pat, char const *str)
{
	/*
	 * Backtrack to previous * on mismatch and retry starting one
	 * character later in the string.  Because * matches all characters
	 * (no exception for /), it can be easily proved that there's
	 * never a need to backtrack multiple levels.
	 */
	char const *back_pat = NULL, *back_str = back_str;

	/*
	 * Loop over each token (character or class) in pat, matching
	 * it against the remaining unmatched tail of str.  Return false
	 * on mismatch, or true after matching the trailing nul bytes.
	 */
	for (;;) {
		unsigned char c = *str++;
		unsigned char d = *pat++;

		switch (d) {
		case '?':	/* Wildcard: anything but nul */
			if (c == '\0')
				return false;
			break;
		case '*':	/* Any-length wildcard */
			if (*pat == '\0')	/* Optimize trailing * case */
				return true;
			back_pat = pat;
			back_str = --str;	/* Allow zero-length match */
			break;
		case '[': {	/* Character class */
			bool match = false, inverted = (*pat == '!');
			char const *class = pat + inverted;
			unsigned char a = *class++;

			/*
			 * Iterate over each span in the character class.
			 * A span is either a single character a, or a
			 * range a-b.  The first span may begin with ']'.
			 */
			do {
				unsigned char b = a;

				if (a == '\0')	/* Malformed */
					goto literal;

				if (class[0] == '-' && class[1] != ']') {
					b = class[1];

					if (b == '\0')
						goto literal;

					class += 2;
					/* Any special action if a > b? */
				}
				match |= (a <= c && c <= b);
			} while ((a = *class++) != ']');

			if (match == inverted)
				goto backtrack;
			pat = class;
			}
			break;
		case '\\':
			d = *pat++;
			/*FALLTHROUGH*/
		default:	/* Literal character */
literal:
			if (c == d) {
				if (d == '\0')
					return true;
				break;
			}
backtrack:
			if (c == '\0' || !back_pat)
				return false;	/* No point continuing */
			/* Try again from last *, one character later in str. */
			pat = back_pat;
			str = ++back_str;
			break;
		}
	}
}
EXPORT_SYMBOL(glob_match);


#ifdef CONFIG_GLOB_SELFTEST

#include <linux/printk.h>
#include <linux/moduleparam.h>

/* Boot with "glob.verbose=1" to show successful tests, too */
static bool verbose = false;
module_param(verbose, bool, 0);

struct glob_test {
	char const *pat, *str;
	bool expected;
};

static bool __pure __init test(char const *pat, char const *str, bool expected)
{
	bool match = glob_match(pat, str);
	bool success = match == expected;

	/* Can't get string literals into a particular section, so... */
	static char const msg_error[] __initconst =
		KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
	static char const msg_ok[] __initconst =
		KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
	static char const mismatch[] __initconst = "mismatch";
	char const *message;

	if (!success)
		message = msg_error;
	else if (verbose)
		message = msg_ok;
	else
		return success;

	printk(message, pat, str, mismatch + 3*match);
	return success;
}

/*
 * The tests are all jammed together in one array to make it simpler
 * to place that array in the .init.rodata section.  The obvious
 * "array of structures containing char *" has no way to force the
 * pointed-to strings to be in a particular section.
 *
 * Anyway, a test consists of:
 * 1. Expected glob_match result: '1' or '0'.
 * 2. Pattern to match: null-terminated string
 * 3. String to match against: null-terminated string
 *
 * The list of tests is terminated with a final '\0' instead of
 * a glob_match result character.
 */
static char const glob_tests[] __initconst =
	/* Some basic tests */
	"1" "a\0" "a\0"
	"0" "a\0" "b\0"
	"0" "a\0" "aa\0"
	"0" "a\0" "\0"
	"1" "\0" "\0"
	"0" "\0" "a\0"
	/* Simple character class tests */
	"1" "[a]\0" "a\0"
	"0" "[a]\0" "b\0"
	"0" "[!a]\0" "a\0"
	"1" "[!a]\0" "b\0"
	"1" "[ab]\0" "a\0"
	"1" "[ab]\0" "b\0"
	"0" "[ab]\0" "c\0"
	"1" "[!ab]\0" "c\0"
	"1" "[a-c]\0" "b\0"
	"0" "[a-c]\0" "d\0"
	/* Corner cases in character class parsing */
	"1" "[a-c-e-g]\0" "-\0"
	"0" "[a-c-e-g]\0" "d\0"
	"1" "[a-c-e-g]\0" "f\0"
	"1" "[]a-ceg-ik[]\0" "a\0"
	"1" "[]a-ceg-ik[]\0" "]\0"
	"1" "[]a-ceg-ik[]\0" "[\0"
	"1" "[]a-ceg-ik[]\0" "h\0"
	"0" "[]a-ceg-ik[]\0" "f\0"
	"0" "[!]a-ceg-ik[]\0" "h\0"
	"0" "[!]a-ceg-ik[]\0" "]\0"
	"1" "[!]a-ceg-ik[]\0" "f\0"
	/* Simple wild cards */
	"1" "?\0" "a\0"
	"0" "?\0" "aa\0"
	"0" "??\0" "a\0"
	"1" "?x?\0" "axb\0"
	"0" "?x?\0" "abx\0"
	"0" "?x?\0" "xab\0"
	/* Asterisk wild cards (backtracking) */
	"0" "*??\0" "a\0"
	"1" "*??\0" "ab\0"
	"1" "*??\0" "abc\0"
	"1" "*??\0" "abcd\0"
	"0" "??*\0" "a\0"
	"1" "??*\0" "ab\0"
	"1" "??*\0" "abc\0"
	"1" "??*\0" "abcd\0"
	"0" "?*?\0" "a\0"
	"1" "?*?\0" "ab\0"
	"1" "?*?\0" "abc\0"
	"1" "?*?\0" "abcd\0"
	"1" "*b\0" "b\0"
	"1" "*b\0" "ab\0"
	"0" "*b\0" "ba\0"
	"1" "*b\0" "bb\0"
	"1" "*b\0" "abb\0"
	"1" "*b\0" "bab\0"
	"1" "*bc\0" "abbc\0"
	"1" "*bc\0" "bc\0"
	"1" "*bc\0" "bbc\0"
	"1" "*bc\0" "bcbc\0"
	/* Multiple asterisks (complex backtracking) */
	"1" "*ac*\0" "abacadaeafag\0"
	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
	"1" "*abcd*\0" "abcabcabcabcdefg\0"
	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
	"0" "*abcd*\0" "abcabcabcabcefg\0"
	"0" "*ab*cd*\0" "abcabcabcabcefg\0";

static int __init glob_init(void)
{
	unsigned successes = 0;
	unsigned n = 0;
	char const *p = glob_tests;
	static char const message[] __initconst =
		KERN_INFO "glob: %u self-tests passed, %u failed\n";

	/*
	 * Tests are jammed together in a string.  The first byte is '1'
	 * or '0' to indicate the expected outcome, or '\0' to indicate the
	 * end of the tests.  Then come two null-terminated strings: the
	 * pattern and the string to match it against.
	 */
	while (*p) {
		bool expected = *p++ & 1;
		char const *pat = p;

		p += strlen(p) + 1;
		successes += test(pat, p, expected);
		p += strlen(p) + 1;
		n++;
	}

	n -= successes;
	printk(message, successes, n);

	/* What's the errno for "kernel bug detected"?  Guess... */
	return n ? -ECANCELED : 0;
}

/* We need a dummy exit function to allow unload */
static void __exit glob_fini(void) { }

module_init(glob_init);
module_exit(glob_fini);

#endif /* CONFIG_GLOB_SELFTEST */
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`#include <linux/module.h>`
			`#include <linux/glob.h>`

			`/*`
			`* The only reason this code can be compiled as a module is because the`
			`* ATA code that depends on it can be as well. In practice, they're`
			`* both usually compiled in and the module overhead goes away.`
			`*/`
			`MODULE_DESCRIPTION("glob(7) matching");`
			`MODULE_LICENSE("Dual MIT/GPL");`

			`/**`
			`* glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)`
			`* @pat: Shell-style pattern to match, e.g. "*.[ch]".`
			`* @str: String to match. The pattern must match the entire string.`
			`*`
			`* Perform shell-style glob matching, returning true (1) if the match`
			`* succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).`
			`*`
			`* Pattern metacharacters are ?, *, [ and \.`
			`* (And, inside character classes, !, - and ].)`
			`*`
			`* This is small and simple implementation intended for device blacklists`
			`* where a string is matched against a number of patterns. Thus, it`
			`* does not preprocess the patterns. It is non-recursive, and run-time`
			`* is at most quadratic: strlen(@str)*strlen(@pat).`
			`*`
			`* An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");`
			`* it takes 6 passes over the pattern before matching the string.`
			`*`
			`* Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT`
			`* treat / or leading . specially; it isn't actually used for pathnames.`
			`*`
			`* Note that according to glob(7) (and unlike bash), character classes`
			`* are complemented by a leading !; this does not support the regex-style`
			`* [^a-z] syntax.`
			`*`
			`* An opening bracket without a matching close is matched literally.`
			`*/`
			`bool __pure glob_match(char const pat, char const str)`
			`{`
			`/*`
			`* Backtrack to previous * on mismatch and retry starting one`
			`* character later in the string. Because * matches all characters`
			`* (no exception for /), it can be easily proved that there's`
			`* never a need to backtrack multiple levels.`
			`*/`
			`char const back_pat = NULL, back_str = back_str;`

			`/*`
			`* Loop over each token (character or class) in pat, matching`
			`* it against the remaining unmatched tail of str. Return false`
			`* on mismatch, or true after matching the trailing nul bytes.`
			`*/`
			`for (;;) {`
			`unsigned char c = *str++;`
			`unsigned char d = *pat++;`

			`switch (d) {`
			`case '?': /* Wildcard: anything but nul */`
			`if (c == '\0')`
			`return false;`
			`break;`
			`case '': / Any-length wildcard */`
			`if (pat == '\0') / Optimize trailing * case */`
			`return true;`
			`back_pat = pat;`
			`back_str = --str; /* Allow zero-length match */`
			`break;`
			`case '[': { /* Character class */`
			`bool match = false, inverted = (*pat == '!');`
			`char const *class = pat + inverted;`
			`unsigned char a = *class++;`

			`/*`
			`* Iterate over each span in the character class.`
			`* A span is either a single character a, or a`
			`* range a-b. The first span may begin with ']'.`
			`*/`
			`do {`
			`unsigned char b = a;`

			`if (a == '\0') /* Malformed */`
			`goto literal;`

			`if (class[0] == '-' && class[1] != ']') {`
			`b = class[1];`

			`if (b == '\0')`
			`goto literal;`

			`class += 2;`
			`/* Any special action if a > b? */`
			`}`
			`match \|= (a <= c && c <= b);`
			`} while ((a = *class++) != ']');`

			`if (match == inverted)`
			`goto backtrack;`
			`pat = class;`
			`}`
			`break;`
			`case '\\':`
			`d = *pat++;`
			`/FALLTHROUGH/`
			`default: /* Literal character */`
			`literal:`
			`if (c == d) {`
			`if (d == '\0')`
			`return true;`
			`break;`
			`}`
			`backtrack:`
			`if (c == '\0' \|\| !back_pat)`
			`return false; /* No point continuing */`
			`/* Try again from last , one character later in str. /`
			`pat = back_pat;`
			`str = ++back_str;`
			`break;`
			`}`
			`}`
			`}`
			`EXPORT_SYMBOL(glob_match);`


			`#ifdef CONFIG_GLOB_SELFTEST`

			`#include <linux/printk.h>`
			`#include <linux/moduleparam.h>`

			`/* Boot with "glob.verbose=1" to show successful tests, too */`
			`static bool verbose = false;`
			`module_param(verbose, bool, 0);`

			`struct glob_test {`
			`char const pat, str;`
			`bool expected;`
			`};`

			`static bool __pure __init test(char const pat, char const str, bool expected)`
			`{`
			`bool match = glob_match(pat, str);`
			`bool success = match == expected;`

			`/* Can't get string literals into a particular section, so... */`
			`static char const msg_error[] __initconst =`
			`KERN_ERR "glob: \"%s\" vs. \"%s\": %s * ERROR *\n";`
			`static char const msg_ok[] __initconst =`
			`KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";`
			`static char const mismatch[] __initconst = "mismatch";`
			`char const *message;`

			`if (!success)`
			`message = msg_error;`
			`else if (verbose)`
			`message = msg_ok;`
			`else`
			`return success;`

			`printk(message, pat, str, mismatch + 3*match);`
			`return success;`
			`}`

			`/*`
			`* The tests are all jammed together in one array to make it simpler`
			`* to place that array in the .init.rodata section. The obvious`
			`* "array of structures containing char *" has no way to force the`
			`* pointed-to strings to be in a particular section.`
			`*`
			`* Anyway, a test consists of:`
			`* 1. Expected glob_match result: '1' or '0'.`
			`* 2. Pattern to match: null-terminated string`
			`* 3. String to match against: null-terminated string`
			`*`
			`* The list of tests is terminated with a final '\0' instead of`
			`* a glob_match result character.`
			`*/`
			`static char const glob_tests[] __initconst =`
			`/* Some basic tests */`
			`"1" "a\0" "a\0"`
			`"0" "a\0" "b\0"`
			`"0" "a\0" "aa\0"`
			`"0" "a\0" "\0"`
			`"1" "\0" "\0"`
			`"0" "\0" "a\0"`
			`/* Simple character class tests */`
			`"1" "[a]\0" "a\0"`
			`"0" "[a]\0" "b\0"`
			`"0" "[!a]\0" "a\0"`
			`"1" "[!a]\0" "b\0"`
			`"1" "[ab]\0" "a\0"`
			`"1" "[ab]\0" "b\0"`
			`"0" "[ab]\0" "c\0"`
			`"1" "[!ab]\0" "c\0"`
			`"1" "[a-c]\0" "b\0"`
			`"0" "[a-c]\0" "d\0"`
			`/* Corner cases in character class parsing */`
			`"1" "[a-c-e-g]\0" "-\0"`
			`"0" "[a-c-e-g]\0" "d\0"`
			`"1" "[a-c-e-g]\0" "f\0"`
			`"1" "[]a-ceg-ik[]\0" "a\0"`
			`"1" "[]a-ceg-ik[]\0" "]\0"`
			`"1" "[]a-ceg-ik[]\0" "[\0"`
			`"1" "[]a-ceg-ik[]\0" "h\0"`
			`"0" "[]a-ceg-ik[]\0" "f\0"`
			`"0" "[!]a-ceg-ik[]\0" "h\0"`
			`"0" "[!]a-ceg-ik[]\0" "]\0"`
			`"1" "[!]a-ceg-ik[]\0" "f\0"`
			`/* Simple wild cards */`
			`"1" "?\0" "a\0"`
			`"0" "?\0" "aa\0"`
			`"0" "??\0" "a\0"`
			`"1" "?x?\0" "axb\0"`
			`"0" "?x?\0" "abx\0"`
			`"0" "?x?\0" "xab\0"`
			`/* Asterisk wild cards (backtracking) */`
			`"0" "*??\0" "a\0"`
			`"1" "*??\0" "ab\0"`
			`"1" "*??\0" "abc\0"`
			`"1" "*??\0" "abcd\0"`
			`"0" "??*\0" "a\0"`
			`"1" "??*\0" "ab\0"`
			`"1" "??*\0" "abc\0"`
			`"1" "??*\0" "abcd\0"`
			`"0" "?*?\0" "a\0"`
			`"1" "?*?\0" "ab\0"`
			`"1" "?*?\0" "abc\0"`
			`"1" "?*?\0" "abcd\0"`
			`"1" "*b\0" "b\0"`
			`"1" "*b\0" "ab\0"`
			`"0" "*b\0" "ba\0"`
			`"1" "*b\0" "bb\0"`
			`"1" "*b\0" "abb\0"`
			`"1" "*b\0" "bab\0"`
			`"1" "*bc\0" "abbc\0"`
			`"1" "*bc\0" "bc\0"`
			`"1" "*bc\0" "bbc\0"`
			`"1" "*bc\0" "bcbc\0"`
			`/* Multiple asterisks (complex backtracking) */`
			`"1" "ac\0" "abacadaeafag\0"`
			`"1" "acaeag\0" "abacadaeafag\0"`
			`"1" "ab[bc][ef]g\0" "abacadaeafag\0"`
			`"0" "ab[ef][cd]g\0" "abacadaeafag\0"`
			`"1" "abcd\0" "abcabcabcabcdefg\0"`
			`"1" "abcd*\0" "abcabcabcabcdefg\0"`
			`"1" "abcdabcdef*\0" "abcabcdabcdeabcdefg\0"`
			`"0" "abcd\0" "abcabcabcabcefg\0"`
			`"0" "abcd*\0" "abcabcabcabcefg\0";`

			`static int __init glob_init(void)`
			`{`
			`unsigned successes = 0;`
			`unsigned n = 0;`
			`char const *p = glob_tests;`
			`static char const message[] __initconst =`
			`KERN_INFO "glob: %u self-tests passed, %u failed\n";`

			`/*`
			`* Tests are jammed together in a string. The first byte is '1'`
			`* or '0' to indicate the expected outcome, or '\0' to indicate the`
			`* end of the tests. Then come two null-terminated strings: the`
			`* pattern and the string to match it against.`
			`*/`
			`while (*p) {`
			`bool expected = *p++ & 1;`
			`char const *pat = p;`

			`p += strlen(p) + 1;`
			`successes += test(pat, p, expected);`
			`p += strlen(p) + 1;`
			`n++;`
			`}`

			`n -= successes;`
			`printk(message, successes, n);`

			`/* What's the errno for "kernel bug detected"? Guess... */`
			`return n ? -ECANCELED : 0;`
			`}`

			`/* We need a dummy exit function to allow unload */`
			`static void __exit glob_fini(void) { }`

			`module_init(glob_init);`
			`module_exit(glob_fini);`

			`#endif /* CONFIG_GLOB_SELFTEST */`