kernel/module/stats.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Debugging module statistics.
  *
  * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
  */

 #include <linux/module.h>
 #include <uapi/linux/module.h>
 #include <linux/string.h>
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/debugfs.h>
 #include <linux/rculist.h>
 #include <linux/math.h>

 #include "internal.h"

 /**
  * DOC: module debugging statistics overview
  *
  * Enabling CONFIG_MODULE_STATS enables module debugging statistics which
  * are useful to monitor and root cause memory pressure issues with module
  * loading. These statistics are useful to allow us to improve production
  * workloads.
  *
  * The current module debugging statistics supported help keep track of module
  * loading failures to enable improvements either for kernel module auto-loading
  * usage (request_module()) or interactions with userspace. Statistics are
  * provided to track all possible failures in the finit_module() path and memory
  * wasted in this process space.  Each of the failure counters are associated
  * to a type of module loading failure which is known to incur a certain amount
  * of memory allocation loss. In the worst case loading a module will fail after
  * a 3 step memory allocation process:
  *
  *   a) memory allocated with kernel_read_file_from_fd()
  *   b) module decompression processes the file read from
  *      kernel_read_file_from_fd(), and vmap() is used to map
  *      the decompressed module to a new local buffer which represents
  *      a copy of the decompressed module passed from userspace. The buffer
  *      from kernel_read_file_from_fd() is freed right away.
  *   c) layout_and_allocate() allocates space for the final resting
  *      place where we would keep the module if it were to be processed
  *      successfully.
  *
  * If a failure occurs after these three different allocations only one
  * counter will be incremented with the summation of the allocated bytes freed
  * incurred during this failure. Likewise, if module loading failed only after
  * step b) a separate counter is used and incremented for the bytes freed and
  * not used during both of those allocations.
  *
  * Virtual memory space can be limited, for example on x86 virtual memory size
  * defaults to 128 MiB. We should strive to limit and avoid wasting virtual
  * memory allocations when possible. These module debugging statistics help
  * to evaluate how much memory is being wasted on bootup due to module loading
  * failures.
  *
  * All counters are designed to be incremental. Atomic counters are used so to
  * remain simple and avoid delays and deadlocks.
  */

 /**
  * DOC: dup_failed_modules - tracks duplicate failed modules
  *
  * Linked list of modules which failed to be loaded because an already existing
  * module with the same name was already being processed or already loaded.
  * The finit_module() system call incurs heavy virtual memory allocations. In
  * the worst case an finit_module() system call can end up allocating virtual
  * memory 3 times:
  *
  *   1) kernel_read_file_from_fd() call uses vmalloc()
  *   2) optional module decompression uses vmap()
  *   3) layout_and allocate() can use vzalloc() or an arch specific variation of
  *      vmalloc to deal with ELF sections requiring special permissions
  *
  * In practice on a typical boot today most finit_module() calls fail due to
  * the module with the same name already being loaded or about to be processed.
  * All virtual memory allocated to these failed modules will be freed with
  * no functional use.
  *
  * To help with this the dup_failed_modules allows us to track modules which
  * failed to load due to the fact that a module was already loaded or being
  * processed.  There are only two points at which we can fail such calls,
  * we list them below along with the number of virtual memory allocation
  * calls:
  *
  *   a) FAIL_DUP_MOD_BECOMING: at the end of early_mod_check() before
  *	layout_and_allocate().
  *	- with module decompression: 2 virtual memory allocation calls
  *	- without module decompression: 1 virtual memory allocation calls
  *   b) FAIL_DUP_MOD_LOAD: after layout_and_allocate() on add_unformed_module()
  *   	- with module decompression 3 virtual memory allocation calls
  *   	- without module decompression 2 virtual memory allocation calls
  *
  * We should strive to get this list to be as small as possible. If this list
  * is not empty it is a reflection of possible work or optimizations possible
  * either in-kernel or in userspace.
  */
 static LIST_HEAD(dup_failed_modules);

 /**
  * DOC: module statistics debugfs counters
  *
  * The total amount of wasted virtual memory allocation space during module
  * loading can be computed by adding the total from the summation:
  *
  *   * @invalid_kread_bytes +
  *     @invalid_decompress_bytes +
  *     @invalid_becoming_bytes +
  *     @invalid_mod_bytes
  *
  * The following debugfs counters are available to inspect module loading
  * failures:
  *
  *   * total_mod_size: total bytes ever used by all modules we've dealt with on
  *     this system
  *   * total_text_size: total bytes of the .text and .init.text ELF section
  *     sizes we've dealt with on this system
  *   * invalid_kread_bytes: bytes allocated and then freed on failures which
  *     happen due to the initial kernel_read_file_from_fd(). kernel_read_file_from_fd()
  *     uses vmalloc(). These should typically not happen unless your system is
  *     under memory pressure.
  *   * invalid_decompress_bytes: number of bytes allocated and freed due to
  *     memory allocations in the module decompression path that use vmap().
  *     These typically should not happen unless your system is under memory
  *     pressure.
  *   * invalid_becoming_bytes: total number of bytes allocated and freed used
  *     to read the kernel module userspace wants us to read before we
  *     promote it to be processed to be added to our @modules linked list. These
  *     failures can happen if we had a check in between a successful kernel_read_file_from_fd()
  *     call and right before we allocate the our private memory for the module
  *     which would be kept if the module is successfully loaded. The most common
  *     reason for this failure is when userspace is racing to load a module
  *     which it does not yet see loaded. The first module to succeed in
  *     add_unformed_module() will add a module to our &modules list and
  *     subsequent loads of modules with the same name will error out at the
  *     end of early_mod_check(). The check for module_patient_check_exists()
  *     at the end of early_mod_check() prevents duplicate allocations
  *     on layout_and_allocate() for modules already being processed. These
  *     duplicate failed modules are non-fatal, however they typically are
  *     indicative of userspace not seeing a module in userspace loaded yet and
  *     unnecessarily trying to load a module before the kernel even has a chance
  *     to begin to process prior requests. Although duplicate failures can be
  *     non-fatal, we should try to reduce vmalloc() pressure proactively, so
  *     ideally after boot this will be close to as 0 as possible.  If module
  *     decompression was used we also add to this counter the cost of the
  *     initial kernel_read_file_from_fd() of the compressed module. If module
  *     decompression was not used the value represents the total allocated and
  *     freed bytes in kernel_read_file_from_fd() calls for these type of
  *     failures. These failures can occur because:
  *
  *    * module_sig_check() - module signature checks
  *    * elf_validity_cache_copy() - some ELF validation issue
  *    * early_mod_check():
  *
  *      * blacklisting
  *      * failed to rewrite section headers
  *      * version magic
  *      * live patch requirements didn't check out
  *      * the module was detected as being already present
  *
  *   * invalid_mod_bytes: these are the total number of bytes allocated and
  *     freed due to failures after we did all the sanity checks of the module
  *     which userspace passed to us and after our first check that the module
  *     is unique.  A module can still fail to load if we detect the module is
  *     loaded after we allocate space for it with layout_and_allocate(), we do
  *     this check right before processing the module as live and run its
  *     initialization routines. Note that you have a failure of this type it
  *     also means the respective kernel_read_file_from_fd() memory space was
  *     also freed and not used, and so we increment this counter with twice
  *     the size of the module. Additionally if you used module decompression
  *     the size of the compressed module is also added to this counter.
  *
  *  * modcount: how many modules we've loaded in our kernel life time
  *  * failed_kreads: how many modules failed due to failed kernel_read_file_from_fd()
  *  * failed_decompress: how many failed module decompression attempts we've had.
  *    These really should not happen unless your compression / decompression
  *    might be broken.
  *  * failed_becoming: how many modules failed after we kernel_read_file_from_fd()
  *    it and before we allocate memory for it with layout_and_allocate(). This
  *    counter is never incremented if you manage to validate the module and
  *    call layout_and_allocate() for it.
  *  * failed_load_modules: how many modules failed once we've allocated our
  *    private space for our module using layout_and_allocate(). These failures
  *    should hopefully mostly be dealt with already. Races in theory could
  *    still exist here, but it would just mean the kernel had started processing
  *    two threads concurrently up to early_mod_check() and one thread won.
  *    These failures are good signs the kernel or userspace is doing something
  *    seriously stupid or that could be improved. We should strive to fix these,
  *    but it is perhaps not easy to fix them. A recent example are the modules
  *    requests incurred for frequency modules, a separate module request was
  *    being issued for each CPU on a system.
  */

 atomic_long_t total_mod_size;
 atomic_long_t total_text_size;
 atomic_long_t invalid_kread_bytes;
 atomic_long_t invalid_decompress_bytes;
 static atomic_long_t invalid_becoming_bytes;
 static atomic_long_t invalid_mod_bytes;
 atomic_t modcount;
 atomic_t failed_kreads;
 atomic_t failed_decompress;
 static atomic_t failed_becoming;
 static atomic_t failed_load_modules;

 static const char *mod_fail_to_str(struct mod_fail_load *mod_fail)
 {
 	if (test_bit(FAIL_DUP_MOD_BECOMING, &mod_fail->dup_fail_mask) &&
 	    test_bit(FAIL_DUP_MOD_LOAD, &mod_fail->dup_fail_mask))
 		return "Becoming & Load";
 	if (test_bit(FAIL_DUP_MOD_BECOMING, &mod_fail->dup_fail_mask))
 		return "Becoming";
 	if (test_bit(FAIL_DUP_MOD_LOAD, &mod_fail->dup_fail_mask))
 		return "Load";
 	return "Bug-on-stats";
 }

 void mod_stat_bump_invalid(struct load_info *info, int flags)
 {
 	atomic_long_add(info->len * 2, &invalid_mod_bytes);
 	atomic_inc(&failed_load_modules);
 #if defined(CONFIG_MODULE_DECOMPRESS)
 	if (flags & MODULE_INIT_COMPRESSED_FILE)
 		atomic_long_add(info->compressed_len, &invalid_mod_bytes);
 #endif
 }

 void mod_stat_bump_becoming(struct load_info *info, int flags)
 {
 	atomic_inc(&failed_becoming);
 	atomic_long_add(info->len, &invalid_becoming_bytes);
 #if defined(CONFIG_MODULE_DECOMPRESS)
 	if (flags & MODULE_INIT_COMPRESSED_FILE)
 		atomic_long_add(info->compressed_len, &invalid_becoming_bytes);
 #endif
 }

 int try_add_failed_module(const char *name, enum fail_dup_mod_reason reason)
 {
 	struct mod_fail_load *mod_fail;

 	list_for_each_entry_rcu(mod_fail, &dup_failed_modules, list,
 				lockdep_is_held(&module_mutex)) {
 		if (!strcmp(mod_fail->name, name)) {
 			atomic_long_inc(&mod_fail->count);
 			__set_bit(reason, &mod_fail->dup_fail_mask);
 			goto out;
 		}
 	}

 	mod_fail = kzalloc(sizeof(*mod_fail), GFP_KERNEL);
 	if (!mod_fail)
 		return -ENOMEM;
 	memcpy(mod_fail->name, name, strlen(name));
 	__set_bit(reason, &mod_fail->dup_fail_mask);
 	atomic_long_inc(&mod_fail->count);
 	list_add_rcu(&mod_fail->list, &dup_failed_modules);
 out:
 	return 0;
 }

 /*
  * At 64 bytes per module and assuming a 1024 bytes preamble we can fit the
  * 112 module prints within 8k.
  *
  * 1024 + (64*112) = 8k
  */
 #define MAX_PREAMBLE 1024
 #define MAX_FAILED_MOD_PRINT 112
 #define MAX_BYTES_PER_MOD 64
 static ssize_t read_file_mod_stats(struct file *file, char __user *user_buf,
 				   size_t count, loff_t *ppos)
 {
 	struct mod_fail_load *mod_fail;
 	unsigned int len, size, count_failed = 0;
 	char *buf;
 	int ret;
 	u32 live_mod_count, fkreads, fdecompress, fbecoming, floads;
 	unsigned long total_size, text_size, ikread_bytes, ibecoming_bytes,
 		idecompress_bytes, imod_bytes, total_virtual_lost;

 	live_mod_count = atomic_read(&modcount);
 	fkreads = atomic_read(&failed_kreads);
 	fdecompress = atomic_read(&failed_decompress);
 	fbecoming = atomic_read(&failed_becoming);
 	floads = atomic_read(&failed_load_modules);

 	total_size = atomic_long_read(&total_mod_size);
 	text_size = atomic_long_read(&total_text_size);
 	ikread_bytes = atomic_long_read(&invalid_kread_bytes);
 	idecompress_bytes = atomic_long_read(&invalid_decompress_bytes);
 	ibecoming_bytes = atomic_long_read(&invalid_becoming_bytes);
 	imod_bytes = atomic_long_read(&invalid_mod_bytes);

 	total_virtual_lost = ikread_bytes + idecompress_bytes + ibecoming_bytes + imod_bytes;

 	size = MAX_PREAMBLE + min((unsigned int)(floads + fbecoming),
 				  (unsigned int)MAX_FAILED_MOD_PRINT) * MAX_BYTES_PER_MOD;
 	buf = kzalloc(size, GFP_KERNEL);
 	if (buf == NULL)
 		return -ENOMEM;

 	/* The beginning of our debug preamble */
 	len = scnprintf(buf, size, "%25s\t%u\n", "Mods ever loaded", live_mod_count);

 	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on kread", fkreads);

 	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on decompress",
 			 fdecompress);
 	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on becoming", fbecoming);

 	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on load", floads);

 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Total module size", total_size);
 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Total mod text size", text_size);

 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed kread bytes", ikread_bytes);

 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed decompress bytes",
 			 idecompress_bytes);

 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed becoming bytes", ibecoming_bytes);

 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed kmod bytes", imod_bytes);

 	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Virtual mem wasted bytes", total_virtual_lost);

 	if (live_mod_count && total_size) {
 		len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Average mod size",
 				 DIV_ROUND_UP(total_size, live_mod_count));
 	}

 	if (live_mod_count && text_size) {
 		len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Average mod text size",
 				 DIV_ROUND_UP(text_size, live_mod_count));
 	}

 	/*
 	 * We use WARN_ON_ONCE() for the counters to ensure we always have parity
 	 * for keeping tabs on a type of failure with one type of byte counter.
 	 * The counters for imod_bytes does not increase for fkreads failures
 	 * for example, and so on.
 	 */

 	WARN_ON_ONCE(ikread_bytes && !fkreads);
 	if (fkreads && ikread_bytes) {
 		len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Avg fail kread bytes",
 				 DIV_ROUND_UP(ikread_bytes, fkreads));
 	}

 	WARN_ON_ONCE(ibecoming_bytes && !fbecoming);
 	if (fbecoming && ibecoming_bytes) {
 		len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Avg fail becoming bytes",
 				 DIV_ROUND_UP(ibecoming_bytes, fbecoming));
 	}

 	WARN_ON_ONCE(idecompress_bytes && !fdecompress);
 	if (fdecompress && idecompress_bytes) {
 		len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Avg fail decomp bytes",
 				 DIV_ROUND_UP(idecompress_bytes, fdecompress));
 	}

 	WARN_ON_ONCE(imod_bytes && !floads);
 	if (floads && imod_bytes) {
 		len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Average fail load bytes",
 				 DIV_ROUND_UP(imod_bytes, floads));
 	}

 	/* End of our debug preamble header. */

 	/* Catch when we've gone beyond our expected preamble */
 	WARN_ON_ONCE(len >= MAX_PREAMBLE);

 	if (list_empty(&dup_failed_modules))
 		goto out;

 	len += scnprintf(buf + len, size - len, "Duplicate failed modules:\n");
 	len += scnprintf(buf + len, size - len, "%25s\t%15s\t%25s\n",
 			 "Module-name", "How-many-times", "Reason");
 	mutex_lock(&module_mutex);


 	list_for_each_entry_rcu(mod_fail, &dup_failed_modules, list) {
 		if (WARN_ON_ONCE(++count_failed >= MAX_FAILED_MOD_PRINT))
 			goto out_unlock;
 		len += scnprintf(buf + len, size - len, "%25s\t%15lu\t%25s\n", mod_fail->name,
 				 atomic_long_read(&mod_fail->count), mod_fail_to_str(mod_fail));
 	}
 out_unlock:
 	mutex_unlock(&module_mutex);
 out:
 	ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
 	kfree(buf);
 	return ret;
 }
 #undef MAX_PREAMBLE
 #undef MAX_FAILED_MOD_PRINT
 #undef MAX_BYTES_PER_MOD

 static const struct file_operations fops_mod_stats = {
 	.read = read_file_mod_stats,
 	.open = simple_open,
 	.owner = THIS_MODULE,
 	.llseek = default_llseek,
 };

 #define mod_debug_add_ulong(name) debugfs_create_ulong(#name, 0400, mod_debugfs_root, (unsigned long *) &name.counter)
 #define mod_debug_add_atomic(name) debugfs_create_atomic_t(#name, 0400, mod_debugfs_root, &name)
 static int __init module_stats_init(void)
 {
 	mod_debug_add_ulong(total_mod_size);
 	mod_debug_add_ulong(total_text_size);
 	mod_debug_add_ulong(invalid_kread_bytes);
 	mod_debug_add_ulong(invalid_decompress_bytes);
 	mod_debug_add_ulong(invalid_becoming_bytes);
 	mod_debug_add_ulong(invalid_mod_bytes);

 	mod_debug_add_atomic(modcount);
 	mod_debug_add_atomic(failed_kreads);
 	mod_debug_add_atomic(failed_decompress);
 	mod_debug_add_atomic(failed_becoming);
 	mod_debug_add_atomic(failed_load_modules);

 	debugfs_create_file("stats", 0400, mod_debugfs_root, mod_debugfs_root, &fops_mod_stats);

 	return 0;
 }
 #undef mod_debug_add_ulong
 #undef mod_debug_add_atomic
 module_init(module_stats_init);
	// SPDX-License-Identifier: GPL-2.0-or-later
	/*
	* Debugging module statistics.
	*
	* Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
	*/

	#include <linux/module.h>
	#include <uapi/linux/module.h>
	#include <linux/string.h>
	#include <linux/printk.h>
	#include <linux/slab.h>
	#include <linux/list.h>
	#include <linux/debugfs.h>
	#include <linux/rculist.h>
	#include <linux/math.h>

	#include "internal.h"

	/**
	* DOC: module debugging statistics overview
	*
	* Enabling CONFIG_MODULE_STATS enables module debugging statistics which
	* are useful to monitor and root cause memory pressure issues with module
	* loading. These statistics are useful to allow us to improve production
	* workloads.
	*
	* The current module debugging statistics supported help keep track of module
	* loading failures to enable improvements either for kernel module auto-loading
	* usage (request_module()) or interactions with userspace. Statistics are
	* provided to track all possible failures in the finit_module() path and memory
	* wasted in this process space. Each of the failure counters are associated
	* to a type of module loading failure which is known to incur a certain amount
	* of memory allocation loss. In the worst case loading a module will fail after
	* a 3 step memory allocation process:
	*
	* a) memory allocated with kernel_read_file_from_fd()
	* b) module decompression processes the file read from
	* kernel_read_file_from_fd(), and vmap() is used to map
	* the decompressed module to a new local buffer which represents
	* a copy of the decompressed module passed from userspace. The buffer
	* from kernel_read_file_from_fd() is freed right away.
	* c) layout_and_allocate() allocates space for the final resting
	* place where we would keep the module if it were to be processed
	* successfully.
	*
	* If a failure occurs after these three different allocations only one
	* counter will be incremented with the summation of the allocated bytes freed
	* incurred during this failure. Likewise, if module loading failed only after
	* step b) a separate counter is used and incremented for the bytes freed and
	* not used during both of those allocations.
	*
	* Virtual memory space can be limited, for example on x86 virtual memory size
	* defaults to 128 MiB. We should strive to limit and avoid wasting virtual
	* memory allocations when possible. These module debugging statistics help
	* to evaluate how much memory is being wasted on bootup due to module loading
	* failures.
	*
	* All counters are designed to be incremental. Atomic counters are used so to
	* remain simple and avoid delays and deadlocks.
	*/

	/**
	* DOC: dup_failed_modules - tracks duplicate failed modules
	*
	* Linked list of modules which failed to be loaded because an already existing
	* module with the same name was already being processed or already loaded.
	* The finit_module() system call incurs heavy virtual memory allocations. In
	* the worst case an finit_module() system call can end up allocating virtual
	* memory 3 times:
	*
	* 1) kernel_read_file_from_fd() call uses vmalloc()
	* 2) optional module decompression uses vmap()
	* 3) layout_and allocate() can use vzalloc() or an arch specific variation of
	* vmalloc to deal with ELF sections requiring special permissions
	*
	* In practice on a typical boot today most finit_module() calls fail due to
	* the module with the same name already being loaded or about to be processed.
	* All virtual memory allocated to these failed modules will be freed with
	* no functional use.
	*
	* To help with this the dup_failed_modules allows us to track modules which
	* failed to load due to the fact that a module was already loaded or being
	* processed. There are only two points at which we can fail such calls,
	* we list them below along with the number of virtual memory allocation
	* calls:
	*
	* a) FAIL_DUP_MOD_BECOMING: at the end of early_mod_check() before
	* layout_and_allocate().
	* - with module decompression: 2 virtual memory allocation calls
	* - without module decompression: 1 virtual memory allocation calls
	* b) FAIL_DUP_MOD_LOAD: after layout_and_allocate() on add_unformed_module()
	* - with module decompression 3 virtual memory allocation calls
	* - without module decompression 2 virtual memory allocation calls
	*
	* We should strive to get this list to be as small as possible. If this list
	* is not empty it is a reflection of possible work or optimizations possible
	* either in-kernel or in userspace.
	*/
	static LIST_HEAD(dup_failed_modules);

	/**
	* DOC: module statistics debugfs counters
	*
	* The total amount of wasted virtual memory allocation space during module
	* loading can be computed by adding the total from the summation:
	*
	* * @invalid_kread_bytes +
	* @invalid_decompress_bytes +
	* @invalid_becoming_bytes +
	* @invalid_mod_bytes
	*
	* The following debugfs counters are available to inspect module loading
	* failures:
	*
	* * total_mod_size: total bytes ever used by all modules we've dealt with on
	* this system
	* * total_text_size: total bytes of the .text and .init.text ELF section
	* sizes we've dealt with on this system
	* * invalid_kread_bytes: bytes allocated and then freed on failures which
	* happen due to the initial kernel_read_file_from_fd(). kernel_read_file_from_fd()
	* uses vmalloc(). These should typically not happen unless your system is
	* under memory pressure.
	* * invalid_decompress_bytes: number of bytes allocated and freed due to
	* memory allocations in the module decompression path that use vmap().
	* These typically should not happen unless your system is under memory
	* pressure.
	* * invalid_becoming_bytes: total number of bytes allocated and freed used
	* to read the kernel module userspace wants us to read before we
	* promote it to be processed to be added to our @modules linked list. These
	* failures can happen if we had a check in between a successful kernel_read_file_from_fd()
	* call and right before we allocate the our private memory for the module
	* which would be kept if the module is successfully loaded. The most common
	* reason for this failure is when userspace is racing to load a module
	* which it does not yet see loaded. The first module to succeed in
	* add_unformed_module() will add a module to our &modules list and
	* subsequent loads of modules with the same name will error out at the
	* end of early_mod_check(). The check for module_patient_check_exists()
	* at the end of early_mod_check() prevents duplicate allocations
	* on layout_and_allocate() for modules already being processed. These
	* duplicate failed modules are non-fatal, however they typically are
	* indicative of userspace not seeing a module in userspace loaded yet and
	* unnecessarily trying to load a module before the kernel even has a chance
	* to begin to process prior requests. Although duplicate failures can be
	* non-fatal, we should try to reduce vmalloc() pressure proactively, so
	* ideally after boot this will be close to as 0 as possible. If module
	* decompression was used we also add to this counter the cost of the
	* initial kernel_read_file_from_fd() of the compressed module. If module
	* decompression was not used the value represents the total allocated and
	* freed bytes in kernel_read_file_from_fd() calls for these type of
	* failures. These failures can occur because:
	*
	* * module_sig_check() - module signature checks
	* * elf_validity_cache_copy() - some ELF validation issue
	* * early_mod_check():
	*
	* * blacklisting
	* * failed to rewrite section headers
	* * version magic
	* * live patch requirements didn't check out
	* * the module was detected as being already present
	*
	* * invalid_mod_bytes: these are the total number of bytes allocated and
	* freed due to failures after we did all the sanity checks of the module
	* which userspace passed to us and after our first check that the module
	* is unique. A module can still fail to load if we detect the module is
	* loaded after we allocate space for it with layout_and_allocate(), we do
	* this check right before processing the module as live and run its
	* initialization routines. Note that you have a failure of this type it
	* also means the respective kernel_read_file_from_fd() memory space was
	* also freed and not used, and so we increment this counter with twice
	* the size of the module. Additionally if you used module decompression
	* the size of the compressed module is also added to this counter.
	*
	* * modcount: how many modules we've loaded in our kernel life time
	* * failed_kreads: how many modules failed due to failed kernel_read_file_from_fd()
	* * failed_decompress: how many failed module decompression attempts we've had.
	* These really should not happen unless your compression / decompression
	* might be broken.
	* * failed_becoming: how many modules failed after we kernel_read_file_from_fd()
	* it and before we allocate memory for it with layout_and_allocate(). This
	* counter is never incremented if you manage to validate the module and
	* call layout_and_allocate() for it.
	* * failed_load_modules: how many modules failed once we've allocated our
	* private space for our module using layout_and_allocate(). These failures
	* should hopefully mostly be dealt with already. Races in theory could
	* still exist here, but it would just mean the kernel had started processing
	* two threads concurrently up to early_mod_check() and one thread won.
	* These failures are good signs the kernel or userspace is doing something
	* seriously stupid or that could be improved. We should strive to fix these,
	* but it is perhaps not easy to fix them. A recent example are the modules
	* requests incurred for frequency modules, a separate module request was
	* being issued for each CPU on a system.
	*/

	atomic_long_t total_mod_size;
	atomic_long_t total_text_size;
	atomic_long_t invalid_kread_bytes;
	atomic_long_t invalid_decompress_bytes;
	static atomic_long_t invalid_becoming_bytes;
	static atomic_long_t invalid_mod_bytes;
	atomic_t modcount;
	atomic_t failed_kreads;
	atomic_t failed_decompress;
	static atomic_t failed_becoming;
	static atomic_t failed_load_modules;

	static const char mod_fail_to_str(struct mod_fail_load mod_fail)
	{
	if (test_bit(FAIL_DUP_MOD_BECOMING, &mod_fail->dup_fail_mask) &&
	test_bit(FAIL_DUP_MOD_LOAD, &mod_fail->dup_fail_mask))
	return "Becoming & Load";
	if (test_bit(FAIL_DUP_MOD_BECOMING, &mod_fail->dup_fail_mask))
	return "Becoming";
	if (test_bit(FAIL_DUP_MOD_LOAD, &mod_fail->dup_fail_mask))
	return "Load";
	return "Bug-on-stats";
	}

	void mod_stat_bump_invalid(struct load_info *info, int flags)
	{
	atomic_long_add(info->len * 2, &invalid_mod_bytes);
	atomic_inc(&failed_load_modules);
	#if defined(CONFIG_MODULE_DECOMPRESS)
	if (flags & MODULE_INIT_COMPRESSED_FILE)
	atomic_long_add(info->compressed_len, &invalid_mod_bytes);
	#endif
	}

	void mod_stat_bump_becoming(struct load_info *info, int flags)
	{
	atomic_inc(&failed_becoming);
	atomic_long_add(info->len, &invalid_becoming_bytes);
	#if defined(CONFIG_MODULE_DECOMPRESS)
	if (flags & MODULE_INIT_COMPRESSED_FILE)
	atomic_long_add(info->compressed_len, &invalid_becoming_bytes);
	#endif
	}

	int try_add_failed_module(const char *name, enum fail_dup_mod_reason reason)
	{
	struct mod_fail_load *mod_fail;

	list_for_each_entry_rcu(mod_fail, &dup_failed_modules, list,
	lockdep_is_held(&module_mutex)) {
	if (!strcmp(mod_fail->name, name)) {
	atomic_long_inc(&mod_fail->count);
	__set_bit(reason, &mod_fail->dup_fail_mask);
	goto out;
	}
	}

	mod_fail = kzalloc(sizeof(*mod_fail), GFP_KERNEL);
	if (!mod_fail)
	return -ENOMEM;
	memcpy(mod_fail->name, name, strlen(name));
	__set_bit(reason, &mod_fail->dup_fail_mask);
	atomic_long_inc(&mod_fail->count);
	list_add_rcu(&mod_fail->list, &dup_failed_modules);
	out:
	return 0;
	}

	/*
	* At 64 bytes per module and assuming a 1024 bytes preamble we can fit the
	* 112 module prints within 8k.
	*
	* 1024 + (64*112) = 8k
	*/
	#define MAX_PREAMBLE 1024
	#define MAX_FAILED_MOD_PRINT 112
	#define MAX_BYTES_PER_MOD 64
	static ssize_t read_file_mod_stats(struct file file, char __user user_buf,
	size_t count, loff_t *ppos)
	{
	struct mod_fail_load *mod_fail;
	unsigned int len, size, count_failed = 0;
	char *buf;
	int ret;
	u32 live_mod_count, fkreads, fdecompress, fbecoming, floads;
	unsigned long total_size, text_size, ikread_bytes, ibecoming_bytes,
	idecompress_bytes, imod_bytes, total_virtual_lost;

	live_mod_count = atomic_read(&modcount);
	fkreads = atomic_read(&failed_kreads);
	fdecompress = atomic_read(&failed_decompress);
	fbecoming = atomic_read(&failed_becoming);
	floads = atomic_read(&failed_load_modules);

	total_size = atomic_long_read(&total_mod_size);
	text_size = atomic_long_read(&total_text_size);
	ikread_bytes = atomic_long_read(&invalid_kread_bytes);
	idecompress_bytes = atomic_long_read(&invalid_decompress_bytes);
	ibecoming_bytes = atomic_long_read(&invalid_becoming_bytes);
	imod_bytes = atomic_long_read(&invalid_mod_bytes);

	total_virtual_lost = ikread_bytes + idecompress_bytes + ibecoming_bytes + imod_bytes;

	size = MAX_PREAMBLE + min((unsigned int)(floads + fbecoming),
	(unsigned int)MAX_FAILED_MOD_PRINT) * MAX_BYTES_PER_MOD;
	buf = kzalloc(size, GFP_KERNEL);
	if (buf == NULL)
	return -ENOMEM;

	/* The beginning of our debug preamble */
	len = scnprintf(buf, size, "%25s\t%u\n", "Mods ever loaded", live_mod_count);

	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on kread", fkreads);

	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on decompress",
	fdecompress);
	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on becoming", fbecoming);

	len += scnprintf(buf + len, size - len, "%25s\t%u\n", "Mods failed on load", floads);

	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Total module size", total_size);
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Total mod text size", text_size);

	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed kread bytes", ikread_bytes);

	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed decompress bytes",
	idecompress_bytes);

	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed becoming bytes", ibecoming_bytes);

	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Failed kmod bytes", imod_bytes);

	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Virtual mem wasted bytes", total_virtual_lost);

	if (live_mod_count && total_size) {
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Average mod size",
	DIV_ROUND_UP(total_size, live_mod_count));
	}

	if (live_mod_count && text_size) {
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Average mod text size",
	DIV_ROUND_UP(text_size, live_mod_count));
	}

	/*
	* We use WARN_ON_ONCE() for the counters to ensure we always have parity
	* for keeping tabs on a type of failure with one type of byte counter.
	* The counters for imod_bytes does not increase for fkreads failures
	* for example, and so on.
	*/

	WARN_ON_ONCE(ikread_bytes && !fkreads);
	if (fkreads && ikread_bytes) {
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Avg fail kread bytes",
	DIV_ROUND_UP(ikread_bytes, fkreads));
	}

	WARN_ON_ONCE(ibecoming_bytes && !fbecoming);
	if (fbecoming && ibecoming_bytes) {
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Avg fail becoming bytes",
	DIV_ROUND_UP(ibecoming_bytes, fbecoming));
	}

	WARN_ON_ONCE(idecompress_bytes && !fdecompress);
	if (fdecompress && idecompress_bytes) {
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Avg fail decomp bytes",
	DIV_ROUND_UP(idecompress_bytes, fdecompress));
	}

	WARN_ON_ONCE(imod_bytes && !floads);
	if (floads && imod_bytes) {
	len += scnprintf(buf + len, size - len, "%25s\t%lu\n", "Average fail load bytes",
	DIV_ROUND_UP(imod_bytes, floads));
	}

	/* End of our debug preamble header. */

	/* Catch when we've gone beyond our expected preamble */
	WARN_ON_ONCE(len >= MAX_PREAMBLE);

	if (list_empty(&dup_failed_modules))
	goto out;

	len += scnprintf(buf + len, size - len, "Duplicate failed modules:\n");
	len += scnprintf(buf + len, size - len, "%25s\t%15s\t%25s\n",
	"Module-name", "How-many-times", "Reason");
	mutex_lock(&module_mutex);


	list_for_each_entry_rcu(mod_fail, &dup_failed_modules, list) {
	if (WARN_ON_ONCE(++count_failed >= MAX_FAILED_MOD_PRINT))
	goto out_unlock;
	len += scnprintf(buf + len, size - len, "%25s\t%15lu\t%25s\n", mod_fail->name,
	atomic_long_read(&mod_fail->count), mod_fail_to_str(mod_fail));
	}
	out_unlock:
	mutex_unlock(&module_mutex);
	out:
	ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
	kfree(buf);
	return ret;
	}
	#undef MAX_PREAMBLE
	#undef MAX_FAILED_MOD_PRINT
	#undef MAX_BYTES_PER_MOD

	static const struct file_operations fops_mod_stats = {
	.read = read_file_mod_stats,
	.open = simple_open,
	.owner = THIS_MODULE,
	.llseek = default_llseek,
	};

	#define mod_debug_add_ulong(name) debugfs_create_ulong(#name, 0400, mod_debugfs_root, (unsigned long *) &name.counter)
	#define mod_debug_add_atomic(name) debugfs_create_atomic_t(#name, 0400, mod_debugfs_root, &name)
	static int __init module_stats_init(void)
	{
	mod_debug_add_ulong(total_mod_size);
	mod_debug_add_ulong(total_text_size);
	mod_debug_add_ulong(invalid_kread_bytes);
	mod_debug_add_ulong(invalid_decompress_bytes);
	mod_debug_add_ulong(invalid_becoming_bytes);
	mod_debug_add_ulong(invalid_mod_bytes);

	mod_debug_add_atomic(modcount);
	mod_debug_add_atomic(failed_kreads);
	mod_debug_add_atomic(failed_decompress);
	mod_debug_add_atomic(failed_becoming);
	mod_debug_add_atomic(failed_load_modules);

	debugfs_create_file("stats", 0400, mod_debugfs_root, mod_debugfs_root, &fops_mod_stats);

	return 0;
	}
	#undef mod_debug_add_ulong
	#undef mod_debug_add_atomic
	module_init(module_stats_init);