M7350/kernel/mm/process_reclaim.c

/*
 * Copyright (c) 2015, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/sort.h>
#include <linux/oom.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <linux/notifier.h>
#include <linux/vmpressure.h>

#define CREATE_TRACE_POINTS
#include <trace/events/process_reclaim.h>

#define MAX_SWAP_TASKS SWAP_CLUSTER_MAX

static void swap_fn(struct work_struct *work);
DECLARE_WORK(swap_work, swap_fn);

/* User knob to enable/disable process reclaim feature */
static int enable_process_reclaim;
module_param_named(enable_process_reclaim, enable_process_reclaim, int,
	S_IRUGO | S_IWUSR);

/* The max number of pages tried to be reclaimed in a single run */
int per_swap_size = SWAP_CLUSTER_MAX * 32;
module_param_named(per_swap_size, per_swap_size, int, S_IRUGO | S_IWUSR);

int reclaim_avg_efficiency;
module_param_named(reclaim_avg_efficiency, reclaim_avg_efficiency,
			int, S_IRUGO);

/* The vmpressure region where process reclaim operates */
static unsigned long pressure_min = 50;
static unsigned long pressure_max = 90;
module_param_named(pressure_min, pressure_min, ulong, S_IRUGO | S_IWUSR);
module_param_named(pressure_max, pressure_max, ulong, S_IRUGO | S_IWUSR);

/*
 * Scheduling process reclaim workqueue unecessarily
 * when the reclaim efficiency is low does not make
 * sense. We try to detect a drop in efficiency and
 * disable reclaim for a time period. This period and the
 * period for which we monitor a drop in efficiency is
 * defined by swap_eff_win. swap_opt_eff is the optimal
 * efficincy used as theshold for this.
 */
static int swap_eff_win = 2;
module_param_named(swap_eff_win, swap_eff_win, int, S_IRUGO | S_IWUSR);

static int swap_opt_eff = 50;
module_param_named(swap_opt_eff, swap_opt_eff, int, S_IRUGO | S_IWUSR);

static atomic_t skip_reclaim = ATOMIC_INIT(0);
/* Not atomic since only a single instance of swap_fn run at a time */
static int monitor_eff;

struct selected_task {
	struct task_struct *p;
	int tasksize;
	short oom_score_adj;
};

int selected_cmp(const void *a, const void *b)
{
	const struct selected_task *x = a;
	const struct selected_task *y = b;
	int ret;

	ret = x->tasksize < y->tasksize ? -1 : 1;

	return ret;
}

static int test_task_flag(struct task_struct *p, int flag)
{
	struct task_struct *t = p;

	rcu_read_lock();
	for_each_thread(p, t) {
		task_lock(t);
		if (test_tsk_thread_flag(t, flag)) {
			task_unlock(t);
			rcu_read_unlock();
			return 1;
		}
		task_unlock(t);
	}
	rcu_read_unlock();

	return 0;
}

static void swap_fn(struct work_struct *work)
{
	struct task_struct *tsk;
	struct reclaim_param rp;

	/* Pick the best MAX_SWAP_TASKS tasks in terms of anon size */
	struct selected_task selected[MAX_SWAP_TASKS] = {{0, 0, 0},};
	int si = 0;
	int i;
	int tasksize;
	int total_sz = 0;
	short min_score_adj = 360;
	int total_scan = 0;
	int total_reclaimed = 0;
	int nr_to_reclaim;
	int efficiency;

	rcu_read_lock();
	for_each_process(tsk) {
		struct task_struct *p;
		short oom_score_adj;

		if (tsk->flags & PF_KTHREAD)
			continue;

		if (test_task_flag(tsk, TIF_MEMDIE))
			continue;

		p = find_lock_task_mm(tsk);
		if (!p)
			continue;

		oom_score_adj = p->signal->oom_score_adj;
		if (oom_score_adj < min_score_adj) {
			task_unlock(p);
			continue;
		}

		tasksize = get_mm_counter(p->mm, MM_ANONPAGES);
		task_unlock(p);

		if (tasksize <= 0)
			continue;

		if (si == MAX_SWAP_TASKS) {
			sort(&selected[0], MAX_SWAP_TASKS,
					sizeof(struct selected_task),
					&selected_cmp, NULL);
			if (tasksize < selected[0].tasksize)
				continue;
			selected[0].p = p;
			selected[0].oom_score_adj = oom_score_adj;
			selected[0].tasksize = tasksize;
		} else {
			selected[si].p = p;
			selected[si].oom_score_adj = oom_score_adj;
			selected[si].tasksize = tasksize;
			si++;
		}
	}

	for (i = 0; i < si; i++)
		total_sz += selected[i].tasksize;

	/* Skip reclaim if total size is too less */
	if (total_sz < SWAP_CLUSTER_MAX) {
		rcu_read_unlock();
		return;
	}

	for (i = 0; i < si; i++)
		get_task_struct(selected[i].p);

	rcu_read_unlock();

	while (si--) {
		nr_to_reclaim =
			(selected[si].tasksize * per_swap_size) / total_sz;
		/* scan atleast a page */
		if (!nr_to_reclaim)
			nr_to_reclaim = 1;

		rp = reclaim_task_anon(selected[si].p, nr_to_reclaim);

		trace_process_reclaim(selected[si].tasksize,
				selected[si].oom_score_adj, rp.nr_scanned,
				rp.nr_reclaimed, per_swap_size, total_sz,
				nr_to_reclaim);
		total_scan += rp.nr_scanned;
		total_reclaimed += rp.nr_reclaimed;
		put_task_struct(selected[si].p);
	}

	if (total_scan) {
		efficiency = (total_reclaimed * 100) / total_scan;

		if (efficiency < swap_opt_eff) {
			if (++monitor_eff == swap_eff_win) {
				atomic_set(&skip_reclaim, swap_eff_win);
				monitor_eff = 0;
			}
		} else {
			monitor_eff = 0;
		}

		reclaim_avg_efficiency =
			(efficiency + reclaim_avg_efficiency) / 2;
		trace_process_reclaim_eff(efficiency, reclaim_avg_efficiency);
	}
}

static int vmpressure_notifier(struct notifier_block *nb,
			unsigned long action, void *data)
{
	unsigned long pressure = action;

	if (!enable_process_reclaim)
		return 0;

	if (!current_is_kswapd())
		return 0;

	if (0 <= atomic_dec_if_positive(&skip_reclaim))
		return 0;

	if ((pressure >= pressure_min) && (pressure < pressure_max))
		if (!work_pending(&swap_work))
			queue_work(system_unbound_wq, &swap_work);
	return 0;
}

static struct notifier_block vmpr_nb = {
	.notifier_call = vmpressure_notifier,
};

static int __init process_reclaim_init(void)
{
	vmpressure_notifier_register(&vmpr_nb);
	return 0;
}

static void __exit process_reclaim_exit(void)
{
	vmpressure_notifier_unregister(&vmpr_nb);
}

module_init(process_reclaim_init);
module_exit(process_reclaim_exit);
M7350v5_en_gpl 2024-09-09 08:57:42 +00:00			`/*`
			`* Copyright (c) 2015, The Linux Foundation. All rights reserved.`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License version 2 and`
			`* only version 2 as published by the Free Software Foundation.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*/`
			`#include <linux/module.h>`
			`#include <linux/kernel.h>`
			`#include <linux/mm.h>`
			`#include <linux/swap.h>`
			`#include <linux/sort.h>`
			`#include <linux/oom.h>`
			`#include <linux/sched.h>`
			`#include <linux/rcupdate.h>`
			`#include <linux/notifier.h>`
			`#include <linux/vmpressure.h>`

			`#define CREATE_TRACE_POINTS`
			`#include <trace/events/process_reclaim.h>`

			`#define MAX_SWAP_TASKS SWAP_CLUSTER_MAX`

			`static void swap_fn(struct work_struct *work);`
			`DECLARE_WORK(swap_work, swap_fn);`

			`/* User knob to enable/disable process reclaim feature */`
			`static int enable_process_reclaim;`
			`module_param_named(enable_process_reclaim, enable_process_reclaim, int,`
			`S_IRUGO \| S_IWUSR);`

			`/* The max number of pages tried to be reclaimed in a single run */`
			`int per_swap_size = SWAP_CLUSTER_MAX * 32;`
			`module_param_named(per_swap_size, per_swap_size, int, S_IRUGO \| S_IWUSR);`

			`int reclaim_avg_efficiency;`
			`module_param_named(reclaim_avg_efficiency, reclaim_avg_efficiency,`
			`int, S_IRUGO);`

			`/* The vmpressure region where process reclaim operates */`
			`static unsigned long pressure_min = 50;`
			`static unsigned long pressure_max = 90;`
			`module_param_named(pressure_min, pressure_min, ulong, S_IRUGO \| S_IWUSR);`
			`module_param_named(pressure_max, pressure_max, ulong, S_IRUGO \| S_IWUSR);`

			`/*`
			`* Scheduling process reclaim workqueue unecessarily`
			`* when the reclaim efficiency is low does not make`
			`* sense. We try to detect a drop in efficiency and`
			`* disable reclaim for a time period. This period and the`
			`* period for which we monitor a drop in efficiency is`
			`* defined by swap_eff_win. swap_opt_eff is the optimal`
			`* efficincy used as theshold for this.`
			`*/`
			`static int swap_eff_win = 2;`
			`module_param_named(swap_eff_win, swap_eff_win, int, S_IRUGO \| S_IWUSR);`

			`static int swap_opt_eff = 50;`
			`module_param_named(swap_opt_eff, swap_opt_eff, int, S_IRUGO \| S_IWUSR);`

			`static atomic_t skip_reclaim = ATOMIC_INIT(0);`
			`/* Not atomic since only a single instance of swap_fn run at a time */`
			`static int monitor_eff;`

			`struct selected_task {`
			`struct task_struct *p;`
			`int tasksize;`
			`short oom_score_adj;`
			`};`

			`int selected_cmp(const void a, const void b)`
			`{`
			`const struct selected_task *x = a;`
			`const struct selected_task *y = b;`
			`int ret;`

			`ret = x->tasksize < y->tasksize ? -1 : 1;`

			`return ret;`
			`}`

			`static int test_task_flag(struct task_struct *p, int flag)`
			`{`
			`struct task_struct *t = p;`

			`rcu_read_lock();`
			`for_each_thread(p, t) {`
			`task_lock(t);`
			`if (test_tsk_thread_flag(t, flag)) {`
			`task_unlock(t);`
			`rcu_read_unlock();`
			`return 1;`
			`}`
			`task_unlock(t);`
			`}`
			`rcu_read_unlock();`

			`return 0;`
			`}`

			`static void swap_fn(struct work_struct *work)`
			`{`
			`struct task_struct *tsk;`
			`struct reclaim_param rp;`

			`/* Pick the best MAX_SWAP_TASKS tasks in terms of anon size */`
			`struct selected_task selected[MAX_SWAP_TASKS] = {{0, 0, 0},};`
			`int si = 0;`
			`int i;`
			`int tasksize;`
			`int total_sz = 0;`
			`short min_score_adj = 360;`
			`int total_scan = 0;`
			`int total_reclaimed = 0;`
			`int nr_to_reclaim;`
			`int efficiency;`

			`rcu_read_lock();`
			`for_each_process(tsk) {`
			`struct task_struct *p;`
			`short oom_score_adj;`

			`if (tsk->flags & PF_KTHREAD)`
			`continue;`

			`if (test_task_flag(tsk, TIF_MEMDIE))`
			`continue;`

			`p = find_lock_task_mm(tsk);`
			`if (!p)`
			`continue;`

			`oom_score_adj = p->signal->oom_score_adj;`
			`if (oom_score_adj < min_score_adj) {`
			`task_unlock(p);`
			`continue;`
			`}`

			`tasksize = get_mm_counter(p->mm, MM_ANONPAGES);`
			`task_unlock(p);`

			`if (tasksize <= 0)`
			`continue;`

			`if (si == MAX_SWAP_TASKS) {`
			`sort(&selected[0], MAX_SWAP_TASKS,`
			`sizeof(struct selected_task),`
			`&selected_cmp, NULL);`
			`if (tasksize < selected[0].tasksize)`
			`continue;`
			`selected[0].p = p;`
			`selected[0].oom_score_adj = oom_score_adj;`
			`selected[0].tasksize = tasksize;`
			`} else {`
			`selected[si].p = p;`
			`selected[si].oom_score_adj = oom_score_adj;`
			`selected[si].tasksize = tasksize;`
			`si++;`
			`}`
			`}`

			`for (i = 0; i < si; i++)`
			`total_sz += selected[i].tasksize;`

			`/* Skip reclaim if total size is too less */`
			`if (total_sz < SWAP_CLUSTER_MAX) {`
			`rcu_read_unlock();`
			`return;`
			`}`

			`for (i = 0; i < si; i++)`
			`get_task_struct(selected[i].p);`

			`rcu_read_unlock();`

			`while (si--) {`
			`nr_to_reclaim =`
			`(selected[si].tasksize * per_swap_size) / total_sz;`
			`/* scan atleast a page */`
			`if (!nr_to_reclaim)`
			`nr_to_reclaim = 1;`

			`rp = reclaim_task_anon(selected[si].p, nr_to_reclaim);`

			`trace_process_reclaim(selected[si].tasksize,`
			`selected[si].oom_score_adj, rp.nr_scanned,`
			`rp.nr_reclaimed, per_swap_size, total_sz,`
			`nr_to_reclaim);`
			`total_scan += rp.nr_scanned;`
			`total_reclaimed += rp.nr_reclaimed;`
			`put_task_struct(selected[si].p);`
			`}`

			`if (total_scan) {`
			`efficiency = (total_reclaimed * 100) / total_scan;`

			`if (efficiency < swap_opt_eff) {`
			`if (++monitor_eff == swap_eff_win) {`
			`atomic_set(&skip_reclaim, swap_eff_win);`
			`monitor_eff = 0;`
			`}`
			`} else {`
			`monitor_eff = 0;`
			`}`

			`reclaim_avg_efficiency =`
			`(efficiency + reclaim_avg_efficiency) / 2;`
			`trace_process_reclaim_eff(efficiency, reclaim_avg_efficiency);`
			`}`
			`}`

			`static int vmpressure_notifier(struct notifier_block *nb,`
			`unsigned long action, void *data)`
			`{`
			`unsigned long pressure = action;`

			`if (!enable_process_reclaim)`
			`return 0;`

			`if (!current_is_kswapd())`
			`return 0;`

			`if (0 <= atomic_dec_if_positive(&skip_reclaim))`
			`return 0;`

			`if ((pressure >= pressure_min) && (pressure < pressure_max))`
			`if (!work_pending(&swap_work))`
			`queue_work(system_unbound_wq, &swap_work);`
			`return 0;`
			`}`

			`static struct notifier_block vmpr_nb = {`
			`.notifier_call = vmpressure_notifier,`
			`};`

			`static int __init process_reclaim_init(void)`
			`{`
			`vmpressure_notifier_register(&vmpr_nb);`
			`return 0;`
			`}`

			`static void __exit process_reclaim_exit(void)`
			`{`
			`vmpressure_notifier_unregister(&vmpr_nb);`
			`}`

			`module_init(process_reclaim_init);`
			`module_exit(process_reclaim_exit);`