654 lines
14 KiB
C
654 lines
14 KiB
C
/**
|
|
* kmemcheck - a heavyweight memory checker for the linux kernel
|
|
* Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
|
|
* (With a lot of help from Ingo Molnar and Pekka Enberg.)
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License (version 2) as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/kmemcheck.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/module.h>
|
|
#include <linux/page-flags.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/kmemcheck.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include "error.h"
|
|
#include "opcode.h"
|
|
#include "pte.h"
|
|
#include "selftest.h"
|
|
#include "shadow.h"
|
|
|
|
|
|
#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
|
|
# define KMEMCHECK_ENABLED 0
|
|
#endif
|
|
|
|
#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
|
|
# define KMEMCHECK_ENABLED 1
|
|
#endif
|
|
|
|
#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
|
|
# define KMEMCHECK_ENABLED 2
|
|
#endif
|
|
|
|
int kmemcheck_enabled = KMEMCHECK_ENABLED;
|
|
|
|
int __init kmemcheck_init(void)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
/*
|
|
* Limit SMP to use a single CPU. We rely on the fact that this code
|
|
* runs before SMP is set up.
|
|
*/
|
|
if (setup_max_cpus > 1) {
|
|
printk(KERN_INFO
|
|
"kmemcheck: Limiting number of CPUs to 1.\n");
|
|
setup_max_cpus = 1;
|
|
}
|
|
#endif
|
|
|
|
if (!kmemcheck_selftest()) {
|
|
printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
|
|
kmemcheck_enabled = 0;
|
|
return -EINVAL;
|
|
}
|
|
|
|
printk(KERN_INFO "kmemcheck: Initialized\n");
|
|
return 0;
|
|
}
|
|
|
|
early_initcall(kmemcheck_init);
|
|
|
|
/*
|
|
* We need to parse the kmemcheck= option before any memory is allocated.
|
|
*/
|
|
static int __init param_kmemcheck(char *str)
|
|
{
|
|
if (!str)
|
|
return -EINVAL;
|
|
|
|
sscanf(str, "%d", &kmemcheck_enabled);
|
|
return 0;
|
|
}
|
|
|
|
early_param("kmemcheck", param_kmemcheck);
|
|
|
|
int kmemcheck_show_addr(unsigned long address)
|
|
{
|
|
pte_t *pte;
|
|
|
|
pte = kmemcheck_pte_lookup(address);
|
|
if (!pte)
|
|
return 0;
|
|
|
|
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
|
|
__flush_tlb_one(address);
|
|
return 1;
|
|
}
|
|
|
|
int kmemcheck_hide_addr(unsigned long address)
|
|
{
|
|
pte_t *pte;
|
|
|
|
pte = kmemcheck_pte_lookup(address);
|
|
if (!pte)
|
|
return 0;
|
|
|
|
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
|
|
__flush_tlb_one(address);
|
|
return 1;
|
|
}
|
|
|
|
struct kmemcheck_context {
|
|
bool busy;
|
|
int balance;
|
|
|
|
/*
|
|
* There can be at most two memory operands to an instruction, but
|
|
* each address can cross a page boundary -- so we may need up to
|
|
* four addresses that must be hidden/revealed for each fault.
|
|
*/
|
|
unsigned long addr[4];
|
|
unsigned long n_addrs;
|
|
unsigned long flags;
|
|
|
|
/* Data size of the instruction that caused a fault. */
|
|
unsigned int size;
|
|
};
|
|
|
|
static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
|
|
|
|
bool kmemcheck_active(struct pt_regs *regs)
|
|
{
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
|
|
return data->balance > 0;
|
|
}
|
|
|
|
/* Save an address that needs to be shown/hidden */
|
|
static void kmemcheck_save_addr(unsigned long addr)
|
|
{
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
|
|
BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
|
|
data->addr[data->n_addrs++] = addr;
|
|
}
|
|
|
|
static unsigned int kmemcheck_show_all(void)
|
|
{
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
unsigned int i;
|
|
unsigned int n;
|
|
|
|
n = 0;
|
|
for (i = 0; i < data->n_addrs; ++i)
|
|
n += kmemcheck_show_addr(data->addr[i]);
|
|
|
|
return n;
|
|
}
|
|
|
|
static unsigned int kmemcheck_hide_all(void)
|
|
{
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
unsigned int i;
|
|
unsigned int n;
|
|
|
|
n = 0;
|
|
for (i = 0; i < data->n_addrs; ++i)
|
|
n += kmemcheck_hide_addr(data->addr[i]);
|
|
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* Called from the #PF handler.
|
|
*/
|
|
void kmemcheck_show(struct pt_regs *regs)
|
|
{
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
|
|
BUG_ON(!irqs_disabled());
|
|
|
|
if (unlikely(data->balance != 0)) {
|
|
kmemcheck_show_all();
|
|
kmemcheck_error_save_bug(regs);
|
|
data->balance = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* None of the addresses actually belonged to kmemcheck. Note that
|
|
* this is not an error.
|
|
*/
|
|
if (kmemcheck_show_all() == 0)
|
|
return;
|
|
|
|
++data->balance;
|
|
|
|
/*
|
|
* The IF needs to be cleared as well, so that the faulting
|
|
* instruction can run "uninterrupted". Otherwise, we might take
|
|
* an interrupt and start executing that before we've had a chance
|
|
* to hide the page again.
|
|
*
|
|
* NOTE: In the rare case of multiple faults, we must not override
|
|
* the original flags:
|
|
*/
|
|
if (!(regs->flags & X86_EFLAGS_TF))
|
|
data->flags = regs->flags;
|
|
|
|
regs->flags |= X86_EFLAGS_TF;
|
|
regs->flags &= ~X86_EFLAGS_IF;
|
|
}
|
|
|
|
/*
|
|
* Called from the #DB handler.
|
|
*/
|
|
void kmemcheck_hide(struct pt_regs *regs)
|
|
{
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
int n;
|
|
|
|
BUG_ON(!irqs_disabled());
|
|
|
|
if (unlikely(data->balance != 1)) {
|
|
kmemcheck_show_all();
|
|
kmemcheck_error_save_bug(regs);
|
|
data->n_addrs = 0;
|
|
data->balance = 0;
|
|
|
|
if (!(data->flags & X86_EFLAGS_TF))
|
|
regs->flags &= ~X86_EFLAGS_TF;
|
|
if (data->flags & X86_EFLAGS_IF)
|
|
regs->flags |= X86_EFLAGS_IF;
|
|
return;
|
|
}
|
|
|
|
if (kmemcheck_enabled)
|
|
n = kmemcheck_hide_all();
|
|
else
|
|
n = kmemcheck_show_all();
|
|
|
|
if (n == 0)
|
|
return;
|
|
|
|
--data->balance;
|
|
|
|
data->n_addrs = 0;
|
|
|
|
if (!(data->flags & X86_EFLAGS_TF))
|
|
regs->flags &= ~X86_EFLAGS_TF;
|
|
if (data->flags & X86_EFLAGS_IF)
|
|
regs->flags |= X86_EFLAGS_IF;
|
|
}
|
|
|
|
void kmemcheck_show_pages(struct page *p, unsigned int n)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
unsigned long address;
|
|
pte_t *pte;
|
|
unsigned int level;
|
|
|
|
address = (unsigned long) page_address(&p[i]);
|
|
pte = lookup_address(address, &level);
|
|
BUG_ON(!pte);
|
|
BUG_ON(level != PG_LEVEL_4K);
|
|
|
|
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
|
|
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
|
|
__flush_tlb_one(address);
|
|
}
|
|
}
|
|
|
|
bool kmemcheck_page_is_tracked(struct page *p)
|
|
{
|
|
/* This will also check the "hidden" flag of the PTE. */
|
|
return kmemcheck_pte_lookup((unsigned long) page_address(p));
|
|
}
|
|
|
|
void kmemcheck_hide_pages(struct page *p, unsigned int n)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
unsigned long address;
|
|
pte_t *pte;
|
|
unsigned int level;
|
|
|
|
address = (unsigned long) page_address(&p[i]);
|
|
pte = lookup_address(address, &level);
|
|
BUG_ON(!pte);
|
|
BUG_ON(level != PG_LEVEL_4K);
|
|
|
|
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
|
|
set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
|
|
__flush_tlb_one(address);
|
|
}
|
|
}
|
|
|
|
/* Access may NOT cross page boundary */
|
|
static void kmemcheck_read_strict(struct pt_regs *regs,
|
|
unsigned long addr, unsigned int size)
|
|
{
|
|
void *shadow;
|
|
enum kmemcheck_shadow status;
|
|
|
|
shadow = kmemcheck_shadow_lookup(addr);
|
|
if (!shadow)
|
|
return;
|
|
|
|
kmemcheck_save_addr(addr);
|
|
status = kmemcheck_shadow_test(shadow, size);
|
|
if (status == KMEMCHECK_SHADOW_INITIALIZED)
|
|
return;
|
|
|
|
if (kmemcheck_enabled)
|
|
kmemcheck_error_save(status, addr, size, regs);
|
|
|
|
if (kmemcheck_enabled == 2)
|
|
kmemcheck_enabled = 0;
|
|
|
|
/* Don't warn about it again. */
|
|
kmemcheck_shadow_set(shadow, size);
|
|
}
|
|
|
|
bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
|
|
{
|
|
enum kmemcheck_shadow status;
|
|
void *shadow;
|
|
|
|
shadow = kmemcheck_shadow_lookup(addr);
|
|
if (!shadow)
|
|
return true;
|
|
|
|
status = kmemcheck_shadow_test_all(shadow, size);
|
|
|
|
return status == KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
|
|
/* Access may cross page boundary */
|
|
static void kmemcheck_read(struct pt_regs *regs,
|
|
unsigned long addr, unsigned int size)
|
|
{
|
|
unsigned long page = addr & PAGE_MASK;
|
|
unsigned long next_addr = addr + size - 1;
|
|
unsigned long next_page = next_addr & PAGE_MASK;
|
|
|
|
if (likely(page == next_page)) {
|
|
kmemcheck_read_strict(regs, addr, size);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* What we do is basically to split the access across the
|
|
* two pages and handle each part separately. Yes, this means
|
|
* that we may now see reads that are 3 + 5 bytes, for
|
|
* example (and if both are uninitialized, there will be two
|
|
* reports), but it makes the code a lot simpler.
|
|
*/
|
|
kmemcheck_read_strict(regs, addr, next_page - addr);
|
|
kmemcheck_read_strict(regs, next_page, next_addr - next_page);
|
|
}
|
|
|
|
static void kmemcheck_write_strict(struct pt_regs *regs,
|
|
unsigned long addr, unsigned int size)
|
|
{
|
|
void *shadow;
|
|
|
|
shadow = kmemcheck_shadow_lookup(addr);
|
|
if (!shadow)
|
|
return;
|
|
|
|
kmemcheck_save_addr(addr);
|
|
kmemcheck_shadow_set(shadow, size);
|
|
}
|
|
|
|
static void kmemcheck_write(struct pt_regs *regs,
|
|
unsigned long addr, unsigned int size)
|
|
{
|
|
unsigned long page = addr & PAGE_MASK;
|
|
unsigned long next_addr = addr + size - 1;
|
|
unsigned long next_page = next_addr & PAGE_MASK;
|
|
|
|
if (likely(page == next_page)) {
|
|
kmemcheck_write_strict(regs, addr, size);
|
|
return;
|
|
}
|
|
|
|
/* See comment in kmemcheck_read(). */
|
|
kmemcheck_write_strict(regs, addr, next_page - addr);
|
|
kmemcheck_write_strict(regs, next_page, next_addr - next_page);
|
|
}
|
|
|
|
/*
|
|
* Copying is hard. We have two addresses, each of which may be split across
|
|
* a page (and each page will have different shadow addresses).
|
|
*/
|
|
static void kmemcheck_copy(struct pt_regs *regs,
|
|
unsigned long src_addr, unsigned long dst_addr, unsigned int size)
|
|
{
|
|
uint8_t shadow[8];
|
|
enum kmemcheck_shadow status;
|
|
|
|
unsigned long page;
|
|
unsigned long next_addr;
|
|
unsigned long next_page;
|
|
|
|
uint8_t *x;
|
|
unsigned int i;
|
|
unsigned int n;
|
|
|
|
BUG_ON(size > sizeof(shadow));
|
|
|
|
page = src_addr & PAGE_MASK;
|
|
next_addr = src_addr + size - 1;
|
|
next_page = next_addr & PAGE_MASK;
|
|
|
|
if (likely(page == next_page)) {
|
|
/* Same page */
|
|
x = kmemcheck_shadow_lookup(src_addr);
|
|
if (x) {
|
|
kmemcheck_save_addr(src_addr);
|
|
for (i = 0; i < size; ++i)
|
|
shadow[i] = x[i];
|
|
} else {
|
|
for (i = 0; i < size; ++i)
|
|
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
} else {
|
|
n = next_page - src_addr;
|
|
BUG_ON(n > sizeof(shadow));
|
|
|
|
/* First page */
|
|
x = kmemcheck_shadow_lookup(src_addr);
|
|
if (x) {
|
|
kmemcheck_save_addr(src_addr);
|
|
for (i = 0; i < n; ++i)
|
|
shadow[i] = x[i];
|
|
} else {
|
|
/* Not tracked */
|
|
for (i = 0; i < n; ++i)
|
|
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
|
|
/* Second page */
|
|
x = kmemcheck_shadow_lookup(next_page);
|
|
if (x) {
|
|
kmemcheck_save_addr(next_page);
|
|
for (i = n; i < size; ++i)
|
|
shadow[i] = x[i - n];
|
|
} else {
|
|
/* Not tracked */
|
|
for (i = n; i < size; ++i)
|
|
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
}
|
|
|
|
page = dst_addr & PAGE_MASK;
|
|
next_addr = dst_addr + size - 1;
|
|
next_page = next_addr & PAGE_MASK;
|
|
|
|
if (likely(page == next_page)) {
|
|
/* Same page */
|
|
x = kmemcheck_shadow_lookup(dst_addr);
|
|
if (x) {
|
|
kmemcheck_save_addr(dst_addr);
|
|
for (i = 0; i < size; ++i) {
|
|
x[i] = shadow[i];
|
|
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
}
|
|
} else {
|
|
n = next_page - dst_addr;
|
|
BUG_ON(n > sizeof(shadow));
|
|
|
|
/* First page */
|
|
x = kmemcheck_shadow_lookup(dst_addr);
|
|
if (x) {
|
|
kmemcheck_save_addr(dst_addr);
|
|
for (i = 0; i < n; ++i) {
|
|
x[i] = shadow[i];
|
|
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
}
|
|
|
|
/* Second page */
|
|
x = kmemcheck_shadow_lookup(next_page);
|
|
if (x) {
|
|
kmemcheck_save_addr(next_page);
|
|
for (i = n; i < size; ++i) {
|
|
x[i - n] = shadow[i];
|
|
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
}
|
|
}
|
|
}
|
|
|
|
status = kmemcheck_shadow_test(shadow, size);
|
|
if (status == KMEMCHECK_SHADOW_INITIALIZED)
|
|
return;
|
|
|
|
if (kmemcheck_enabled)
|
|
kmemcheck_error_save(status, src_addr, size, regs);
|
|
|
|
if (kmemcheck_enabled == 2)
|
|
kmemcheck_enabled = 0;
|
|
}
|
|
|
|
enum kmemcheck_method {
|
|
KMEMCHECK_READ,
|
|
KMEMCHECK_WRITE,
|
|
};
|
|
|
|
static void kmemcheck_access(struct pt_regs *regs,
|
|
unsigned long fallback_address, enum kmemcheck_method fallback_method)
|
|
{
|
|
const uint8_t *insn;
|
|
const uint8_t *insn_primary;
|
|
unsigned int size;
|
|
|
|
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
|
|
|
|
/* Recursive fault -- ouch. */
|
|
if (data->busy) {
|
|
kmemcheck_show_addr(fallback_address);
|
|
kmemcheck_error_save_bug(regs);
|
|
return;
|
|
}
|
|
|
|
data->busy = true;
|
|
|
|
insn = (const uint8_t *) regs->ip;
|
|
insn_primary = kmemcheck_opcode_get_primary(insn);
|
|
|
|
kmemcheck_opcode_decode(insn, &size);
|
|
|
|
switch (insn_primary[0]) {
|
|
#ifdef CONFIG_KMEMCHECK_BITOPS_OK
|
|
/* AND, OR, XOR */
|
|
/*
|
|
* Unfortunately, these instructions have to be excluded from
|
|
* our regular checking since they access only some (and not
|
|
* all) bits. This clears out "bogus" bitfield-access warnings.
|
|
*/
|
|
case 0x80:
|
|
case 0x81:
|
|
case 0x82:
|
|
case 0x83:
|
|
switch ((insn_primary[1] >> 3) & 7) {
|
|
/* OR */
|
|
case 1:
|
|
/* AND */
|
|
case 4:
|
|
/* XOR */
|
|
case 6:
|
|
kmemcheck_write(regs, fallback_address, size);
|
|
goto out;
|
|
|
|
/* ADD */
|
|
case 0:
|
|
/* ADC */
|
|
case 2:
|
|
/* SBB */
|
|
case 3:
|
|
/* SUB */
|
|
case 5:
|
|
/* CMP */
|
|
case 7:
|
|
break;
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
/* MOVS, MOVSB, MOVSW, MOVSD */
|
|
case 0xa4:
|
|
case 0xa5:
|
|
/*
|
|
* These instructions are special because they take two
|
|
* addresses, but we only get one page fault.
|
|
*/
|
|
kmemcheck_copy(regs, regs->si, regs->di, size);
|
|
goto out;
|
|
|
|
/* CMPS, CMPSB, CMPSW, CMPSD */
|
|
case 0xa6:
|
|
case 0xa7:
|
|
kmemcheck_read(regs, regs->si, size);
|
|
kmemcheck_read(regs, regs->di, size);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* If the opcode isn't special in any way, we use the data from the
|
|
* page fault handler to determine the address and type of memory
|
|
* access.
|
|
*/
|
|
switch (fallback_method) {
|
|
case KMEMCHECK_READ:
|
|
kmemcheck_read(regs, fallback_address, size);
|
|
goto out;
|
|
case KMEMCHECK_WRITE:
|
|
kmemcheck_write(regs, fallback_address, size);
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
data->busy = false;
|
|
}
|
|
|
|
bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
|
|
unsigned long error_code)
|
|
{
|
|
pte_t *pte;
|
|
|
|
/*
|
|
* XXX: Is it safe to assume that memory accesses from virtual 86
|
|
* mode or non-kernel code segments will _never_ access kernel
|
|
* memory (e.g. tracked pages)? For now, we need this to avoid
|
|
* invoking kmemcheck for PnP BIOS calls.
|
|
*/
|
|
if (regs->flags & X86_VM_MASK)
|
|
return false;
|
|
if (regs->cs != __KERNEL_CS)
|
|
return false;
|
|
|
|
pte = kmemcheck_pte_lookup(address);
|
|
if (!pte)
|
|
return false;
|
|
|
|
WARN_ON_ONCE(in_nmi());
|
|
|
|
if (error_code & 2)
|
|
kmemcheck_access(regs, address, KMEMCHECK_WRITE);
|
|
else
|
|
kmemcheck_access(regs, address, KMEMCHECK_READ);
|
|
|
|
kmemcheck_show(regs);
|
|
return true;
|
|
}
|
|
|
|
bool kmemcheck_trap(struct pt_regs *regs)
|
|
{
|
|
if (!kmemcheck_active(regs))
|
|
return false;
|
|
|
|
/* We're done. */
|
|
kmemcheck_hide(regs);
|
|
return true;
|
|
}
|