M7350v1_en_gpl

This commit is contained in:
T
2024-09-09 08:52:07 +00:00
commit f9cc65cfda
65988 changed files with 26357421 additions and 0 deletions
+20
View File
@@ -0,0 +1,20 @@
# KVM common configuration items and defaults
config HAVE_KVM
bool
config HAVE_KVM_IRQCHIP
bool
config HAVE_KVM_EVENTFD
bool
select EVENTFD
config KVM_APIC_ARCHITECTURE
bool
config KVM_MMIO
bool
config KVM_ASYNC_PF
bool
File diff suppressed because it is too large Load Diff
+216
View File
@@ -0,0 +1,216 @@
/*
* kvm asynchronous fault support
*
* Copyright 2010 Red Hat, Inc.
*
* Author:
* Gleb Natapov <gleb@redhat.com>
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mmu_context.h>
#include "async_pf.h"
#include <trace/events/kvm.h>
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
{
async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
if (!async_pf_cache)
return -ENOMEM;
return 0;
}
void kvm_async_pf_deinit(void)
{
if (async_pf_cache)
kmem_cache_destroy(async_pf_cache);
async_pf_cache = NULL;
}
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
{
INIT_LIST_HEAD(&vcpu->async_pf.done);
INIT_LIST_HEAD(&vcpu->async_pf.queue);
spin_lock_init(&vcpu->async_pf.lock);
}
static void async_pf_execute(struct work_struct *work)
{
struct page *page = NULL;
struct kvm_async_pf *apf =
container_of(work, struct kvm_async_pf, work);
struct mm_struct *mm = apf->mm;
struct kvm_vcpu *vcpu = apf->vcpu;
unsigned long addr = apf->addr;
gva_t gva = apf->gva;
might_sleep();
use_mm(mm);
down_read(&mm->mmap_sem);
get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
up_read(&mm->mmap_sem);
unuse_mm(mm);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
apf->page = page;
apf->done = true;
spin_unlock(&vcpu->async_pf.lock);
/*
* apf may be freed by kvm_check_async_pf_completion() after
* this point
*/
trace_kvm_async_pf_completed(addr, page, gva);
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
mmdrop(mm);
kvm_put_kvm(vcpu->kvm);
}
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
{
/* cancel outstanding work queue item */
while (!list_empty(&vcpu->async_pf.queue)) {
struct kvm_async_pf *work =
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
cancel_work_sync(&work->work);
list_del(&work->queue);
if (!work->done) /* work was canceled */
kmem_cache_free(async_pf_cache, work);
}
spin_lock(&vcpu->async_pf.lock);
while (!list_empty(&vcpu->async_pf.done)) {
struct kvm_async_pf *work =
list_entry(vcpu->async_pf.done.next,
typeof(*work), link);
list_del(&work->link);
if (work->page)
put_page(work->page);
kmem_cache_free(async_pf_cache, work);
}
spin_unlock(&vcpu->async_pf.lock);
vcpu->async_pf.queued = 0;
}
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
while (!list_empty_careful(&vcpu->async_pf.done) &&
kvm_arch_can_inject_async_page_present(vcpu)) {
spin_lock(&vcpu->async_pf.lock);
work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
link);
list_del(&work->link);
spin_unlock(&vcpu->async_pf.lock);
if (work->page)
kvm_arch_async_page_ready(vcpu, work);
kvm_arch_async_page_present(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
if (work->page)
put_page(work->page);
kmem_cache_free(async_pf_cache, work);
}
}
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
return 0;
/* setup delayed work */
/*
* do alloc nowait since if we are going to sleep anyway we
* may as well sleep faulting in page
*/
work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
if (!work)
return 0;
work->page = NULL;
work->done = false;
work->vcpu = vcpu;
work->gva = gva;
work->addr = gfn_to_hva(vcpu->kvm, gfn);
work->arch = *arch;
work->mm = current->mm;
atomic_inc(&work->mm->mm_count);
kvm_get_kvm(work->vcpu->kvm);
/* this can't really happen otherwise gfn_to_pfn_async
would succeed */
if (unlikely(kvm_is_error_hva(work->addr)))
goto retry_sync;
INIT_WORK(&work->work, async_pf_execute);
if (!schedule_work(&work->work))
goto retry_sync;
list_add_tail(&work->queue, &vcpu->async_pf.queue);
vcpu->async_pf.queued++;
kvm_arch_async_page_not_present(vcpu, work);
return 1;
retry_sync:
kvm_put_kvm(work->vcpu->kvm);
mmdrop(work->mm);
kmem_cache_free(async_pf_cache, work);
return 0;
}
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
if (!list_empty_careful(&vcpu->async_pf.done))
return 0;
work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
if (!work)
return -ENOMEM;
work->page = bad_page;
get_page(bad_page);
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&work->link, &vcpu->async_pf.done);
spin_unlock(&vcpu->async_pf.lock);
vcpu->async_pf.queued++;
return 0;
}
+36
View File
@@ -0,0 +1,36 @@
/*
* kvm asynchronous fault support
*
* Copyright 2010 Red Hat, Inc.
*
* Author:
* Gleb Natapov <gleb@redhat.com>
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __KVM_ASYNC_PF_H__
#define __KVM_ASYNC_PF_H__
#ifdef CONFIG_KVM_ASYNC_PF
int kvm_async_pf_init(void);
void kvm_async_pf_deinit(void);
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
#else
#define kvm_async_pf_init() (0)
#define kvm_async_pf_deinit() do{}while(0)
#define kvm_async_pf_vcpu_init(C) do{}while(0)
#endif
#endif
+186
View File
@@ -0,0 +1,186 @@
/*
* KVM coalesced MMIO
*
* Copyright (c) 2008 Bull S.A.S.
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
*/
#include "iodev.h"
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/kvm.h>
#include "coalesced_mmio.h"
static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
}
static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
gpa_t addr, int len)
{
/* is it in a batchable area ?
* (addr,len) is fully included in
* (zone->addr, zone->size)
*/
if (len < 0)
return 0;
if (addr + len < addr)
return 0;
if (addr < dev->zone.addr)
return 0;
if (addr + len > dev->zone.addr + dev->zone.size)
return 0;
return 1;
}
static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
{
struct kvm_coalesced_mmio_ring *ring;
unsigned avail;
/* Are we able to batch it ? */
/* last is the first free entry
* check if we don't meet the first used entry
* there is always one unused entry in the buffer
*/
ring = dev->kvm->coalesced_mmio_ring;
avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
if (avail == 0) {
/* full */
return 0;
}
return 1;
}
static int coalesced_mmio_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *val)
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
if (!coalesced_mmio_in_range(dev, addr, len))
return -EOPNOTSUPP;
spin_lock(&dev->kvm->ring_lock);
if (!coalesced_mmio_has_room(dev)) {
spin_unlock(&dev->kvm->ring_lock);
return -EOPNOTSUPP;
}
/* copy data in first free entry of the ring */
ring->coalesced_mmio[ring->last].phys_addr = addr;
ring->coalesced_mmio[ring->last].len = len;
memcpy(ring->coalesced_mmio[ring->last].data, val, len);
smp_wmb();
ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
spin_unlock(&dev->kvm->ring_lock);
return 0;
}
static void coalesced_mmio_destructor(struct kvm_io_device *this)
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
list_del(&dev->list);
kfree(dev);
}
static const struct kvm_io_device_ops coalesced_mmio_ops = {
.write = coalesced_mmio_write,
.destructor = coalesced_mmio_destructor,
};
int kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct page *page;
int ret;
ret = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto out_err;
ret = 0;
kvm->coalesced_mmio_ring = page_address(page);
/*
* We're using this spinlock to sync access to the coalesced ring.
* The list doesn't need it's own lock since device registration and
* unregistration should only happen when kvm->slots_lock is held.
*/
spin_lock_init(&kvm->ring_lock);
INIT_LIST_HEAD(&kvm->coalesced_zones);
out_err:
return ret;
}
void kvm_coalesced_mmio_free(struct kvm *kvm)
{
if (kvm->coalesced_mmio_ring)
free_page((unsigned long)kvm->coalesced_mmio_ring);
}
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
{
int ret;
struct kvm_coalesced_mmio_dev *dev;
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
dev->kvm = kvm;
dev->zone = *zone;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
zone->size, &dev->dev);
if (ret < 0)
goto out_free_dev;
list_add_tail(&dev->list, &kvm->coalesced_zones);
mutex_unlock(&kvm->slots_lock);
return ret;
out_free_dev:
mutex_unlock(&kvm->slots_lock);
kfree(dev);
if (dev == NULL)
return -ENXIO;
return 0;
}
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
{
struct kvm_coalesced_mmio_dev *dev, *tmp;
mutex_lock(&kvm->slots_lock);
list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
kvm_iodevice_destructor(&dev->dev);
}
mutex_unlock(&kvm->slots_lock);
return 0;
}
+38
View File
@@ -0,0 +1,38 @@
#ifndef __KVM_COALESCED_MMIO_H__
#define __KVM_COALESCED_MMIO_H__
/*
* KVM coalesced MMIO
*
* Copyright (c) 2008 Bull S.A.S.
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
*/
#ifdef CONFIG_KVM_MMIO
#include <linux/list.h>
struct kvm_coalesced_mmio_dev {
struct list_head list;
struct kvm_io_device dev;
struct kvm *kvm;
struct kvm_coalesced_mmio_zone zone;
};
int kvm_coalesced_mmio_init(struct kvm *kvm);
void kvm_coalesced_mmio_free(struct kvm *kvm);
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
#else
static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
#endif
#endif
+657
View File
@@ -0,0 +1,657 @@
/*
* kvm eventfd support - use eventfd objects to signal various KVM events
*
* Copyright 2009 Novell. All Rights Reserved.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Author:
* Gregory Haskins <ghaskins@novell.com>
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/workqueue.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/eventfd.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include "iodev.h"
/*
* --------------------------------------------------------------------
* irqfd: Allows an fd to be used to inject an interrupt to the guest
*
* Credit goes to Avi Kivity for the original idea.
* --------------------------------------------------------------------
*/
struct _irqfd {
/* Used for MSI fast-path */
struct kvm *kvm;
wait_queue_t wait;
/* Update side is protected by irqfds.lock */
struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
/* Used for level IRQ fast-path */
int gsi;
struct work_struct inject;
/* Used for setup/shutdown */
struct eventfd_ctx *eventfd;
struct list_head list;
poll_table pt;
struct work_struct shutdown;
};
static struct workqueue_struct *irqfd_cleanup_wq;
static void
irqfd_inject(struct work_struct *work)
{
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm;
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
}
/*
* Race-free decouple logic (ordering is critical)
*/
static void
irqfd_shutdown(struct work_struct *work)
{
struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
u64 cnt;
/*
* Synchronize with the wait-queue and unhook ourselves to prevent
* further events.
*/
eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
/*
* We know no new events will be scheduled at this point, so block
* until all previously outstanding events have completed
*/
flush_work_sync(&irqfd->inject);
/*
* It is now safe to release the object's resources
*/
eventfd_ctx_put(irqfd->eventfd);
kfree(irqfd);
}
/* assumes kvm->irqfds.lock is held */
static bool
irqfd_is_active(struct _irqfd *irqfd)
{
return list_empty(&irqfd->list) ? false : true;
}
/*
* Mark the irqfd as inactive and schedule it for removal
*
* assumes kvm->irqfds.lock is held
*/
static void
irqfd_deactivate(struct _irqfd *irqfd)
{
BUG_ON(!irqfd_is_active(irqfd));
list_del_init(&irqfd->list);
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}
/*
* Called with wqh->lock held and interrupts disabled
*/
static int
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
unsigned long flags = (unsigned long)key;
struct kvm_kernel_irq_routing_entry *irq;
struct kvm *kvm = irqfd->kvm;
if (flags & POLLIN) {
rcu_read_lock();
irq = rcu_dereference(irqfd->irq_entry);
/* An event has been signaled, inject an interrupt */
if (irq)
kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
else
schedule_work(&irqfd->inject);
rcu_read_unlock();
}
if (flags & POLLHUP) {
/* The eventfd is closing, detach from KVM */
unsigned long flags;
spin_lock_irqsave(&kvm->irqfds.lock, flags);
/*
* We must check if someone deactivated the irqfd before
* we could acquire the irqfds.lock since the item is
* deactivated from the KVM side before it is unhooked from
* the wait-queue. If it is already deactivated, we can
* simply return knowing the other side will cleanup for us.
* We cannot race against the irqfd going away since the
* other side is required to acquire wqh->lock, which we hold
*/
if (irqfd_is_active(irqfd))
irqfd_deactivate(irqfd);
spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
}
return 0;
}
static void
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
add_wait_queue(wqh, &irqfd->wait);
}
/* Must be called under irqfds.lock */
static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
struct kvm_irq_routing_table *irq_rt)
{
struct kvm_kernel_irq_routing_entry *e;
struct hlist_node *n;
if (irqfd->gsi >= irq_rt->nr_rt_entries) {
rcu_assign_pointer(irqfd->irq_entry, NULL);
return;
}
hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
/* Only fast-path MSI. */
if (e->type == KVM_IRQ_ROUTING_MSI)
rcu_assign_pointer(irqfd->irq_entry, e);
else
rcu_assign_pointer(irqfd->irq_entry, NULL);
}
}
static int
kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
{
struct kvm_irq_routing_table *irq_rt;
struct _irqfd *irqfd, *tmp;
struct file *file = NULL;
struct eventfd_ctx *eventfd = NULL;
int ret;
unsigned int events;
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
return -ENOMEM;
irqfd->kvm = kvm;
irqfd->gsi = gsi;
INIT_LIST_HEAD(&irqfd->list);
INIT_WORK(&irqfd->inject, irqfd_inject);
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
file = eventfd_fget(fd);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto fail;
}
eventfd = eventfd_ctx_fileget(file);
if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd);
goto fail;
}
irqfd->eventfd = eventfd;
/*
* Install our own custom wake-up handling so we are notified via
* a callback whenever someone signals the underlying eventfd
*/
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
spin_lock_irq(&kvm->irqfds.lock);
ret = 0;
list_for_each_entry(tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd != tmp->eventfd)
continue;
/* This fd is used for another irq already. */
ret = -EBUSY;
spin_unlock_irq(&kvm->irqfds.lock);
goto fail;
}
irq_rt = rcu_dereference_protected(kvm->irq_routing,
lockdep_is_held(&kvm->irqfds.lock));
irqfd_update(kvm, irqfd, irq_rt);
events = file->f_op->poll(file, &irqfd->pt);
list_add_tail(&irqfd->list, &kvm->irqfds.items);
/*
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
if (events & POLLIN)
schedule_work(&irqfd->inject);
spin_unlock_irq(&kvm->irqfds.lock);
/*
* do not drop the file until the irqfd is fully initialized, otherwise
* we might race against the POLLHUP
*/
fput(file);
return 0;
fail:
if (eventfd && !IS_ERR(eventfd))
eventfd_ctx_put(eventfd);
if (!IS_ERR(file))
fput(file);
kfree(irqfd);
return ret;
}
void
kvm_eventfd_init(struct kvm *kvm)
{
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->ioeventfds);
}
/*
* shutdown any irqfd's that match fd+gsi
*/
static int
kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
{
struct _irqfd *irqfd, *tmp;
struct eventfd_ctx *eventfd;
eventfd = eventfd_ctx_fdget(fd);
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
/*
* This rcu_assign_pointer is needed for when
* another thread calls kvm_irq_routing_update before
* we flush workqueue below (we synchronize with
* kvm_irq_routing_update using irqfds.lock).
* It is paired with synchronize_rcu done by caller
* of that function.
*/
rcu_assign_pointer(irqfd->irq_entry, NULL);
irqfd_deactivate(irqfd);
}
}
spin_unlock_irq(&kvm->irqfds.lock);
eventfd_ctx_put(eventfd);
/*
* Block until we know all outstanding shutdown jobs have completed
* so that we guarantee there will not be any more interrupts on this
* gsi once this deassign function returns.
*/
flush_workqueue(irqfd_cleanup_wq);
return 0;
}
int
kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
{
if (flags & KVM_IRQFD_FLAG_DEASSIGN)
return kvm_irqfd_deassign(kvm, fd, gsi);
return kvm_irqfd_assign(kvm, fd, gsi);
}
/*
* This function is called as the kvm VM fd is being released. Shutdown all
* irqfds that still remain open
*/
void
kvm_irqfd_release(struct kvm *kvm)
{
struct _irqfd *irqfd, *tmp;
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
irqfd_deactivate(irqfd);
spin_unlock_irq(&kvm->irqfds.lock);
/*
* Block until we know all outstanding shutdown jobs have completed
* since we do not take a kvm* reference.
*/
flush_workqueue(irqfd_cleanup_wq);
}
/*
* Change irq_routing and irqfd.
* Caller must invoke synchronize_rcu afterwards.
*/
void kvm_irq_routing_update(struct kvm *kvm,
struct kvm_irq_routing_table *irq_rt)
{
struct _irqfd *irqfd;
spin_lock_irq(&kvm->irqfds.lock);
rcu_assign_pointer(kvm->irq_routing, irq_rt);
list_for_each_entry(irqfd, &kvm->irqfds.items, list)
irqfd_update(kvm, irqfd, irq_rt);
spin_unlock_irq(&kvm->irqfds.lock);
}
/*
* create a host-wide workqueue for issuing deferred shutdown requests
* aggregated from all vm* instances. We need our own isolated single-thread
* queue to prevent deadlock against flushing the normal work-queue.
*/
static int __init irqfd_module_init(void)
{
irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
if (!irqfd_cleanup_wq)
return -ENOMEM;
return 0;
}
static void __exit irqfd_module_exit(void)
{
destroy_workqueue(irqfd_cleanup_wq);
}
module_init(irqfd_module_init);
module_exit(irqfd_module_exit);
/*
* --------------------------------------------------------------------
* ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
*
* userspace can register a PIO/MMIO address with an eventfd for receiving
* notification when the memory has been touched.
* --------------------------------------------------------------------
*/
struct _ioeventfd {
struct list_head list;
u64 addr;
int length;
struct eventfd_ctx *eventfd;
u64 datamatch;
struct kvm_io_device dev;
bool wildcard;
};
static inline struct _ioeventfd *
to_ioeventfd(struct kvm_io_device *dev)
{
return container_of(dev, struct _ioeventfd, dev);
}
static void
ioeventfd_release(struct _ioeventfd *p)
{
eventfd_ctx_put(p->eventfd);
list_del(&p->list);
kfree(p);
}
static bool
ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
{
u64 _val;
if (!(addr == p->addr && len == p->length))
/* address-range must be precise for a hit */
return false;
if (p->wildcard)
/* all else equal, wildcard is always a hit */
return true;
/* otherwise, we have to actually compare the data */
BUG_ON(!IS_ALIGNED((unsigned long)val, len));
switch (len) {
case 1:
_val = *(u8 *)val;
break;
case 2:
_val = *(u16 *)val;
break;
case 4:
_val = *(u32 *)val;
break;
case 8:
_val = *(u64 *)val;
break;
default:
return false;
}
return _val == p->datamatch ? true : false;
}
/* MMIO/PIO writes trigger an event if the addr/val match */
static int
ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
const void *val)
{
struct _ioeventfd *p = to_ioeventfd(this);
if (!ioeventfd_in_range(p, addr, len, val))
return -EOPNOTSUPP;
eventfd_signal(p->eventfd, 1);
return 0;
}
/*
* This function is called as KVM is completely shutting down. We do not
* need to worry about locking just nuke anything we have as quickly as possible
*/
static void
ioeventfd_destructor(struct kvm_io_device *this)
{
struct _ioeventfd *p = to_ioeventfd(this);
ioeventfd_release(p);
}
static const struct kvm_io_device_ops ioeventfd_ops = {
.write = ioeventfd_write,
.destructor = ioeventfd_destructor,
};
/* assumes kvm->slots_lock held */
static bool
ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
{
struct _ioeventfd *_p;
list_for_each_entry(_p, &kvm->ioeventfds, list)
if (_p->addr == p->addr && _p->length == p->length &&
(_p->wildcard || p->wildcard ||
_p->datamatch == p->datamatch))
return true;
return false;
}
static int
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p;
struct eventfd_ctx *eventfd;
int ret;
/* must be natural-word sized */
switch (args->len) {
case 1:
case 2:
case 4:
case 8:
break;
default:
return -EINVAL;
}
/* check for range overflow */
if (args->addr + args->len < args->addr)
return -EINVAL;
/* check for extra flags that we don't understand */
if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
return -EINVAL;
eventfd = eventfd_ctx_fdget(args->fd);
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
goto fail;
}
INIT_LIST_HEAD(&p->list);
p->addr = args->addr;
p->length = args->len;
p->eventfd = eventfd;
/* The datamatch feature is optional, otherwise this is a wildcard */
if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
p->datamatch = args->datamatch;
else
p->wildcard = true;
mutex_lock(&kvm->slots_lock);
/* Verify that there isn't a match already */
if (ioeventfd_check_collision(kvm, p)) {
ret = -EEXIST;
goto unlock_fail;
}
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
&p->dev);
if (ret < 0)
goto unlock_fail;
list_add_tail(&p->list, &kvm->ioeventfds);
mutex_unlock(&kvm->slots_lock);
return 0;
unlock_fail:
mutex_unlock(&kvm->slots_lock);
fail:
kfree(p);
eventfd_ctx_put(eventfd);
return ret;
}
static int
kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p, *tmp;
struct eventfd_ctx *eventfd;
int ret = -ENOENT;
eventfd = eventfd_ctx_fdget(args->fd);
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
mutex_lock(&kvm->slots_lock);
list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
if (p->eventfd != eventfd ||
p->addr != args->addr ||
p->length != args->len ||
p->wildcard != wildcard)
continue;
if (!p->wildcard && p->datamatch != args->datamatch)
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
ioeventfd_release(p);
ret = 0;
break;
}
mutex_unlock(&kvm->slots_lock);
eventfd_ctx_put(eventfd);
return ret;
}
int
kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
return kvm_deassign_ioeventfd(kvm, args);
return kvm_assign_ioeventfd(kvm, args);
}
+451
View File
@@ -0,0 +1,451 @@
/*
* Copyright (C) 2001 MandrakeSoft S.A.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* MandrakeSoft S.A.
* 43, rue d'Aboukir
* 75002 Paris - France
* http://www.linux-mandrake.com/
* http://www.mandrakesoft.com/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Yunhong Jiang <yunhong.jiang@intel.com>
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
* Based on Xen 3.1 code.
*/
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/hrtimer.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/current.h>
#include <trace/events/kvm.h>
#include "ioapic.h"
#include "lapic.h"
#include "irq.h"
#if 0
#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
#else
#define ioapic_debug(fmt, arg...)
#endif
static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
unsigned long addr,
unsigned long length)
{
unsigned long result = 0;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
| (IOAPIC_VERSION_ID & 0xff));
break;
case IOAPIC_REG_APIC_ID:
case IOAPIC_REG_ARB_ID:
result = ((ioapic->id & 0xf) << 24);
break;
default:
{
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
u64 redir_content;
ASSERT(redir_index < IOAPIC_NUM_PINS);
redir_content = ioapic->redirtbl[redir_index].bits;
result = (ioapic->ioregsel & 0x1) ?
(redir_content >> 32) & 0xffffffff :
redir_content & 0xffffffff;
break;
}
}
return result;
}
static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
{
union kvm_ioapic_redirect_entry *pent;
int injected = -1;
pent = &ioapic->redirtbl[idx];
if (!pent->fields.mask) {
injected = ioapic_deliver(ioapic, idx);
if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
pent->fields.remote_irr = 1;
}
return injected;
}
static void update_handled_vectors(struct kvm_ioapic *ioapic)
{
DECLARE_BITMAP(handled_vectors, 256);
int i;
memset(handled_vectors, 0, sizeof(handled_vectors));
for (i = 0; i < IOAPIC_NUM_PINS; ++i)
__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
memcpy(ioapic->handled_vectors, handled_vectors,
sizeof(handled_vectors));
smp_wmb();
}
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
bool mask_before, mask_after;
union kvm_ioapic_redirect_entry *e;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
/* Writes are ignored. */
break;
case IOAPIC_REG_APIC_ID:
ioapic->id = (val >> 24) & 0xf;
break;
case IOAPIC_REG_ARB_ID:
break;
default:
index = (ioapic->ioregsel - 0x10) >> 1;
ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
} else {
e->bits &= ~0xffffffffULL;
e->bits |= (u32) val;
e->fields.remote_irr = 0;
}
update_handled_vectors(ioapic);
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
break;
}
}
static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
{
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
struct kvm_lapic_irq irqe;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest_id, entry->fields.dest_mode,
entry->fields.delivery_mode, entry->fields.vector,
entry->fields.trig_mode);
irqe.dest_id = entry->fields.dest_id;
irqe.vector = entry->fields.vector;
irqe.dest_mode = entry->fields.dest_mode;
irqe.trig_mode = entry->fields.trig_mode;
irqe.delivery_mode = entry->fields.delivery_mode << 8;
irqe.level = 1;
irqe.shorthand = 0;
#ifdef CONFIG_X86
/* Always delivery PIT interrupt to vcpu 0 */
if (irq == 0) {
irqe.dest_mode = 0; /* Physical mode. */
/* need to read apic_id from apic regiest since
* it can be rewritten */
irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
}
#endif
return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
}
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{
u32 old_irr;
u32 mask = 1 << irq;
union kvm_ioapic_redirect_entry entry;
int ret = 1;
spin_lock(&ioapic->lock);
old_irr = ioapic->irr;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
if (!level)
ioapic->irr &= ~mask;
else {
int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
ioapic->irr |= mask;
if ((edge && old_irr != ioapic->irr) ||
(!edge && !entry.fields.remote_irr))
ret = ioapic_service(ioapic, irq);
else
ret = 0; /* report coalesced interrupt */
}
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
}
spin_unlock(&ioapic->lock);
return ret;
}
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
int trigger_mode)
{
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
if (ent->fields.vector != vector)
continue;
/*
* We are dropping lock while calling ack notifiers because ack
* notifier callbacks for assigned devices call into IOAPIC
* recursively. Since remote_irr is cleared only after call
* to notifiers if the same vector will be delivered while lock
* is dropped it will be put into irr and will be delivered
* after ack notifier returns.
*/
spin_unlock(&ioapic->lock);
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
spin_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG)
continue;
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << i)))
ioapic_service(ioapic, i);
}
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
smp_rmb();
if (!test_bit(vector, ioapic->handled_vectors))
return;
spin_lock(&ioapic->lock);
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
spin_unlock(&ioapic->lock);
}
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_ioapic, dev);
}
static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
{
return ((addr >= ioapic->base_address &&
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
}
static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
void *val)
{
struct kvm_ioapic *ioapic = to_ioapic(this);
u32 result;
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
spin_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
result = ioapic->ioregsel;
break;
case IOAPIC_REG_WINDOW:
result = ioapic_read_indirect(ioapic, addr, len);
break;
default:
result = 0;
break;
}
spin_unlock(&ioapic->lock);
switch (len) {
case 8:
*(u64 *) val = result;
break;
case 1:
case 2:
case 4:
memcpy(val, (char *)&result, len);
break;
default:
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
}
return 0;
}
static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
const void *val)
{
struct kvm_ioapic *ioapic = to_ioapic(this);
u32 data;
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
(void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
switch (len) {
case 8:
case 4:
data = *(u32 *) val;
break;
case 2:
data = *(u16 *) val;
break;
case 1:
data = *(u8 *) val;
break;
default:
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
return 0;
}
addr &= 0xff;
spin_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
ioapic->ioregsel = data & 0xFF; /* 8-bit register */
break;
case IOAPIC_REG_WINDOW:
ioapic_write_indirect(ioapic, data);
break;
#ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
break;
#endif
default:
break;
}
spin_unlock(&ioapic->lock);
return 0;
}
void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
{
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->ioregsel = 0;
ioapic->irr = 0;
ioapic->id = 0;
update_handled_vectors(ioapic);
}
static const struct kvm_io_device_ops ioapic_mmio_ops = {
.read = ioapic_mmio_read,
.write = ioapic_mmio_write,
};
int kvm_ioapic_init(struct kvm *kvm)
{
struct kvm_ioapic *ioapic;
int ret;
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic)
return -ENOMEM;
spin_lock_init(&ioapic->lock);
kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
IOAPIC_MEM_LENGTH, &ioapic->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
return ret;
}
void kvm_ioapic_destroy(struct kvm *kvm)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
if (ioapic) {
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
}
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
spin_unlock(&ioapic->lock);
return 0;
}
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
update_handled_vectors(ioapic);
spin_unlock(&ioapic->lock);
return 0;
}
+83
View File
@@ -0,0 +1,83 @@
#ifndef __KVM_IO_APIC_H
#define __KVM_IO_APIC_H
#include <linux/kvm_host.h>
#include "iodev.h"
struct kvm;
struct kvm_vcpu;
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */
#define IOAPIC_REG_SELECT 0x00
#define IOAPIC_REG_WINDOW 0x10
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
/* Indirect registers. */
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
#define IOAPIC_REG_VERSION 0x01
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
/*ioapic delivery mode*/
#define IOAPIC_FIXED 0x0
#define IOAPIC_LOWEST_PRIORITY 0x1
#define IOAPIC_PMI 0x2
#define IOAPIC_NMI 0x4
#define IOAPIC_INIT 0x5
#define IOAPIC_EXTINT 0x7
struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
u32 id;
u32 irr;
u32 pad;
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
DECLARE_BITMAP(handled_vectors, 256);
};
#ifdef DEBUG
#define ASSERT(x) \
do { \
if (!(x)) { \
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
__FILE__, __LINE__, #x); \
BUG(); \
} \
} while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
{
return kvm->arch.vioapic;
}
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq);
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
#endif
+70
View File
@@ -0,0 +1,70 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __KVM_IODEV_H__
#define __KVM_IODEV_H__
#include <linux/kvm_types.h>
#include <asm/errno.h>
struct kvm_io_device;
/**
* kvm_io_device_ops are called under kvm slots_lock.
* read and write handlers return 0 if the transaction has been handled,
* or non-zero to have it passed to the next device.
**/
struct kvm_io_device_ops {
int (*read)(struct kvm_io_device *this,
gpa_t addr,
int len,
void *val);
int (*write)(struct kvm_io_device *this,
gpa_t addr,
int len,
const void *val);
void (*destructor)(struct kvm_io_device *this);
};
struct kvm_io_device {
const struct kvm_io_device_ops *ops;
};
static inline void kvm_iodevice_init(struct kvm_io_device *dev,
const struct kvm_io_device_ops *ops)
{
dev->ops = ops;
}
static inline int kvm_iodevice_read(struct kvm_io_device *dev,
gpa_t addr, int l, void *v)
{
return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
}
static inline int kvm_iodevice_write(struct kvm_io_device *dev,
gpa_t addr, int l, const void *v)
{
return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
}
static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->ops->destructor)
dev->ops->destructor(dev);
}
#endif /* __KVM_IODEV_H__ */
+353
View File
@@ -0,0 +1,353 @@
/*
* Copyright (c) 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Copyright (C) 2006-2008 Intel Corporation
* Copyright IBM Corporation, 2008
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Author: Allen M. Kay <allen.m.kay@intel.com>
* Author: Weidong Han <weidong.han@intel.com>
* Author: Ben-Ami Yassour <benami@il.ibm.com>
*/
#include <linux/list.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/stat.h>
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
static bool allow_unsafe_assigned_interrupts;
module_param_named(allow_unsafe_assigned_interrupts,
allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
"Enable device assignment on platforms without interrupt remapping support.");
static int kvm_iommu_unmap_memslots(struct kvm *kvm);
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);
static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, unsigned long size)
{
gfn_t end_gfn;
pfn_t pfn;
pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
end_gfn = gfn + (size >> PAGE_SHIFT);
gfn += 1;
if (is_error_pfn(pfn))
return pfn;
while (gfn < end_gfn)
gfn_to_pfn_memslot(kvm, slot, gfn++);
return pfn;
}
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
{
gfn_t gfn, end_gfn;
pfn_t pfn;
int r = 0;
struct iommu_domain *domain = kvm->arch.iommu_domain;
int flags;
/* check if iommu exists and in use */
if (!domain)
return 0;
gfn = slot->base_gfn;
end_gfn = gfn + slot->npages;
flags = IOMMU_READ | IOMMU_WRITE;
if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
flags |= IOMMU_CACHE;
while (gfn < end_gfn) {
unsigned long page_size;
/* Check if already mapped */
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
gfn += 1;
continue;
}
/* Get the page size we could use to map */
page_size = kvm_host_page_size(kvm, gfn);
/* Make sure the page_size does not exceed the memslot */
while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
page_size >>= 1;
/* Make sure gfn is aligned to the page size we want to map */
while ((gfn << PAGE_SHIFT) & (page_size - 1))
page_size >>= 1;
/*
* Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later.
*/
pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
if (is_error_pfn(pfn)) {
gfn += 1;
continue;
}
/* Map into IO address space */
r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
page_size, flags);
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
goto unmap_pages;
}
gfn += page_size >> PAGE_SHIFT;
}
return 0;
unmap_pages:
kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
return r;
}
static int kvm_iommu_map_memslots(struct kvm *kvm)
{
int idx, r = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
r = kvm_iommu_map_pages(kvm, memslot);
if (r)
break;
}
srcu_read_unlock(&kvm->srcu, idx);
return r;
}
int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
struct pci_dev *pdev = NULL;
struct iommu_domain *domain = kvm->arch.iommu_domain;
int r, last_flags;
/* check if iommu exists and in use */
if (!domain)
return 0;
pdev = assigned_dev->dev;
if (pdev == NULL)
return -ENODEV;
r = iommu_attach_device(domain, &pdev->dev);
if (r) {
printk(KERN_ERR "assign device %x:%x:%x.%x failed",
pci_domain_nr(pdev->bus),
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn));
return r;
}
last_flags = kvm->arch.iommu_flags;
if (iommu_domain_has_cap(kvm->arch.iommu_domain,
IOMMU_CAP_CACHE_COHERENCY))
kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
/* Check if need to update IOMMU page table for guest memory */
if ((last_flags ^ kvm->arch.iommu_flags) ==
KVM_IOMMU_CACHE_COHERENCY) {
kvm_iommu_unmap_memslots(kvm);
r = kvm_iommu_map_memslots(kvm);
if (r)
goto out_unmap;
}
pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
return 0;
out_unmap:
kvm_iommu_unmap_memslots(kvm);
return r;
}
int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
struct iommu_domain *domain = kvm->arch.iommu_domain;
struct pci_dev *pdev = NULL;
/* check if iommu exists and in use */
if (!domain)
return 0;
pdev = assigned_dev->dev;
if (pdev == NULL)
return -ENODEV;
iommu_detach_device(domain, &pdev->dev);
pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
return 0;
}
int kvm_iommu_map_guest(struct kvm *kvm)
{
int r;
if (!iommu_present(&pci_bus_type)) {
printk(KERN_ERR "%s: iommu not found\n", __func__);
return -ENODEV;
}
mutex_lock(&kvm->slots_lock);
kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type, 0);
if (!kvm->arch.iommu_domain) {
r = -ENOMEM;
goto out_unlock;
}
if (!allow_unsafe_assigned_interrupts &&
!iommu_domain_has_cap(kvm->arch.iommu_domain,
IOMMU_CAP_INTR_REMAP)) {
printk(KERN_WARNING "%s: No interrupt remapping support,"
" disallowing device assignment."
" Re-enble with \"allow_unsafe_assigned_interrupts=1\""
" module option.\n", __func__);
iommu_domain_free(kvm->arch.iommu_domain);
kvm->arch.iommu_domain = NULL;
r = -EPERM;
goto out_unlock;
}
r = kvm_iommu_map_memslots(kvm);
if (r)
kvm_iommu_unmap_memslots(kvm);
out_unlock:
mutex_unlock(&kvm->slots_lock);
return r;
}
static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
{
unsigned long i;
for (i = 0; i < npages; ++i)
kvm_release_pfn_clean(pfn + i);
}
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages)
{
struct iommu_domain *domain;
gfn_t end_gfn, gfn;
pfn_t pfn;
u64 phys;
domain = kvm->arch.iommu_domain;
end_gfn = base_gfn + npages;
gfn = base_gfn;
/* check if iommu exists and in use */
if (!domain)
return;
while (gfn < end_gfn) {
unsigned long unmap_pages;
size_t size;
/* Get physical address */
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
pfn = phys >> PAGE_SHIFT;
/* Unmap address from IO address space */
size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
unmap_pages = 1ULL << get_order(size);
/* Unpin all pages we just unmapped to not leak any memory */
kvm_unpin_pages(kvm, pfn, unmap_pages);
gfn += unmap_pages;
}
}
void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
{
kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
}
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int idx;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
kvm_iommu_unmap_pages(kvm, memslot);
srcu_read_unlock(&kvm->srcu, idx);
return 0;
}
int kvm_iommu_unmap_guest(struct kvm *kvm)
{
struct iommu_domain *domain = kvm->arch.iommu_domain;
/* check if iommu exists and in use */
if (!domain)
return 0;
mutex_lock(&kvm->slots_lock);
kvm_iommu_unmap_memslots(kvm);
kvm->arch.iommu_domain = NULL;
mutex_unlock(&kvm->slots_lock);
iommu_domain_free(domain);
return 0;
}
+474
View File
@@ -0,0 +1,474 @@
/*
* irq_comm.c: Common API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
*/
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <trace/events/kvm.h>
#include <asm/msidef.h>
#ifdef CONFIG_IA64
#include <asm/iosapic.h>
#endif
#include "irq.h"
#include "ioapic.h"
static inline int kvm_irq_line_state(unsigned long *irq_state,
int irq_source_id, int level)
{
/* Logical OR for level trig interrupt */
if (level)
set_bit(irq_source_id, irq_state);
else
clear_bit(irq_source_id, irq_state);
return !!(*irq_state);
}
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level)
{
#ifdef CONFIG_X86
struct kvm_pic *pic = pic_irqchip(kvm);
level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
irq_source_id, level);
return kvm_pic_set_irq(pic, e->irqchip.pin, level);
#else
return -1;
#endif
}
static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
irq_source_id, level);
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
}
inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
{
#ifdef CONFIG_IA64
return irq->delivery_mode ==
(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
#else
return irq->delivery_mode == APIC_DM_LOWEST;
#endif
}
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq)
{
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_is_dm_lowest_prio(irq))
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
continue;
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
irq->dest_id, irq->dest_mode))
continue;
if (!kvm_is_dm_lowest_prio(irq)) {
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq);
} else if (kvm_lapic_enabled(vcpu)) {
if (!lowest)
lowest = vcpu;
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
lowest = vcpu;
}
}
if (lowest)
r = kvm_apic_set_irq(lowest, irq);
return r;
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level)
{
struct kvm_lapic_irq irq;
if (!level)
return -1;
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
irq.dest_id = (e->msi.address_lo &
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
irq.vector = (e->msi.data &
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
irq.delivery_mode = e->msi.data & 0x700;
irq.level = 1;
irq.shorthand = 0;
/* TODO Deal with RH bit of MSI message address */
return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
}
/*
* Return value:
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
* = 0 Interrupt was coalesced (previous irq is still pending)
* > 0 Number of CPUs interrupt was delivered to
*/
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
{
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
int ret = -1, i = 0;
struct kvm_irq_routing_table *irq_rt;
struct hlist_node *n;
trace_kvm_set_irq(irq, level, irq_source_id);
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
rcu_read_lock();
irq_rt = rcu_dereference(kvm->irq_routing);
if (irq < irq_rt->nr_rt_entries)
hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
irq_set[i++] = *e;
rcu_read_unlock();
while(i--) {
int r;
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
if (r < 0)
continue;
ret = r + ((ret < 0) ? 0 : ret);
}
return ret;
}
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
struct hlist_node *n;
int gsi;
trace_kvm_ack_irq(irqchip, pin);
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
rcu_read_unlock();
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
}
int kvm_request_irq_source_id(struct kvm *kvm)
{
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
int irq_source_id;
mutex_lock(&kvm->irq_lock);
irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
if (irq_source_id >= BITS_PER_LONG) {
printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
irq_source_id = -EFAULT;
goto unlock;
}
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
set_bit(irq_source_id, bitmap);
unlock:
mutex_unlock(&kvm->irq_lock);
return irq_source_id;
}
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
{
int i;
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
mutex_lock(&kvm->irq_lock);
if (irq_source_id < 0 ||
irq_source_id >= BITS_PER_LONG) {
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
goto unlock;
}
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
if (!irqchip_in_kernel(kvm))
goto unlock;
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
if (i >= 16)
continue;
#ifdef CONFIG_X86
clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
#endif
}
unlock:
mutex_unlock(&kvm->irq_lock);
}
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn)
{
mutex_lock(&kvm->irq_lock);
kimn->irq = irq;
hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn)
{
mutex_lock(&kvm->irq_lock);
hlist_del_rcu(&kimn->link);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
}
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask)
{
struct kvm_irq_mask_notifier *kimn;
struct hlist_node *n;
int gsi;
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
if (kimn->irq == gsi)
kimn->func(kimn, mask);
rcu_read_unlock();
}
void kvm_free_irq_routing(struct kvm *kvm)
{
/* Called only during vm destruction. Nobody can use the pointer
at this stage */
kfree(kvm->irq_routing);
}
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
int delta;
unsigned max_pin;
struct kvm_kernel_irq_routing_entry *ei;
struct hlist_node *n;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and MSI.
*/
hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
if (ei->type == KVM_IRQ_ROUTING_MSI ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return r;
e->gsi = ue->gsi;
e->type = ue->type;
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
delta = 0;
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_MASTER:
e->set = kvm_set_pic_irq;
max_pin = 16;
break;
case KVM_IRQCHIP_PIC_SLAVE:
e->set = kvm_set_pic_irq;
max_pin = 16;
delta = 8;
break;
case KVM_IRQCHIP_IOAPIC:
max_pin = KVM_IOAPIC_NUM_PINS;
e->set = kvm_set_ioapic_irq;
break;
default:
goto out;
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
e->irqchip.pin = ue->u.irqchip.pin + delta;
if (e->irqchip.pin >= max_pin)
goto out;
rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
break;
default:
goto out;
}
hlist_add_head(&e->link, &rt->map[e->gsi]);
r = 0;
out:
return r;
}
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue,
unsigned nr,
unsigned flags)
{
struct kvm_irq_routing_table *new, *old;
u32 i, j, nr_rt_entries = 0;
int r;
for (i = 0; i < nr; ++i) {
if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
return -EINVAL;
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
}
nr_rt_entries += 1;
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
GFP_KERNEL);
if (!new)
return -ENOMEM;
new->rt_entries = (void *)&new->map[nr_rt_entries];
new->nr_rt_entries = nr_rt_entries;
for (i = 0; i < 3; i++)
for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
r = -EINVAL;
if (ue->flags)
goto out;
r = setup_routing_entry(new, &new->rt_entries[i], ue);
if (r)
goto out;
++ue;
}
mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
kvm_irq_routing_update(kvm, new);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
new = old;
r = 0;
out:
kfree(new);
return r;
}
#define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
.u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
#ifdef CONFIG_X86
# define PIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
.u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
# define ROUTING_ENTRY2(irq) \
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
#else
# define ROUTING_ENTRY2(irq) \
IOAPIC_ROUTING_ENTRY(irq)
#endif
static const struct kvm_irq_routing_entry default_routing[] = {
ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
#ifdef CONFIG_IA64
ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
#endif
};
int kvm_setup_default_irq_routing(struct kvm *kvm)
{
return kvm_set_irq_routing(kvm, default_routing,
ARRAY_SIZE(default_routing), 0);
}
File diff suppressed because it is too large Load Diff