Commit 0803e040 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio/vhost updates from Michael Tsirkin:

 - new vsock device support in host and guest

 - platform IOMMU support in host and guest, including compatibility
   quirks for legacy systems.

 - misc fixes and cleanups.

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  VSOCK: Use kvfree()
  vhost: split out vringh Kconfig
  vhost: detect 32 bit integer wrap around
  vhost: new device IOTLB API
  vhost: drop vringh dependency
  vhost: convert pre sorted vhost memory array to interval tree
  vhost: introduce vhost memory accessors
  VSOCK: Add Makefile and Kconfig
  VSOCK: Introduce vhost_vsock.ko
  VSOCK: Introduce virtio_transport.ko
  VSOCK: Introduce virtio_vsock_common.ko
  VSOCK: defer sock removal to transports
  VSOCK: transport-specific vsock_transport functions
  vhost: drop vringh dependency
  vop: pull in vhost Kconfig
  virtio: new feature to detect IOMMU device quirk
  balloon: check the number of available pages in leak balloon
  vhost: lockless enqueuing
  vhost: simplify work flushing
parents 80fac0f5 b226acab
......@@ -12419,6 +12419,19 @@ S: Maintained
F: drivers/media/v4l2-core/videobuf2-*
F: include/media/videobuf2-*
VIRTIO AND VHOST VSOCK DRIVER
M: Stefan Hajnoczi <stefanha@redhat.com>
L: kvm@vger.kernel.org
L: virtualization@lists.linux-foundation.org
L: netdev@vger.kernel.org
S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
F: drivers/vhost/vsock.c
F: drivers/vhost/vsock.h
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
S: Maintained
......
......@@ -138,6 +138,7 @@ obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
obj-$(CONFIG_BCMA) += bcma/
obj-$(CONFIG_VHOST_RING) += vhost/
obj-$(CONFIG_VHOST) += vhost/
obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
obj-y += platform/
......
......@@ -146,3 +146,7 @@ config VOP
More information about the Intel MIC family as well as the Linux
OS and tools for MIC to use with this driver are available from
<http://software.intel.com/en-us/mic-developer>.
if VOP
source "drivers/vhost/Kconfig.vringh"
endif
......@@ -52,5 +52,5 @@ config CAIF_VIRTIO
The caif driver for CAIF over Virtio.
if CAIF_VIRTIO
source "drivers/vhost/Kconfig"
source "drivers/vhost/Kconfig.vringh"
endif
......@@ -2,7 +2,6 @@ config VHOST_NET
tristate "Host kernel accelerator for virtio net"
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
select VHOST
select VHOST_RING
---help---
This kernel module can be loaded in host kernel to accelerate
guest networking with virtio_net. Not to be confused with virtio_net
......@@ -15,17 +14,24 @@ config VHOST_SCSI
tristate "VHOST_SCSI TCM fabric driver"
depends on TARGET_CORE && EVENTFD && m
select VHOST
select VHOST_RING
default n
---help---
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
config VHOST_RING
tristate
config VHOST_VSOCK
tristate "vhost virtio-vsock driver"
depends on VSOCKETS && EVENTFD
select VIRTIO_VSOCKETS_COMMON
select VHOST
default n
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.
This kernel module can be loaded in the host kernel to provide AF_VSOCK
sockets for communicating with guests. The guests must have the
virtio_transport.ko driver loaded to use the virtio-vsock device.
To compile this driver as a module, choose M here: the module will be called
vhost_vsock.
config VHOST
tristate
......
config VHOST_RING
tristate
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.
......@@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
vhost_vsock-y := vsock.o
obj-$(CONFIG_VHOST_RING) += vringh.o
obj-$(CONFIG_VHOST) += vhost.o
......@@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF)
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
};
enum {
......@@ -334,7 +335,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
{
unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
out_num, in_num, NULL, NULL);
if (r == vq->num && vq->busyloop_timeout) {
preempt_disable();
......@@ -344,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
cpu_relax_lowlatency();
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
out_num, in_num, NULL, NULL);
}
return r;
......@@ -377,6 +378,9 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
goto out;
if (!vq_iotlb_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
hdr_size = nvq->vhost_hlen;
......@@ -652,6 +656,10 @@ static void handle_rx(struct vhost_net *net)
sock = vq->private_data;
if (!sock)
goto out;
if (!vq_iotlb_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
vhost_net_disable_vq(net, vq);
......@@ -1052,20 +1060,20 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL;
long err;
struct vhost_memory *memory;
struct vhost_umem *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
if (err)
goto done;
memory = vhost_dev_reset_owner_prepare();
if (!memory) {
umem = vhost_dev_reset_owner_prepare();
if (!umem) {
err = -ENOMEM;
goto done;
}
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
vhost_dev_reset_owner(&n->dev, memory);
vhost_dev_reset_owner(&n->dev, umem);
vhost_net_vq_reset(n);
done:
mutex_unlock(&n->dev.mutex);
......@@ -1096,10 +1104,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
!vhost_log_access_ok(&n->dev)) {
mutex_unlock(&n->dev.mutex);
return -EFAULT;
!vhost_log_access_ok(&n->dev))
goto out_unlock;
if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
if (vhost_init_device_iotlb(&n->dev, true))
goto out_unlock;
}
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features;
......@@ -1109,6 +1121,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_unlock(&n->dev.mutex);
return 0;
out_unlock:
mutex_unlock(&n->dev.mutex);
return -EFAULT;
}
static long vhost_net_set_owner(struct vhost_net *n)
......@@ -1182,9 +1198,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
}
#endif
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
int noblock = file->f_flags & O_NONBLOCK;
return vhost_chr_read_iter(dev, to, noblock);
}
static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
return vhost_chr_write_iter(dev, from);
}
static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
{
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
return vhost_chr_poll(file, dev, wait);
}
static const struct file_operations vhost_net_fops = {
.owner = THIS_MODULE,
.release = vhost_net_release,
.read_iter = vhost_net_chr_read_iter,
.write_iter = vhost_net_chr_write_iter,
.poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vhost_net_compat_ioctl,
......
......@@ -27,6 +27,7 @@
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/sort.h>
#include <linux/interval_tree_generic.h>
#include "vhost.h"
......@@ -34,6 +35,10 @@ static ushort max_mem_regions = 64;
module_param(max_mem_regions, ushort, 0444);
MODULE_PARM_DESC(max_mem_regions,
"Maximum number of memory regions in memory map. (default: 64)");
static int max_iotlb_entries = 2048;
module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries. (default: 2048)");
enum {
VHOST_MEMORY_F_LOG = 0x1,
......@@ -42,6 +47,10 @@ enum {
#define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
INTERVAL_TREE_DEFINE(struct vhost_umem_node,
rb, __u64, __subtree_last,
START, LAST, , vhost_umem_interval_tree);
#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
{
......@@ -131,6 +140,19 @@ static void vhost_reset_is_le(struct vhost_virtqueue *vq)
vq->is_le = virtio_legacy_is_little_endian();
}
struct vhost_flush_struct {
struct vhost_work work;
struct completion wait_event;
};
static void vhost_flush_work(struct vhost_work *work)
{
struct vhost_flush_struct *s;
s = container_of(work, struct vhost_flush_struct, work);
complete(&s->wait_event);
}
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
......@@ -155,11 +177,9 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
{
INIT_LIST_HEAD(&work->node);
clear_bit(VHOST_WORK_QUEUED, &work->flags);
work->fn = fn;
init_waitqueue_head(&work->done);
work->flushing = 0;
work->queue_seq = work->done_seq = 0;
}
EXPORT_SYMBOL_GPL(vhost_work_init);
......@@ -211,31 +231,17 @@ void vhost_poll_stop(struct vhost_poll *poll)
}
EXPORT_SYMBOL_GPL(vhost_poll_stop);
static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
unsigned seq)
{
int left;
spin_lock_irq(&dev->work_lock);
left = seq - work->done_seq;
spin_unlock_irq(&dev->work_lock);
return left <= 0;
}
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned seq;
int flushing;
struct vhost_flush_struct flush;
spin_lock_irq(&dev->work_lock);
seq = work->queue_seq;
work->flushing++;
spin_unlock_irq(&dev->work_lock);
wait_event(work->done, vhost_work_seq_done(dev, work, seq));
spin_lock_irq(&dev->work_lock);
flushing = --work->flushing;
spin_unlock_irq(&dev->work_lock);
BUG_ON(flushing < 0);
if (dev->worker) {
init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work);
vhost_work_queue(dev, &flush.work);
wait_for_completion(&flush.wait_event);
}
}
EXPORT_SYMBOL_GPL(vhost_work_flush);
......@@ -249,16 +255,16 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned long flags;
if (!dev->worker)
return;
spin_lock_irqsave(&dev->work_lock, flags);
if (list_empty(&work->node)) {
list_add_tail(&work->node, &dev->work_list);
work->queue_seq++;
spin_unlock_irqrestore(&dev->work_lock, flags);
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
/* We can only add the work to the list after we're
* sure it was not in the list.
*/
smp_mb();
llist_add(&work->node, &dev->work_list);
wake_up_process(dev->worker);
} else {
spin_unlock_irqrestore(&dev->work_lock, flags);
}
}
EXPORT_SYMBOL_GPL(vhost_work_queue);
......@@ -266,7 +272,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
/* A lockless hint for busy polling code to exit the loop */
bool vhost_has_work(struct vhost_dev *dev)
{
return !list_empty(&dev->work_list);
return !llist_empty(&dev->work_list);
}
EXPORT_SYMBOL_GPL(vhost_has_work);
......@@ -300,17 +306,18 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call_ctx = NULL;
vq->call = NULL;
vq->log_ctx = NULL;
vq->memory = NULL;
vhost_reset_is_le(vq);
vhost_disable_cross_endian(vq);
vq->busyloop_timeout = 0;
vq->umem = NULL;
vq->iotlb = NULL;
}
static int vhost_worker(void *data)
{
struct vhost_dev *dev = data;
struct vhost_work *work = NULL;
unsigned uninitialized_var(seq);
struct vhost_work *work, *work_next;
struct llist_node *node;
mm_segment_t oldfs = get_fs();
set_fs(USER_DS);
......@@ -320,35 +327,25 @@ static int vhost_worker(void *data)
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
spin_lock_irq(&dev->work_lock);
if (work) {
work->done_seq = seq;
if (work->flushing)
wake_up_all(&work->done);
}
if (kthread_should_stop()) {
spin_unlock_irq(&dev->work_lock);
__set_current_state(TASK_RUNNING);
break;
}
if (!list_empty(&dev->work_list)) {
work = list_first_entry(&dev->work_list,
struct vhost_work, node);
list_del_init(&work->node);
seq = work->queue_seq;
} else
work = NULL;
spin_unlock_irq(&dev->work_lock);
if (work) {
node = llist_del_all(&dev->work_list);
if (!node)
schedule();
node = llist_reverse_order(node);
/* make sure flag is seen after deletion */
smp_wmb();
llist_for_each_entry_safe(work, work_next, node, node) {
clear_bit(VHOST_WORK_QUEUED, &work->flags);
__set_current_state(TASK_RUNNING);
work->fn(work);
if (need_resched())
schedule();
} else
schedule();
}
}
unuse_mm(dev->mm);
set_fs(oldfs);
......@@ -407,11 +404,16 @@ void vhost_dev_init(struct vhost_dev *dev,
mutex_init(&dev->mutex);
dev->log_ctx = NULL;
dev->log_file = NULL;
dev->memory = NULL;
dev->umem = NULL;
dev->iotlb = NULL;
dev->mm = NULL;
spin_lock_init(&dev->work_lock);
INIT_LIST_HEAD(&dev->work_list);
dev->worker = NULL;
init_llist_head(&dev->work_list);
init_waitqueue_head(&dev->wait);
INIT_LIST_HEAD(&dev->read_list);
INIT_LIST_HEAD(&dev->pending_list);
spin_lock_init(&dev->iotlb_lock);
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
......@@ -512,27 +514,36 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
struct vhost_memory *vhost_dev_reset_owner_prepare(void)
static void *vhost_kvzalloc(unsigned long size)
{
return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
if (!n)
n = vzalloc(size);
return n;
}
struct vhost_umem *vhost_dev_reset_owner_prepare(void)
{
return vhost_kvzalloc(sizeof(struct vhost_umem));
}
EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
/* Caller should have device mutex */
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem)
{
int i;
vhost_dev_cleanup(dev, true);
/* Restore memory to default empty mapping. */
memory->nregions = 0;
dev->memory = memory;
INIT_LIST_HEAD(&umem->umem_list);
dev->umem = umem;
/* We don't need VQ locks below since vhost_dev_cleanup makes sure
* VQs aren't running.
*/
for (i = 0; i < dev->nvqs; ++i)
dev->vqs[i]->memory = memory;
dev->vqs[i]->umem = umem;
}
EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
......@@ -549,6 +560,47 @@ void vhost_dev_stop(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_stop);
static void vhost_umem_free(struct vhost_umem *umem,
struct vhost_umem_node *node)
{
vhost_umem_interval_tree_remove(node, &umem->umem_tree);
list_del(&node->link);
kfree(node);
umem->numem--;
}
static void vhost_umem_clean(struct vhost_umem *umem)
{
struct vhost_umem_node *node, *tmp;
if (!umem)
return;
list_for_each_entry_safe(node, tmp, &umem->umem_list, link)
vhost_umem_free(umem, node);
kvfree(umem);
}
static void vhost_clear_msg(struct vhost_dev *dev)
{
struct vhost_msg_node *node, *n;
spin_lock(&dev->iotlb_lock);
list_for_each_entry_safe(node, n, &dev->read_list, node) {
list_del(&node->node);
kfree(node);
}
list_for_each_entry_safe(node, n, &dev->pending_list, node) {
list_del(&node->node);
kfree(node);
}
spin_unlock(&dev->iotlb_lock);
}
/* Caller should have device mutex if and only if locked is set */
void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
{
......@@ -575,9 +627,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
fput(dev->log_file);
dev->log_file = NULL;
/* No one will access memory at this point */
kvfree(dev->memory);
dev->memory = NULL;
WARN_ON(!list_empty(&dev->work_list));
vhost_umem_clean(dev->umem);
dev->umem = NULL;
vhost_umem_clean(dev->iotlb);
dev->iotlb = NULL;
vhost_clear_msg(dev);
wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM);
WARN_ON(!llist_empty(&dev->work_list));
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;
......@@ -601,26 +657,34 @@ static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
(sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
}
static bool vhost_overflow(u64 uaddr, u64 size)
{
/* Make sure 64 bit math will not overflow. */
return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size;
}
/* Caller should have vq mutex and device mutex. */
static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
int log_all)
{
int i;
struct vhost_umem_node *node;
if (!mem)
if (!umem)
return 0;
for (i = 0; i < mem->nregions; ++i) {
struct vhost_memory_region *m = mem->regions + i;
unsigned long a = m->userspace_addr;
if (m->memory_size > ULONG_MAX)
list_for_each_entry(node, &umem->umem_list, link) {
unsigned long a = node->userspace_addr;
if (vhost_overflow(node->userspace_addr, node->size))
return 0;
else if (!access_ok(VERIFY_WRITE, (void __user *)a,
m->memory_size))
if (!access_ok(VERIFY_WRITE, (void __user *)a,
node->size))
return 0;
else if (log_all && !log_access_ok(log_base,
m->guest_phys_addr,
m->memory_size))
node->start,
node->size))
return 0;
}
return 1;
......@@ -628,7 +692,7 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
/* Can we switch to this memory table? */
/* Caller should have device mutex but not vq mutex */
static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
int log_all)
{
int i;
......@@ -641,7 +705,8 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
/* If ring is inactive, will check when it's enabled. */
if (d->vqs[i]->private_data)
ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log);
ok = vq_memory_access_ok(d->vqs[i]->log_base,
umem, log);
else
ok = 1;
mutex_unlock(&d->vqs[i]->mutex);
......@@ -651,12 +716,385 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
return 1;
}
static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
struct iovec iov[], int iov_size, int access);
static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to,
const void *from, unsigned size)
{
int ret;
if (!vq->iotlb)
return __copy_to_user(to, from, size);
else {
/* This function should be called after iotlb
* prefetch, which means we're sure that all vq
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
/* TODO: more fast path */
struct iov_iter t;
ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_WO);
if (ret < 0)
goto out;
iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size);
ret = copy_to_iter(from, size, &t);
if (ret == size)
ret = 0;
}
out:
return ret;
}
static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
void *from, unsigned size)
{
int ret;
if (!vq->iotlb)
return __copy_from_user(to, from, size);
else {
/* This function should be called after iotlb
* prefetch, which means we're sure that vq
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
/* TODO: more fast path */
struct iov_iter f;
ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_RO);
if (ret < 0) {
vq_err(vq, "IOTLB translation failure: uaddr "
"%p size 0x%llx\n", from,
(unsigned long long) size);
goto out;
}
iov_iter_init(&f, READ, vq->iotlb_iov, ret, size);
ret = copy_from_iter(to, size, &f);
if (ret == size)
ret = 0;
}
out:
return ret;
}
static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
void *addr, unsigned size)
{
int ret;
/* This function should be called after iotlb
* prefetch, which means we're sure that vq
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
/* TODO: more fast path */
ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_RO);
if (ret < 0) {
vq_err(vq, "IOTLB translation failure: uaddr "
"%p size 0x%llx\n", addr,
(unsigned long long) size);