Commit c68ea0d0 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Alex Williamson
Browse files

vfio: simplify iommu group allocation for mediated devices



Reuse the logic in vfio_noiommu_group_alloc to allocate a fake
single-device iommu group for mediated devices by factoring out a common
function, and replacing the noiommu boolean field in struct vfio_group
with an enum to distinguish the three different kinds of groups.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-8-hch@lst.de


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent c04ac340
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev)
	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
	mutex_unlock(&matrix_dev->lock);

	ret = vfio_register_group_dev(&matrix_mdev->vdev);
	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
	if (ret)
		goto err_list;
	dev_set_drvdata(&mdev->dev, matrix_mdev);
+4 −41
Original line number Diff line number Diff line
@@ -13,60 +13,23 @@

#include "mdev_private.h"

static int mdev_attach_iommu(struct mdev_device *mdev)
{
	int ret;
	struct iommu_group *group;

	group = iommu_group_alloc();
	if (IS_ERR(group))
		return PTR_ERR(group);

	ret = iommu_group_add_device(group, &mdev->dev);
	if (!ret)
		dev_info(&mdev->dev, "MDEV: group_id = %d\n",
			 iommu_group_id(group));

	iommu_group_put(group);
	return ret;
}

static void mdev_detach_iommu(struct mdev_device *mdev)
{
	iommu_group_remove_device(&mdev->dev);
	dev_info(&mdev->dev, "MDEV: detaching iommu\n");
}

static int mdev_probe(struct device *dev)
{
	struct mdev_driver *drv =
		container_of(dev->driver, struct mdev_driver, driver);
	struct mdev_device *mdev = to_mdev_device(dev);
	int ret;

	ret = mdev_attach_iommu(mdev);
	if (ret)
		return ret;

	if (drv->probe) {
		ret = drv->probe(mdev);
		if (ret)
			mdev_detach_iommu(mdev);
	}

	return ret;
	if (!drv->probe)
		return 0;
	return drv->probe(to_mdev_device(dev));
}

static void mdev_remove(struct device *dev)
{
	struct mdev_driver *drv =
		container_of(dev->driver, struct mdev_driver, driver);
	struct mdev_device *mdev = to_mdev_device(dev);

	if (drv->remove)
		drv->remove(mdev);

	mdev_detach_iommu(mdev);
		drv->remove(to_mdev_device(dev));
}

static int mdev_match(struct device *dev, struct device_driver *drv)
+1 −1
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device *mdev)
		return -ENOMEM;

	vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
	ret = vfio_register_group_dev(vdev);
	ret = vfio_register_emulated_iommu_dev(vdev);
	if (ret)
		goto out_uninit;

+66 −26
Original line number Diff line number Diff line
@@ -67,6 +67,30 @@ struct vfio_unbound_dev {
	struct list_head		unbound_next;
};

enum vfio_group_type {
	/*
	 * Physical device with IOMMU backing.
	 */
	VFIO_IOMMU,

	/*
	 * Virtual device without IOMMU backing. The VFIO core fakes up an
	 * iommu_group as the iommu_group sysfs interface is part of the
	 * userspace ABI.  The user of these devices must not be able to
	 * directly trigger unmediated DMA.
	 */
	VFIO_EMULATED_IOMMU,

	/*
	 * Physical device without IOMMU backing. The VFIO core fakes up an
	 * iommu_group as the iommu_group sysfs interface is part of the
	 * userspace ABI.  Users can trigger unmediated DMA by the device,
	 * usage is highly dangerous, requires an explicit opt-in and will
	 * taint the kernel.
	 */
	VFIO_NO_IOMMU,
};

struct vfio_group {
	struct kref			kref;
	int				minor;
@@ -83,7 +107,7 @@ struct vfio_group {
	struct mutex			unbound_lock;
	atomic_t			opened;
	wait_queue_head_t		container_q;
	bool				noiommu;
	enum vfio_group_type		type;
	unsigned int			dev_counter;
	struct kvm			*kvm;
	struct blocking_notifier_head	notifier;
@@ -336,7 +360,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 * Group objects - create, release, get, put, search
 */
static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
		bool noiommu)
		enum vfio_group_type type)
{
	struct vfio_group *group, *tmp;
	struct device *dev;
@@ -355,7 +379,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
	atomic_set(&group->opened, 0);
	init_waitqueue_head(&group->container_q);
	group->iommu_group = iommu_group;
	group->noiommu = noiommu;
	group->type = type;
	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);

	group->nb.notifier_call = vfio_iommu_group_notifier;
@@ -391,8 +415,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
	}

	dev = device_create(vfio.class, NULL,
			    MKDEV(MAJOR(vfio.group_devt), minor),
			    group, "%s%d", group->noiommu ? "noiommu-" : "",
			    MKDEV(MAJOR(vfio.group_devt), minor), group, "%s%d",
			    group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
			    iommu_group_id(iommu_group));
	if (IS_ERR(dev)) {
		vfio_free_group_minor(minor);
@@ -778,8 +802,8 @@ void vfio_uninit_group_dev(struct vfio_device *device)
}
EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);

#ifdef CONFIG_VFIO_NOIOMMU
static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev)
static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
		enum vfio_group_type type)
{
	struct iommu_group *iommu_group;
	struct vfio_group *group;
@@ -794,7 +818,7 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev)
	if (ret)
		goto out_put_group;

	group = vfio_create_group(iommu_group, true);
	group = vfio_create_group(iommu_group, type);
	if (IS_ERR(group)) {
		ret = PTR_ERR(group);
		goto out_remove_device;
@@ -808,7 +832,6 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev)
	iommu_group_put(iommu_group);
	return ERR_PTR(ret);
}
#endif

static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
{
@@ -824,7 +847,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
		 * bus.  Taint the kernel because we're about to give a DMA
		 * capable device to a user without IOMMU protection.
		 */
		group = vfio_noiommu_group_alloc(dev);
		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
		if (!IS_ERR(group)) {
			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
@@ -841,7 +864,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
		goto out_put;

	/* a newly created vfio_group keeps the reference. */
	group = vfio_create_group(iommu_group, false);
	group = vfio_create_group(iommu_group, VFIO_IOMMU);
	if (IS_ERR(group))
		goto out_put;
	return group;
@@ -851,10 +874,13 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
	return group;
}

int vfio_register_group_dev(struct vfio_device *device)
static int __vfio_register_dev(struct vfio_device *device,
		struct vfio_group *group)
{
	struct vfio_device *existing_device;
	struct vfio_group *group;

	if (IS_ERR(group))
		return PTR_ERR(group);

	/*
	 * If the driver doesn't specify a set then the device is added to a
@@ -863,16 +889,13 @@ int vfio_register_group_dev(struct vfio_device *device)
	if (!device->dev_set)
		vfio_assign_device_set(device, device);

	group = vfio_group_find_or_alloc(device->dev);
	if (IS_ERR(group))
		return PTR_ERR(group);

	existing_device = vfio_group_get_device(group, device->dev);
	if (existing_device) {
		dev_WARN(device->dev, "Device already exists on group %d\n",
			 iommu_group_id(group->iommu_group));
		vfio_device_put(existing_device);
		if (group->noiommu)
		if (group->type == VFIO_NO_IOMMU ||
		    group->type == VFIO_EMULATED_IOMMU)
			iommu_group_remove_device(device->dev);
		vfio_group_put(group);
		return -EBUSY;
@@ -891,8 +914,25 @@ int vfio_register_group_dev(struct vfio_device *device)

	return 0;
}

int vfio_register_group_dev(struct vfio_device *device)
{
	return __vfio_register_dev(device,
		vfio_group_find_or_alloc(device->dev));
}
EXPORT_SYMBOL_GPL(vfio_register_group_dev);

/*
 * Register a virtual device without IOMMU backing.  The user of this
 * device must not be able to directly trigger unmediated DMA.
 */
int vfio_register_emulated_iommu_dev(struct vfio_device *device)
{
	return __vfio_register_dev(device,
		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
}
EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);

/**
 * Get a reference to the vfio_device for a device.  Even if the
 * caller thinks they own the device, they could be racing with a
@@ -1019,7 +1059,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
	if (list_empty(&group->device_list))
		wait_event(group->container_q, !group->container);

	if (group->noiommu)
	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
		iommu_group_remove_device(device->dev);

	/* Matches the get in vfio_register_group_dev() */
@@ -1368,7 +1408,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
	if (atomic_read(&group->container_users))
		return -EINVAL;

	if (group->noiommu && !capable(CAP_SYS_RAWIO))
	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
		return -EPERM;

	f = fdget(container_fd);
@@ -1388,7 +1428,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)

	/* Real groups and fake groups cannot mix */
	if (!list_empty(&container->group_list) &&
	    container->noiommu != group->noiommu) {
	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
		ret = -EPERM;
		goto unlock_out;
	}
@@ -1402,7 +1442,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
	}

	group->container = container;
	container->noiommu = group->noiommu;
	container->noiommu = (group->type == VFIO_NO_IOMMU);
	list_add(&group->container_next, &container->group_list);

	/* Get a reference on the container and mark a user within the group */
@@ -1426,7 +1466,7 @@ static int vfio_group_add_container_user(struct vfio_group *group)
	if (!atomic_inc_not_zero(&group->container_users))
		return -EINVAL;

	if (group->noiommu) {
	if (group->type == VFIO_NO_IOMMU) {
		atomic_dec(&group->container_users);
		return -EPERM;
	}
@@ -1451,7 +1491,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
	    !group->container->iommu_driver || !vfio_group_viable(group))
		return -EINVAL;

	if (group->noiommu && !capable(CAP_SYS_RAWIO))
	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
		return -EPERM;

	device = vfio_device_get_from_name(group, buf);
@@ -1498,7 +1538,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)

	fd_install(fdno, filep);

	if (group->noiommu)
	if (group->type == VFIO_NO_IOMMU)
		dev_warn(device->dev, "vfio-noiommu device opened by user "
			 "(%s:%d)\n", current->comm, task_pid_nr(current));
	return fdno;
@@ -1594,7 +1634,7 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
	if (!group)
		return -ENODEV;

	if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
		vfio_group_put(group);
		return -EPERM;
	}
+1 −0
Original line number Diff line number Diff line
@@ -75,6 +75,7 @@ void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
			 const struct vfio_device_ops *ops);
void vfio_uninit_group_dev(struct vfio_device *device);
int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device);
Loading