Commit 48f06ca4 authored by Alex Williamson's avatar Alex Williamson
Browse files

Merge branch 'v5.16/vfio/colin_xu_igd_opregion_2.0_v8' into v5.16/vfio/next

parents 052493d5 49ba1a29
Loading
Loading
Loading
Loading
+175 −59
Original line number Diff line number Diff line
@@ -25,20 +25,121 @@
#define OPREGION_RVDS		0x3c2
#define OPREGION_VERSION	0x16

struct igd_opregion_vbt {
	void *opregion;
	void *vbt_ex;
};

/**
 * igd_opregion_shift_copy() - Copy OpRegion to user buffer and shift position.
 * @dst: User buffer ptr to copy to.
 * @off: Offset to user buffer ptr. Increased by bytes on return.
 * @src: Source buffer to copy from.
 * @pos: Increased by bytes on return.
 * @remaining: Decreased by bytes on return.
 * @bytes: Bytes to copy and adjust off, pos and remaining.
 *
 * Copy OpRegion to offset from specific source ptr and shift the offset.
 *
 * Return: 0 on success, -EFAULT otherwise.
 *
 */
static inline unsigned long igd_opregion_shift_copy(char __user *dst,
						    loff_t *off,
						    void *src,
						    loff_t *pos,
						    size_t *remaining,
						    size_t bytes)
{
	if (copy_to_user(dst + (*off), src, bytes))
		return -EFAULT;

	*off += bytes;
	*pos += bytes;
	*remaining -= bytes;

	return 0;
}

static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
			       char __user *buf, size_t count, loff_t *ppos,
			       bool iswrite)
{
	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
	void *base = vdev->region[i].data;
	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
	struct igd_opregion_vbt *opregionvbt = vdev->region[i].data;
	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK, off = 0;
	size_t remaining;

	if (pos >= vdev->region[i].size || iswrite)
		return -EINVAL;

	count = min(count, (size_t)(vdev->region[i].size - pos));
	count = min_t(size_t, count, vdev->region[i].size - pos);
	remaining = count;

	/* Copy until OpRegion version */
	if (remaining && pos < OPREGION_VERSION) {
		size_t bytes = min_t(size_t, remaining, OPREGION_VERSION - pos);

		if (igd_opregion_shift_copy(buf, &off,
					    opregionvbt->opregion + pos, &pos,
					    &remaining, bytes))
			return -EFAULT;
	}

	/* Copy patched (if necessary) OpRegion version */
	if (remaining && pos < OPREGION_VERSION + sizeof(__le16)) {
		size_t bytes = min_t(size_t, remaining,
				     OPREGION_VERSION + sizeof(__le16) - pos);
		__le16 version = *(__le16 *)(opregionvbt->opregion +
					     OPREGION_VERSION);

		/* Patch to 2.1 if OpRegion 2.0 has extended VBT */
		if (le16_to_cpu(version) == 0x0200 && opregionvbt->vbt_ex)
			version = cpu_to_le16(0x0201);

		if (igd_opregion_shift_copy(buf, &off,
					    &version + (pos - OPREGION_VERSION),
					    &pos, &remaining, bytes))
			return -EFAULT;
	}

	/* Copy until RVDA */
	if (remaining && pos < OPREGION_RVDA) {
		size_t bytes = min_t(size_t, remaining, OPREGION_RVDA - pos);

		if (igd_opregion_shift_copy(buf, &off,
					    opregionvbt->opregion + pos, &pos,
					    &remaining, bytes))
			return -EFAULT;
	}

	/* Copy modified (if necessary) RVDA */
	if (remaining && pos < OPREGION_RVDA + sizeof(__le64)) {
		size_t bytes = min_t(size_t, remaining,
				     OPREGION_RVDA + sizeof(__le64) - pos);
		__le64 rvda = cpu_to_le64(opregionvbt->vbt_ex ?
					  OPREGION_SIZE : 0);

		if (igd_opregion_shift_copy(buf, &off,
					    &rvda + (pos - OPREGION_RVDA),
					    &pos, &remaining, bytes))
			return -EFAULT;
	}

	/* Copy the rest of OpRegion */
	if (remaining && pos < OPREGION_SIZE) {
		size_t bytes = min_t(size_t, remaining, OPREGION_SIZE - pos);

		if (igd_opregion_shift_copy(buf, &off,
					    opregionvbt->opregion + pos, &pos,
					    &remaining, bytes))
			return -EFAULT;
	}

	if (copy_to_user(buf, base + pos, count))
	/* Copy extended VBT if exists */
	if (remaining &&
	    copy_to_user(buf + off, opregionvbt->vbt_ex + (pos - OPREGION_SIZE),
			 remaining))
		return -EFAULT;

	*ppos += count;
@@ -49,7 +150,13 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev,
				 struct vfio_pci_region *region)
{
	memunmap(region->data);
	struct igd_opregion_vbt *opregionvbt = region->data;

	if (opregionvbt->vbt_ex)
		memunmap(opregionvbt->vbt_ex);

	memunmap(opregionvbt->opregion);
	kfree(opregionvbt);
}

static const struct vfio_pci_regops vfio_pci_igd_regops = {
@@ -61,7 +168,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
{
	__le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
	u32 addr, size;
	void *base;
	struct igd_opregion_vbt *opregionvbt;
	int ret;
	u16 version;

@@ -72,84 +179,93 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
	if (!addr || !(~addr))
		return -ENODEV;

	base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
	if (!base)
	opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL);
	if (!opregionvbt)
		return -ENOMEM;

	if (memcmp(base, OPREGION_SIGNATURE, 16)) {
		memunmap(base);
	opregionvbt->opregion = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
	if (!opregionvbt->opregion) {
		kfree(opregionvbt);
		return -ENOMEM;
	}

	if (memcmp(opregionvbt->opregion, OPREGION_SIGNATURE, 16)) {
		memunmap(opregionvbt->opregion);
		kfree(opregionvbt);
		return -EINVAL;
	}

	size = le32_to_cpu(*(__le32 *)(base + 16));
	size = le32_to_cpu(*(__le32 *)(opregionvbt->opregion + 16));
	if (!size) {
		memunmap(base);
		memunmap(opregionvbt->opregion);
		kfree(opregionvbt);
		return -EINVAL;
	}

	size *= 1024; /* In KB */

	/*
	 * Support opregion v2.1+
	 * When VBT data exceeds 6KB size and cannot be within mailbox #4, then
	 * the Extended VBT region next to opregion is used to hold the VBT data.
	 * RVDA (Relative Address of VBT Data from Opregion Base) and RVDS
	 * (Raw VBT Data Size) from opregion structure member are used to hold the
	 * address from region base and size of VBT data. RVDA/RVDS are not
	 * defined before opregion 2.0.
	 *
	 * opregion 2.1+: RVDA is unsigned, relative offset from
	 * opregion base, and should point to the end of opregion.
	 * otherwise, exposing to userspace to allow read access to everything between
	 * the OpRegion and VBT is not safe.
	 * RVDS is defined as size in bytes.
	 * OpRegion and VBT:
	 * When VBT data doesn't exceed 6KB, it's stored in Mailbox #4.
	 * When VBT data exceeds 6KB size, Mailbox #4 is no longer large enough
	 * to hold the VBT data, the Extended VBT region is introduced since
	 * OpRegion 2.0 to hold the VBT data. Since OpRegion 2.0, RVDA/RVDS are
	 * introduced to define the extended VBT data location and size.
	 * OpRegion 2.0: RVDA defines the absolute physical address of the
	 *   extended VBT data, RVDS defines the VBT data size.
	 * OpRegion 2.1 and above: RVDA defines the relative address of the
	 *   extended VBT data to OpRegion base, RVDS defines the VBT data size.
	 *
	 * opregion 2.0: rvda is the physical VBT address.
	 * Since rvda is HPA it cannot be directly used in guest.
	 * And it should not be practically available for end user,so it is not supported.
	 * Due to the RVDA definition diff in OpRegion VBT (also the only diff
	 * between 2.0 and 2.1), exposing OpRegion and VBT as a contiguous range
	 * for OpRegion 2.0 and above makes it possible to support the
	 * non-contiguous VBT through a single vfio region. From r/w ops view,
	 * only contiguous VBT after OpRegion with version 2.1+ is exposed,
	 * regardless the host OpRegion is 2.0 or non-contiguous 2.1+. The r/w
	 * ops will on-the-fly shift the actural offset into VBT so that data at
	 * correct position can be returned to the requester.
	 */
	version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION));
	version = le16_to_cpu(*(__le16 *)(opregionvbt->opregion +
					  OPREGION_VERSION));
	if (version >= 0x0200) {
		u64 rvda;
		u32 rvds;
		u64 rvda = le64_to_cpu(*(__le64 *)(opregionvbt->opregion +
						   OPREGION_RVDA));
		u32 rvds = le32_to_cpu(*(__le32 *)(opregionvbt->opregion +
						   OPREGION_RVDS));

		rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA));
		rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));
		/* The extended VBT is valid only when RVDA/RVDS are non-zero */
		if (rvda && rvds) {
			/* no support for opregion v2.0 with physical VBT address */
			if (version == 0x0200) {
				memunmap(base);
				pci_err(vdev->pdev,
					"IGD assignment does not support opregion v2.0 with an extended VBT region\n");
				return -EINVAL;
			}

			if (rvda != size) {
				memunmap(base);
				pci_err(vdev->pdev,
					"Extended VBT does not follow opregion on version 0x%04x\n",
					version);
				return -EINVAL;
			}

			/* region size for opregion v2.0+: opregion and VBT size. */
			size += rvds;
		}
	}

	if (size != OPREGION_SIZE) {
		memunmap(base);
		base = memremap(addr, size, MEMREMAP_WB);
		if (!base)
			/*
			 * Extended VBT location by RVDA:
			 * Absolute physical addr for 2.0.
			 * Relative addr to OpRegion header for 2.1+.
			 */
			if (version == 0x0200)
				addr = rvda;
			else
				addr += rvda;

			opregionvbt->vbt_ex = memremap(addr, rvds, MEMREMAP_WB);
			if (!opregionvbt->vbt_ex) {
				memunmap(opregionvbt->opregion);
				kfree(opregionvbt);
				return -ENOMEM;
			}
		}
	}

	ret = vfio_pci_register_dev_region(vdev,
		PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
		VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
		&vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base);
		VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &vfio_pci_igd_regops,
		size, VFIO_REGION_INFO_FLAG_READ, opregionvbt);
	if (ret) {
		memunmap(base);
		if (opregionvbt->vbt_ex)
			memunmap(opregionvbt->vbt_ex);

		memunmap(opregionvbt->opregion);
		kfree(opregionvbt);
		return ret;
	}