Commit 8aa1e1e6 authored by Moti Haimovski's avatar Moti Haimovski Committed by Oded Gabbay
Browse files

habanalabs: add gaudi2 MMU support



Gaudi2 has new MMU units. A PMMU for device->host accesses, and HMMU
for HBM accesses.

The page tables of both MMUs are located in the host's memory (referred
to in the code as host-resident pgt).

Signed-off-by: default avatarMoti Haimovski <mhaimovski@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent f73c6376
Loading
Loading
Loading
Loading
+73 −4
Original line number Diff line number Diff line
@@ -450,7 +450,7 @@ static int mmu_show(struct seq_file *s, void *data)
	if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) {
		dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
				virt_addr);
		return 0;
		goto put_ctx;
	}

	hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
@@ -476,6 +476,10 @@ static int mmu_show(struct seq_file *s, void *data)
				i, hops_info.hop_info[i].hop_pte_val);
	}

put_ctx:
	if (dev_entry->mmu_asid != HL_KERNEL_ASID_ID)
		hl_ctx_put(ctx);

	return 0;
}

@@ -522,6 +526,66 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
	return -EINVAL;
}

static int mmu_ack_error(struct seq_file *s, void *data)
{
	struct hl_debugfs_entry *entry = s->private;
	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
	struct hl_device *hdev = dev_entry->hdev;
	int rc;

	if (!hdev->mmu_enable)
		return 0;

	if (!dev_entry->mmu_cap_mask) {
		dev_err(hdev->dev, "mmu_cap_mask is not set\n");
		goto err;
	}

	rc = hdev->asic_funcs->ack_mmu_errors(hdev, dev_entry->mmu_cap_mask);
	if (rc)
		goto err;

	return 0;
err:
	return -EINVAL;
}

static ssize_t mmu_ack_error_value_write(struct file *file,
		const char __user *buf,
		size_t count, loff_t *f_pos)
{
	struct seq_file *s = file->private_data;
	struct hl_debugfs_entry *entry = s->private;
	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
	struct hl_device *hdev = dev_entry->hdev;
	char kbuf[MMU_KBUF_SIZE];
	ssize_t rc;

	if (!hdev->mmu_enable)
		return count;

	if (count > sizeof(kbuf) - 1)
		goto err;

	if (copy_from_user(kbuf, buf, count))
		goto err;

	kbuf[count] = 0;

	if (strncmp(kbuf, "0x", 2))
		goto err;

	rc = kstrtoull(kbuf, 16, &dev_entry->mmu_cap_mask);
	if (rc)
		goto err;

	return count;
err:
	dev_err(hdev->dev, "usage: echo <0xmmu_cap_mask > > mmu_error\n");

	return -EINVAL;
}

static int engines_show(struct seq_file *s, void *data)
{
	struct hl_debugfs_entry *entry = s->private;
@@ -667,7 +731,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
		dev_err(hdev->dev,
			"virt addr 0x%llx is not mapped\n",
			virt_addr);
		return -EINVAL;
		rc = -EINVAL;
		goto put_ctx;
	}

	rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);
@@ -678,6 +743,9 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
		rc = -EINVAL;
	}

put_ctx:
	hl_ctx_put(ctx);

	return rc;
}

@@ -1462,7 +1530,8 @@ static const struct hl_info_list hl_debugfs_list[] = {
	{"vm", vm_show, NULL},
	{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
	{"mmu", mmu_show, mmu_asid_va_write},
	{"engines", engines_show, NULL}
	{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
	{"engines", engines_show, NULL},
};

static int hl_debugfs_open(struct inode *inode, struct file *file)
+84 −15
Original line number Diff line number Diff line
@@ -104,6 +104,18 @@ enum hl_mmu_page_table_location {
	MMU_NUM_PGT_LOCATIONS	/* num of PGT locations */
};

/**
 * enum hl_mmu_enablement - what mmu modules to enable
 * @MMU_EN_NONE: mmu disabled.
 * @MMU_EN_ALL: enable all.
 * @MMU_EN_PMMU_ONLY: Enable only the PMMU leaving the DMMU disabled.
 */
enum hl_mmu_enablement {
	MMU_EN_NONE = 0,
	MMU_EN_ALL = 1,
	MMU_EN_PMMU_ONLY = 3,	/* N/A for Goya/Gaudi */
};

/*
 * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
 * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
@@ -238,20 +250,27 @@ struct hl_gen_wait_properties {

/**
 * struct pgt_info - MMU hop page info.
 * @node: hash linked-list node for the pgts shadow hash of pgts.
 * @node: hash linked-list node for the pgts on host (shadow pgts for device resident MMU and
 *        actual pgts for host resident MMU).
 * @phys_addr: physical address of the pgt.
 * @shadow_addr: shadow hop in the host.
 * @virt_addr: host virtual address of the pgt (see above device/host resident).
 * @shadow_addr: shadow hop in the host for device resident MMU.
 * @ctx: pointer to the owner ctx.
 * @num_of_ptes: indicates how many ptes are used in the pgt.
 * @num_of_ptes: indicates how many ptes are used in the pgt. used only for dynamically
 *               allocated HOPs (all HOPs but HOP0)
 *
 * The MMU page tables hierarchy can be placed either on the device's DRAM (in which case shadow
 * pgts will be stored on host memory) or on host memory (in which case no shadow is required).
 *
 * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
 * is needed during mapping, a new page is allocated and this structure holds
 * its essential information. During unmapping, if no valid PTEs remained in the
 * page, it is freed with its pgt_info structure.
 * When a new level (hop) is needed during mapping this structure will be used to describe
 * the newly allocated hop as well as to track number of PTEs in it.
 * During unmapping, if no valid PTEs remained in the page of a newly allocated hop, it is
 * freed with its pgt_info structure.
 */
struct pgt_info {
	struct hlist_node	node;
	u64			phys_addr;
	u64			virt_addr;
	u64			shadow_addr;
	struct hl_ctx		*ctx;
	int			num_of_ptes;
@@ -1704,6 +1723,9 @@ struct hl_cs_outcome_store {
 * @mem_hash: holds mapping from virtual address to virtual memory area
 *		descriptor (hl_vm_phys_pg_list or hl_userptr).
 * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
 * @hr_mmu_phys_hash: if host-resident MMU is used, holds a mapping from
 *                    MMU-hop-page physical address to its host-resident
 *                    pgt_info structure.
 * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
 * @hdev: pointer to the device structure.
 * @refcount: reference counter for the context. Context is released only when
@@ -1742,6 +1764,7 @@ struct hl_cs_outcome_store {
struct hl_ctx {
	DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
	DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
	DECLARE_HASHTABLE(hr_mmu_phys_hash, MMU_HASH_TABLE_BITS);
	struct hl_fpriv			*hpriv;
	struct hl_device		*hdev;
	struct kref			refcount;
@@ -2199,6 +2222,7 @@ struct hl_debugfs_entry {
 * @state_dump_sem: protects state_dump.
 * @addr: next address to read/write from/to in read/write32.
 * @mmu_addr: next virtual address to translate to physical address in mmu_show.
 * @mmu_cap_mask: mmu hw capability mask, to be used in mmu_ack_error.
 * @userptr_lookup: the target user ptr to look up for on demand.
 * @mmu_asid: ASID to use while translating in mmu_show.
 * @state_dump_head: index of the latest state dump
@@ -2229,6 +2253,7 @@ struct hl_dbg_device_entry {
	struct rw_semaphore		state_dump_sem;
	u64				addr;
	u64				mmu_addr;
	u64				mmu_cap_mask;
	u64				userptr_lookup;
	u32				mmu_asid;
	u32				state_dump_head;
@@ -2612,11 +2637,27 @@ struct hl_mmu_per_hop_info {
struct hl_mmu_hop_info {
	u64 scrambled_vaddr;
	u64 unscrambled_paddr;
	struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
	struct hl_mmu_per_hop_info hop_info[MMU_ARCH_6_HOPS];
	u32 used_hops;
	enum hl_va_range_type range_type;
};

/**
 * struct hl_hr_mmu_funcs - Device related host resident MMU functions.
 * @get_hop0_pgt_info: get page table info structure for HOP0.
 * @get_pgt_info: get page table info structure for HOP other than HOP0.
 * @add_pgt_info: add page table info structure to hash.
 * @get_tlb_mapping_params: get mapping parameters needed for getting TLB info for specific mapping.
 */
struct hl_hr_mmu_funcs {
	struct pgt_info *(*get_hop0_pgt_info)(struct hl_ctx *ctx);
	struct pgt_info *(*get_pgt_info)(struct hl_ctx *ctx, u64 phys_hop_addr);
	void (*add_pgt_info)(struct hl_ctx *ctx, struct pgt_info *pgt_info, dma_addr_t phys_addr);
	int (*get_tlb_mapping_params)(struct hl_device *hdev, struct hl_mmu_properties **mmu_prop,
								struct hl_mmu_hop_info *hops,
								u64 virt_addr, bool *is_huge);
};

/**
 * struct hl_mmu_funcs - Device related MMU functions.
 * @init: initialize the MMU module.
@@ -2631,22 +2672,21 @@ struct hl_mmu_hop_info {
 * @get_tlb_info: returns the list of hops and hop-entries used that were
 *                created in order to translate the giver virtual address to a
 *                physical one.
 * @hr_funcs: functions specific to host resident MMU.
 */
struct hl_mmu_funcs {
	int (*init)(struct hl_device *hdev);
	void (*fini)(struct hl_device *hdev);
	int (*ctx_init)(struct hl_ctx *ctx);
	void (*ctx_fini)(struct hl_ctx *ctx);
	int (*map)(struct hl_ctx *ctx,
			u64 virt_addr, u64 phys_addr, u32 page_size,
	int (*map)(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
				bool is_dram_addr);
	int (*unmap)(struct hl_ctx *ctx,
			u64 virt_addr, bool is_dram_addr);
	int (*unmap)(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr);
	void (*flush)(struct hl_ctx *ctx);
	void (*swap_out)(struct hl_ctx *ctx);
	void (*swap_in)(struct hl_ctx *ctx);
	int (*get_tlb_info)(struct hl_ctx *ctx,
			u64 virt_addr, struct hl_mmu_hop_info *hops);
	int (*get_tlb_info)(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops);
	struct hl_hr_mmu_funcs hr_funcs;
};

/**
@@ -3461,10 +3501,39 @@ int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va,
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
					u8 hop_idx, u64 hop_addr, u64 virt_addr);
void hl_mmu_hr_flush(struct hl_ctx *ctx);
int hl_mmu_hr_init(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 hop_table_size,
			u64 pgt_size);
void hl_mmu_hr_fini(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 hop_table_size);
void hl_mmu_hr_free_hop_remove_pgt(struct pgt_info *pgt_info, struct hl_mmu_hr_priv *hr_priv,
				u32 hop_table_size);
u64 hl_mmu_hr_pte_phys_to_virt(struct hl_ctx *ctx, struct pgt_info *pgt, u64 phys_pte_addr,
							u32 hop_table_size);
void hl_mmu_hr_write_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, u64 phys_pte_addr,
							u64 val, u32 hop_table_size);
void hl_mmu_hr_clear_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, u64 phys_pte_addr,
							u32 hop_table_size);
int hl_mmu_hr_put_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, struct hl_mmu_hr_priv *hr_priv,
							u32 hop_table_size);
void hl_mmu_hr_get_pte(struct hl_ctx *ctx, struct hl_hr_mmu_funcs *hr_func, u64 phys_hop_addr);
struct pgt_info *hl_mmu_hr_get_next_hop_pgt_info(struct hl_ctx *ctx,
							struct hl_hr_mmu_funcs *hr_func,
							u64 curr_pte);
struct pgt_info *hl_mmu_hr_alloc_hop(struct hl_ctx *ctx, struct hl_mmu_hr_priv *hr_priv,
							struct hl_hr_mmu_funcs *hr_func,
							struct hl_mmu_properties *mmu_prop);
struct pgt_info *hl_mmu_hr_get_alloc_next_hop(struct hl_ctx *ctx,
							struct hl_mmu_hr_priv *hr_priv,
							struct hl_hr_mmu_funcs *hr_func,
							struct hl_mmu_properties *mmu_prop,
							u64 curr_pte, bool *is_new_hop);
int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops,
							struct hl_hr_mmu_funcs *hr_func);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
			struct hl_mmu_hop_info *hops);
+1 −1
Original line number Diff line number Diff line
@@ -69,7 +69,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)

	dram_available_size = prop->dram_size - dram_kmd_size;

	if (hdev->mmu_enable)
	if (hdev->mmu_enable == MMU_EN_ALL)
		hw_ip.dram_size = DIV_ROUND_DOWN_ULL(dram_available_size,
				prop->dram_page_size) * prop->dram_page_size;
	else
+12 −9
Original line number Diff line number Diff line
@@ -2476,18 +2476,21 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
/**
 * va_range_init() - initialize virtual addresses range.
 * @hdev: pointer to the habanalabs device structure.
 * @va_range: pointer to va_range structure.
 * @start: range start address.
 * @end: range end address.
 * @va_ranges: pointer to va_ranges array.
 * @range_type: virtual address range type.
 * @start: range start address, inclusive.
 * @end: range end address, inclusive.
 * @page_size: page size for this va_range.
 *
 * This function does the following:
 * - Initializes the virtual addresses list of the given range with the given
 *   addresses.
 */
static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
				u64 start, u64 end, u32 page_size)
static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges,
				enum hl_va_range_type range_type, u64 start,
				u64 end, u32 page_size)
{
	struct hl_va_range *va_range = va_ranges[range_type];
	int rc;

	INIT_LIST_HEAD(&va_range->list);
@@ -2605,7 +2608,7 @@ static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,

	mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);

	rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST],
	rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_HOST,
			host_range_start, host_range_end, host_page_size);
	if (rc) {
		dev_err(hdev->dev, "failed to init host vm range\n");
@@ -2616,7 +2619,7 @@ static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
		mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);

		rc = va_range_init(hdev,
			ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE],
			ctx->va_range, HL_VA_RANGE_TYPE_HOST_HUGE,
			host_huge_range_start, host_huge_range_end,
			host_huge_page_size);
		if (rc) {
@@ -2632,7 +2635,7 @@ static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,

	mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);

	rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM],
	rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_DRAM,
			dram_range_start, dram_range_end, dram_page_size);
	if (rc) {
		dev_err(hdev->dev, "failed to init dram vm range\n");
+2 −1
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0-only
HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o
HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \
			common/mmu/mmu_v2_hr.o
Loading