Commit 7c10dd0a authored by Ravi Bangoria's avatar Ravi Bangoria Committed by Peter Zijlstra
Browse files

perf/x86/amd: Support PERF_SAMPLE_DATA_SRC



struct perf_mem_data_src is used to pass arch specific memory access
details into generic form. These details gets consumed by tools like
perf mem and c2c. IBS tagged load/store sample provides most of the
information needed for these tools. Add a logic to convert IBS
specific raw data into perf_mem_data_src.

Signed-off-by: default avatarRavi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220928095805.596-4-ravi.bangoria@amd.com
parent 610c2380
Loading
Loading
Loading
Loading
+312 −6
Original line number Diff line number Diff line
@@ -678,6 +678,312 @@ static struct perf_ibs perf_ibs_op = {
	.get_count		= get_ibs_op_count,
};

static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3,
				struct perf_sample_data *data)
{
	union perf_mem_data_src *data_src = &data->data_src;

	data_src->mem_op = PERF_MEM_OP_NA;

	if (op_data3->ld_op)
		data_src->mem_op = PERF_MEM_OP_LOAD;
	else if (op_data3->st_op)
		data_src->mem_op = PERF_MEM_OP_STORE;
}

/*
 * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has
 * more fine granular DataSrc encodings. Others have coarse.
 */
static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
{
	if (ibs_caps & IBS_CAPS_ZEN4)
		return (op_data2->data_src_hi << 3) | op_data2->data_src_lo;

	return op_data2->data_src_lo;
}

static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
				 union ibs_op_data3 *op_data3,
				 struct perf_sample_data *data)
{
	union perf_mem_data_src *data_src = &data->data_src;
	u8 ibs_data_src = perf_ibs_data_src(op_data2);

	data_src->mem_lvl = 0;

	/*
	 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
	 * memory accesses. So, check DcUcMemAcc bit early.
	 */
	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
		data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
		return;
	}

	/* L1 Hit */
	if (op_data3->dc_miss == 0) {
		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
		return;
	}

	/* L2 Hit */
	if (op_data3->l2_miss == 0) {
		/* Erratum #1293 */
		if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
			data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
			return;
		}
	}

	/*
	 * OP_DATA2 is valid only for load ops. Skip all checks which
	 * uses OP_DATA2[DataSrc].
	 */
	if (data_src->mem_op != PERF_MEM_OP_LOAD)
		goto check_mab;

	/* L3 Hit */
	if (ibs_caps & IBS_CAPS_ZEN4) {
		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
			return;
		}
	} else {
		if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
					    PERF_MEM_LVL_HIT;
			return;
		}
	}

	/* A peer cache in a near CCX */
	if (ibs_caps & IBS_CAPS_ZEN4 &&
	    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
		return;
	}

	/* A peer cache in a far CCX */
	if (ibs_caps & IBS_CAPS_ZEN4) {
		if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
			return;
		}
	} else {
		if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
			return;
		}
	}

	/* DRAM */
	if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
		if (op_data2->rmt_node == 0)
			data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
		else
			data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
		return;
	}

	/* PMEM */
	if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
		data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
		if (op_data2->rmt_node) {
			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
			/* IBS doesn't provide Remote socket detail */
			data_src->mem_hops = PERF_MEM_HOPS_1;
		}
		return;
	}

	/* Extension Memory */
	if (ibs_caps & IBS_CAPS_ZEN4 &&
	    ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
		data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM;
		if (op_data2->rmt_node) {
			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
			/* IBS doesn't provide Remote socket detail */
			data_src->mem_hops = PERF_MEM_HOPS_1;
		}
		return;
	}

	/* IO */
	if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
		data_src->mem_lvl = PERF_MEM_LVL_IO;
		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
		if (op_data2->rmt_node) {
			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
			/* IBS doesn't provide Remote socket detail */
			data_src->mem_hops = PERF_MEM_HOPS_1;
		}
		return;
	}

check_mab:
	/*
	 * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding
	 * DC misses. However, such data may come from any level in mem
	 * hierarchy. IBS provides detail about both MAB as well as actual
	 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
	 * MAB only when IBS fails to provide DataSrc.
	 */
	if (op_data3->dc_miss_no_mab_alloc) {
		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
		return;
	}

	data_src->mem_lvl = PERF_MEM_LVL_NA;
}

static bool perf_ibs_cache_hit_st_valid(void)
{
	/* 0: Uninitialized, 1: Valid, -1: Invalid */
	static int cache_hit_st_valid;

	if (unlikely(!cache_hit_st_valid)) {
		if (boot_cpu_data.x86 == 0x19 &&
		    (boot_cpu_data.x86_model <= 0xF ||
		    (boot_cpu_data.x86_model >= 0x20 &&
		     boot_cpu_data.x86_model <= 0x5F))) {
			cache_hit_st_valid = -1;
		} else {
			cache_hit_st_valid = 1;
		}
	}

	return cache_hit_st_valid == 1;
}

static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2,
				   struct perf_sample_data *data)
{
	union perf_mem_data_src *data_src = &data->data_src;
	u8 ibs_data_src;

	data_src->mem_snoop = PERF_MEM_SNOOP_NA;

	if (!perf_ibs_cache_hit_st_valid() ||
	    data_src->mem_op != PERF_MEM_OP_LOAD ||
	    data_src->mem_lvl & PERF_MEM_LVL_L1 ||
	    data_src->mem_lvl & PERF_MEM_LVL_L2 ||
	    op_data2->cache_hit_st)
		return;

	ibs_data_src = perf_ibs_data_src(op_data2);

	if (ibs_caps & IBS_CAPS_ZEN4) {
		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ||
		    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ||
		    ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE)
			data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
	} else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
	}
}

static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
				 struct perf_sample_data *data)
{
	union perf_mem_data_src *data_src = &data->data_src;

	data_src->mem_dtlb = PERF_MEM_TLB_NA;

	if (!op_data3->dc_lin_addr_valid)
		return;

	if (!op_data3->dc_l1tlb_miss) {
		data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
		return;
	}

	if (!op_data3->dc_l2tlb_miss) {
		data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT;
		return;
	}

	data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS;
}

static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
				  struct perf_sample_data *data)
{
	union perf_mem_data_src *data_src = &data->data_src;

	data_src->mem_lock = PERF_MEM_LOCK_NA;

	if (op_data3->dc_locked_op)
		data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
}

#define ibs_op_msr_idx(msr)	(msr - MSR_AMD64_IBSOPCTL)

static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
				  struct perf_sample_data *data,
				  union ibs_op_data2 *op_data2,
				  union ibs_op_data3 *op_data3)
{
	perf_ibs_get_mem_lvl(op_data2, op_data3, data);
	perf_ibs_get_mem_snoop(op_data2, data);
	perf_ibs_get_tlb_lvl(op_data3, data);
	perf_ibs_get_mem_lock(op_data3, data);
}

static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data,
				   union ibs_op_data3 *op_data3)
{
	__u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)];

	/* Erratum #1293 */
	if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF &&
	    (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
		/*
		 * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode.
		 * DataSrc=0 is 'No valid status' and RmtNode is invalid when
		 * DataSrc=0.
		 */
		val = 0;
	}
	return val;
}

static void perf_ibs_parse_ld_st_data(__u64 sample_type,
				      struct perf_ibs_data *ibs_data,
				      struct perf_sample_data *data)
{
	union ibs_op_data3 op_data3;
	union ibs_op_data2 op_data2;

	data->data_src.val = PERF_MEM_NA;
	op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];

	perf_ibs_get_mem_op(&op_data3, data);
	if (data->data_src.mem_op != PERF_MEM_OP_LOAD &&
	    data->data_src.mem_op != PERF_MEM_OP_STORE)
		return;

	op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3);

	if (sample_type & PERF_SAMPLE_DATA_SRC) {
		perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3);
		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
	}
}

static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
				   int check_rip)
{
	if (sample_type & PERF_SAMPLE_RAW ||
	    (perf_ibs == &perf_ibs_op &&
	     sample_type & PERF_SAMPLE_DATA_SRC))
		return perf_ibs->offset_max;
	else if (check_rip)
		return 3;
	return 1;
}

static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -725,12 +1031,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
	size = 1;
	offset = 1;
	check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
	if (event->attr.sample_type & PERF_SAMPLE_RAW)
		offset_max = perf_ibs->offset_max;
	else if (check_rip)
		offset_max = 3;
	else
		offset_max = 1;

	offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);

	do {
		rdmsrl(msr + offset, *buf++);
		size++;
@@ -784,6 +1087,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
		data.sample_flags |= PERF_SAMPLE_RAW;
	}

	if (perf_ibs == &perf_ibs_op)
		perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data);

	/*
	 * rip recorded by IbsOpRip will not be consistent with rsp and rbp
	 * recorded as part of interrupt regs. Thus we need to use rip from