Skip to content

Commit 8bfc20b

Browse files
namhyungPeter Zijlstra
authored andcommitted
perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
Kernel IBS driver wasn't using new PERF_MEM_* APIs due to some of its limitations. Mainly: 1. mem_lvl_num doesn't allow setting multiple sources whereas old API allows it. Setting multiple data sources is useful because IBS on pre-zen4 uarch doesn't provide fine granular DataSrc details (there is only one such DataSrc(2h) though). 2. perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf c2c (c2c_decode_stats()) does not use mem_lvl_num at all. 1st one can be handled using ANY_CACHE with HOPS_0. 2nd is purely perf tool specific issue and should be fixed separately. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230725150206.184-4-ravi.bangoria@amd.com
1 parent 5c6e623 commit 8bfc20b

File tree

1 file changed

+68
-88
lines changed

1 file changed

+68
-88
lines changed

arch/x86/events/amd/ibs.c

Lines changed: 68 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -728,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
728728
return op_data2->data_src_lo;
729729
}
730730

731-
static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
732-
union ibs_op_data3 *op_data3,
733-
struct perf_sample_data *data)
731+
#define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
732+
#define LN(x) PERF_MEM_S(LVLNUM, x)
733+
#define REM PERF_MEM_S(REMOTE, REMOTE)
734+
#define HOPS(x) PERF_MEM_S(HOPS, x)
735+
736+
static u64 g_data_src[8] = {
737+
[IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
738+
[IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM),
739+
[IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
740+
[IBS_DATA_SRC_IO] = L(IO) | LN(IO),
741+
};
742+
743+
#define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM)
744+
#define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x))
745+
746+
static u64 g_zen4_data_src[32] = {
747+
[IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3),
748+
[IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
749+
[IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM),
750+
[IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
751+
[IBS_DATA_SRC_EXT_PMEM] = LN(PMEM),
752+
[IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO),
753+
[IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL),
754+
};
755+
756+
#define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \
757+
(1 << IBS_DATA_SRC_EXT_PMEM) | \
758+
(1 << IBS_DATA_SRC_EXT_EXT_MEM))
759+
#define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x))
760+
761+
static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
762+
union ibs_op_data3 *op_data3,
763+
struct perf_sample_data *data)
734764
{
735765
union perf_mem_data_src *data_src = &data->data_src;
736766
u8 ibs_data_src = perf_ibs_data_src(op_data2);
737767

738768
data_src->mem_lvl = 0;
769+
data_src->mem_lvl_num = 0;
739770

740771
/*
741772
* DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
742773
* memory accesses. So, check DcUcMemAcc bit early.
743774
*/
744-
if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
745-
data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
746-
return;
747-
}
775+
if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
776+
return L(UNC) | LN(UNC);
748777

749778
/* L1 Hit */
750-
if (op_data3->dc_miss == 0) {
751-
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
752-
return;
753-
}
779+
if (op_data3->dc_miss == 0)
780+
return L(L1) | LN(L1);
754781

755782
/* L2 Hit */
756783
if (op_data3->l2_miss == 0) {
757784
/* Erratum #1293 */
758785
if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
759-
!(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
760-
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
761-
return;
762-
}
786+
!(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
787+
return L(L2) | LN(L2);
763788
}
764789

765790
/*
@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
769794
if (data_src->mem_op != PERF_MEM_OP_LOAD)
770795
goto check_mab;
771796

772-
/* L3 Hit */
773797
if (ibs_caps & IBS_CAPS_ZEN4) {
774-
if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
775-
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
776-
return;
777-
}
778-
} else {
779-
if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
780-
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
781-
PERF_MEM_LVL_HIT;
782-
return;
783-
}
784-
}
798+
u64 val = g_zen4_data_src[ibs_data_src];
785799

786-
/* A peer cache in a near CCX */
787-
if (ibs_caps & IBS_CAPS_ZEN4 &&
788-
ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
789-
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
790-
return;
791-
}
800+
if (!val)
801+
goto check_mab;
792802

793-
/* A peer cache in a far CCX */
794-
if (ibs_caps & IBS_CAPS_ZEN4) {
795-
if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
796-
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
797-
return;
798-
}
799-
} else {
800-
if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
801-
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
802-
return;
803+
/* HOPS_1 because IBS doesn't provide remote socket detail */
804+
if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
805+
if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
806+
val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
807+
else
808+
val |= REM | HOPS(1);
803809
}
804-
}
805810

806-
/* DRAM */
807-
if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
808-
if (op_data2->rmt_node == 0)
809-
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
810-
else
811-
data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
812-
return;
813-
}
811+
return val;
812+
} else {
813+
u64 val = g_data_src[ibs_data_src];
814814

815-
/* PMEM */
816-
if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
817-
data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
818-
if (op_data2->rmt_node) {
819-
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
820-
/* IBS doesn't provide Remote socket detail */
821-
data_src->mem_hops = PERF_MEM_HOPS_1;
822-
}
823-
return;
824-
}
815+
if (!val)
816+
goto check_mab;
825817

826-
/* Extension Memory */
827-
if (ibs_caps & IBS_CAPS_ZEN4 &&
828-
ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
829-
data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
830-
if (op_data2->rmt_node) {
831-
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
832-
/* IBS doesn't provide Remote socket detail */
833-
data_src->mem_hops = PERF_MEM_HOPS_1;
818+
/* HOPS_1 because IBS doesn't provide remote socket detail */
819+
if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
820+
if (ibs_data_src == IBS_DATA_SRC_DRAM)
821+
val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
822+
else
823+
val |= REM | HOPS(1);
834824
}
835-
return;
836-
}
837825

838-
/* IO */
839-
if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
840-
data_src->mem_lvl = PERF_MEM_LVL_IO;
841-
data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
842-
if (op_data2->rmt_node) {
843-
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
844-
/* IBS doesn't provide Remote socket detail */
845-
data_src->mem_hops = PERF_MEM_HOPS_1;
846-
}
847-
return;
826+
return val;
848827
}
849828

850829
check_mab:
@@ -855,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
855834
* DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
856835
* MAB only when IBS fails to provide DataSrc.
857836
*/
858-
if (op_data3->dc_miss_no_mab_alloc) {
859-
data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
860-
return;
861-
}
837+
if (op_data3->dc_miss_no_mab_alloc)
838+
return L(LFB) | LN(LFB);
862839

863-
data_src->mem_lvl = PERF_MEM_LVL_NA;
840+
/* Don't set HIT with NA */
841+
return PERF_MEM_S(LVL, NA) | LN(NA);
864842
}
865843

866844
static bool perf_ibs_cache_hit_st_valid(void)
@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
950928
union ibs_op_data2 *op_data2,
951929
union ibs_op_data3 *op_data3)
952930
{
953-
perf_ibs_get_mem_lvl(op_data2, op_data3, data);
931+
union perf_mem_data_src *data_src = &data->data_src;
932+
933+
data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
954934
perf_ibs_get_mem_snoop(op_data2, data);
955935
perf_ibs_get_tlb_lvl(op_data3, data);
956936
perf_ibs_get_mem_lock(op_data3, data);

0 commit comments

Comments
 (0)