@@ -728,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
728
728
return op_data2 -> data_src_lo ;
729
729
}
730
730
731
- static void perf_ibs_get_mem_lvl (union ibs_op_data2 * op_data2 ,
732
- union ibs_op_data3 * op_data3 ,
733
- struct perf_sample_data * data )
731
+ #define L (x ) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
732
+ #define LN (x ) PERF_MEM_S(LVLNUM, x)
733
+ #define REM PERF_MEM_S(REMOTE, REMOTE)
734
+ #define HOPS (x ) PERF_MEM_S(HOPS, x)
735
+
736
+ static u64 g_data_src [8 ] = {
737
+ [IBS_DATA_SRC_LOC_CACHE ] = L (L3 ) | L (REM_CCE1 ) | LN (ANY_CACHE ) | HOPS (0 ),
738
+ [IBS_DATA_SRC_DRAM ] = L (LOC_RAM ) | LN (RAM ),
739
+ [IBS_DATA_SRC_REM_CACHE ] = L (REM_CCE2 ) | LN (ANY_CACHE ) | REM | HOPS (1 ),
740
+ [IBS_DATA_SRC_IO ] = L (IO ) | LN (IO ),
741
+ };
742
+
743
+ #define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM)
744
+ #define RMT_NODE_APPLICABLE (x ) (RMT_NODE_BITS & (1 << x))
745
+
746
+ static u64 g_zen4_data_src [32 ] = {
747
+ [IBS_DATA_SRC_EXT_LOC_CACHE ] = L (L3 ) | LN (L3 ),
748
+ [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ] = L (REM_CCE1 ) | LN (ANY_CACHE ) | REM | HOPS (0 ),
749
+ [IBS_DATA_SRC_EXT_DRAM ] = L (LOC_RAM ) | LN (RAM ),
750
+ [IBS_DATA_SRC_EXT_FAR_CCX_CACHE ] = L (REM_CCE2 ) | LN (ANY_CACHE ) | REM | HOPS (1 ),
751
+ [IBS_DATA_SRC_EXT_PMEM ] = LN (PMEM ),
752
+ [IBS_DATA_SRC_EXT_IO ] = L (IO ) | LN (IO ),
753
+ [IBS_DATA_SRC_EXT_EXT_MEM ] = LN (CXL ),
754
+ };
755
+
756
+ #define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \
757
+ (1 << IBS_DATA_SRC_EXT_PMEM) | \
758
+ (1 << IBS_DATA_SRC_EXT_EXT_MEM))
759
+ #define ZEN4_RMT_NODE_APPLICABLE (x ) (ZEN4_RMT_NODE_BITS & (1 << x))
760
+
761
+ static __u64 perf_ibs_get_mem_lvl (union ibs_op_data2 * op_data2 ,
762
+ union ibs_op_data3 * op_data3 ,
763
+ struct perf_sample_data * data )
734
764
{
735
765
union perf_mem_data_src * data_src = & data -> data_src ;
736
766
u8 ibs_data_src = perf_ibs_data_src (op_data2 );
737
767
738
768
data_src -> mem_lvl = 0 ;
769
+ data_src -> mem_lvl_num = 0 ;
739
770
740
771
/*
741
772
* DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
742
773
* memory accesses. So, check DcUcMemAcc bit early.
743
774
*/
744
- if (op_data3 -> dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO ) {
745
- data_src -> mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT ;
746
- return ;
747
- }
775
+ if (op_data3 -> dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO )
776
+ return L (UNC ) | LN (UNC );
748
777
749
778
/* L1 Hit */
750
- if (op_data3 -> dc_miss == 0 ) {
751
- data_src -> mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT ;
752
- return ;
753
- }
779
+ if (op_data3 -> dc_miss == 0 )
780
+ return L (L1 ) | LN (L1 );
754
781
755
782
/* L2 Hit */
756
783
if (op_data3 -> l2_miss == 0 ) {
757
784
/* Erratum #1293 */
758
785
if (boot_cpu_data .x86 != 0x19 || boot_cpu_data .x86_model > 0xF ||
759
- !(op_data3 -> sw_pf || op_data3 -> dc_miss_no_mab_alloc )) {
760
- data_src -> mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT ;
761
- return ;
762
- }
786
+ !(op_data3 -> sw_pf || op_data3 -> dc_miss_no_mab_alloc ))
787
+ return L (L2 ) | LN (L2 );
763
788
}
764
789
765
790
/*
@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
769
794
if (data_src -> mem_op != PERF_MEM_OP_LOAD )
770
795
goto check_mab ;
771
796
772
- /* L3 Hit */
773
797
if (ibs_caps & IBS_CAPS_ZEN4 ) {
774
- if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ) {
775
- data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT ;
776
- return ;
777
- }
778
- } else {
779
- if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE ) {
780
- data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
781
- PERF_MEM_LVL_HIT ;
782
- return ;
783
- }
784
- }
798
+ u64 val = g_zen4_data_src [ibs_data_src ];
785
799
786
- /* A peer cache in a near CCX */
787
- if (ibs_caps & IBS_CAPS_ZEN4 &&
788
- ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ) {
789
- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT ;
790
- return ;
791
- }
800
+ if (!val )
801
+ goto check_mab ;
792
802
793
- /* A peer cache in a far CCX */
794
- if (ibs_caps & IBS_CAPS_ZEN4 ) {
795
- if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE ) {
796
- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT ;
797
- return ;
798
- }
799
- } else {
800
- if (ibs_data_src == IBS_DATA_SRC_REM_CACHE ) {
801
- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT ;
802
- return ;
803
+ /* HOPS_1 because IBS doesn't provide remote socket detail */
804
+ if (op_data2 -> rmt_node && ZEN4_RMT_NODE_APPLICABLE (ibs_data_src )) {
805
+ if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM )
806
+ val = L (REM_RAM1 ) | LN (RAM ) | REM | HOPS (1 );
807
+ else
808
+ val |= REM | HOPS (1 );
803
809
}
804
- }
805
810
806
- /* DRAM */
807
- if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM ) {
808
- if (op_data2 -> rmt_node == 0 )
809
- data_src -> mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT ;
810
- else
811
- data_src -> mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT ;
812
- return ;
813
- }
811
+ return val ;
812
+ } else {
813
+ u64 val = g_data_src [ibs_data_src ];
814
814
815
- /* PMEM */
816
- if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM ) {
817
- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_PMEM ;
818
- if (op_data2 -> rmt_node ) {
819
- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
820
- /* IBS doesn't provide Remote socket detail */
821
- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
822
- }
823
- return ;
824
- }
815
+ if (!val )
816
+ goto check_mab ;
825
817
826
- /* Extension Memory */
827
- if (ibs_caps & IBS_CAPS_ZEN4 &&
828
- ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM ) {
829
- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_CXL ;
830
- if (op_data2 -> rmt_node ) {
831
- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
832
- /* IBS doesn't provide Remote socket detail */
833
- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
818
+ /* HOPS_1 because IBS doesn't provide remote socket detail */
819
+ if (op_data2 -> rmt_node && RMT_NODE_APPLICABLE (ibs_data_src )) {
820
+ if (ibs_data_src == IBS_DATA_SRC_DRAM )
821
+ val = L (REM_RAM1 ) | LN (RAM ) | REM | HOPS (1 );
822
+ else
823
+ val |= REM | HOPS (1 );
834
824
}
835
- return ;
836
- }
837
825
838
- /* IO */
839
- if (ibs_data_src == IBS_DATA_SRC_EXT_IO ) {
840
- data_src -> mem_lvl = PERF_MEM_LVL_IO ;
841
- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_IO ;
842
- if (op_data2 -> rmt_node ) {
843
- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
844
- /* IBS doesn't provide Remote socket detail */
845
- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
846
- }
847
- return ;
826
+ return val ;
848
827
}
849
828
850
829
check_mab :
@@ -855,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
855
834
* DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
856
835
* MAB only when IBS fails to provide DataSrc.
857
836
*/
858
- if (op_data3 -> dc_miss_no_mab_alloc ) {
859
- data_src -> mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT ;
860
- return ;
861
- }
837
+ if (op_data3 -> dc_miss_no_mab_alloc )
838
+ return L (LFB ) | LN (LFB );
862
839
863
- data_src -> mem_lvl = PERF_MEM_LVL_NA ;
840
+ /* Don't set HIT with NA */
841
+ return PERF_MEM_S (LVL , NA ) | LN (NA );
864
842
}
865
843
866
844
static bool perf_ibs_cache_hit_st_valid (void )
@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
950
928
union ibs_op_data2 * op_data2 ,
951
929
union ibs_op_data3 * op_data3 )
952
930
{
953
- perf_ibs_get_mem_lvl (op_data2 , op_data3 , data );
931
+ union perf_mem_data_src * data_src = & data -> data_src ;
932
+
933
+ data_src -> val |= perf_ibs_get_mem_lvl (op_data2 , op_data3 , data );
954
934
perf_ibs_get_mem_snoop (op_data2 , data );
955
935
perf_ibs_get_tlb_lvl (op_data3 , data );
956
936
perf_ibs_get_mem_lock (op_data3 , data );
0 commit comments