@@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
156
156
* count to the generic event atomically:
157
157
*/
158
158
prev_raw_count = local64_read (& hwc -> prev_count );
159
- if (local64_cmpxchg (& hwc -> prev_count , prev_raw_count ,
160
- new_raw_count ) != prev_raw_count )
159
+ if (! local64_try_cmpxchg (& hwc -> prev_count ,
160
+ & prev_raw_count , new_raw_count ) )
161
161
return 0 ;
162
162
163
163
/*
@@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event)
247
247
return - ENOENT ;
248
248
}
249
249
250
+ /*
251
+ * Grouping of IBS events is not possible since IBS can have only
252
+ * one event active at any point in time.
253
+ */
254
+ static int validate_group (struct perf_event * event )
255
+ {
256
+ struct perf_event * sibling ;
257
+
258
+ if (event -> group_leader == event )
259
+ return 0 ;
260
+
261
+ if (event -> group_leader -> pmu == event -> pmu )
262
+ return - EINVAL ;
263
+
264
+ for_each_sibling_event (sibling , event -> group_leader ) {
265
+ if (sibling -> pmu == event -> pmu )
266
+ return - EINVAL ;
267
+ }
268
+ return 0 ;
269
+ }
270
+
250
271
static int perf_ibs_init (struct perf_event * event )
251
272
{
252
273
struct hw_perf_event * hwc = & event -> hw ;
253
274
struct perf_ibs * perf_ibs ;
254
275
u64 max_cnt , config ;
276
+ int ret ;
255
277
256
278
perf_ibs = get_ibs_pmu (event -> attr .type );
257
279
if (!perf_ibs )
@@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event)
265
287
if (config & ~perf_ibs -> config_mask )
266
288
return - EINVAL ;
267
289
290
+ ret = validate_group (event );
291
+ if (ret )
292
+ return ret ;
293
+
268
294
if (hwc -> sample_period ) {
269
295
if (config & perf_ibs -> cnt_mask )
270
296
/* raw max_cnt may not be set */
@@ -702,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
702
728
return op_data2 -> data_src_lo ;
703
729
}
704
730
705
- static void perf_ibs_get_mem_lvl (union ibs_op_data2 * op_data2 ,
706
- union ibs_op_data3 * op_data3 ,
707
- struct perf_sample_data * data )
731
+ #define L (x ) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
732
+ #define LN (x ) PERF_MEM_S(LVLNUM, x)
733
+ #define REM PERF_MEM_S(REMOTE, REMOTE)
734
+ #define HOPS (x ) PERF_MEM_S(HOPS, x)
735
+
736
+ static u64 g_data_src [8 ] = {
737
+ [IBS_DATA_SRC_LOC_CACHE ] = L (L3 ) | L (REM_CCE1 ) | LN (ANY_CACHE ) | HOPS (0 ),
738
+ [IBS_DATA_SRC_DRAM ] = L (LOC_RAM ) | LN (RAM ),
739
+ [IBS_DATA_SRC_REM_CACHE ] = L (REM_CCE2 ) | LN (ANY_CACHE ) | REM | HOPS (1 ),
740
+ [IBS_DATA_SRC_IO ] = L (IO ) | LN (IO ),
741
+ };
742
+
743
+ #define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM)
744
+ #define RMT_NODE_APPLICABLE (x ) (RMT_NODE_BITS & (1 << x))
745
+
746
+ static u64 g_zen4_data_src [32 ] = {
747
+ [IBS_DATA_SRC_EXT_LOC_CACHE ] = L (L3 ) | LN (L3 ),
748
+ [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ] = L (REM_CCE1 ) | LN (ANY_CACHE ) | REM | HOPS (0 ),
749
+ [IBS_DATA_SRC_EXT_DRAM ] = L (LOC_RAM ) | LN (RAM ),
750
+ [IBS_DATA_SRC_EXT_FAR_CCX_CACHE ] = L (REM_CCE2 ) | LN (ANY_CACHE ) | REM | HOPS (1 ),
751
+ [IBS_DATA_SRC_EXT_PMEM ] = LN (PMEM ),
752
+ [IBS_DATA_SRC_EXT_IO ] = L (IO ) | LN (IO ),
753
+ [IBS_DATA_SRC_EXT_EXT_MEM ] = LN (CXL ),
754
+ };
755
+
756
+ #define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \
757
+ (1 << IBS_DATA_SRC_EXT_PMEM) | \
758
+ (1 << IBS_DATA_SRC_EXT_EXT_MEM))
759
+ #define ZEN4_RMT_NODE_APPLICABLE (x ) (ZEN4_RMT_NODE_BITS & (1 << x))
760
+
761
+ static __u64 perf_ibs_get_mem_lvl (union ibs_op_data2 * op_data2 ,
762
+ union ibs_op_data3 * op_data3 ,
763
+ struct perf_sample_data * data )
708
764
{
709
765
union perf_mem_data_src * data_src = & data -> data_src ;
710
766
u8 ibs_data_src = perf_ibs_data_src (op_data2 );
711
767
712
768
data_src -> mem_lvl = 0 ;
769
+ data_src -> mem_lvl_num = 0 ;
713
770
714
771
/*
715
772
* DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
716
773
* memory accesses. So, check DcUcMemAcc bit early.
717
774
*/
718
- if (op_data3 -> dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO ) {
719
- data_src -> mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT ;
720
- return ;
721
- }
775
+ if (op_data3 -> dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO )
776
+ return L (UNC ) | LN (UNC );
722
777
723
778
/* L1 Hit */
724
- if (op_data3 -> dc_miss == 0 ) {
725
- data_src -> mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT ;
726
- return ;
727
- }
779
+ if (op_data3 -> dc_miss == 0 )
780
+ return L (L1 ) | LN (L1 );
728
781
729
782
/* L2 Hit */
730
783
if (op_data3 -> l2_miss == 0 ) {
731
784
/* Erratum #1293 */
732
785
if (boot_cpu_data .x86 != 0x19 || boot_cpu_data .x86_model > 0xF ||
733
- !(op_data3 -> sw_pf || op_data3 -> dc_miss_no_mab_alloc )) {
734
- data_src -> mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT ;
735
- return ;
736
- }
786
+ !(op_data3 -> sw_pf || op_data3 -> dc_miss_no_mab_alloc ))
787
+ return L (L2 ) | LN (L2 );
737
788
}
738
789
739
790
/*
@@ -743,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
743
794
if (data_src -> mem_op != PERF_MEM_OP_LOAD )
744
795
goto check_mab ;
745
796
746
- /* L3 Hit */
747
797
if (ibs_caps & IBS_CAPS_ZEN4 ) {
748
- if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ) {
749
- data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT ;
750
- return ;
751
- }
752
- } else {
753
- if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE ) {
754
- data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
755
- PERF_MEM_LVL_HIT ;
756
- return ;
757
- }
758
- }
798
+ u64 val = g_zen4_data_src [ibs_data_src ];
759
799
760
- /* A peer cache in a near CCX */
761
- if (ibs_caps & IBS_CAPS_ZEN4 &&
762
- ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ) {
763
- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT ;
764
- return ;
765
- }
800
+ if (!val )
801
+ goto check_mab ;
766
802
767
- /* A peer cache in a far CCX */
768
- if (ibs_caps & IBS_CAPS_ZEN4 ) {
769
- if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE ) {
770
- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT ;
771
- return ;
803
+ /* HOPS_1 because IBS doesn't provide remote socket detail */
804
+ if (op_data2 -> rmt_node && ZEN4_RMT_NODE_APPLICABLE (ibs_data_src )) {
805
+ if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM )
806
+ val = L (REM_RAM1 ) | LN (RAM ) | REM | HOPS (1 );
807
+ else
808
+ val |= REM | HOPS (1 );
772
809
}
773
- } else {
774
- if (ibs_data_src == IBS_DATA_SRC_REM_CACHE ) {
775
- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT ;
776
- return ;
777
- }
778
- }
779
810
780
- /* DRAM */
781
- if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM ) {
782
- if (op_data2 -> rmt_node == 0 )
783
- data_src -> mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT ;
784
- else
785
- data_src -> mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT ;
786
- return ;
787
- }
811
+ return val ;
812
+ } else {
813
+ u64 val = g_data_src [ibs_data_src ];
788
814
789
- /* PMEM */
790
- if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM ) {
791
- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_PMEM ;
792
- if (op_data2 -> rmt_node ) {
793
- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
794
- /* IBS doesn't provide Remote socket detail */
795
- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
796
- }
797
- return ;
798
- }
815
+ if (!val )
816
+ goto check_mab ;
799
817
800
- /* Extension Memory */
801
- if (ibs_caps & IBS_CAPS_ZEN4 &&
802
- ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM ) {
803
- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_CXL ;
804
- if (op_data2 -> rmt_node ) {
805
- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
806
- /* IBS doesn't provide Remote socket detail */
807
- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
818
+ /* HOPS_1 because IBS doesn't provide remote socket detail */
819
+ if (op_data2 -> rmt_node && RMT_NODE_APPLICABLE (ibs_data_src )) {
820
+ if (ibs_data_src == IBS_DATA_SRC_DRAM )
821
+ val = L (REM_RAM1 ) | LN (RAM ) | REM | HOPS (1 );
822
+ else
823
+ val |= REM | HOPS (1 );
808
824
}
809
- return ;
810
- }
811
825
812
- /* IO */
813
- if (ibs_data_src == IBS_DATA_SRC_EXT_IO ) {
814
- data_src -> mem_lvl = PERF_MEM_LVL_IO ;
815
- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_IO ;
816
- if (op_data2 -> rmt_node ) {
817
- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
818
- /* IBS doesn't provide Remote socket detail */
819
- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
820
- }
821
- return ;
826
+ return val ;
822
827
}
823
828
824
829
check_mab :
@@ -829,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
829
834
* DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
830
835
* MAB only when IBS fails to provide DataSrc.
831
836
*/
832
- if (op_data3 -> dc_miss_no_mab_alloc ) {
833
- data_src -> mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT ;
834
- return ;
835
- }
837
+ if (op_data3 -> dc_miss_no_mab_alloc )
838
+ return L (LFB ) | LN (LFB );
836
839
837
- data_src -> mem_lvl = PERF_MEM_LVL_NA ;
840
+ /* Don't set HIT with NA */
841
+ return PERF_MEM_S (LVL , NA ) | LN (NA );
838
842
}
839
843
840
844
static bool perf_ibs_cache_hit_st_valid (void )
@@ -924,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
924
928
union ibs_op_data2 * op_data2 ,
925
929
union ibs_op_data3 * op_data3 )
926
930
{
927
- perf_ibs_get_mem_lvl (op_data2 , op_data3 , data );
931
+ union perf_mem_data_src * data_src = & data -> data_src ;
932
+
933
+ data_src -> val |= perf_ibs_get_mem_lvl (op_data2 , op_data3 , data );
928
934
perf_ibs_get_mem_snoop (op_data2 , data );
929
935
perf_ibs_get_tlb_lvl (op_data3 , data );
930
936
perf_ibs_get_mem_lock (op_data3 , data );
0 commit comments