29
29
struct x86_pmu_capability __read_mostly kvm_pmu_cap ;
30
30
EXPORT_SYMBOL_GPL (kvm_pmu_cap );
31
31
32
+ struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel ;
33
+ EXPORT_SYMBOL_GPL (kvm_pmu_eventsel );
34
+
32
35
/* Precise Distribution of Instructions Retired (PDIR) */
33
36
static const struct x86_cpu_id vmx_pebs_pdir_cpu [] = {
34
37
X86_MATCH_INTEL_FAM6_MODEL (ICELAKE_D , NULL ),
@@ -67,7 +70,7 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
67
70
* all perf counters (both gp and fixed). The mapping relationship
68
71
* between pmc and perf counters is as the following:
69
72
* * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
70
- * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
73
+ * [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
71
74
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
72
75
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
73
76
*/
@@ -411,7 +414,7 @@ static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
411
414
static bool is_fixed_event_allowed (struct kvm_x86_pmu_event_filter * filter ,
412
415
int idx )
413
416
{
414
- int fixed_idx = idx - INTEL_PMC_IDX_FIXED ;
417
+ int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX ;
415
418
416
419
if (filter -> action == KVM_PMU_EVENT_DENY &&
417
420
test_bit (fixed_idx , (ulong * )& filter -> fixed_counter_bitmap ))
@@ -441,11 +444,10 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
441
444
static bool pmc_event_is_allowed (struct kvm_pmc * pmc )
442
445
{
443
446
return pmc_is_globally_enabled (pmc ) && pmc_speculative_in_use (pmc ) &&
444
- static_call (kvm_x86_pmu_hw_event_available )(pmc ) &&
445
447
check_pmu_event_filter (pmc );
446
448
}
447
449
448
- static void reprogram_counter (struct kvm_pmc * pmc )
450
+ static int reprogram_counter (struct kvm_pmc * pmc )
449
451
{
450
452
struct kvm_pmu * pmu = pmc_to_pmu (pmc );
451
453
u64 eventsel = pmc -> eventsel ;
@@ -456,7 +458,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
456
458
emulate_overflow = pmc_pause_counter (pmc );
457
459
458
460
if (!pmc_event_is_allowed (pmc ))
459
- goto reprogram_complete ;
461
+ return 0 ;
460
462
461
463
if (emulate_overflow )
462
464
__kvm_perf_overflow (pmc , false);
@@ -466,7 +468,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
466
468
467
469
if (pmc_is_fixed (pmc )) {
468
470
fixed_ctr_ctrl = fixed_ctrl_field (pmu -> fixed_ctr_ctrl ,
469
- pmc -> idx - INTEL_PMC_IDX_FIXED );
471
+ pmc -> idx - KVM_FIXED_PMC_BASE_IDX );
470
472
if (fixed_ctr_ctrl & 0x1 )
471
473
eventsel |= ARCH_PERFMON_EVENTSEL_OS ;
472
474
if (fixed_ctr_ctrl & 0x2 )
@@ -477,43 +479,45 @@ static void reprogram_counter(struct kvm_pmc *pmc)
477
479
}
478
480
479
481
if (pmc -> current_config == new_config && pmc_resume_counter (pmc ))
480
- goto reprogram_complete ;
482
+ return 0 ;
481
483
482
484
pmc_release_perf_event (pmc );
483
485
484
486
pmc -> current_config = new_config ;
485
487
486
- /*
487
- * If reprogramming fails, e.g. due to contention, leave the counter's
488
- * regprogram bit set, i.e. opportunistically try again on the next PMU
489
- * refresh. Don't make a new request as doing so can stall the guest
490
- * if reprogramming repeatedly fails.
491
- */
492
- if (pmc_reprogram_counter (pmc , PERF_TYPE_RAW ,
493
- (eventsel & pmu -> raw_event_mask ),
494
- !(eventsel & ARCH_PERFMON_EVENTSEL_USR ),
495
- !(eventsel & ARCH_PERFMON_EVENTSEL_OS ),
496
- eventsel & ARCH_PERFMON_EVENTSEL_INT ))
497
- return ;
498
-
499
- reprogram_complete :
500
- clear_bit (pmc -> idx , (unsigned long * )& pmc_to_pmu (pmc )-> reprogram_pmi );
488
+ return pmc_reprogram_counter (pmc , PERF_TYPE_RAW ,
489
+ (eventsel & pmu -> raw_event_mask ),
490
+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR ),
491
+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS ),
492
+ eventsel & ARCH_PERFMON_EVENTSEL_INT );
501
493
}
502
494
503
495
void kvm_pmu_handle_event (struct kvm_vcpu * vcpu )
504
496
{
497
+ DECLARE_BITMAP (bitmap , X86_PMC_IDX_MAX );
505
498
struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
499
+ struct kvm_pmc * pmc ;
506
500
int bit ;
507
501
508
- for_each_set_bit (bit , pmu -> reprogram_pmi , X86_PMC_IDX_MAX ) {
509
- struct kvm_pmc * pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , bit );
502
+ bitmap_copy (bitmap , pmu -> reprogram_pmi , X86_PMC_IDX_MAX );
510
503
511
- if (unlikely (!pmc )) {
512
- clear_bit (bit , pmu -> reprogram_pmi );
513
- continue ;
514
- }
504
+ /*
505
+ * The reprogramming bitmap can be written asynchronously by something
506
+ * other than the task that holds vcpu->mutex, take care to clear only
507
+ * the bits that will actually processed.
508
+ */
509
+ BUILD_BUG_ON (sizeof (bitmap ) != sizeof (atomic64_t ));
510
+ atomic64_andnot (* (s64 * )bitmap , & pmu -> __reprogram_pmi );
515
511
516
- reprogram_counter (pmc );
512
+ kvm_for_each_pmc (pmu , pmc , bit , bitmap ) {
513
+ /*
514
+ * If reprogramming fails, e.g. due to contention, re-set the
515
+ * regprogram bit set, i.e. opportunistically try again on the
516
+ * next PMU refresh. Don't make a new request as doing so can
517
+ * stall the guest if reprogramming repeatedly fails.
518
+ */
519
+ if (reprogram_counter (pmc ))
520
+ set_bit (pmc -> idx , pmu -> reprogram_pmi );
517
521
}
518
522
519
523
/*
@@ -525,10 +529,20 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
525
529
kvm_pmu_cleanup (vcpu );
526
530
}
527
531
528
- /* check if idx is a valid index to access PMU */
529
- bool kvm_pmu_is_valid_rdpmc_ecx (struct kvm_vcpu * vcpu , unsigned int idx )
532
+ int kvm_pmu_check_rdpmc_early (struct kvm_vcpu * vcpu , unsigned int idx )
530
533
{
531
- return static_call (kvm_x86_pmu_is_valid_rdpmc_ecx )(vcpu , idx );
534
+ /*
535
+ * On Intel, VMX interception has priority over RDPMC exceptions that
536
+ * aren't already handled by the emulator, i.e. there are no additional
537
+ * check needed for Intel PMUs.
538
+ *
539
+ * On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts,
540
+ * i.e. an invalid PMC results in a #GP, not #VMEXIT.
541
+ */
542
+ if (!kvm_pmu_ops .check_rdpmc_early )
543
+ return 0 ;
544
+
545
+ return static_call (kvm_x86_pmu_check_rdpmc_early )(vcpu , idx );
532
546
}
533
547
534
548
bool is_vmware_backdoor_pmc (u32 pmc_idx )
@@ -567,10 +581,9 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
567
581
568
582
int kvm_pmu_rdpmc (struct kvm_vcpu * vcpu , unsigned idx , u64 * data )
569
583
{
570
- bool fast_mode = idx & (1u << 31 );
571
584
struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
572
585
struct kvm_pmc * pmc ;
573
- u64 mask = fast_mode ? ~ 0u : ~0ull ;
586
+ u64 mask = ~0ull ;
574
587
575
588
if (!pmu -> version )
576
589
return 1 ;
@@ -716,11 +729,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
716
729
717
730
bitmap_zero (pmu -> reprogram_pmi , X86_PMC_IDX_MAX );
718
731
719
- for_each_set_bit (i , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX ) {
720
- pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
721
- if (!pmc )
722
- continue ;
723
-
732
+ kvm_for_each_pmc (pmu , pmc , i , pmu -> all_valid_pmc_idx ) {
724
733
pmc_stop_counter (pmc );
725
734
pmc -> counter = 0 ;
726
735
pmc -> emulated_counter = 0 ;
@@ -741,6 +750,8 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
741
750
*/
742
751
void kvm_pmu_refresh (struct kvm_vcpu * vcpu )
743
752
{
753
+ struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
754
+
744
755
if (KVM_BUG_ON (kvm_vcpu_has_run (vcpu ), vcpu -> kvm ))
745
756
return ;
746
757
@@ -750,8 +761,22 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
750
761
*/
751
762
kvm_pmu_reset (vcpu );
752
763
753
- bitmap_zero (vcpu_to_pmu (vcpu )-> all_valid_pmc_idx , X86_PMC_IDX_MAX );
754
- static_call (kvm_x86_pmu_refresh )(vcpu );
764
+ pmu -> version = 0 ;
765
+ pmu -> nr_arch_gp_counters = 0 ;
766
+ pmu -> nr_arch_fixed_counters = 0 ;
767
+ pmu -> counter_bitmask [KVM_PMC_GP ] = 0 ;
768
+ pmu -> counter_bitmask [KVM_PMC_FIXED ] = 0 ;
769
+ pmu -> reserved_bits = 0xffffffff00200000ull ;
770
+ pmu -> raw_event_mask = X86_RAW_EVENT_MASK ;
771
+ pmu -> global_ctrl_mask = ~0ull ;
772
+ pmu -> global_status_mask = ~0ull ;
773
+ pmu -> fixed_ctr_ctrl_mask = ~0ull ;
774
+ pmu -> pebs_enable_mask = ~0ull ;
775
+ pmu -> pebs_data_cfg_mask = ~0ull ;
776
+ bitmap_zero (pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX );
777
+
778
+ if (vcpu -> kvm -> arch .enable_pmu )
779
+ static_call (kvm_x86_pmu_refresh )(vcpu );
755
780
}
756
781
757
782
void kvm_pmu_init (struct kvm_vcpu * vcpu )
@@ -776,10 +801,8 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
776
801
bitmap_andnot (bitmask , pmu -> all_valid_pmc_idx ,
777
802
pmu -> pmc_in_use , X86_PMC_IDX_MAX );
778
803
779
- for_each_set_bit (i , bitmask , X86_PMC_IDX_MAX ) {
780
- pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
781
-
782
- if (pmc && pmc -> perf_event && !pmc_speculative_in_use (pmc ))
804
+ kvm_for_each_pmc (pmu , pmc , i , bitmask ) {
805
+ if (pmc -> perf_event && !pmc_speculative_in_use (pmc ))
783
806
pmc_stop_counter (pmc );
784
807
}
785
808
@@ -799,13 +822,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
799
822
kvm_pmu_request_counter_reprogram (pmc );
800
823
}
801
824
802
- static inline bool eventsel_match_perf_hw_id (struct kvm_pmc * pmc ,
803
- unsigned int perf_hw_id )
804
- {
805
- return !((pmc -> eventsel ^ perf_get_hw_event_config (perf_hw_id )) &
806
- AMD64_RAW_EVENT_MASK_NB );
807
- }
808
-
809
825
static inline bool cpl_is_matched (struct kvm_pmc * pmc )
810
826
{
811
827
bool select_os , select_user ;
@@ -817,29 +833,56 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
817
833
select_user = config & ARCH_PERFMON_EVENTSEL_USR ;
818
834
} else {
819
835
config = fixed_ctrl_field (pmc_to_pmu (pmc )-> fixed_ctr_ctrl ,
820
- pmc -> idx - INTEL_PMC_IDX_FIXED );
836
+ pmc -> idx - KVM_FIXED_PMC_BASE_IDX );
821
837
select_os = config & 0x1 ;
822
838
select_user = config & 0x2 ;
823
839
}
824
840
841
+ /*
842
+ * Skip the CPL lookup, which isn't free on Intel, if the result will
843
+ * be the same regardless of the CPL.
844
+ */
845
+ if (select_os == select_user )
846
+ return select_os ;
847
+
825
848
return (static_call (kvm_x86_get_cpl )(pmc -> vcpu ) == 0 ) ? select_os : select_user ;
826
849
}
827
850
828
- void kvm_pmu_trigger_event (struct kvm_vcpu * vcpu , u64 perf_hw_id )
851
+ void kvm_pmu_trigger_event (struct kvm_vcpu * vcpu , u64 eventsel )
829
852
{
853
+ DECLARE_BITMAP (bitmap , X86_PMC_IDX_MAX );
830
854
struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
831
855
struct kvm_pmc * pmc ;
832
856
int i ;
833
857
834
- for_each_set_bit (i , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX ) {
835
- pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
858
+ BUILD_BUG_ON (sizeof (pmu -> global_ctrl ) * BITS_PER_BYTE != X86_PMC_IDX_MAX );
836
859
837
- if (!pmc || !pmc_event_is_allowed (pmc ))
860
+ if (!kvm_pmu_has_perf_global_ctrl (pmu ))
861
+ bitmap_copy (bitmap , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX );
862
+ else if (!bitmap_and (bitmap , pmu -> all_valid_pmc_idx ,
863
+ (unsigned long * )& pmu -> global_ctrl , X86_PMC_IDX_MAX ))
864
+ return ;
865
+
866
+ kvm_for_each_pmc (pmu , pmc , i , bitmap ) {
867
+ /*
868
+ * Ignore checks for edge detect (all events currently emulated
869
+ * but KVM are always rising edges), pin control (unsupported
870
+ * by modern CPUs), and counter mask and its invert flag (KVM
871
+ * doesn't emulate multiple events in a single clock cycle).
872
+ *
873
+ * Note, the uppermost nibble of AMD's mask overlaps Intel's
874
+ * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
875
+ * bits (bits 35:34). Checking the "in HLE/RTM transaction"
876
+ * flags is correct as the vCPU can't be in a transaction if
877
+ * KVM is emulating an instruction. Checking the reserved bits
878
+ * might be wrong if they are defined in the future, but so
879
+ * could ignoring them, so do the simple thing for now.
880
+ */
881
+ if (((pmc -> eventsel ^ eventsel ) & AMD64_RAW_EVENT_MASK_NB ) ||
882
+ !pmc_event_is_allowed (pmc ) || !cpl_is_matched (pmc ))
838
883
continue ;
839
884
840
- /* Ignore checks for edge detect, pin control, invert and CMASK bits */
841
- if (eventsel_match_perf_hw_id (pmc , perf_hw_id ) && cpl_is_matched (pmc ))
842
- kvm_pmu_incr_counter (pmc );
885
+ kvm_pmu_incr_counter (pmc );
843
886
}
844
887
}
845
888
EXPORT_SYMBOL_GPL (kvm_pmu_trigger_event );
0 commit comments