Skip to content

Commit a35747c

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Plug a race in the stage-2 mapping code where the IPA and the PA would end up being out of sync - Make better use of the bitmap API (bitmap_zero, bitmap_zalloc...) - FP/SVE/SME documentation update, in the hope that this field becomes clearer... - Add workaround for Apple SEIS brokenness to a new SoC - Random comment fixes x86: - add MSR_IA32_TSX_CTRL into msrs_to_save - fixes for XCR0 handling in SGX enclaves Generic: - Fix vcpu_array[0] races - Fix race between starting a VM and 'reboot -f'" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: VMX: add MSR_IA32_TSX_CTRL into msrs_to_save KVM: x86: Don't adjust guest's CPUID.0x12.1 (allowed SGX enclave XFRM) KVM: VMX: Don't rely _only_ on CPUID to enforce XCR0 restrictions for ECREATE KVM: Fix vcpu_array[0] races KVM: VMX: Fix header file dependency of asm/vmx.h KVM: Don't enable hardware after a restart/shutdown is initiated KVM: Use syscore_ops instead of reboot_notifier to hook restart/shutdown KVM: arm64: vgic: Add Apple M2 PRO/MAX cpus to the list of broken SEIS implementations KVM: arm64: Clarify host SME state management KVM: arm64: Restructure check for SVE support in FP trap handler KVM: arm64: Document check for TIF_FOREIGN_FPSTATE KVM: arm64: Fix repeated words in comments KVM: arm64: Constify start/end/phys fields of the pgtable walker data KVM: arm64: Infer PA offset from VA in hyp map walker KVM: arm64: Infer the PA offset from IPA in stage-2 map walker KVM: arm64: Use the bitmap API to allocate bitmaps KVM: arm64: Slightly optimize flush_context()
2 parents c47d122 + b9846a6 commit a35747c

File tree

13 files changed

+129
-66
lines changed

13 files changed

+129
-66
lines changed

arch/arm64/include/asm/cputype.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@
126126
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
127127
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
128128
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
129+
#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
130+
#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
131+
#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
132+
#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
129133

130134
#define AMPERE_CPU_PART_AMPERE1 0xAC3
131135

@@ -181,6 +185,10 @@
181185
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
182186
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
183187
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
188+
#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
189+
#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
190+
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
191+
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
184192
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
185193

186194
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
209209
kvm_pte_t old;
210210
void *arg;
211211
struct kvm_pgtable_mm_ops *mm_ops;
212+
u64 start;
212213
u64 addr;
213214
u64 end;
214215
u32 level;

arch/arm64/kvm/fpsimd.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,26 +81,34 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
8181

8282
fpsimd_kvm_prepare();
8383

84+
/*
85+
* We will check TIF_FOREIGN_FPSTATE just before entering the
86+
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
87+
* FP_STATE_FREE if the flag set.
88+
*/
8489
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
8590

8691
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
8792
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
8893
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
8994

90-
/*
91-
* We don't currently support SME guests but if we leave
92-
* things in streaming mode then when the guest starts running
93-
* FPSIMD or SVE code it may generate SME traps so as a
94-
* special case if we are in streaming mode we force the host
95-
* state to be saved now and exit streaming mode so that we
96-
* don't have to handle any SME traps for valid guest
97-
* operations. Do this for ZA as well for now for simplicity.
98-
*/
9995
if (system_supports_sme()) {
10096
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
10197
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
10298
vcpu_set_flag(vcpu, HOST_SME_ENABLED);
10399

100+
/*
101+
* If PSTATE.SM is enabled then save any pending FP
102+
* state and disable PSTATE.SM. If we leave PSTATE.SM
103+
* enabled and the guest does not enable SME via
104+
* CPACR_EL1.SMEN then operations that should be valid
105+
* may generate SME traps from EL1 to EL1 which we
106+
* can't intercept and which would confuse the guest.
107+
*
108+
* Do the same for PSTATE.ZA in the case where there
109+
* is state in the registers which has not already
110+
* been saved, this is very unlikely to happen.
111+
*/
104112
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
105113
vcpu->arch.fp_state = FP_STATE_FREE;
106114
fpsimd_save_and_flush_cpu_state();

arch/arm64/kvm/hyp/include/hyp/switch.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,17 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
177177
sve_guest = vcpu_has_sve(vcpu);
178178
esr_ec = kvm_vcpu_trap_get_class(vcpu);
179179

180-
/* Don't handle SVE traps for non-SVE vcpus here: */
181-
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
180+
/* Only handle traps the vCPU can support here: */
181+
switch (esr_ec) {
182+
case ESR_ELx_EC_FP_ASIMD:
183+
break;
184+
case ESR_ELx_EC_SVE:
185+
if (!sve_guest)
186+
return false;
187+
break;
188+
default:
182189
return false;
190+
}
183191

184192
/* Valid trap. Switch the context: */
185193

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,9 @@
5858
struct kvm_pgtable_walk_data {
5959
struct kvm_pgtable_walker *walker;
6060

61+
const u64 start;
6162
u64 addr;
62-
u64 end;
63+
const u64 end;
6364
};
6465

6566
static bool kvm_phys_is_valid(u64 phys)
@@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
201202
.old = READ_ONCE(*ptep),
202203
.arg = data->walker->arg,
203204
.mm_ops = mm_ops,
205+
.start = data->start,
204206
.addr = data->addr,
205207
.end = data->end,
206208
.level = level,
@@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
293295
struct kvm_pgtable_walker *walker)
294296
{
295297
struct kvm_pgtable_walk_data walk_data = {
298+
.start = ALIGN_DOWN(addr, PAGE_SIZE),
296299
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
297300
.end = PAGE_ALIGN(walk_data.addr + size),
298301
.walker = walker,
@@ -349,7 +352,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
349352
}
350353

351354
struct hyp_map_data {
352-
u64 phys;
355+
const u64 phys;
353356
kvm_pte_t attr;
354357
};
355358

@@ -407,13 +410,12 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
407410
static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
408411
struct hyp_map_data *data)
409412
{
413+
u64 phys = data->phys + (ctx->addr - ctx->start);
410414
kvm_pte_t new;
411-
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
412415

413416
if (!kvm_block_mapping_supported(ctx, phys))
414417
return false;
415418

416-
data->phys += granule;
417419
new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
418420
if (ctx->old == new)
419421
return true;
@@ -576,7 +578,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
576578
}
577579

578580
struct stage2_map_data {
579-
u64 phys;
581+
const u64 phys;
580582
kvm_pte_t attr;
581583
u8 owner_id;
582584

@@ -794,20 +796,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
794796
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
795797
}
796798

799+
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
800+
const struct stage2_map_data *data)
801+
{
802+
u64 phys = data->phys;
803+
804+
/*
805+
* Stage-2 walks to update ownership data are communicated to the map
806+
* walker using an invalid PA. Avoid offsetting an already invalid PA,
807+
* which could overflow and make the address valid again.
808+
*/
809+
if (!kvm_phys_is_valid(phys))
810+
return phys;
811+
812+
/*
813+
* Otherwise, work out the correct PA based on how far the walk has
814+
* gotten.
815+
*/
816+
return phys + (ctx->addr - ctx->start);
817+
}
818+
797819
static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
798820
struct stage2_map_data *data)
799821
{
822+
u64 phys = stage2_map_walker_phys_addr(ctx, data);
823+
800824
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
801825
return false;
802826

803-
return kvm_block_mapping_supported(ctx, data->phys);
827+
return kvm_block_mapping_supported(ctx, phys);
804828
}
805829

806830
static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
807831
struct stage2_map_data *data)
808832
{
809833
kvm_pte_t new;
810-
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
834+
u64 phys = stage2_map_walker_phys_addr(ctx, data);
835+
u64 granule = kvm_granule_size(ctx->level);
811836
struct kvm_pgtable *pgt = data->mmu->pgt;
812837
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
813838

@@ -841,8 +866,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
841866

842867
stage2_make_pte(ctx, new);
843868

844-
if (kvm_phys_is_valid(phys))
845-
data->phys += granule;
846869
return 0;
847870
}
848871

arch/arm64/kvm/inject_fault.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
204204
* Size Fault at level 0, as if exceeding PARange.
205205
*
206206
* Non-LPAE guests will only get the external abort, as there
207-
* is no way to to describe the ASF.
207+
* is no way to describe the ASF.
208208
*/
209209
if (vcpu_el1_is_32bit(vcpu) &&
210210
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))

arch/arm64/kvm/vgic/vgic-v3.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,10 @@ static const struct midr_range broken_seis[] = {
616616
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
617617
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
618618
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
619+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
620+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
621+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
622+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
619623
{},
620624
};
621625

arch/arm64/kvm/vmid.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static void flush_context(void)
4747
int cpu;
4848
u64 vmid;
4949

50-
bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
50+
bitmap_zero(vmid_map, NUM_USER_VMIDS);
5151

5252
for_each_possible_cpu(cpu) {
5353
vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
@@ -182,8 +182,7 @@ int __init kvm_arm_vmid_alloc_init(void)
182182
*/
183183
WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
184184
atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
185-
vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
186-
sizeof(*vmid_map), GFP_KERNEL);
185+
vmid_map = bitmap_zalloc(NUM_USER_VMIDS, GFP_KERNEL);
187186
if (!vmid_map)
188187
return -ENOMEM;
189188

@@ -192,5 +191,5 @@ int __init kvm_arm_vmid_alloc_init(void)
192191

193192
void __init kvm_arm_vmid_alloc_free(void)
194193
{
195-
kfree(vmid_map);
194+
bitmap_free(vmid_map);
196195
}

arch/x86/include/asm/vmx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313

1414

1515
#include <linux/bitops.h>
16+
#include <linux/bug.h>
1617
#include <linux/types.h>
18+
1719
#include <uapi/asm/vmx.h>
1820
#include <asm/vmxfeatures.h>
1921

arch/x86/kvm/cpuid.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
253253
int nent)
254254
{
255255
struct kvm_cpuid_entry2 *best;
256-
u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
257256

258257
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
259258
if (best) {
@@ -292,21 +291,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
292291
vcpu->arch.ia32_misc_enable_msr &
293292
MSR_IA32_MISC_ENABLE_MWAIT);
294293
}
295-
296-
/*
297-
* Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
298-
* the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
299-
* requested XCR0 value. The enclave's XFRM must be a subset of XCRO
300-
* at the time of EENTER, thus adjust the allowed XFRM by the guest's
301-
* supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
302-
* '1' even on CPUs that don't support XSAVE.
303-
*/
304-
best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
305-
if (best) {
306-
best->ecx &= guest_supported_xcr0 & 0xffffffff;
307-
best->edx &= guest_supported_xcr0 >> 32;
308-
best->ecx |= XFEATURE_MASK_FPSSE;
309-
}
310294
}
311295

312296
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)

0 commit comments

Comments
 (0)