|
19 | 19 | #include <linux/misc_cgroup.h>
|
20 | 20 | #include <linux/processor.h>
|
21 | 21 | #include <linux/trace_events.h>
|
| 22 | +#include <uapi/linux/sev-guest.h> |
22 | 23 |
|
23 | 24 | #include <asm/pkru.h>
|
24 | 25 | #include <asm/trapnr.h>
|
@@ -326,6 +327,78 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
|
326 | 327 | sev_decommission(handle);
|
327 | 328 | }
|
328 | 329 |
|
| 330 | +/* |
| 331 | + * This sets up bounce buffers/firmware pages to handle SNP Guest Request |
| 332 | + * messages (e.g. attestation requests). See "SNP Guest Request" in the GHCB |
| 333 | + * 2.0 specification for more details. |
| 334 | + * |
| 335 | + * Technically, when an SNP Guest Request is issued, the guest will provide its |
| 336 | + * own request/response pages, which could in theory be passed along directly |
| 337 | + * to firmware rather than using bounce pages. However, these pages would need |
| 338 | + * special care: |
| 339 | + * |
| 340 | + * - Both pages are from shared guest memory, so they need to be protected |
| 341 | + * from migration/etc. occurring while firmware reads/writes to them. At a |
| 342 | + * minimum, this requires elevating the ref counts and potentially needing |
| 343 | + * an explicit pinning of the memory. This places additional restrictions |
| 344 | + * on what type of memory backends userspace can use for shared guest |
| 345 | + * memory since there is some reliance on using refcounted pages. |
| 346 | + * |
| 347 | + * - The response page needs to be switched to Firmware-owned[1] state |
| 348 | + * before the firmware can write to it, which can lead to potential |
| 349 | + * host RMP #PFs if the guest is misbehaved and hands the host a |
| 350 | + * guest page that KVM might write to for other reasons (e.g. virtio |
| 351 | + * buffers/etc.). |
| 352 | + * |
| 353 | + * Both of these issues can be avoided completely by using separately-allocated |
| 354 | + * bounce pages for both the request/response pages and passing those to |
| 355 | + * firmware instead. So that's what is being set up here. |
| 356 | + * |
| 357 | + * Guest requests rely on message sequence numbers to ensure requests are |
| 358 | + * issued to firmware in the order the guest issues them, so concurrent guest |
| 359 | + * requests generally shouldn't happen. But a misbehaved guest could issue |
| 360 | + * concurrent guest requests in theory, so a mutex is used to serialize |
| 361 | + * access to the bounce buffers. |
| 362 | + * |
| 363 | + * [1] See the "Page States" section of the SEV-SNP Firmware ABI for more |
| 364 | + * details on Firmware-owned pages, along with "RMP and VMPL Access Checks" |
| 365 | + * in the APM for details on the related RMP restrictions. |
| 366 | + */ |
| 367 | +static int snp_guest_req_init(struct kvm *kvm) |
| 368 | +{ |
| 369 | + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); |
| 370 | + struct page *req_page; |
| 371 | + |
| 372 | + req_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
| 373 | + if (!req_page) |
| 374 | + return -ENOMEM; |
| 375 | + |
| 376 | + sev->guest_resp_buf = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
| 377 | + if (!sev->guest_resp_buf) { |
| 378 | + __free_page(req_page); |
| 379 | + return -EIO; |
| 380 | + } |
| 381 | + |
| 382 | + sev->guest_req_buf = page_address(req_page); |
| 383 | + mutex_init(&sev->guest_req_mutex); |
| 384 | + |
| 385 | + return 0; |
| 386 | +} |
| 387 | + |
| 388 | +static void snp_guest_req_cleanup(struct kvm *kvm) |
| 389 | +{ |
| 390 | + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); |
| 391 | + |
| 392 | + if (sev->guest_resp_buf) |
| 393 | + snp_free_firmware_page(sev->guest_resp_buf); |
| 394 | + |
| 395 | + if (sev->guest_req_buf) |
| 396 | + __free_page(virt_to_page(sev->guest_req_buf)); |
| 397 | + |
| 398 | + sev->guest_req_buf = NULL; |
| 399 | + sev->guest_resp_buf = NULL; |
| 400 | +} |
| 401 | + |
329 | 402 | static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
330 | 403 | struct kvm_sev_init *data,
|
331 | 404 | unsigned long vm_type)
|
@@ -376,6 +449,10 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
376 | 449 | if (ret)
|
377 | 450 | goto e_free;
|
378 | 451 |
|
| 452 | + /* This needs to happen after SEV/SNP firmware initialization. */ |
| 453 | + if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm)) |
| 454 | + goto e_free; |
| 455 | + |
379 | 456 | INIT_LIST_HEAD(&sev->regions_list);
|
380 | 457 | INIT_LIST_HEAD(&sev->mirror_vms);
|
381 | 458 | sev->need_init = false;
|
@@ -2850,6 +2927,8 @@ void sev_vm_destroy(struct kvm *kvm)
|
2850 | 2927 | }
|
2851 | 2928 |
|
2852 | 2929 | if (sev_snp_guest(kvm)) {
|
| 2930 | + snp_guest_req_cleanup(kvm); |
| 2931 | + |
2853 | 2932 | /*
|
2854 | 2933 | * Decomission handles unbinding of the ASID. If it fails for
|
2855 | 2934 | * some unexpected reason, just leak the ASID.
|
@@ -3321,6 +3400,14 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
3321 | 3400 | if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_sw_scratch_is_valid(svm))
|
3322 | 3401 | goto vmgexit_err;
|
3323 | 3402 | break;
|
| 3403 | + case SVM_VMGEXIT_GUEST_REQUEST: |
| 3404 | + case SVM_VMGEXIT_EXT_GUEST_REQUEST: |
| 3405 | + if (!sev_snp_guest(vcpu->kvm) || |
| 3406 | + !PAGE_ALIGNED(control->exit_info_1) || |
| 3407 | + !PAGE_ALIGNED(control->exit_info_2) || |
| 3408 | + control->exit_info_1 == control->exit_info_2) |
| 3409 | + goto vmgexit_err; |
| 3410 | + break; |
3324 | 3411 | default:
|
3325 | 3412 | reason = GHCB_ERR_INVALID_EVENT;
|
3326 | 3413 | goto vmgexit_err;
|
@@ -3939,6 +4026,103 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
|
3939 | 4026 | return ret;
|
3940 | 4027 | }
|
3941 | 4028 |
|
| 4029 | +static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) |
| 4030 | +{ |
| 4031 | + struct sev_data_snp_guest_request data = {0}; |
| 4032 | + struct kvm *kvm = svm->vcpu.kvm; |
| 4033 | + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); |
| 4034 | + sev_ret_code fw_err = 0; |
| 4035 | + int ret; |
| 4036 | + |
| 4037 | + if (!sev_snp_guest(kvm)) |
| 4038 | + return -EINVAL; |
| 4039 | + |
| 4040 | + mutex_lock(&sev->guest_req_mutex); |
| 4041 | + |
| 4042 | + if (kvm_read_guest(kvm, req_gpa, sev->guest_req_buf, PAGE_SIZE)) { |
| 4043 | + ret = -EIO; |
| 4044 | + goto out_unlock; |
| 4045 | + } |
| 4046 | + |
| 4047 | + data.gctx_paddr = __psp_pa(sev->snp_context); |
| 4048 | + data.req_paddr = __psp_pa(sev->guest_req_buf); |
| 4049 | + data.res_paddr = __psp_pa(sev->guest_resp_buf); |
| 4050 | + |
| 4051 | + /* |
| 4052 | + * Firmware failures are propagated on to guest, but any other failure |
| 4053 | + * condition along the way should be reported to userspace. E.g. if |
| 4054 | + * the PSP is dead and commands are timing out. |
| 4055 | + */ |
| 4056 | + ret = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &fw_err); |
| 4057 | + if (ret && !fw_err) |
| 4058 | + goto out_unlock; |
| 4059 | + |
| 4060 | + if (kvm_write_guest(kvm, resp_gpa, sev->guest_resp_buf, PAGE_SIZE)) { |
| 4061 | + ret = -EIO; |
| 4062 | + goto out_unlock; |
| 4063 | + } |
| 4064 | + |
| 4065 | + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(0, fw_err)); |
| 4066 | + |
| 4067 | + ret = 1; /* resume guest */ |
| 4068 | + |
| 4069 | +out_unlock: |
| 4070 | + mutex_unlock(&sev->guest_req_mutex); |
| 4071 | + return ret; |
| 4072 | +} |
| 4073 | + |
| 4074 | +static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) |
| 4075 | +{ |
| 4076 | + struct kvm *kvm = svm->vcpu.kvm; |
| 4077 | + u8 msg_type; |
| 4078 | + |
| 4079 | + if (!sev_snp_guest(kvm)) |
| 4080 | + return -EINVAL; |
| 4081 | + |
| 4082 | + if (kvm_read_guest(kvm, req_gpa + offsetof(struct snp_guest_msg_hdr, msg_type), |
| 4083 | + &msg_type, 1)) |
| 4084 | + return -EIO; |
| 4085 | + |
| 4086 | + /* |
| 4087 | + * As per GHCB spec, requests of type MSG_REPORT_REQ also allow for |
| 4088 | + * additional certificate data to be provided alongside the attestation |
| 4089 | + * report via the guest-provided data pages indicated by RAX/RBX. The |
| 4090 | + * certificate data is optional and requires additional KVM enablement |
| 4091 | + * to provide an interface for userspace to provide it, but KVM still |
| 4092 | + * needs to be able to handle extended guest requests either way. So |
| 4093 | + * provide a stub implementation that will always return an empty |
| 4094 | + * certificate table in the guest-provided data pages. |
| 4095 | + */ |
| 4096 | + if (msg_type == SNP_MSG_REPORT_REQ) { |
| 4097 | + struct kvm_vcpu *vcpu = &svm->vcpu; |
| 4098 | + u64 data_npages; |
| 4099 | + gpa_t data_gpa; |
| 4100 | + |
| 4101 | + if (!kvm_ghcb_rax_is_valid(svm) || !kvm_ghcb_rbx_is_valid(svm)) |
| 4102 | + goto request_invalid; |
| 4103 | + |
| 4104 | + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; |
| 4105 | + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; |
| 4106 | + |
| 4107 | + if (!PAGE_ALIGNED(data_gpa)) |
| 4108 | + goto request_invalid; |
| 4109 | + |
| 4110 | + /* |
| 4111 | + * As per GHCB spec (see "SNP Extended Guest Request"), the |
| 4112 | + * certificate table is terminated by 24-bytes of zeroes. |
| 4113 | + */ |
| 4114 | + if (data_npages && kvm_clear_guest(kvm, data_gpa, 24)) |
| 4115 | + return -EIO; |
| 4116 | + } |
| 4117 | + |
| 4118 | + return snp_handle_guest_req(svm, req_gpa, resp_gpa); |
| 4119 | + |
| 4120 | +request_invalid: |
| 4121 | + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); |
| 4122 | + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); |
| 4123 | + return 1; /* resume guest */ |
| 4124 | +} |
| 4125 | + |
3942 | 4126 | static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
|
3943 | 4127 | {
|
3944 | 4128 | struct vmcb_control_area *control = &svm->vmcb->control;
|
@@ -4213,6 +4397,12 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
4213 | 4397 |
|
4214 | 4398 | ret = 1;
|
4215 | 4399 | break;
|
| 4400 | + case SVM_VMGEXIT_GUEST_REQUEST: |
| 4401 | + ret = snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2); |
| 4402 | + break; |
| 4403 | + case SVM_VMGEXIT_EXT_GUEST_REQUEST: |
| 4404 | + ret = snp_handle_ext_guest_req(svm, control->exit_info_1, control->exit_info_2); |
| 4405 | + break; |
4216 | 4406 | case SVM_VMGEXIT_UNSUPPORTED_EVENT:
|
4217 | 4407 | vcpu_unimpl(vcpu,
|
4218 | 4408 | "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
|
|
0 commit comments