Skip to content

Commit c846b45

Browse files
yamahatabonzini
authored andcommitted
KVM: TDX: Add an ioctl to create initial guest memory
Add a new ioctl for the user space VMM to initialize guest memory with the specified memory contents. Because TDX protects the guest's memory, the creation of the initial guest memory requires a dedicated TDX module API, TDH.MEM.PAGE.ADD(), instead of directly copying the memory contents into the guest's memory in the case of the default VM type. Define a new subcommand, KVM_TDX_INIT_MEM_REGION, of vCPU-scoped KVM_MEMORY_ENCRYPT_OP. Check if the GFN is already pre-allocated, assign the guest page in Secure-EPT, copy the initial memory contents into the guest memory, and encrypt the guest memory. Optionally, extend the memory measurement of the TDX guest. The ioctl uses the vCPU file descriptor because of the TDX module's requirement that the memory is added to the S-EPT (via TDH.MEM.SEPT.ADD) prior to initialization (TDH.MEM.PAGE.ADD). Accessing the MMU in turn requires a vCPU file descriptor, just like for KVM_PRE_FAULT_MEMORY. In fact, the post-populate callback is able to reuse the same logic used by KVM_PRE_FAULT_MEMORY, so that userspace can do everything with a single ioctl. Note that this is the only way to invoke TDH.MEM.SEPT.ADD before the TD in finalized, as userspace cannot use KVM_PRE_FAULT_MEMORY at that point. This ensures that there cannot be pages in the S-EPT awaiting TDH.MEM.PAGE.ADD, which would be treated incorrectly as spurious by tdp_mmu_map_handle_target_level() (KVM would see the SPTE as PRESENT, but the corresponding S-EPT entry will be !PRESENT). Suggested-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Co-developed-by: Yan Zhao <yan.y.zhao@intel.com> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> --- - KVM_BUG_ON() for kvm_tdx->nr_premapped (Paolo) - Use tdx_operand_busy() - Merge first patch in SEPT SEAMCALL retry series in to this base (Paolo) Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent b832317 commit c846b45

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed

arch/x86/include/uapi/asm/kvm.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,7 @@ enum kvm_tdx_cmd_id {
932932
KVM_TDX_CAPABILITIES = 0,
933933
KVM_TDX_INIT_VM,
934934
KVM_TDX_INIT_VCPU,
935+
KVM_TDX_INIT_MEM_REGION,
935936
KVM_TDX_GET_CPUID,
936937

937938
KVM_TDX_CMD_NR_MAX,
@@ -987,4 +988,12 @@ struct kvm_tdx_init_vm {
987988
struct kvm_cpuid2 cpuid;
988989
};
989990

991+
#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0)
992+
993+
struct kvm_tdx_init_mem_region {
994+
__u64 source_addr;
995+
__u64 gpa;
996+
__u64 nr_pages;
997+
};
998+
990999
#endif /* _ASM_X86_KVM_H */

arch/x86/kvm/vmx/tdx.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/cleanup.h>
23
#include <linux/cpu.h>
34
#include <asm/cpufeature.h>
45
#include <linux/misc_cgroup.h>
@@ -10,6 +11,7 @@
1011
#include "tdx.h"
1112
#include "vmx.h"
1213
#include "mmu/spte.h"
14+
#include "common.h"
1315

1416
#pragma GCC poison to_vmx
1517

@@ -1606,6 +1608,145 @@ static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
16061608
return 0;
16071609
}
16081610

1611+
struct tdx_gmem_post_populate_arg {
1612+
struct kvm_vcpu *vcpu;
1613+
__u32 flags;
1614+
};
1615+
1616+
static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
1617+
void __user *src, int order, void *_arg)
1618+
{
1619+
u64 error_code = PFERR_GUEST_FINAL_MASK | PFERR_PRIVATE_ACCESS;
1620+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
1621+
struct tdx_gmem_post_populate_arg *arg = _arg;
1622+
struct kvm_vcpu *vcpu = arg->vcpu;
1623+
gpa_t gpa = gfn_to_gpa(gfn);
1624+
u8 level = PG_LEVEL_4K;
1625+
struct page *src_page;
1626+
int ret, i;
1627+
u64 err, entry, level_state;
1628+
1629+
/*
1630+
* Get the source page if it has been faulted in. Return failure if the
1631+
* source page has been swapped out or unmapped in primary memory.
1632+
*/
1633+
ret = get_user_pages_fast((unsigned long)src, 1, 0, &src_page);
1634+
if (ret < 0)
1635+
return ret;
1636+
if (ret != 1)
1637+
return -ENOMEM;
1638+
1639+
ret = kvm_tdp_map_page(vcpu, gpa, error_code, &level);
1640+
if (ret < 0)
1641+
goto out;
1642+
1643+
/*
1644+
* The private mem cannot be zapped after kvm_tdp_map_page()
1645+
* because all paths are covered by slots_lock and the
1646+
* filemap invalidate lock. Check that they are indeed enough.
1647+
*/
1648+
if (IS_ENABLED(CONFIG_KVM_PROVE_MMU)) {
1649+
scoped_guard(read_lock, &kvm->mmu_lock) {
1650+
if (KVM_BUG_ON(!kvm_tdp_mmu_gpa_is_mapped(vcpu, gpa), kvm)) {
1651+
ret = -EIO;
1652+
goto out;
1653+
}
1654+
}
1655+
}
1656+
1657+
ret = 0;
1658+
err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn_to_page(pfn),
1659+
src_page, &entry, &level_state);
1660+
if (err) {
1661+
ret = unlikely(tdx_operand_busy(err)) ? -EBUSY : -EIO;
1662+
goto out;
1663+
}
1664+
1665+
if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) {
1666+
for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) {
1667+
err = tdh_mr_extend(&kvm_tdx->td, gpa + i, &entry,
1668+
&level_state);
1669+
if (err) {
1670+
ret = -EIO;
1671+
break;
1672+
}
1673+
}
1674+
}
1675+
1676+
out:
1677+
put_page(src_page);
1678+
return ret;
1679+
}
1680+
1681+
static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
1682+
{
1683+
struct vcpu_tdx *tdx = to_tdx(vcpu);
1684+
struct kvm *kvm = vcpu->kvm;
1685+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
1686+
struct kvm_tdx_init_mem_region region;
1687+
struct tdx_gmem_post_populate_arg arg;
1688+
long gmem_ret;
1689+
int ret;
1690+
1691+
if (tdx->state != VCPU_TD_STATE_INITIALIZED)
1692+
return -EINVAL;
1693+
1694+
guard(mutex)(&kvm->slots_lock);
1695+
1696+
/* Once TD is finalized, the initial guest memory is fixed. */
1697+
if (kvm_tdx->state == TD_STATE_RUNNABLE)
1698+
return -EINVAL;
1699+
1700+
if (cmd->flags & ~KVM_TDX_MEASURE_MEMORY_REGION)
1701+
return -EINVAL;
1702+
1703+
if (copy_from_user(&region, u64_to_user_ptr(cmd->data), sizeof(region)))
1704+
return -EFAULT;
1705+
1706+
if (!PAGE_ALIGNED(region.source_addr) || !PAGE_ALIGNED(region.gpa) ||
1707+
!region.nr_pages ||
1708+
region.gpa + (region.nr_pages << PAGE_SHIFT) <= region.gpa ||
1709+
!vt_is_tdx_private_gpa(kvm, region.gpa) ||
1710+
!vt_is_tdx_private_gpa(kvm, region.gpa + (region.nr_pages << PAGE_SHIFT) - 1))
1711+
return -EINVAL;
1712+
1713+
kvm_mmu_reload(vcpu);
1714+
ret = 0;
1715+
while (region.nr_pages) {
1716+
if (signal_pending(current)) {
1717+
ret = -EINTR;
1718+
break;
1719+
}
1720+
1721+
arg = (struct tdx_gmem_post_populate_arg) {
1722+
.vcpu = vcpu,
1723+
.flags = cmd->flags,
1724+
};
1725+
gmem_ret = kvm_gmem_populate(kvm, gpa_to_gfn(region.gpa),
1726+
u64_to_user_ptr(region.source_addr),
1727+
1, tdx_gmem_post_populate, &arg);
1728+
if (gmem_ret < 0) {
1729+
ret = gmem_ret;
1730+
break;
1731+
}
1732+
1733+
if (gmem_ret != 1) {
1734+
ret = -EIO;
1735+
break;
1736+
}
1737+
1738+
region.source_addr += PAGE_SIZE;
1739+
region.gpa += PAGE_SIZE;
1740+
region.nr_pages--;
1741+
1742+
cond_resched();
1743+
}
1744+
1745+
if (copy_to_user(u64_to_user_ptr(cmd->data), &region, sizeof(region)))
1746+
ret = -EFAULT;
1747+
return ret;
1748+
}
1749+
16091750
int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
16101751
{
16111752
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
@@ -1625,6 +1766,9 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
16251766
case KVM_TDX_INIT_VCPU:
16261767
ret = tdx_vcpu_init(vcpu, &cmd);
16271768
break;
1769+
case KVM_TDX_INIT_MEM_REGION:
1770+
ret = tdx_vcpu_init_mem_region(vcpu, &cmd);
1771+
break;
16281772
case KVM_TDX_GET_CPUID:
16291773
ret = tdx_vcpu_get_cpuid(vcpu, &cmd);
16301774
break;

0 commit comments

Comments
 (0)