Skip to content

Commit 0c9782e

Browse files
kaccardihansendc
authored andcommitted
x86/sgx: Set active memcg prior to shmem allocation
When the system runs out of enclave memory, SGX can reclaim EPC pages by swapping to normal RAM. These backing pages are allocated via a per-enclave shared memory area. Since SGX allows unlimited over commit on EPC memory, the reclaimer thread can allocate a large number of backing RAM pages in response to EPC memory pressure. When the shared memory backing RAM allocation occurs during the reclaimer thread context, the shared memory is charged to the root memory control group, and the shmem usage of the enclave is not properly accounted for, making cgroups ineffective at limiting the amount of RAM an enclave can consume. For example, when using a cgroup to launch a set of test enclaves, the kernel does not properly account for 50% - 75% of shmem page allocations on average. In the worst case, when nearly all allocations occur during the reclaimer thread, the kernel accounts less than a percent of the amount of shmem used by the enclave's cgroup to the correct cgroup. SGX stores a list of mm_structs that are associated with an enclave. Pick one of them during reclaim and charge that mm's memcg with the shmem allocation. The one that gets picked is arbitrary, but this list almost always only has one mm. The cases where there is more than one mm with different memcg's are not worth considering. Create a new function - sgx_encl_alloc_backing(). This function is used whenever a new backing storage page needs to be allocated. Previously the same function was used for page allocation as well as retrieving a previously allocated page. Prior to backing page allocation, if there is a mm_struct associated with the enclave that is requesting the allocation, it is set as the active memory control group. [ dhansen: - fix merge conflict with ELDU fixes - check against actual ksgxd_tsk, not ->mm ] Cc: stable@vger.kernel.org Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Link: https://lkml.kernel.org/r/20220520174248.4918-1-kristen@linux.intel.com
1 parent 17d8e3d commit 0c9782e

File tree

3 files changed

+115
-6
lines changed

3 files changed

+115
-6
lines changed

arch/x86/kernel/cpu/sgx/encl.c

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
152152

153153
page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
154154

155-
ret = sgx_encl_get_backing(encl, page_index, &b);
155+
ret = sgx_encl_lookup_backing(encl, page_index, &b);
156156
if (ret)
157157
return ret;
158158

@@ -718,7 +718,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
718718
* 0 on success,
719719
* -errno otherwise.
720720
*/
721-
int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
721+
static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
722722
struct sgx_backing *backing)
723723
{
724724
pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
@@ -743,6 +743,107 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
743743
return 0;
744744
}
745745

746+
/*
747+
* When called from ksgxd, returns the mem_cgroup of a struct mm stored
748+
* in the enclave's mm_list. When not called from ksgxd, just returns
749+
* the mem_cgroup of the current task.
750+
*/
751+
static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
752+
{
753+
struct mem_cgroup *memcg = NULL;
754+
struct sgx_encl_mm *encl_mm;
755+
int idx;
756+
757+
/*
758+
* If called from normal task context, return the mem_cgroup
759+
* of the current task's mm. The remainder of the handling is for
760+
* ksgxd.
761+
*/
762+
if (!current_is_ksgxd())
763+
return get_mem_cgroup_from_mm(current->mm);
764+
765+
/*
766+
* Search the enclave's mm_list to find an mm associated with
767+
* this enclave to charge the allocation to.
768+
*/
769+
idx = srcu_read_lock(&encl->srcu);
770+
771+
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
772+
if (!mmget_not_zero(encl_mm->mm))
773+
continue;
774+
775+
memcg = get_mem_cgroup_from_mm(encl_mm->mm);
776+
777+
mmput_async(encl_mm->mm);
778+
779+
break;
780+
}
781+
782+
srcu_read_unlock(&encl->srcu, idx);
783+
784+
/*
785+
* In the rare case that there isn't an mm associated with
786+
* the enclave, set memcg to the current active mem_cgroup.
787+
* This will be the root mem_cgroup if there is no active
788+
* mem_cgroup.
789+
*/
790+
if (!memcg)
791+
return get_mem_cgroup_from_mm(NULL);
792+
793+
return memcg;
794+
}
795+
796+
/**
797+
* sgx_encl_alloc_backing() - allocate a new backing storage page
798+
* @encl: an enclave pointer
799+
* @page_index: enclave page index
800+
* @backing: data for accessing backing storage for the page
801+
*
802+
* When called from ksgxd, sets the active memcg from one of the
803+
* mms in the enclave's mm_list prior to any backing page allocation,
804+
* in order to ensure that shmem page allocations are charged to the
805+
* enclave.
806+
*
807+
* Return:
808+
* 0 on success,
809+
* -errno otherwise.
810+
*/
811+
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
812+
struct sgx_backing *backing)
813+
{
814+
struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
815+
struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
816+
int ret;
817+
818+
ret = sgx_encl_get_backing(encl, page_index, backing);
819+
820+
set_active_memcg(memcg);
821+
mem_cgroup_put(encl_memcg);
822+
823+
return ret;
824+
}
825+
826+
/**
827+
* sgx_encl_lookup_backing() - retrieve an existing backing storage page
828+
* @encl: an enclave pointer
829+
* @page_index: enclave page index
830+
* @backing: data for accessing backing storage for the page
831+
*
832+
* Retrieve a backing page for loading data back into an EPC page with ELDU.
833+
* It is the caller's responsibility to ensure that it is appropriate to use
834+
* sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
835+
* not used correctly, this will cause an allocation which is not accounted for.
836+
*
837+
* Return:
838+
* 0 on success,
839+
* -errno otherwise.
840+
*/
841+
int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
842+
struct sgx_backing *backing)
843+
{
844+
return sgx_encl_get_backing(encl, page_index, backing);
845+
}
846+
746847
/**
747848
* sgx_encl_put_backing() - Unpin the backing storage
748849
* @backing: data for accessing backing storage for the page

arch/x86/kernel/cpu/sgx/encl.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,13 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
103103
int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
104104
unsigned long end, unsigned long vm_flags);
105105

106+
bool current_is_ksgxd(void);
106107
void sgx_encl_release(struct kref *ref);
107108
int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
108-
int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
109-
struct sgx_backing *backing);
109+
int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
110+
struct sgx_backing *backing);
111+
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
112+
struct sgx_backing *backing);
110113
void sgx_encl_put_backing(struct sgx_backing *backing);
111114
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
112115
struct sgx_encl_page *page);

arch/x86/kernel/cpu/sgx/main.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
313313
sgx_encl_put_backing(backing);
314314

315315
if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
316-
ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
316+
ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size),
317317
&secs_backing);
318318
if (ret)
319319
goto out;
@@ -384,7 +384,7 @@ static void sgx_reclaim_pages(void)
384384
page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
385385

386386
mutex_lock(&encl_page->encl->lock);
387-
ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
387+
ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]);
388388
if (ret) {
389389
mutex_unlock(&encl_page->encl->lock);
390390
goto skip;
@@ -475,6 +475,11 @@ static bool __init sgx_page_reclaimer_init(void)
475475
return true;
476476
}
477477

478+
bool current_is_ksgxd(void)
479+
{
480+
return current == ksgxd_tsk;
481+
}
482+
478483
static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
479484
{
480485
struct sgx_numa_node *node = &sgx_numa_nodes[nid];

0 commit comments

Comments
 (0)