Skip to content

Commit 2472627

Browse files
Eric DeVolderakpm00
authored andcommitted
crash: add generic infrastructure for crash hotplug support
To support crash hotplug, a mechanism is needed to update the crash elfcorehdr upon CPU or memory changes (eg. hot un/plug or off/ onlining). The crash elfcorehdr describes the CPUs and memory to be written into the vmcore. To track CPU changes, callbacks are registered with the cpuhp mechanism via cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN). The crash hotplug elfcorehdr update has no explicit ordering requirement (relative to other cpuhp states), so meets the criteria for utilizing CPUHP_BP_PREPARE_DYN. CPUHP_BP_PREPARE_DYN is a dynamic state and avoids the need to introduce a new state for crash hotplug. Also, CPUHP_BP_PREPARE_DYN is the last state in the PREPARE group, just prior to the STARTING group, which is very close to the CPU starting up in a plug/online situation, or stopping in a unplug/ offline situation. This minimizes the window of time during an actual plug/online or unplug/offline situation in which the elfcorehdr would be inaccurate. Note that for a CPU being unplugged or offlined, the CPU will still be present in the list of CPUs generated by crash_prepare_elf64_headers(). However, there is no need to explicitly omit the CPU, see justification in 'crash: change crash_prepare_elf64_headers() to for_each_possible_cpu()'. To track memory changes, a notifier is registered to capture the memblock MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier(). The CPU callbacks and memory notifiers invoke crash_handle_hotplug_event() which performs needed tasks and then dispatches the event to the architecture specific arch_crash_handle_hotplug_event() to update the elfcorehdr with the current state of CPUs and memory. During the process, the kexec_lock is held. Link: https://lkml.kernel.org/r/20230814214446.6659-3-eric.devolder@oracle.com Signed-off-by: Eric DeVolder <eric.devolder@oracle.com> Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com> Acked-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Baoquan He <bhe@redhat.com> Cc: Akhil Raj <lf32.dev@gmail.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Borislav Petkov (AMD) <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Young <dyoung@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Mimi Zohar <zohar@linux.ibm.com> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Sean Christopherson <seanjc@google.com> Cc: Takashi Iwai <tiwai@suse.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Weißschuh <linux@weissschuh.net> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 6f991cc commit 2472627

File tree

5 files changed

+197
-0
lines changed

5 files changed

+197
-0
lines changed

include/linux/crash_core.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,11 @@ extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_ma
104104
struct kimage;
105105
struct kexec_segment;
106106

107+
#define KEXEC_CRASH_HP_NONE 0
108+
#define KEXEC_CRASH_HP_ADD_CPU 1
109+
#define KEXEC_CRASH_HP_REMOVE_CPU 2
110+
#define KEXEC_CRASH_HP_ADD_MEMORY 3
111+
#define KEXEC_CRASH_HP_REMOVE_MEMORY 4
112+
#define KEXEC_CRASH_HP_INVALID_CPU -1U
113+
107114
#endif /* LINUX_CRASH_CORE_H */

include/linux/kexec.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extern note_buf_t __percpu *crash_notes;
3333
#include <linux/compat.h>
3434
#include <linux/ioport.h>
3535
#include <linux/module.h>
36+
#include <linux/highmem.h>
3637
#include <asm/kexec.h>
3738

3839
/* Verify architecture specific macros are defined */
@@ -345,6 +346,12 @@ struct kimage {
345346
struct purgatory_info purgatory_info;
346347
#endif
347348

349+
#ifdef CONFIG_CRASH_HOTPLUG
350+
int hp_action;
351+
int elfcorehdr_index;
352+
bool elfcorehdr_updated;
353+
#endif
354+
348355
#ifdef CONFIG_IMA_KEXEC
349356
/* Virtual address of IMA measurement buffer for kexec syscall */
350357
void *ima_buffer;
@@ -475,6 +482,10 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
475482
static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
476483
#endif
477484

485+
#ifndef arch_crash_handle_hotplug_event
486+
static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
487+
#endif
488+
478489
#else /* !CONFIG_KEXEC_CORE */
479490
struct pt_regs;
480491
struct task_struct;

kernel/Kconfig.kexec

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,35 @@ config CRASH_DUMP
112112
For s390, this option also enables zfcpdump.
113113
See also <file:Documentation/s390/zfcpdump.rst>
114114

115+
config CRASH_HOTPLUG
116+
bool "Update the crash elfcorehdr on system configuration changes"
117+
default y
118+
depends on CRASH_DUMP && (HOTPLUG_CPU || MEMORY_HOTPLUG)
119+
depends on ARCH_SUPPORTS_CRASH_HOTPLUG
120+
help
121+
Enable direct update to the crash elfcorehdr (which contains
122+
the list of CPUs and memory regions to be dumped upon a crash)
123+
in response to hot plug/unplug or online/offline of CPUs or
124+
memory. This is a much more advanced approach than userspace
125+
attempting that.
126+
127+
If unsure, say Y.
128+
129+
config CRASH_MAX_MEMORY_RANGES
130+
int "Specify the maximum number of memory regions for the elfcorehdr"
131+
default 8192
132+
depends on CRASH_HOTPLUG
133+
help
134+
For the kexec_file_load() syscall path, specify the maximum number of
135+
memory regions that the elfcorehdr buffer/segment can accommodate.
136+
These regions are obtained via walk_system_ram_res(); eg. the
137+
'System RAM' entries in /proc/iomem.
138+
This value is combined with NR_CPUS_DEFAULT and multiplied by
139+
sizeof(Elf64_Phdr) to determine the final elfcorehdr memory buffer/
140+
segment size.
141+
The value 8192, for example, covers a (sparsely populated) 1TiB system
142+
consisting of 128MiB memblocks, while resulting in an elfcorehdr
143+
memory buffer/segment size under 1MiB. This represents a sane choice
144+
to accommodate both baremetal and virtual machine configurations.
145+
115146
endmenu

kernel/crash_core.c

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111
#include <linux/vmalloc.h>
1212
#include <linux/sizes.h>
1313
#include <linux/kexec.h>
14+
#include <linux/memory.h>
15+
#include <linux/cpuhotplug.h>
1416

1517
#include <asm/page.h>
1618
#include <asm/sections.h>
1719

1820
#include <crypto/sha1.h>
1921

2022
#include "kallsyms_internal.h"
23+
#include "kexec_internal.h"
2124

2225
/* Per cpu memory for storing cpu states in case of system crash. */
2326
note_buf_t __percpu *crash_notes;
@@ -733,3 +736,142 @@ static int __init crash_notes_memory_init(void)
733736
return 0;
734737
}
735738
subsys_initcall(crash_notes_memory_init);
739+
740+
#ifdef CONFIG_CRASH_HOTPLUG
741+
#undef pr_fmt
742+
#define pr_fmt(fmt) "crash hp: " fmt
743+
/*
744+
* To accurately reflect hot un/plug changes of cpu and memory resources
745+
* (including onling and offlining of those resources), the elfcorehdr
746+
* (which is passed to the crash kernel via the elfcorehdr= parameter)
747+
* must be updated with the new list of CPUs and memories.
748+
*
749+
* In order to make changes to elfcorehdr, two conditions are needed:
750+
* First, the segment containing the elfcorehdr must be large enough
751+
* to permit a growing number of resources; the elfcorehdr memory size
752+
* is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
753+
* Second, purgatory must explicitly exclude the elfcorehdr from the
754+
* list of segments it checks (since the elfcorehdr changes and thus
755+
* would require an update to purgatory itself to update the digest).
756+
*/
757+
static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
758+
{
759+
struct kimage *image;
760+
761+
/* Obtain lock while changing crash information */
762+
if (!kexec_trylock()) {
763+
pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
764+
return;
765+
}
766+
767+
/* Check kdump is not loaded */
768+
if (!kexec_crash_image)
769+
goto out;
770+
771+
image = kexec_crash_image;
772+
773+
if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
774+
hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
775+
pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
776+
else
777+
pr_debug("hp_action %u\n", hp_action);
778+
779+
/*
780+
* The elfcorehdr_index is set to -1 when the struct kimage
781+
* is allocated. Find the segment containing the elfcorehdr,
782+
* if not already found.
783+
*/
784+
if (image->elfcorehdr_index < 0) {
785+
unsigned long mem;
786+
unsigned char *ptr;
787+
unsigned int n;
788+
789+
for (n = 0; n < image->nr_segments; n++) {
790+
mem = image->segment[n].mem;
791+
ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
792+
if (ptr) {
793+
/* The segment containing elfcorehdr */
794+
if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
795+
image->elfcorehdr_index = (int)n;
796+
kunmap_local(ptr);
797+
}
798+
}
799+
}
800+
801+
if (image->elfcorehdr_index < 0) {
802+
pr_err("unable to locate elfcorehdr segment");
803+
goto out;
804+
}
805+
806+
/* Needed in order for the segments to be updated */
807+
arch_kexec_unprotect_crashkres();
808+
809+
/* Differentiate between normal load and hotplug update */
810+
image->hp_action = hp_action;
811+
812+
/* Now invoke arch-specific update handler */
813+
arch_crash_handle_hotplug_event(image);
814+
815+
/* No longer handling a hotplug event */
816+
image->hp_action = KEXEC_CRASH_HP_NONE;
817+
image->elfcorehdr_updated = true;
818+
819+
/* Change back to read-only */
820+
arch_kexec_protect_crashkres();
821+
822+
/* Errors in the callback is not a reason to rollback state */
823+
out:
824+
/* Release lock now that update complete */
825+
kexec_unlock();
826+
}
827+
828+
static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
829+
{
830+
switch (val) {
831+
case MEM_ONLINE:
832+
crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
833+
KEXEC_CRASH_HP_INVALID_CPU);
834+
break;
835+
836+
case MEM_OFFLINE:
837+
crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
838+
KEXEC_CRASH_HP_INVALID_CPU);
839+
break;
840+
}
841+
return NOTIFY_OK;
842+
}
843+
844+
static struct notifier_block crash_memhp_nb = {
845+
.notifier_call = crash_memhp_notifier,
846+
.priority = 0
847+
};
848+
849+
static int crash_cpuhp_online(unsigned int cpu)
850+
{
851+
crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
852+
return 0;
853+
}
854+
855+
static int crash_cpuhp_offline(unsigned int cpu)
856+
{
857+
crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
858+
return 0;
859+
}
860+
861+
static int __init crash_hotplug_init(void)
862+
{
863+
int result = 0;
864+
865+
if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
866+
register_memory_notifier(&crash_memhp_nb);
867+
868+
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
869+
result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
870+
"crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
871+
}
872+
873+
return result;
874+
}
875+
876+
subsys_initcall(crash_hotplug_init);
877+
#endif

kernel/kexec_core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ struct kimage *do_kimage_alloc_init(void)
274274
/* Initialize the list of unusable pages */
275275
INIT_LIST_HEAD(&image->unusable_pages);
276276

277+
#ifdef CONFIG_CRASH_HOTPLUG
278+
image->hp_action = KEXEC_CRASH_HP_NONE;
279+
image->elfcorehdr_index = -1;
280+
image->elfcorehdr_updated = false;
281+
#endif
282+
277283
return image;
278284
}
279285

0 commit comments

Comments
 (0)