|
10 | 10 | #include <linux/utsname.h>
|
11 | 11 | #include <linux/vmalloc.h>
|
12 | 12 | #include <linux/sizes.h>
|
| 13 | +#include <linux/kexec.h> |
13 | 14 |
|
14 | 15 | #include <asm/page.h>
|
15 | 16 | #include <asm/sections.h>
|
|
18 | 19 |
|
19 | 20 | #include "kallsyms_internal.h"
|
20 | 21 |
|
| 22 | +/* Per cpu memory for storing cpu states in case of system crash. */ |
| 23 | +note_buf_t __percpu *crash_notes; |
| 24 | + |
21 | 25 | /* vmcoreinfo stuff */
|
22 | 26 | unsigned char *vmcoreinfo_data;
|
23 | 27 | size_t vmcoreinfo_size;
|
@@ -314,6 +318,187 @@ static int __init parse_crashkernel_dummy(char *arg)
|
314 | 318 | }
|
315 | 319 | early_param("crashkernel", parse_crashkernel_dummy);
|
316 | 320 |
|
| 321 | +int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, |
| 322 | + void **addr, unsigned long *sz) |
| 323 | +{ |
| 324 | + Elf64_Ehdr *ehdr; |
| 325 | + Elf64_Phdr *phdr; |
| 326 | + unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; |
| 327 | + unsigned char *buf; |
| 328 | + unsigned int cpu, i; |
| 329 | + unsigned long long notes_addr; |
| 330 | + unsigned long mstart, mend; |
| 331 | + |
| 332 | + /* extra phdr for vmcoreinfo ELF note */ |
| 333 | + nr_phdr = nr_cpus + 1; |
| 334 | + nr_phdr += mem->nr_ranges; |
| 335 | + |
| 336 | + /* |
| 337 | + * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping |
| 338 | + * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). |
| 339 | + * I think this is required by tools like gdb. So same physical |
| 340 | + * memory will be mapped in two ELF headers. One will contain kernel |
| 341 | + * text virtual addresses and other will have __va(physical) addresses. |
| 342 | + */ |
| 343 | + |
| 344 | + nr_phdr++; |
| 345 | + elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); |
| 346 | + elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); |
| 347 | + |
| 348 | + buf = vzalloc(elf_sz); |
| 349 | + if (!buf) |
| 350 | + return -ENOMEM; |
| 351 | + |
| 352 | + ehdr = (Elf64_Ehdr *)buf; |
| 353 | + phdr = (Elf64_Phdr *)(ehdr + 1); |
| 354 | + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); |
| 355 | + ehdr->e_ident[EI_CLASS] = ELFCLASS64; |
| 356 | + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; |
| 357 | + ehdr->e_ident[EI_VERSION] = EV_CURRENT; |
| 358 | + ehdr->e_ident[EI_OSABI] = ELF_OSABI; |
| 359 | + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); |
| 360 | + ehdr->e_type = ET_CORE; |
| 361 | + ehdr->e_machine = ELF_ARCH; |
| 362 | + ehdr->e_version = EV_CURRENT; |
| 363 | + ehdr->e_phoff = sizeof(Elf64_Ehdr); |
| 364 | + ehdr->e_ehsize = sizeof(Elf64_Ehdr); |
| 365 | + ehdr->e_phentsize = sizeof(Elf64_Phdr); |
| 366 | + |
| 367 | + /* Prepare one phdr of type PT_NOTE for each present CPU */ |
| 368 | + for_each_present_cpu(cpu) { |
| 369 | + phdr->p_type = PT_NOTE; |
| 370 | + notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); |
| 371 | + phdr->p_offset = phdr->p_paddr = notes_addr; |
| 372 | + phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); |
| 373 | + (ehdr->e_phnum)++; |
| 374 | + phdr++; |
| 375 | + } |
| 376 | + |
| 377 | + /* Prepare one PT_NOTE header for vmcoreinfo */ |
| 378 | + phdr->p_type = PT_NOTE; |
| 379 | + phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); |
| 380 | + phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; |
| 381 | + (ehdr->e_phnum)++; |
| 382 | + phdr++; |
| 383 | + |
| 384 | + /* Prepare PT_LOAD type program header for kernel text region */ |
| 385 | + if (need_kernel_map) { |
| 386 | + phdr->p_type = PT_LOAD; |
| 387 | + phdr->p_flags = PF_R|PF_W|PF_X; |
| 388 | + phdr->p_vaddr = (unsigned long) _text; |
| 389 | + phdr->p_filesz = phdr->p_memsz = _end - _text; |
| 390 | + phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); |
| 391 | + ehdr->e_phnum++; |
| 392 | + phdr++; |
| 393 | + } |
| 394 | + |
| 395 | + /* Go through all the ranges in mem->ranges[] and prepare phdr */ |
| 396 | + for (i = 0; i < mem->nr_ranges; i++) { |
| 397 | + mstart = mem->ranges[i].start; |
| 398 | + mend = mem->ranges[i].end; |
| 399 | + |
| 400 | + phdr->p_type = PT_LOAD; |
| 401 | + phdr->p_flags = PF_R|PF_W|PF_X; |
| 402 | + phdr->p_offset = mstart; |
| 403 | + |
| 404 | + phdr->p_paddr = mstart; |
| 405 | + phdr->p_vaddr = (unsigned long) __va(mstart); |
| 406 | + phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; |
| 407 | + phdr->p_align = 0; |
| 408 | + ehdr->e_phnum++; |
| 409 | + pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", |
| 410 | + phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, |
| 411 | + ehdr->e_phnum, phdr->p_offset); |
| 412 | + phdr++; |
| 413 | + } |
| 414 | + |
| 415 | + *addr = buf; |
| 416 | + *sz = elf_sz; |
| 417 | + return 0; |
| 418 | +} |
| 419 | + |
| 420 | +int crash_exclude_mem_range(struct crash_mem *mem, |
| 421 | + unsigned long long mstart, unsigned long long mend) |
| 422 | +{ |
| 423 | + int i, j; |
| 424 | + unsigned long long start, end, p_start, p_end; |
| 425 | + struct range temp_range = {0, 0}; |
| 426 | + |
| 427 | + for (i = 0; i < mem->nr_ranges; i++) { |
| 428 | + start = mem->ranges[i].start; |
| 429 | + end = mem->ranges[i].end; |
| 430 | + p_start = mstart; |
| 431 | + p_end = mend; |
| 432 | + |
| 433 | + if (mstart > end || mend < start) |
| 434 | + continue; |
| 435 | + |
| 436 | + /* Truncate any area outside of range */ |
| 437 | + if (mstart < start) |
| 438 | + p_start = start; |
| 439 | + if (mend > end) |
| 440 | + p_end = end; |
| 441 | + |
| 442 | + /* Found completely overlapping range */ |
| 443 | + if (p_start == start && p_end == end) { |
| 444 | + mem->ranges[i].start = 0; |
| 445 | + mem->ranges[i].end = 0; |
| 446 | + if (i < mem->nr_ranges - 1) { |
| 447 | + /* Shift rest of the ranges to left */ |
| 448 | + for (j = i; j < mem->nr_ranges - 1; j++) { |
| 449 | + mem->ranges[j].start = |
| 450 | + mem->ranges[j+1].start; |
| 451 | + mem->ranges[j].end = |
| 452 | + mem->ranges[j+1].end; |
| 453 | + } |
| 454 | + |
| 455 | + /* |
| 456 | + * Continue to check if there are another overlapping ranges |
| 457 | + * from the current position because of shifting the above |
| 458 | + * mem ranges. |
| 459 | + */ |
| 460 | + i--; |
| 461 | + mem->nr_ranges--; |
| 462 | + continue; |
| 463 | + } |
| 464 | + mem->nr_ranges--; |
| 465 | + return 0; |
| 466 | + } |
| 467 | + |
| 468 | + if (p_start > start && p_end < end) { |
| 469 | + /* Split original range */ |
| 470 | + mem->ranges[i].end = p_start - 1; |
| 471 | + temp_range.start = p_end + 1; |
| 472 | + temp_range.end = end; |
| 473 | + } else if (p_start != start) |
| 474 | + mem->ranges[i].end = p_start - 1; |
| 475 | + else |
| 476 | + mem->ranges[i].start = p_end + 1; |
| 477 | + break; |
| 478 | + } |
| 479 | + |
| 480 | + /* If a split happened, add the split to array */ |
| 481 | + if (!temp_range.end) |
| 482 | + return 0; |
| 483 | + |
| 484 | + /* Split happened */ |
| 485 | + if (i == mem->max_nr_ranges - 1) |
| 486 | + return -ENOMEM; |
| 487 | + |
| 488 | + /* Location where new range should go */ |
| 489 | + j = i + 1; |
| 490 | + if (j < mem->nr_ranges) { |
| 491 | + /* Move over all ranges one slot towards the end */ |
| 492 | + for (i = mem->nr_ranges - 1; i >= j; i--) |
| 493 | + mem->ranges[i + 1] = mem->ranges[i]; |
| 494 | + } |
| 495 | + |
| 496 | + mem->ranges[j].start = temp_range.start; |
| 497 | + mem->ranges[j].end = temp_range.end; |
| 498 | + mem->nr_ranges++; |
| 499 | + return 0; |
| 500 | +} |
| 501 | + |
317 | 502 | Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
|
318 | 503 | void *data, size_t data_len)
|
319 | 504 | {
|
@@ -515,3 +700,36 @@ static int __init crash_save_vmcoreinfo_init(void)
|
515 | 700 | }
|
516 | 701 |
|
517 | 702 | subsys_initcall(crash_save_vmcoreinfo_init);
|
| 703 | + |
| 704 | +static int __init crash_notes_memory_init(void) |
| 705 | +{ |
| 706 | + /* Allocate memory for saving cpu registers. */ |
| 707 | + size_t size, align; |
| 708 | + |
| 709 | + /* |
| 710 | + * crash_notes could be allocated across 2 vmalloc pages when percpu |
| 711 | + * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc |
| 712 | + * pages are also on 2 continuous physical pages. In this case the |
| 713 | + * 2nd part of crash_notes in 2nd page could be lost since only the |
| 714 | + * starting address and size of crash_notes are exported through sysfs. |
| 715 | + * Here round up the size of crash_notes to the nearest power of two |
| 716 | + * and pass it to __alloc_percpu as align value. This can make sure |
| 717 | + * crash_notes is allocated inside one physical page. |
| 718 | + */ |
| 719 | + size = sizeof(note_buf_t); |
| 720 | + align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); |
| 721 | + |
| 722 | + /* |
| 723 | + * Break compile if size is bigger than PAGE_SIZE since crash_notes |
| 724 | + * definitely will be in 2 pages with that. |
| 725 | + */ |
| 726 | + BUILD_BUG_ON(size > PAGE_SIZE); |
| 727 | + |
| 728 | + crash_notes = __alloc_percpu(size, align); |
| 729 | + if (!crash_notes) { |
| 730 | + pr_warn("Memory allocation for saving cpu register states failed\n"); |
| 731 | + return -ENOMEM; |
| 732 | + } |
| 733 | + return 0; |
| 734 | +} |
| 735 | +subsys_initcall(crash_notes_memory_init); |
0 commit comments