|  | /* | 
|  | * Hypervisor-assisted dump | 
|  | * | 
|  | * Linas Vepstas, Manish Ahuja 2008 | 
|  | * Copyright 2008 IBM Corp. | 
|  | * | 
|  | *      This program is free software; you can redistribute it and/or | 
|  | *      modify it under the terms of the GNU General Public License | 
|  | *      as published by the Free Software Foundation; either version | 
|  | *      2 of the License, or (at your option) any later version. | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include <linux/gfp.h> | 
|  | #include <linux/init.h> | 
|  | #include <linux/kobject.h> | 
|  | #include <linux/mm.h> | 
|  | #include <linux/of.h> | 
|  | #include <linux/pfn.h> | 
|  | #include <linux/swap.h> | 
|  | #include <linux/sysfs.h> | 
|  |  | 
|  | #include <asm/page.h> | 
|  | #include <asm/phyp_dump.h> | 
|  | #include <asm/machdep.h> | 
|  | #include <asm/prom.h> | 
|  | #include <asm/rtas.h> | 
|  |  | 
|  | /* Variables, used to communicate data between early boot and late boot */ | 
|  | static struct phyp_dump phyp_dump_vars; | 
|  | struct phyp_dump *phyp_dump_info = &phyp_dump_vars; | 
|  |  | 
|  | static int ibm_configure_kernel_dump; | 
|  | /* ------------------------------------------------- */ | 
|  | /* RTAS interfaces to declare the dump regions */ | 
|  |  | 
|  | struct dump_section { | 
|  | u32 dump_flags; | 
|  | u16 source_type; | 
|  | u16 error_flags; | 
|  | u64 source_address; | 
|  | u64 source_length; | 
|  | u64 length_copied; | 
|  | u64 destination_address; | 
|  | }; | 
|  |  | 
|  | struct phyp_dump_header { | 
|  | u32 version; | 
|  | u16 num_of_sections; | 
|  | u16 status; | 
|  |  | 
|  | u32 first_offset_section; | 
|  | u32 dump_disk_section; | 
|  | u64 block_num_dd; | 
|  | u64 num_of_blocks_dd; | 
|  | u32 offset_dd; | 
|  | u32 maxtime_to_auto; | 
|  | /* No dump disk path string used */ | 
|  |  | 
|  | struct dump_section cpu_data; | 
|  | struct dump_section hpte_data; | 
|  | struct dump_section kernel_data; | 
|  | }; | 
|  |  | 
|  | /* The dump header *must be* in low memory, so .bss it */ | 
|  | static struct phyp_dump_header phdr; | 
|  |  | 
|  | #define NUM_DUMP_SECTIONS	3 | 
|  | #define DUMP_HEADER_VERSION	0x1 | 
|  | #define DUMP_REQUEST_FLAG	0x1 | 
|  | #define DUMP_SOURCE_CPU		0x0001 | 
|  | #define DUMP_SOURCE_HPTE	0x0002 | 
|  | #define DUMP_SOURCE_RMO		0x0011 | 
|  | #define DUMP_ERROR_FLAG		0x2000 | 
|  | #define DUMP_TRIGGERED		0x4000 | 
|  | #define DUMP_PERFORMED		0x8000 | 
|  |  | 
|  |  | 
|  | /** | 
|  | * init_dump_header() - initialize the header declaring a dump | 
|  | * Returns: length of dump save area. | 
|  | * | 
|  | * When the hypervisor saves crashed state, it needs to put | 
|  | * it somewhere. The dump header tells the hypervisor where | 
|  | * the data can be saved. | 
|  | */ | 
|  | static unsigned long init_dump_header(struct phyp_dump_header *ph) | 
|  | { | 
|  | unsigned long addr_offset = 0; | 
|  |  | 
|  | /* Set up the dump header */ | 
|  | ph->version = DUMP_HEADER_VERSION; | 
|  | ph->num_of_sections = NUM_DUMP_SECTIONS; | 
|  | ph->status = 0; | 
|  |  | 
|  | ph->first_offset_section = | 
|  | (u32)offsetof(struct phyp_dump_header, cpu_data); | 
|  | ph->dump_disk_section = 0; | 
|  | ph->block_num_dd = 0; | 
|  | ph->num_of_blocks_dd = 0; | 
|  | ph->offset_dd = 0; | 
|  |  | 
|  | ph->maxtime_to_auto = 0; /* disabled */ | 
|  |  | 
|  | /* The first two sections are mandatory */ | 
|  | ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG; | 
|  | ph->cpu_data.source_type = DUMP_SOURCE_CPU; | 
|  | ph->cpu_data.source_address = 0; | 
|  | ph->cpu_data.source_length = phyp_dump_info->cpu_state_size; | 
|  | ph->cpu_data.destination_address = addr_offset; | 
|  | addr_offset += phyp_dump_info->cpu_state_size; | 
|  |  | 
|  | ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG; | 
|  | ph->hpte_data.source_type = DUMP_SOURCE_HPTE; | 
|  | ph->hpte_data.source_address = 0; | 
|  | ph->hpte_data.source_length = phyp_dump_info->hpte_region_size; | 
|  | ph->hpte_data.destination_address = addr_offset; | 
|  | addr_offset += phyp_dump_info->hpte_region_size; | 
|  |  | 
|  | /* This section describes the low kernel region */ | 
|  | ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG; | 
|  | ph->kernel_data.source_type = DUMP_SOURCE_RMO; | 
|  | ph->kernel_data.source_address = PHYP_DUMP_RMR_START; | 
|  | ph->kernel_data.source_length = PHYP_DUMP_RMR_END; | 
|  | ph->kernel_data.destination_address = addr_offset; | 
|  | addr_offset += ph->kernel_data.source_length; | 
|  |  | 
|  | return addr_offset; | 
|  | } | 
|  |  | 
|  | static void print_dump_header(const struct phyp_dump_header *ph) | 
|  | { | 
|  | #ifdef DEBUG | 
|  | if (ph == NULL) | 
|  | return; | 
|  |  | 
|  | printk(KERN_INFO "dump header:\n"); | 
|  | /* setup some ph->sections required */ | 
|  | printk(KERN_INFO "version = %d\n", ph->version); | 
|  | printk(KERN_INFO "Sections = %d\n", ph->num_of_sections); | 
|  | printk(KERN_INFO "Status = 0x%x\n", ph->status); | 
|  |  | 
|  | /* No ph->disk, so all should be set to 0 */ | 
|  | printk(KERN_INFO "Offset to first section 0x%x\n", | 
|  | ph->first_offset_section); | 
|  | printk(KERN_INFO "dump disk sections should be zero\n"); | 
|  | printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section); | 
|  | printk(KERN_INFO "block num = %lld\n", ph->block_num_dd); | 
|  | printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd); | 
|  | printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd); | 
|  | printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto); | 
|  |  | 
|  | /*set cpu state and hpte states as well scratch pad area */ | 
|  | printk(KERN_INFO " CPU AREA\n"); | 
|  | printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags); | 
|  | printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type); | 
|  | printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags); | 
|  | printk(KERN_INFO "cpu source_address =%llx\n", | 
|  | ph->cpu_data.source_address); | 
|  | printk(KERN_INFO "cpu source_length =%llx\n", | 
|  | ph->cpu_data.source_length); | 
|  | printk(KERN_INFO "cpu length_copied =%llx\n", | 
|  | ph->cpu_data.length_copied); | 
|  |  | 
|  | printk(KERN_INFO " HPTE AREA\n"); | 
|  | printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags); | 
|  | printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type); | 
|  | printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags); | 
|  | printk(KERN_INFO "HPTE source_address =%llx\n", | 
|  | ph->hpte_data.source_address); | 
|  | printk(KERN_INFO "HPTE source_length =%llx\n", | 
|  | ph->hpte_data.source_length); | 
|  | printk(KERN_INFO "HPTE length_copied =%llx\n", | 
|  | ph->hpte_data.length_copied); | 
|  |  | 
|  | printk(KERN_INFO " SRSD AREA\n"); | 
|  | printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags); | 
|  | printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type); | 
|  | printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags); | 
|  | printk(KERN_INFO "SRSD source_address =%llx\n", | 
|  | ph->kernel_data.source_address); | 
|  | printk(KERN_INFO "SRSD source_length =%llx\n", | 
|  | ph->kernel_data.source_length); | 
|  | printk(KERN_INFO "SRSD length_copied =%llx\n", | 
|  | ph->kernel_data.length_copied); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static ssize_t show_phyp_dump_active(struct kobject *kobj, | 
|  | struct kobj_attribute *attr, char *buf) | 
|  | { | 
|  |  | 
|  | /* create filesystem entry so kdump is phyp-dump aware */ | 
|  | return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot); | 
|  | } | 
|  |  | 
|  | static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600, | 
|  | show_phyp_dump_active, | 
|  | NULL); | 
|  |  | 
|  | static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr) | 
|  | { | 
|  | int rc; | 
|  |  | 
|  | /* Add addr value if not initialized before */ | 
|  | if (ph->cpu_data.destination_address == 0) { | 
|  | ph->cpu_data.destination_address += addr; | 
|  | ph->hpte_data.destination_address += addr; | 
|  | ph->kernel_data.destination_address += addr; | 
|  | } | 
|  |  | 
|  | /* ToDo Invalidate kdump and free memory range. */ | 
|  |  | 
|  | do { | 
|  | rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, | 
|  | 1, ph, sizeof(struct phyp_dump_header)); | 
|  | } while (rtas_busy_delay(rc)); | 
|  |  | 
|  | if (rc) { | 
|  | printk(KERN_ERR "phyp-dump: unexpected error (%d) on " | 
|  | "register\n", rc); | 
|  | print_dump_header(ph); | 
|  | return; | 
|  | } | 
|  |  | 
|  | rc = sysfs_create_file(kernel_kobj, &pdl.attr); | 
|  | if (rc) | 
|  | printk(KERN_ERR "phyp-dump: unable to create sysfs" | 
|  | " file (%d)\n", rc); | 
|  | } | 
|  |  | 
|  | static | 
|  | void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) | 
|  | { | 
|  | int rc; | 
|  |  | 
|  | /* Add addr value if not initialized before */ | 
|  | if (ph->cpu_data.destination_address == 0) { | 
|  | ph->cpu_data.destination_address += addr; | 
|  | ph->hpte_data.destination_address += addr; | 
|  | ph->kernel_data.destination_address += addr; | 
|  | } | 
|  |  | 
|  | do { | 
|  | rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, | 
|  | 2, ph, sizeof(struct phyp_dump_header)); | 
|  | } while (rtas_busy_delay(rc)); | 
|  |  | 
|  | if (rc) { | 
|  | printk(KERN_ERR "phyp-dump: unexpected error (%d) " | 
|  | "on invalidate\n", rc); | 
|  | print_dump_header(ph); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* ------------------------------------------------- */ | 
|  | /** | 
|  | * release_memory_range -- release memory previously memblock_reserved | 
|  | * @start_pfn: starting physical frame number | 
|  | * @nr_pages: number of pages to free. | 
|  | * | 
|  | * This routine will release memory that had been previously | 
|  | * memblock_reserved in early boot. The released memory becomes | 
|  | * available for genreal use. | 
|  | */ | 
|  | static void release_memory_range(unsigned long start_pfn, | 
|  | unsigned long nr_pages) | 
|  | { | 
|  | struct page *rpage; | 
|  | unsigned long end_pfn; | 
|  | long i; | 
|  |  | 
|  | end_pfn = start_pfn + nr_pages; | 
|  |  | 
|  | for (i = start_pfn; i <= end_pfn; i++) { | 
|  | rpage = pfn_to_page(i); | 
|  | if (PageReserved(rpage)) { | 
|  | ClearPageReserved(rpage); | 
|  | init_page_count(rpage); | 
|  | __free_page(rpage); | 
|  | totalram_pages++; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * track_freed_range -- Counts the range being freed. | 
|  | * Once the counter goes to zero, it re-registers dump for | 
|  | * future use. | 
|  | */ | 
|  | static void | 
|  | track_freed_range(unsigned long addr, unsigned long length) | 
|  | { | 
|  | static unsigned long scratch_area_size, reserved_area_size; | 
|  |  | 
|  | if (addr < phyp_dump_info->init_reserve_start) | 
|  | return; | 
|  |  | 
|  | if ((addr >= phyp_dump_info->init_reserve_start) && | 
|  | (addr <= phyp_dump_info->init_reserve_start + | 
|  | phyp_dump_info->init_reserve_size)) | 
|  | reserved_area_size += length; | 
|  |  | 
|  | if ((addr >= phyp_dump_info->reserved_scratch_addr) && | 
|  | (addr <= phyp_dump_info->reserved_scratch_addr + | 
|  | phyp_dump_info->reserved_scratch_size)) | 
|  | scratch_area_size += length; | 
|  |  | 
|  | if ((reserved_area_size == phyp_dump_info->init_reserve_size) && | 
|  | (scratch_area_size == phyp_dump_info->reserved_scratch_size)) { | 
|  |  | 
|  | invalidate_last_dump(&phdr, | 
|  | phyp_dump_info->reserved_scratch_addr); | 
|  | register_dump_area(&phdr, | 
|  | phyp_dump_info->reserved_scratch_addr); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* ------------------------------------------------- */ | 
|  | /** | 
|  | * sysfs_release_region -- sysfs interface to release memory range. | 
|  | * | 
|  | * Usage: | 
|  | *   "echo <start addr> <length> > /sys/kernel/release_region" | 
|  | * | 
|  | * Example: | 
|  | *   "echo 0x40000000 0x10000000 > /sys/kernel/release_region" | 
|  | * | 
|  | * will release 256MB starting at 1GB. | 
|  | */ | 
|  | static ssize_t store_release_region(struct kobject *kobj, | 
|  | struct kobj_attribute *attr, | 
|  | const char *buf, size_t count) | 
|  | { | 
|  | unsigned long start_addr, length, end_addr; | 
|  | unsigned long start_pfn, nr_pages; | 
|  | ssize_t ret; | 
|  |  | 
|  | ret = sscanf(buf, "%lx %lx", &start_addr, &length); | 
|  | if (ret != 2) | 
|  | return -EINVAL; | 
|  |  | 
|  | track_freed_range(start_addr, length); | 
|  |  | 
|  | /* Range-check - don't free any reserved memory that | 
|  | * wasn't reserved for phyp-dump */ | 
|  | if (start_addr < phyp_dump_info->init_reserve_start) | 
|  | start_addr = phyp_dump_info->init_reserve_start; | 
|  |  | 
|  | end_addr = phyp_dump_info->init_reserve_start + | 
|  | phyp_dump_info->init_reserve_size; | 
|  | if (start_addr+length > end_addr) | 
|  | length = end_addr - start_addr; | 
|  |  | 
|  | /* Release the region of memory assed in by user */ | 
|  | start_pfn = PFN_DOWN(start_addr); | 
|  | nr_pages = PFN_DOWN(length); | 
|  | release_memory_range(start_pfn, nr_pages); | 
|  |  | 
|  | return count; | 
|  | } | 
|  |  | 
|  | static ssize_t show_release_region(struct kobject *kobj, | 
|  | struct kobj_attribute *attr, char *buf) | 
|  | { | 
|  | u64 second_addr_range; | 
|  |  | 
|  | /* total reserved size - start of scratch area */ | 
|  | second_addr_range = phyp_dump_info->init_reserve_size - | 
|  | phyp_dump_info->reserved_scratch_size; | 
|  | return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:" | 
|  | " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n", | 
|  | phdr.cpu_data.destination_address, | 
|  | phdr.cpu_data.length_copied, | 
|  | phdr.hpte_data.destination_address, | 
|  | phdr.hpte_data.length_copied, | 
|  | phdr.kernel_data.destination_address, | 
|  | phdr.kernel_data.length_copied, | 
|  | phyp_dump_info->init_reserve_start, | 
|  | second_addr_range); | 
|  | } | 
|  |  | 
|  | static struct kobj_attribute rr = __ATTR(release_region, 0600, | 
|  | show_release_region, | 
|  | store_release_region); | 
|  |  | 
|  | static int __init phyp_dump_setup(void) | 
|  | { | 
|  | struct device_node *rtas; | 
|  | const struct phyp_dump_header *dump_header = NULL; | 
|  | unsigned long dump_area_start; | 
|  | unsigned long dump_area_length; | 
|  | int header_len = 0; | 
|  | int rc; | 
|  |  | 
|  | /* If no memory was reserved in early boot, there is nothing to do */ | 
|  | if (phyp_dump_info->init_reserve_size == 0) | 
|  | return 0; | 
|  |  | 
|  | /* Return if phyp dump not supported */ | 
|  | if (!phyp_dump_info->phyp_dump_configured) | 
|  | return -ENOSYS; | 
|  |  | 
|  | /* Is there dump data waiting for us? If there isn't, | 
|  | * then register a new dump area, and release all of | 
|  | * the rest of the reserved ram. | 
|  | * | 
|  | * The /rtas/ibm,kernel-dump rtas node is present only | 
|  | * if there is dump data waiting for us. | 
|  | */ | 
|  | rtas = of_find_node_by_path("/rtas"); | 
|  | if (rtas) { | 
|  | dump_header = of_get_property(rtas, "ibm,kernel-dump", | 
|  | &header_len); | 
|  | of_node_put(rtas); | 
|  | } | 
|  |  | 
|  | ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump"); | 
|  |  | 
|  | print_dump_header(dump_header); | 
|  | dump_area_length = init_dump_header(&phdr); | 
|  | /* align down */ | 
|  | dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK; | 
|  |  | 
|  | if (dump_header == NULL) { | 
|  | register_dump_area(&phdr, dump_area_start); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* re-register the dump area, if old dump was invalid */ | 
|  | if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) { | 
|  | invalidate_last_dump(&phdr, dump_area_start); | 
|  | register_dump_area(&phdr, dump_area_start); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if (dump_header) { | 
|  | phyp_dump_info->reserved_scratch_addr = | 
|  | dump_header->cpu_data.destination_address; | 
|  | phyp_dump_info->reserved_scratch_size = | 
|  | dump_header->cpu_data.source_length + | 
|  | dump_header->hpte_data.source_length + | 
|  | dump_header->kernel_data.source_length; | 
|  | } | 
|  |  | 
|  | /* Should we create a dump_subsys, analogous to s390/ipl.c ? */ | 
|  | rc = sysfs_create_file(kernel_kobj, &rr.attr); | 
|  | if (rc) | 
|  | printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n", | 
|  | rc); | 
|  |  | 
|  | /* ToDo: re-register the dump area, for next time. */ | 
|  | return 0; | 
|  | } | 
|  | machine_subsys_initcall(pseries, phyp_dump_setup); | 
|  |  | 
|  | int __init early_init_dt_scan_phyp_dump(unsigned long node, | 
|  | const char *uname, int depth, void *data) | 
|  | { | 
|  | const unsigned int *sizes; | 
|  |  | 
|  | phyp_dump_info->phyp_dump_configured = 0; | 
|  | phyp_dump_info->phyp_dump_is_active = 0; | 
|  |  | 
|  | if (depth != 1 || strcmp(uname, "rtas") != 0) | 
|  | return 0; | 
|  |  | 
|  | if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL)) | 
|  | phyp_dump_info->phyp_dump_configured++; | 
|  |  | 
|  | if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL)) | 
|  | phyp_dump_info->phyp_dump_is_active++; | 
|  |  | 
|  | sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", | 
|  | NULL); | 
|  | if (!sizes) | 
|  | return 0; | 
|  |  | 
|  | if (sizes[0] == 1) | 
|  | phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]); | 
|  |  | 
|  | if (sizes[3] == 2) | 
|  | phyp_dump_info->hpte_region_size = | 
|  | *((unsigned long *)&sizes[4]); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* Look for phyp_dump= cmdline option */ | 
|  | static int __init early_phyp_dump_enabled(char *p) | 
|  | { | 
|  | phyp_dump_info->phyp_dump_at_boot = 1; | 
|  |  | 
|  | if (!p) | 
|  | return 0; | 
|  |  | 
|  | if (strncmp(p, "1", 1) == 0) | 
|  | phyp_dump_info->phyp_dump_at_boot = 1; | 
|  | else if (strncmp(p, "0", 1) == 0) | 
|  | phyp_dump_info->phyp_dump_at_boot = 0; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | early_param("phyp_dump", early_phyp_dump_enabled); | 
|  |  | 
|  | /* Look for phyp_dump_reserve_size= cmdline option */ | 
|  | static int __init early_phyp_dump_reserve_size(char *p) | 
|  | { | 
|  | if (p) | 
|  | phyp_dump_info->reserve_bootvar = memparse(p, &p); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size); |