Commit 972d5e7e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 EFI changes from Ingo Molnar:
 "This consists of two main parts:

   - New static EFI runtime services virtual mapping layout which is
     groundwork for kexec support on EFI (Borislav Petkov)

   - EFI kexec support itself (Dave Young)"

* 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  x86/efi: parse_efi_setup() build fix
  x86: ksysfs.c build fix
  x86/efi: Delete superfluous global variables
  x86: Reserve setup_data ranges late after parsing memmap cmdline
  x86: Export x86 boot_params to sysfs
  x86: Add xloadflags bit for EFI runtime support on kexec
  x86/efi: Pass necessary EFI data for kexec via setup_data
  efi: Export EFI runtime memory mapping to sysfs
  efi: Export more EFI table variables to sysfs
  x86/efi: Cleanup efi_enter_virtual_mode() function
  x86/efi: Fix off-by-one bug in EFI Boot Services reservation
  x86/efi: Add a wrapper function efi_map_region_fixed()
  x86/efi: Remove unused variables in __map_region()
  x86/efi: Check krealloc return value
  x86/efi: Runtime services virtual mapping
  x86/mm/cpa: Map in an arbitrary pgd
  x86/mm/pageattr: Add last levels of error path
  x86/mm/pageattr: Add a PUD error unwinding path
  x86/mm/pageattr: Add a PTE pagetable populating function
  x86/mm/pageattr: Add a PMD pagetable populating function
  ...
parents 5d4863e4 ef0b8b9a
What: /sys/firmware/efi/fw_vendor
Date: December 2013
Contact: Dave Young <dyoung@redhat.com>
Description: It shows the physical address of firmware vendor field in the
EFI system table.
Users: Kexec
What: /sys/firmware/efi/runtime
Date: December 2013
Contact: Dave Young <dyoung@redhat.com>
Description: It shows the physical address of runtime service table entry in
the EFI system table.
Users: Kexec
What: /sys/firmware/efi/config_table
Date: December 2013
Contact: Dave Young <dyoung@redhat.com>
Description: It shows the physical address of config table entry in the EFI
system table.
Users: Kexec
What: /sys/firmware/efi/runtime-map/
Date: December 2013
Contact: Dave Young <dyoung@redhat.com>
Description: Switching efi runtime services to virtual mode requires
that all efi memory ranges which have the runtime attribute
bit set to be mapped to virtual addresses.
The efi runtime services can only be switched to virtual
mode once without rebooting. The kexec kernel must maintain
the same physical to virtual address mappings as the first
kernel. The mappings are exported to sysfs so userspace tools
can reassemble them and pass them into the kexec kernel.
/sys/firmware/efi/runtime-map/ is the directory the kernel
exports that information in.
subdirectories are named with the number of the memory range:
/sys/firmware/efi/runtime-map/0
/sys/firmware/efi/runtime-map/1
/sys/firmware/efi/runtime-map/2
/sys/firmware/efi/runtime-map/3
...
Each subdirectory contains five files:
attribute : The attributes of the memory range.
num_pages : The size of the memory range in pages.
phys_addr : The physical address of the memory range.
type : The type of the memory range.
virt_addr : The virtual address of the memory range.
Above values are all hexadecimal numbers with the '0x' prefix.
Users: Kexec
What: /sys/kernel/boot_params
Date: December 2013
Contact: Dave Young <dyoung@redhat.com>
Description: The /sys/kernel/boot_params directory contains two
files: "data" and "version" and one subdirectory "setup_data".
It is used to export the kernel boot parameters of an x86
platform to userspace for kexec and debugging purpose.
If there's no setup_data in boot_params the subdirectory will
not be created.
"data" file is the binary representation of struct boot_params.
"version" file is the string representation of boot
protocol version.
"setup_data" subdirectory contains the setup_data data
structure in boot_params. setup_data is maintained in kernel
as a link list. In "setup_data" subdirectory there's one
subdirectory for each link list node named with the number
of the list nodes. The list node subdirectory contains two
files "type" and "data". "type" file is the string
representation of setup_data type. "data" file is the binary
representation of setup_data payload.
The whole boot_params directory structure is like below:
/sys/kernel/boot_params
|__ data
|__ setup_data
| |__ 0
| | |__ data
| | |__ type
| |__ 1
| |__ data
| |__ type
|__ version
Users: Kexec
......@@ -899,6 +899,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
edd= [EDD]
Format: {"off" | "on" | "skip[mbr]"}
efi= [EFI]
Format: { "old_map" }
old_map [X86-64]: switch to the old ioremap-based EFI
runtime services mapping. 32-bit still uses this one by
default.
efi_no_storage_paranoia [EFI; X86]
Using this parameter you can use more than 50% of
your efi variable storage. Use this parameter only if
......
......@@ -608,6 +608,9 @@ Protocol: 2.12+
- If 1, the kernel supports the 64-bit EFI handoff entry point
given at handover_offset + 0x200.
Bit 4 (read): XLF_EFI_KEXEC
- If 1, the kernel supports kexec EFI boot with EFI runtime support.
Field name: cmdline_size
Type: read
Offset/size: 0x238/4
......
......@@ -28,4 +28,11 @@ reference.
Current X86-64 implementations only support 40 bits of address space,
but we support up to 46 bits. This expands into MBZ space in the page tables.
->trampoline_pgd:
We map EFI runtime services in the aforementioned PGD in the virtual
range of 64Gb (arbitrarily set, can be raised if needed)
0xffffffef00000000 - 0xffffffff00000000
-Andi Kleen, Jul 2004
......@@ -391,7 +391,14 @@ xloadflags:
#else
# define XLF23 0
#endif
.word XLF0 | XLF1 | XLF23
#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC)
# define XLF4 XLF_EFI_KEXEC
#else
# define XLF4 0
#endif
.word XLF0 | XLF1 | XLF23 | XLF4
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
......
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H
/*
* We map the EFI regions needed for runtime services non-contiguously,
* with preserved alignment on virtual addresses starting from -4G down
* for a total max space of 64G. This way, we provide for stable runtime
* services addresses across kernels so that a kexec'd kernel can still
* use them.
*
* This is the main reason why we're doing stable VA mappings for RT
* services.
*
* This flag is used in conjuction with a chicken bit called
* "efi=old_map" which can be used as a fallback to the old runtime
* services mapping method in case there's some b0rkage with a
* particular EFI implementation (haha, it is hard to hold up the
* sarcasm here...).
*/
#define EFI_OLD_MEMMAP EFI_ARCH_1
#ifdef CONFIG_X86_32
#define EFI_LOADER_SIGNATURE "EL32"
......@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
(u64)(a4), (u64)(a5), (u64)(a6))
#define _efi_call_virtX(x, f, ...) \
({ \
efi_status_t __s; \
\
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
preempt_enable(); \
__s; \
})
#define efi_call_virt0(f) \
efi_call0((efi.systab->runtime->f))
#define efi_call_virt1(f, a1) \
efi_call1((efi.systab->runtime->f), (u64)(a1))
#define efi_call_virt2(f, a1, a2) \
efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
#define efi_call_virt3(f, a1, a2, a3) \
efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3))
#define efi_call_virt4(f, a1, a2, a3, a4) \
efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4))
#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4), (u64)(a5))
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
_efi_call_virtX(0, f)
#define efi_call_virt1(f, a1) \
_efi_call_virtX(1, f, (u64)(a1))
#define efi_call_virt2(f, a1, a2) \
_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
#define efi_call_virt3(f, a1, a2, a3) \
_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
#define efi_call_virt4(f, a1, a2, a3, a4) \
_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
......@@ -95,12 +120,28 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
extern int add_efi_memmap;
extern unsigned long x86_efi_facility;
extern struct efi_scratch efi_scratch;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
extern void efi_unmap_memmap(void);
extern void efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
extern void efi_setup_page_tables(void);
extern void __init old_map_region(efi_memory_desc_t *md);
struct efi_setup_data {
u64 fw_vendor;
u64 runtime;
u64 tables;
u64 smbios;
u64 reserved[8];
};
extern u64 efi_setup;
#ifdef CONFIG_EFI
......@@ -110,7 +151,7 @@ static inline bool efi_is_native(void)
}
extern struct console early_efi_console;
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
#else
/*
* IF EFI is not configured, have the EFI calls return -ENOSYS.
......@@ -122,6 +163,7 @@ extern struct console early_efi_console;
#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
#endif /* CONFIG_EFI */
#endif /* _ASM_X86_EFI_H */
......@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
*/
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags);
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_DEFS_H */
......@@ -6,6 +6,7 @@
#define SETUP_E820_EXT 1
#define SETUP_DTB 2
#define SETUP_PCI 3
#define SETUP_EFI 4
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
......@@ -23,6 +24,7 @@
#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1)
#define XLF_EFI_HANDOVER_32 (1<<2)
#define XLF_EFI_HANDOVER_64 (1<<3)
#define XLF_EFI_KEXEC (1<<4)
#ifndef __ASSEMBLY__
......
......@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-y += syscall_$(BITS).o
obj-$(CONFIG_X86_64) += vsyscall_64.o
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
......
/*
* Architecture specific sysfs attributes in /sys/kernel
*
* Copyright (C) 2007, Intel Corp.
* Huang Ying <ying.huang@intel.com>
* Copyright (C) 2013, 2013 Red Hat, Inc.
* Dave Young <dyoung@redhat.com>
*
* This file is released under the GPLv2
*/
#include <linux/kobject.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include <linux/init.h>
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/io.h>
#include <asm/setup.h>
static ssize_t version_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "0x%04x\n", boot_params.hdr.version);
}
static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version);
static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
memcpy(buf, (void *)&boot_params + off, count);
return count;
}
static struct bin_attribute boot_params_data_attr = {
.attr = {
.name = "data",
.mode = S_IRUGO,
},
.read = boot_params_data_read,
.size = sizeof(boot_params),
};
static struct attribute *boot_params_version_attrs[] = {
&boot_params_version_attr.attr,
NULL,
};
static struct bin_attribute *boot_params_data_attrs[] = {
&boot_params_data_attr,
NULL,
};
static struct attribute_group boot_params_attr_group = {
.attrs = boot_params_version_attrs,
.bin_attrs = boot_params_data_attrs,
};
static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr)
{
const char *name;
name = kobject_name(kobj);
return kstrtoint(name, 10, nr);
}
static int get_setup_data_paddr(int nr, u64 *paddr)
{
int i = 0;
struct setup_data *data;
u64 pa_data = boot_params.hdr.setup_data;
while (pa_data) {
if (nr == i) {
*paddr = pa_data;
return 0;
}
data = ioremap_cache(pa_data, sizeof(*data));
if (!data)
return -ENOMEM;
pa_data = data->next;
iounmap(data);
i++;
}
return -EINVAL;
}
static int __init get_setup_data_size(int nr, size_t *size)
{
int i = 0;
struct setup_data *data;
u64 pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = ioremap_cache(pa_data, sizeof(*data));
if (!data)
return -ENOMEM;
if (nr == i) {
*size = data->len;
iounmap(data);
return 0;
}
pa_data = data->next;
iounmap(data);
i++;
}
return -EINVAL;
}
static ssize_t type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
int nr, ret;
u64 paddr;
struct setup_data *data;
ret = kobj_to_setup_data_nr(kobj, &nr);
if (ret)
return ret;
ret = get_setup_data_paddr(nr, &paddr);
if (ret)
return ret;
data = ioremap_cache(paddr, sizeof(*data));
if (!data)
return -ENOMEM;
ret = sprintf(buf, "0x%x\n", data->type);
iounmap(data);
return ret;
}
static ssize_t setup_data_data_read(struct file *fp,
struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf,
loff_t off, size_t count)
{
int nr, ret = 0;
u64 paddr;
struct setup_data *data;
void *p;
ret = kobj_to_setup_data_nr(kobj, &nr);
if (ret)
return ret;
ret = get_setup_data_paddr(nr, &paddr);
if (ret)
return ret;
data = ioremap_cache(paddr, sizeof(*data));
if (!data)
return -ENOMEM;
if (off > data->len) {
ret = -EINVAL;
goto out;
}
if (count > data->len - off)
count = data->len - off;
if (!count)
goto out;
ret = count;
p = ioremap_cache(paddr + sizeof(*data), data->len);
if (!p) {
ret = -ENOMEM;
goto out;
}
memcpy(buf, p + off, count);
iounmap(p);
out:
iounmap(data);
return ret;
}
static struct kobj_attribute type_attr = __ATTR_RO(type);
static struct bin_attribute data_attr = {
.attr = {
.name = "data",
.mode = S_IRUGO,
},
.read = setup_data_data_read,
};
static struct attribute *setup_data_type_attrs[] = {
&type_attr.attr,
NULL,
};
static struct bin_attribute *setup_data_data_attrs[] = {
&data_attr,
NULL,
};
static struct attribute_group setup_data_attr_group = {
.attrs = setup_data_type_attrs,
.bin_attrs = setup_data_data_attrs,
};
static int __init create_setup_data_node(struct kobject *parent,
struct kobject **kobjp, int nr)
{
int ret = 0;
size_t size;
struct kobject *kobj;
char name[16]; /* should be enough for setup_data nodes numbers */
snprintf(name, 16, "%d", nr);
kobj = kobject_create_and_add(name, parent);
if (!kobj)
return -ENOMEM;
ret = get_setup_data_size(nr, &size);
if (ret)
goto out_kobj;
data_attr.size = size;
ret = sysfs_create_group(kobj, &setup_data_attr_group);
if (ret)
goto out_kobj;
*kobjp = kobj;
return 0;
out_kobj:
kobject_put(kobj);
return ret;
}
static void __init cleanup_setup_data_node(struct kobject *kobj)
{
sysfs_remove_group(kobj, &setup_data_attr_group);
kobject_put(kobj);
}
static int __init get_setup_data_total_num(u64 pa_data, int *nr)
{
int ret = 0;
struct setup_data *data;
*nr = 0;
while (pa_data) {
*nr += 1;
data = ioremap_cache(pa_data, sizeof(*data));
if (!data) {
ret = -ENOMEM;
goto out;
}
pa_data = data->next;
iounmap(data);
}
out:
return ret;
}
static int __init create_setup_data_nodes(struct kobject *parent)
{
struct kobject *setup_data_kobj, **kobjp;
u64 pa_data;
int i, j, nr, ret = 0;
pa_data = boot_params.hdr.setup_data;
if (!pa_data)
return 0;
setup_data_kobj = kobject_create_and_add("setup_data", parent);
if (!setup_data_kobj) {
ret = -ENOMEM;
goto out;
}
ret = get_setup_data_total_num(pa_data, &nr);
if (ret)
goto out_setup_data_kobj;
kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL);
if (!kobjp) {
ret = -ENOMEM;
goto out_setup_data_kobj;
}
for (i = 0; i < nr; i++) {
ret = create_setup_data_node(setup_data_kobj, kobjp + i, i);
if (ret)
goto out_clean_nodes;
}
kfree(kobjp);
return 0;
out_clean_nodes:
for (j = i - 1; j > 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
out_setup_data_kobj:
kobject_put(setup_data_kobj);
out:
return ret;
}
static int __init boot_params_ksysfs_init(void)
{
int ret;
struct kobject *boot_params_kobj;
boot_params_kobj = kobject_create_and_add("boot_params",
kernel_kobj);