From 3a25e46c99e9c8b294b89df7a13a4638bf722af8 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 3 Oct 2019 12:12:09 +0100 Subject: [PATCH 01/48] docs/arm64: elf_hwcaps: sort the HWCAP{, 2} documentation by ascending value Part of the hardware capabilities documented in elf_hwcap.rst are ordered following the definition in the header arch/arm64/include/uapi/asm/hwcap.h but others seems to be documented in random order. To make easier to match against the definition in the header, they are now sorted in the same order as they are defined in header. I.e., HWCAP first by ascending value, and then HWCAP2 in the similar fashion. Acked-by: Will Deacon Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 64 +++++++++++++++--------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 91f79529c58c..9ee7f8ff1fae 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -119,10 +119,6 @@ HWCAP_LRCPC HWCAP_DCPOP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001. -HWCAP2_DCPODP - - Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. - HWCAP_SHA3 Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001. @@ -141,6 +137,38 @@ HWCAP_SHA512 HWCAP_SVE Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001. +HWCAP_ASIMDFHM + Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. + +HWCAP_DIT + Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001. + +HWCAP_USCAT + Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001. + +HWCAP_ILRCPC + Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. + +HWCAP_FLAGM + Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. + +HWCAP_SSBS + Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. + +HWCAP_PACA + Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or + ID_AA64ISAR1_EL1.API == 0b0001, as described by + Documentation/arm64/pointer-authentication.rst. + +HWCAP_PACG + Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or + ID_AA64ISAR1_EL1.GPI == 0b0001, as described by + Documentation/arm64/pointer-authentication.rst. + +HWCAP2_DCPODP + + Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. + HWCAP2_SVE2 Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001. @@ -165,38 +193,10 @@ HWCAP2_SVESM4 Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001. -HWCAP_ASIMDFHM - Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. - -HWCAP_DIT - Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001. - -HWCAP_USCAT - Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001. - -HWCAP_ILRCPC - Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. - -HWCAP_FLAGM - Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. - HWCAP2_FLAGM2 Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010. -HWCAP_SSBS - Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. - -HWCAP_PACA - Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or - ID_AA64ISAR1_EL1.API == 0b0001, as described by - Documentation/arm64/pointer-authentication.rst. - -HWCAP_PACG - Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or - ID_AA64ISAR1_EL1.GPI == 0b0001, as described by - Documentation/arm64/pointer-authentication.rst. - HWCAP2_FRINT Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001. From 0f6e4c40164d4283b8bbe4ed80bf54424b756bc7 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 3 Oct 2019 12:12:10 +0100 Subject: [PATCH 02/48] docs/arm64: elf_hwcaps: Document HWCAP_SB All the hardware capabilities but HWCAP_SB is not documented in elf_hwcaps.rst. So document it. Acked-by: Will Deacon Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 9ee7f8ff1fae..7fa3d215ae6a 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -155,6 +155,9 @@ HWCAP_FLAGM HWCAP_SSBS Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. +HWCAP_SB + Functionality implied by ID_AA64ISAR1_EL1.SB == 0b0001. + HWCAP_PACA Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or ID_AA64ISAR1_EL1.API == 0b0001, as described by From a8613e7070e771cea90d93eb1e8397246883065a Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 3 Oct 2019 12:12:11 +0100 Subject: [PATCH 03/48] docs/arm64: cpu-feature-registers: Documents missing visible fields A couple of fields visible to userspace are not described in the documentation. So update it. Acked-by: Will Deacon Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 2955287e9acc..ffcf4e2c71ef 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -193,6 +193,10 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | SB | [36-39] | y | + +------------------------------+---------+---------+ + | FRINTTS | [32-35] | y | + +------------------------------+---------+---------+ | GPI | [31-28] | y | +------------------------------+---------+---------+ | GPA | [27-24] | y | From ce87de45b3243d7023e8a4a76ba004002a7ec087 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 Sep 2019 13:55:50 +0100 Subject: [PATCH 04/48] arm64: simplify syscall wrapper ifdeffery Back in commit: 4378a7d4be30ec69 ("arm64: implement syscall wrappers") ... I implemented the arm64 syscall wrapper glue following the approach taken on x86. While doing so, I also copied across some ifdeffery that isn't necessary on arm64. On arm64 we don't share any of the native wrappers with compat tasks, and unlike x86 we don't have alternative implementations of SYSCALL_DEFINE0(), COND_SYSCALL(), or SYS_NI() defined when AArch32 compat support is enabled. Thus we don't need to prevent multiple definitions of these macros, and can remove the #ifndef ... #endif guards protecting them. If any of these had been previously defined elsewhere, syscalls are unlikely to work correctly, and we'd want the compiler to warn about the multiple definitions. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/syscall_wrapper.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index 06d880b3526c..b383b4802a7b 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -66,24 +66,18 @@ struct pt_regs; } \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \ ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) -#endif -#ifndef COND_SYSCALL #define COND_SYSCALL(name) \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } -#endif -#ifndef SYS_NI #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); -#endif #endif /* __ASM_SYSCALL_WRAPPER_H */ From 6b7fe77c334ae59fed9500140e08f4f896b36871 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:40 +0100 Subject: [PATCH 05/48] arm/arm64: smccc/psci: add arm_smccc_1_1_get_conduit() SMCCC callers are currently amassing a collection of enums for the SMCCC conduit, and are having to dig into the PSCI driver's internals in order to figure out what to do. Let's clean this up, with common SMCCC_CONDUIT_* definitions, and an arm_smccc_1_1_get_conduit() helper that abstracts the PSCI driver's internal state. We can kill off the PSCI_CONDUIT_* definitions once we've migrated users over to the new interface. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/firmware/psci/psci.c | 15 +++++++++++++++ include/linux/arm-smccc.h | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 84f4ff351c62..eb797081d159 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -57,6 +57,21 @@ struct psci_operations psci_ops = { .smccc_version = SMCCC_VERSION_1_0, }; +enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) +{ + if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + return SMCCC_CONDUIT_NONE; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_SMC: + return SMCCC_CONDUIT_SMC; + case PSCI_CONDUIT_HVC: + return SMCCC_CONDUIT_HVC; + default: + return SMCCC_CONDUIT_NONE; + } +} + typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); static psci_fn *invoke_psci_fn; diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 080012a6f025..df01a8579034 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -80,6 +80,22 @@ #include #include + +enum arm_smccc_conduit { + SMCCC_CONDUIT_NONE, + SMCCC_CONDUIT_SMC, + SMCCC_CONDUIT_HVC, +}; + +/** + * arm_smccc_1_1_get_conduit() + * + * Returns the conduit to be used for SMCCCv1.1 or later. + * + * When SMCCCv1.1 is not present, returns SMCCC_CONDUIT_NONE. + */ +enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void); + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 From c98bd29917281a5023f71a3148f538ad2709c0f0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:41 +0100 Subject: [PATCH 06/48] arm64: errata: use arm_smccc_1_1_get_conduit() Now that we have arm_smccc_1_1_get_conduit(), we can hide the PSCI implementation details from the arm64 cpu errata code, so let's do so. As arm_smccc_1_1_get_conduit() implicitly checks that the SMCCC version is at least SMCCC_VERSION_1_1, we no longer need to check this explicitly where switch statements have a default case, e.g. in has_ssbd_mitigation(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Lorenzo Pieralisi Cc: Will Deacon Cc: Marc Zyngier Cc: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 37 +++++++++++----------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f593f4cffc0d..9c0b011eee20 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -166,9 +165,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM_INDIRECT_VECTORS */ -#include #include -#include static void call_smc_arch_workaround_1(void) { @@ -212,11 +209,8 @@ static int detect_harden_bp_fw(void) struct arm_smccc_res res; u32 midr = read_cpuid_id(); - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return -1; - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); switch ((int)res.a0) { @@ -234,7 +228,7 @@ static int detect_harden_bp_fw(void) } break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); switch ((int)res.a0) { @@ -308,11 +302,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, BUG_ON(nr_inst != 1); - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: insn = aarch64_insn_get_hvc_value(); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: insn = aarch64_insn_get_smc_value(); break; default: @@ -351,12 +345,12 @@ void arm64_set_ssbd_mitigation(bool state) return; } - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); break; @@ -390,20 +384,13 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, goto out_printmsg; } - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); break; From 6848253ddeae9fa44680bab6212599283f9d4ef2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:42 +0100 Subject: [PATCH 07/48] arm: spectre-v2: use arm_smccc_1_1_get_conduit() Now that we have arm_smccc_1_1_get_conduit(), we can hide the PSCI implementation details from the arm spectre-v2 code, so let's do so. As arm_smccc_1_1_get_conduit() implicitly checks that the SMCCC version is at least SMCCC_VERSION_1_1, we no longer need to check this explicitly where switch statements have a default case. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Russell King Signed-off-by: Catalin Marinas --- arch/arm/mm/proc-v7-bugs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 9a07916af8dd..54d87506d3b5 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include @@ -75,11 +74,8 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A72: { struct arm_smccc_res res; - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - break; - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) @@ -90,7 +86,7 @@ static void cpu_v7_spectre_init(void) spectre_v2_method = "hypervisor"; break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) From a5520eac4d2dafb7a48c1b0f1c486afcebd6fe0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:43 +0100 Subject: [PATCH 08/48] firmware/psci: use common SMCCC_CONDUIT_* Now that we have common SMCCC_CONDUIT_* definitions, migrate the PSCI code over to them, and kill off the old PSCI_CONDUIT_* definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/firmware/psci/psci.c | 25 +++++++++---------------- include/linux/psci.h | 9 ++------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index eb797081d159..b3b6c15e7b36 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -53,7 +53,7 @@ bool psci_tos_resident_on(int cpu) } struct psci_operations psci_ops = { - .conduit = PSCI_CONDUIT_NONE, + .conduit = SMCCC_CONDUIT_NONE, .smccc_version = SMCCC_VERSION_1_0, }; @@ -62,14 +62,7 @@ enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) if (psci_ops.smccc_version < SMCCC_VERSION_1_1) return SMCCC_CONDUIT_NONE; - switch (psci_ops.conduit) { - case PSCI_CONDUIT_SMC: - return SMCCC_CONDUIT_SMC; - case PSCI_CONDUIT_HVC: - return SMCCC_CONDUIT_HVC; - default: - return SMCCC_CONDUIT_NONE; - } + return psci_ops.conduit; } typedef unsigned long (psci_fn)(unsigned long, unsigned long, @@ -227,13 +220,13 @@ static unsigned long psci_migrate_info_up_cpu(void) 0, 0, 0); } -static void set_conduit(enum psci_conduit conduit) +static void set_conduit(enum arm_smccc_conduit conduit) { switch (conduit) { - case PSCI_CONDUIT_HVC: + case SMCCC_CONDUIT_HVC: invoke_psci_fn = __invoke_psci_fn_hvc; break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: invoke_psci_fn = __invoke_psci_fn_smc; break; default: @@ -255,9 +248,9 @@ static int get_set_conduit_method(struct device_node *np) } if (!strcmp("hvc", method)) { - set_conduit(PSCI_CONDUIT_HVC); + set_conduit(SMCCC_CONDUIT_HVC); } else if (!strcmp("smc", method)) { - set_conduit(PSCI_CONDUIT_SMC); + set_conduit(SMCCC_CONDUIT_SMC); } else { pr_warn("invalid \"method\" property: %s\n", method); return -EINVAL; @@ -598,9 +591,9 @@ int __init psci_acpi_init(void) pr_info("probing for conduit method from ACPI.\n"); if (acpi_psci_use_hvc()) - set_conduit(PSCI_CONDUIT_HVC); + set_conduit(SMCCC_CONDUIT_HVC); else - set_conduit(PSCI_CONDUIT_SMC); + set_conduit(SMCCC_CONDUIT_SMC); return psci_probe(); } diff --git a/include/linux/psci.h b/include/linux/psci.h index e2bacc6fd2f2..ebe0a881d13d 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -7,6 +7,7 @@ #ifndef __LINUX_PSCI_H #define __LINUX_PSCI_H +#include #include #include @@ -18,12 +19,6 @@ bool psci_tos_resident_on(int cpu); int psci_cpu_suspend_enter(u32 state); bool psci_power_state_is_valid(u32 state); -enum psci_conduit { - PSCI_CONDUIT_NONE, - PSCI_CONDUIT_SMC, - PSCI_CONDUIT_HVC, -}; - enum smccc_version { SMCCC_VERSION_1_0, SMCCC_VERSION_1_1, @@ -38,7 +33,7 @@ struct psci_operations { int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); - enum psci_conduit conduit; + enum arm_smccc_conduit conduit; enum smccc_version smccc_version; }; From e6ea46511b1ae8c4491904c79411fcd29139af14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:44 +0100 Subject: [PATCH 09/48] firmware: arm_sdei: use common SMCCC_CONDUIT_* Now that we have common definitions for SMCCC conduits, move the SDEI code over to them, and remove the SDEI-specific definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: James Morse Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sdei.c | 3 ++- drivers/firmware/arm_sdei.c | 12 ++++++------ include/linux/arm_sdei.h | 6 ------ 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index ea94cf8f9dc6..d6259dac62b6 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -2,6 +2,7 @@ // Copyright (C) 2017 Arm Ltd. #define pr_fmt(fmt) "sdei: " fmt +#include #include #include #include @@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) return 0; } - sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; + sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 if (arm64_kernel_unmapped_at_el0()) { diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 9cd70d1a5622..a479023fa036 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -967,29 +967,29 @@ static int sdei_get_conduit(struct platform_device *pdev) if (np) { if (of_property_read_string(np, "method", &method)) { pr_warn("missing \"method\" property\n"); - return CONDUIT_INVALID; + return SMCCC_CONDUIT_NONE; } if (!strcmp("hvc", method)) { sdei_firmware_call = &sdei_smccc_hvc; - return CONDUIT_HVC; + return SMCCC_CONDUIT_HVC; } else if (!strcmp("smc", method)) { sdei_firmware_call = &sdei_smccc_smc; - return CONDUIT_SMC; + return SMCCC_CONDUIT_SMC; } pr_warn("invalid \"method\" property: %s\n", method); } else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) { if (acpi_psci_use_hvc()) { sdei_firmware_call = &sdei_smccc_hvc; - return CONDUIT_HVC; + return SMCCC_CONDUIT_HVC; } else { sdei_firmware_call = &sdei_smccc_smc; - return CONDUIT_SMC; + return SMCCC_CONDUIT_SMC; } } - return CONDUIT_INVALID; + return SMCCC_CONDUIT_NONE; } static int sdei_probe(struct platform_device *pdev) diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 3305ea7f9dc7..0a241c5c911d 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -5,12 +5,6 @@ #include -enum sdei_conduit_types { - CONDUIT_INVALID = 0, - CONDUIT_SMC, - CONDUIT_HVC, -}; - #include #ifdef CONFIG_ARM_SDE_INTERFACE From ae970dc096b2d39f65f2e18d142e3978dc9ee1c7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:43 +0200 Subject: [PATCH 10/48] arm64: mm: use arm64_dma_phys_limit instead of calling max_zone_dma_phys() By the time we call zones_sizes_init() arm64_dma_phys_limit already contains the result of max_zone_dma_phys(). We use the variable instead of calling the function directly to save some precious cpu time. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 45c00a54909c..098c0f5bedf6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -187,7 +187,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max; From a573cdd7973dedd87e62196c400332896bb236c8 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:44 +0200 Subject: [PATCH 11/48] arm64: rename variables used to calculate ZONE_DMA32's size Let the name indicate that they are used to calculate ZONE_DMA32's size as opposed to ZONE_DMA. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 098c0f5bedf6..8e9bc64c5878 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(physvirt_offset); struct page *vmemmap __ro_after_init; EXPORT_SYMBOL(vmemmap); -phys_addr_t arm64_dma_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -174,7 +174,7 @@ static void __init reserve_elfcorehdr(void) * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t __init max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma32_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -187,7 +187,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma_phys_limit); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max; @@ -200,16 +200,16 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma = min; + unsigned long max_dma32 = min; memset(zone_size, 0, sizeof(zone_size)); /* 4GB maximum for 32-bit only capable devices */ #ifdef CONFIG_ZONE_DMA32 - max_dma = PFN_DOWN(arm64_dma_phys_limit); - zone_size[ZONE_DMA32] = max_dma - min; + max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); + zone_size[ZONE_DMA32] = max_dma32 - min; #endif - zone_size[ZONE_NORMAL] = max - max_dma; + zone_size[ZONE_NORMAL] = max - max_dma32; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -221,14 +221,14 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) continue; #ifdef CONFIG_ZONE_DMA32 - if (start < max_dma) { - unsigned long dma_end = min(end, max_dma); + if (start < max_dma32) { + unsigned long dma_end = min(end, max_dma32); zhole_size[ZONE_DMA32] -= dma_end - start; } #endif - if (end > max_dma) { + if (end > max_dma32) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma); + unsigned long normal_start = max(start, max_dma32); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } @@ -420,9 +420,9 @@ void __init arm64_memblock_init(void) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma_phys_limit = max_zone_dma_phys(); + arm64_dma32_phys_limit = max_zone_dma32_phys(); else - arm64_dma_phys_limit = PHYS_MASK + 1; + arm64_dma32_phys_limit = PHYS_MASK + 1; reserve_crashkernel(); @@ -430,7 +430,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma_phys_limit); + dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -534,7 +534,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; From 1a8e1cef7603e218339ac63cb3178b25554524e5 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:45 +0200 Subject: [PATCH 12/48] arm64: use both ZONE_DMA and ZONE_DMA32 So far all arm64 devices have supported 32 bit DMA masks for their peripherals. This is not true anymore for the Raspberry Pi 4 as most of it's peripherals can only address the first GB of memory on a total of up to 4 GB. This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32 to be addressable with a 32 bit mask. So it was decided to re-introduce ZONE_DMA in arm64. ZONE_DMA will contain the lower 1G of memory, which is currently the memory area addressable by any peripheral on an arm64 device. ZONE_DMA32 will contain the rest of the 32 bit addressable memory. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 4 +++ arch/arm64/include/asm/page.h | 2 ++ arch/arm64/mm/init.c | 54 +++++++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 950a56b71ff0..1b6ea5a9d1a6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -266,6 +266,10 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y +config ZONE_DMA + bool "Support DMA zone" if EXPERT + default y + config ZONE_DMA32 bool "Support DMA32 zone" if EXPERT default y diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index d39ddb258a04..7b8c98830101 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -38,4 +38,6 @@ extern int pfn_valid(unsigned long); #include +#define ARCH_ZONE_DMA_BITS 30 + #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8e9bc64c5878..44f07fdf7a59 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -56,6 +56,13 @@ EXPORT_SYMBOL(physvirt_offset); struct page *vmemmap __ro_after_init; EXPORT_SYMBOL(vmemmap); +/* + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of + * memory as some devices, namely the Raspberry Pi 4, have peripherals with + * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 + * bit addressable memory area. + */ +phys_addr_t arm64_dma_phys_limit __ro_after_init; phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE @@ -169,15 +176,16 @@ static void __init reserve_elfcorehdr(void) { } #endif /* CONFIG_CRASH_DUMP */ + /* - * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It - * currently assumes that for memory starting above 4G, 32-bit devices will - * use a DMA offset. + * Return the maximum physical address for a zone with a given address size + * limit. It currently assumes that for memory starting above 4G, 32-bit + * devices will use a DMA offset. */ -static phys_addr_t __init max_zone_dma32_phys(void) +static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); - return min(offset + (1ULL << 32), memblock_end_of_DRAM()); + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); + return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); } #ifdef CONFIG_NUMA @@ -186,6 +194,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); +#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); #endif @@ -201,13 +212,18 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; unsigned long max_dma32 = min; + unsigned long max_dma = min; memset(zone_size, 0, sizeof(zone_size)); - /* 4GB maximum for 32-bit only capable devices */ +#ifdef CONFIG_ZONE_DMA + max_dma = PFN_DOWN(arm64_dma_phys_limit); + zone_size[ZONE_DMA] = max_dma - min; + max_dma32 = max_dma; +#endif #ifdef CONFIG_ZONE_DMA32 max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); - zone_size[ZONE_DMA32] = max_dma32 - min; + zone_size[ZONE_DMA32] = max_dma32 - max_dma; #endif zone_size[ZONE_NORMAL] = max - max_dma32; @@ -219,11 +235,17 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; - +#ifdef CONFIG_ZONE_DMA + if (start < max_dma) { + unsigned long dma_end = min_not_zero(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; + } +#endif #ifdef CONFIG_ZONE_DMA32 if (start < max_dma32) { - unsigned long dma_end = min(end, max_dma32); - zhole_size[ZONE_DMA32] -= dma_end - start; + unsigned long dma32_end = min(end, max_dma32); + unsigned long dma32_start = max(start, max_dma); + zhole_size[ZONE_DMA32] -= dma32_end - dma32_start; } #endif if (end > max_dma32) { @@ -418,9 +440,11 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - /* 4GB maximum for 32-bit only capable devices */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) + arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS); + if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma32_phys_limit = max_zone_dma32_phys(); + arm64_dma32_phys_limit = max_zone_phys(32); else arm64_dma32_phys_limit = PHYS_MASK + 1; @@ -430,7 +454,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma32_phys_limit); + dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -534,7 +558,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; From 734f9246e791d8da278957b2c326d7709b2a97c0 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:46 +0200 Subject: [PATCH 13/48] mm: refresh ZONE_DMA and ZONE_DMA32 comments in 'enum zone_type' These zones usage has evolved with time and the comments were outdated. This joins both ZONE_DMA and ZONE_DMA32 explanation and gives up to date examples on how they are used on different architectures. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- include/linux/mmzone.h | 45 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bda20282746b..b0a36d1580b6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -359,33 +359,40 @@ struct per_cpu_nodestat { #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { -#ifdef CONFIG_ZONE_DMA /* - * ZONE_DMA is used when there are devices that are not able - * to do DMA to all of addressable memory (ZONE_NORMAL). Then we - * carve out the portion of memory that is needed for these devices. - * The range is arch specific. + * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able + * to DMA to all of the addressable memory (ZONE_NORMAL). + * On architectures where this area covers the whole 32 bit address + * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller + * DMA addressing constraints. This distinction is important as a 32bit + * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit + * platforms may need both zones as they support peripherals with + * different DMA addressing limitations. * - * Some examples + * Some examples: * - * Architecture Limit - * --------------------------- - * parisc, ia64, sparc <4G - * s390, powerpc <2G - * arm Various - * alpha Unlimited or 0-16MB. + * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the + * rest of the lower 4G. * - * i386, x86_64 and multiple other arches - * <16M. + * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on + * the specific device. + * + * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the + * lower 4G. + * + * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary + * depending on the specific device. + * + * - s390 uses ZONE_DMA fixed to the lower 2G. + * + * - ia64 and riscv only use ZONE_DMA32. + * + * - parisc uses neither. */ +#ifdef CONFIG_ZONE_DMA ZONE_DMA, #endif #ifdef CONFIG_ZONE_DMA32 - /* - * x86_64 needs two ZONE_DMAs because it supports devices that are - * only able to do DMA to the lower 16M but also 32 bit devices that - * can only do DMA areas below 4G. - */ ZONE_DMA32, #endif /* From f226650494c6aa87526d12135b7de8b8c074f3de Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Oct 2019 10:06:12 +0100 Subject: [PATCH 14/48] arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear The GICv3 architecture specification is incredibly misleading when it comes to PMR and the requirement for a DSB. It turns out that this DSB is only required if the CPU interface sends an Upstream Control message to the redistributor in order to update the RD's view of PMR. This message is only sent when ICC_CTLR_EL1.PMHE is set, which isn't the case in Linux. It can still be set from EL3, so some special care is required. But the upshot is that in the (hopefuly large) majority of the cases, we can drop the DSB altogether. This relies on a new static key being set if the boot CPU has PMHE set. The drawback is that this static key has to be exported to modules. Cc: Will Deacon Cc: James Morse Cc: Julien Thierry Cc: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 12 ++++++++++++ arch/arm64/include/asm/daifflags.h | 3 ++- arch/arm64/include/asm/irqflags.h | 19 ++++++++++--------- arch/arm64/include/asm/kvm_host.h | 3 +-- arch/arm64/kernel/entry.S | 6 ++++-- arch/arm64/kvm/hyp/switch.c | 4 ++-- drivers/irqchip/irq-gic-v3.c | 20 ++++++++++++++++++++ include/linux/irqchip/arm-gic-v3.h | 2 ++ 8 files changed, 53 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index e0e2b1946f42..7d9cc5ec4971 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -29,6 +29,18 @@ SB_BARRIER_INSN"nop\n", \ ARM64_HAS_SB)) +#ifdef CONFIG_ARM64_PSEUDO_NMI +#define pmr_sync() \ + do { \ + extern struct static_key_false gic_pmr_sync; \ + \ + if (static_branch_unlikely(&gic_pmr_sync)) \ + dsb(sy); \ + } while(0) +#else +#define pmr_sync() do {} while (0) +#endif + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 063c964af705..53cd5fab79a8 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -8,6 +8,7 @@ #include #include +#include #include #define DAIF_PROCCTX 0 @@ -65,7 +66,7 @@ static inline void local_daif_restore(unsigned long flags) if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON); - dsb(sy); + pmr_sync(); } } else if (system_uses_irq_prio_masking()) { u64 pmr; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 1a59f0ed1ae3..aa4b6521ef14 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -6,6 +6,7 @@ #define __ASM_IRQFLAGS_H #include +#include #include #include @@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", + "msr daifclr, #2 // arch_local_irq_enable", + __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : : "r" ((unsigned long) GIC_PRIO_IRQON) : "memory"); + + pmr_sync(); } static inline void arch_local_irq_disable(void) @@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_restore(unsigned long flags) { asm volatile(ALTERNATIVE( - "msr daif, %0\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", - ARM64_HAS_IRQ_PRIO_MASKING) + "msr daif, %0", + __msr_s(SYS_ICC_PMR_EL1, "%0"), + ARM64_HAS_IRQ_PRIO_MASKING) : : "r" (flags) : "memory"); + + pmr_sync(); } #endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f656169db8c3..5ecb091c8576 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -600,8 +600,7 @@ static inline void kvm_arm_vhe_guest_enter(void) * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) - dsb(sy); + pmr_sync(); } static inline void kvm_arm_vhe_guest_exit(void) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e304fe04b098..0a44f21bf087 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -269,8 +269,10 @@ alternative_else_nop_endif alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] msr_s SYS_ICC_PMR_EL1, x20 - /* Ensure priority change is seen by redistributor */ - dsb sy + mrs_s x21, SYS_ICC_CTLR_EL1 + tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE + dsb sy // Ensure priority change is seen by redistributor +.L__skip_pmr_sync\@: alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 3d3815020e36..402f18664f25 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include @@ -592,7 +592,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) */ if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - dsb(sy); + pmr_sync(); } vcpu = kern_hyp_va(vcpu); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 422664ac5f53..0abc5a13adaa 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -87,6 +87,15 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); */ static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +/* + * Global static key controlling whether an update to PMR allowing more + * interrupts requires to be propagated to the redistributor (DSB SY). + * And this needs to be exported for modules to be able to enable + * interrupts... + */ +DEFINE_STATIC_KEY_FALSE(gic_pmr_sync); +EXPORT_SYMBOL(gic_pmr_sync); + /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; @@ -1502,6 +1511,17 @@ static void gic_enable_nmi_support(void) for (i = 0; i < gic_data.ppi_nr; i++) refcount_set(&ppi_nmi_refs[i], 0); + /* + * Linux itself doesn't use 1:N distribution, so has no need to + * set PMHE. The only reason to have it set is if EL3 requires it + * (and we can't change it). + */ + if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) + static_branch_enable(&gic_pmr_sync); + + pr_info("%s ICC_PMR_EL1 synchronisation\n", + static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing"); + static_branch_enable(&supports_pseudo_nmis); if (static_branch_likely(&supports_deactivate_key)) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 5cc10cf7cb3e..a0bde9e12efa 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -487,6 +487,8 @@ #define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) #define ICC_CTLR_EL1_CBPR_SHIFT 0 #define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) #define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 #define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) #define ICC_CTLR_EL1_ID_BITS_SHIFT 11 From 7e3a57fa6ca831fa232a7cd4659eaed674236810 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Oct 2019 10:06:13 +0100 Subject: [PATCH 15/48] arm64: Document ICC_CTLR_EL3.PMHE setting requirements It goes without saying, but better saying it: the kernel expects ICC_CTLR_EL3.PMHE to have the same value across all CPUs, and for that setting not to change during the lifetime of the kernel. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- Documentation/arm64/booting.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index d3f3a60fbf25..5d78a6f5b0ae 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -213,6 +213,9 @@ Before jumping into the kernel, the following conditions must be met: - ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1. - ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1. + - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across + all CPUs the kernel is executing on, and must stay constant + for the lifetime of the kernel. - If the kernel is entered at EL1: From 899ee4afe5eb262236717188ccdaa0192c00dc5a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 28 Sep 2019 11:02:26 +0300 Subject: [PATCH 16/48] arm64: use generic free_initrd_mem() arm64 calls memblock_free() for the initrd area in its implementation of free_initrd_mem(), but this call has no actual effect that late in the boot process. By the time initrd is freed, all the reserved memory is managed by the page allocator and the memblock.reserved is unused, so the only purpose of the memblock_free() call is to keep track of initrd memory for debugging and accounting. Without the memblock_free() call the only difference between arm64 and the generic versions of free_initrd_mem() is the memory poisoning. Move memblock_free() call to the generic code, enable it there for the architectures that define ARCH_KEEP_MEMBLOCK and use the generic implementation of free_initrd_mem() on arm64. Tested-by: Anshuman Khandual #arm64 Reviewed-by: Anshuman Khandual Acked-by: Will Deacon Signed-off-by: Mike Rapoport Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 12 ------------ init/initramfs.c | 8 ++++++++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 45c00a54909c..87a0e3b6c146 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -580,18 +580,6 @@ void free_initmem(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - unsigned long aligned_start, aligned_end; - - aligned_start = __virt_to_phys(start) & PAGE_MASK; - aligned_end = PAGE_ALIGN(__virt_to_phys(end)); - memblock_free(aligned_start, aligned_end - aligned_start); - free_reserved_area((void *)start, (void *)end, 0, "initrd"); -} -#endif - /* * Dump out memory limit information on panic. */ diff --git a/init/initramfs.c b/init/initramfs.c index c47dad0884f7..8ec1be4d7d51 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -10,6 +10,7 @@ #include #include #include +#include static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -529,6 +530,13 @@ extern unsigned long __initramfs_size; void __weak free_initrd_mem(unsigned long start, unsigned long end) { +#ifdef CONFIG_ARCH_KEEP_MEMBLOCK + unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); + unsigned long aligned_end = ALIGN(end, PAGE_SIZE); + + memblock_free(__pa(aligned_start), aligned_end - aligned_start); +#endif + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, "initrd"); } From 6ec939f8b809cb06ba7802e17ef7024d1bc0ee84 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 4 Oct 2019 09:53:58 +0530 Subject: [PATCH 17/48] arm64/mm: Poison initmem while freeing with free_reserved_area() Platform implementation for free_initmem() should poison the memory while freeing it up. Hence pass across POISON_FREE_INITMEM while calling into free_reserved_area(). The same is being followed in the generic fallback for free_initmem() and some other platforms overriding it. Cc: Mark Rutland Cc: linux-kernel@vger.kernel.org Reviewed-by: Steven Price Acked-by: Will Deacon Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 87a0e3b6c146..7c225d0132b8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -571,7 +571,7 @@ void free_initmem(void) { free_reserved_area(lm_alias(__init_begin), lm_alias(__init_end), - 0, "unused kernel"); + POISON_FREE_INITMEM, "unused kernel"); /* * Unmap the __init region but leave the VM area in place. This * prevents the region from being reused for kernel modules, which From 4399d430700d3974ed6c5a1b1380bc6527f17e99 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 16 Oct 2019 07:47:14 -0700 Subject: [PATCH 18/48] arm64: mm: Fix unused variable warning in zone_sizes_init When building arm64 allnoconfig, CONFIG_ZONE_DMA and CONFIG_ZONE_DMA32 get disabled so there is a warning about max_dma being unused. ../arch/arm64/mm/init.c:215:16: warning: unused variable 'max_dma' [-Wunused-variable] unsigned long max_dma = min; ^ 1 warning generated. Add __maybe_unused to make this clear to the compiler. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Nathan Chancellor Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 44f07fdf7a59..71b45c58218b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -212,7 +212,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; unsigned long max_dma32 = min; - unsigned long max_dma = min; + unsigned long __maybe_unused max_dma = min; memset(zone_size, 0, sizeof(zone_size)); From 47d7b15b88f96a90694cfc607d0717d62dff6c45 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:36 +0800 Subject: [PATCH 19/48] arm64: cpufeature: introduce helper cpu_has_hw_af() We unconditionally set the HW_AFDBM capability and only enable it on CPUs which really have the feature. But sometimes we need to know whether this cpu has the capability of HW AF. So decouple AF from DBM by a new helper cpu_has_hw_af(). If later we noticed a potential performance issue on this path, we can turn it into a static label as with other CPU features. Signed-off-by: Jia He Suggested-by: Suzuki Poulose Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9cde5d2e768f..4261d55e8506 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -659,6 +659,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) default: return CONFIG_ARM64_PA_BITS; } } + +/* Check whether hardware update of the Access flag is supported */ +static inline bool cpu_has_hw_af(void) +{ + u64 mmfr1; + + if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM)) + return false; + + mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_HADBS_SHIFT); +} + #endif /* __ASSEMBLY__ */ #endif From 6af31226d0394691f5562eca0134262bb935fa9c Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:37 +0800 Subject: [PATCH 20/48] arm64: mm: implement arch_faults_on_old_pte() on arm64 On arm64 without hardware Access Flag, copying from user will fail because the pte is old and cannot be marked young. So we always end up with zeroed page after fork() + CoW for pfn mappings. We don't always have a hardware-managed Access Flag on arm64. Hence implement arch_faults_on_old_pte on arm64 to indicate that it might cause page fault when accessing old pte. Signed-off-by: Jia He Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7576df00eb50..e96fb82f62de 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -885,6 +885,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define phys_to_ttbr(addr) (addr) #endif +/* + * On arm64 without hardware Access Flag, copying from user will fail because + * the pte is old and cannot be marked young. So we always end up with zeroed + * page after fork() + CoW for pfn mappings. We don't always have a + * hardware-managed access flag on arm64. + */ +static inline bool arch_faults_on_old_pte(void) +{ + WARN_ON(preemptible()); + + return !cpu_has_hw_af(); +} +#define arch_faults_on_old_pte arch_faults_on_old_pte + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ From f2c4e5970cece75a895fcc45f0cd66b5a5ec0819 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:38 +0800 Subject: [PATCH 21/48] x86/mm: implement arch_faults_on_old_pte() stub on x86 arch_faults_on_old_pte is a helper to indicate that it might cause page fault when accessing old pte. But on x86, there is feature to setting pte access flag by hardware. Hence implement an overriding stub which always returns false. Signed-off-by: Jia He Suggested-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/x86/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0bc530c4eb13..ad97dc155195 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } +#define arch_faults_on_old_pte arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ + return false; +} + #include #endif /* __ASSEMBLY__ */ From 83d116c53058d505ddef051e90ab27f57015b025 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:39 +0800 Subject: [PATCH 22/48] mm: fix double page fault on arm64 if PTE_AF is cleared When we tested pmdk unit test [1] vmmalloc_fork TEST3 on arm64 guest, there will be a double page fault in __copy_from_user_inatomic of cow_user_page. To reproduce the bug, the cmd is as follows after you deployed everything: make -C src/test/vmmalloc_fork/ TEST_TIME=60m check Below call trace is from arm64 do_page_fault for debugging purpose: [ 110.016195] Call trace: [ 110.016826] do_page_fault+0x5a4/0x690 [ 110.017812] do_mem_abort+0x50/0xb0 [ 110.018726] el1_da+0x20/0xc4 [ 110.019492] __arch_copy_from_user+0x180/0x280 [ 110.020646] do_wp_page+0xb0/0x860 [ 110.021517] __handle_mm_fault+0x994/0x1338 [ 110.022606] handle_mm_fault+0xe8/0x180 [ 110.023584] do_page_fault+0x240/0x690 [ 110.024535] do_mem_abort+0x50/0xb0 [ 110.025423] el0_da+0x20/0x24 The pte info before __copy_from_user_inatomic is (PTE_AF is cleared): [ffff9b007000] pgd=000000023d4f8003, pud=000000023da9b003, pmd=000000023d4b3003, pte=360000298607bd3 As told by Catalin: "On arm64 without hardware Access Flag, copying from user will fail because the pte is old and cannot be marked young. So we always end up with zeroed page after fork() + CoW for pfn mappings. we don't always have a hardware-managed access flag on arm64." This patch fixes it by calling pte_mkyoung. Also, the parameter is changed because vmf should be passed to cow_user_page() Add a WARN_ON_ONCE when __copy_from_user_inatomic() returns error in case there can be some obscure use-case (by Kirill). [1] https://github.com/pmem/pmdk/tree/master/src/test/vmmalloc_fork Signed-off-by: Jia He Reported-by: Yibo Cai Reviewed-by: Catalin Marinas Acked-by: Kirill A. Shutemov Signed-off-by: Catalin Marinas --- mm/memory.c | 104 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b1ca51a079f2..b6a5d6a08438 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -118,6 +118,18 @@ int randomize_va_space __read_mostly = 2; #endif +#ifndef arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ + /* + * Those arches which don't have hw access flag feature need to + * implement their own helper. By default, "true" means pagefault + * will be hit on old pte. + */ + return true; +} +#endif + static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -2145,32 +2157,82 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, return same; } -static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) +static inline bool cow_user_page(struct page *dst, struct page *src, + struct vm_fault *vmf) { + bool ret; + void *kaddr; + void __user *uaddr; + bool force_mkyoung; + struct vm_area_struct *vma = vmf->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long addr = vmf->address; + debug_dma_assert_idle(src); + if (likely(src)) { + copy_user_highpage(dst, src, addr, vma); + return true; + } + /* * If the source page was a PFN mapping, we don't have * a "struct page" for it. We do a best-effort copy by * just copying from the original user address. If that * fails, we just zero-fill it. Live with it. */ - if (unlikely(!src)) { - void *kaddr = kmap_atomic(dst); - void __user *uaddr = (void __user *)(va & PAGE_MASK); + kaddr = kmap_atomic(dst); + uaddr = (void __user *)(addr & PAGE_MASK); + /* + * On architectures with software "accessed" bits, we would + * take a double page fault, so mark it accessed here. + */ + force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); + if (force_mkyoung) { + pte_t entry; + + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* + * Other thread has already handled the fault + * and we don't need to do anything. If it's + * not the case, the fault will be triggered + * again on the same address. + */ + ret = false; + goto pte_unlock; + } + + entry = pte_mkyoung(vmf->orig_pte); + if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) + update_mmu_cache(vma, addr, vmf->pte); + } + + /* + * This really shouldn't fail, because the page is there + * in the page tables. But it might just be unreadable, + * in which case we just give up and fill the result with + * zeroes. + */ + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { /* - * This really shouldn't fail, because the page is there - * in the page tables. But it might just be unreadable, - * in which case we just give up and fill the result with - * zeroes. + * Give a warn in case there can be some obscure + * use-case */ - if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) - clear_page(kaddr); - kunmap_atomic(kaddr); - flush_dcache_page(dst); - } else - copy_user_highpage(dst, src, va, vma); + WARN_ON_ONCE(1); + clear_page(kaddr); + } + + ret = true; + +pte_unlock: + if (force_mkyoung) + pte_unmap_unlock(vmf->pte, vmf->ptl); + kunmap_atomic(kaddr); + flush_dcache_page(dst); + + return ret; } static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) @@ -2327,7 +2389,19 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) vmf->address); if (!new_page) goto oom; - cow_user_page(new_page, old_page, vmf->address, vma); + + if (!cow_user_page(new_page, old_page, vmf)) { + /* + * COW failed, if the fault was solved by other, + * it's fine. If not, userspace would re-fault on + * the same address and we will handle the fault + * from the second attempt. + */ + put_page(new_page); + if (old_page) + put_page(old_page); + return 0; + } } if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false)) From 7db3e57e6a95435cef5b33f2a90efcac5ce577da Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 25 Oct 2019 14:32:06 +0800 Subject: [PATCH 23/48] arm64: cpufeature: Fix typos in comment Fix up one typos: CTR_E0 -> CTR_EL0 Cc: Will Deacon Acked-by: Suzuki K Poulose Signed-off-by: Shaokun Zhang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 05933c065732..56bba746da1c 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -329,7 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_cntfrq = arch_timer_get_cntfrq(); /* * Use the effective value of the CTR_EL0 than the raw value - * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * exposed by the CPU. CTR_EL0.IDC field value must be interpreted * with the CLIDR_EL1 fields to avoid triggering false warnings * when there is a mismatch across the CPUs. Keep track of the * effective value of the CTR_EL0 in our internal records for From ade12b8631d91b9c2849facb0a1dc3af317ecbb3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 23 Oct 2019 18:52:22 +0100 Subject: [PATCH 24/48] arm64: docs: cpu-feature-registers: Document ID_AA64PFR1_EL1 Commit d71be2b6c0e1 ("arm64: cpufeature: Detect SSBS and advertise to userspace") exposes ID_AA64PFR1_EL1 to userspace, but didn't update the documentation to match. Add it. Acked-by: Will Deacon Signed-off-by: Dave Martin Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 2955287e9acc..b86828f86e39 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -168,8 +168,15 @@ infrastructure: +------------------------------+---------+---------+ - 3) MIDR_EL1 - Main ID Register + 3) ID_AA64PFR1_EL1 - Processor Feature Register 1 + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | SSBS | [7-4] | y | + +------------------------------+---------+---------+ + + 4) MIDR_EL1 - Main ID Register +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ @@ -188,7 +195,7 @@ infrastructure: as available on the CPU where it is fetched and is not a system wide safe value. - 4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + 5) ID_AA64ISAR1_EL1 - Instruction set attribute register 1 +------------------------------+---------+---------+ | Name | bits | visible | @@ -210,7 +217,7 @@ infrastructure: | DPB | [3-0] | y | +------------------------------+---------+---------+ - 5) ID_AA64MMFR2_EL1 - Memory model feature register 2 + 6) ID_AA64MMFR2_EL1 - Memory model feature register 2 +------------------------------+---------+---------+ | Name | bits | visible | @@ -218,7 +225,7 @@ infrastructure: | AT | [35-32] | y | +------------------------------+---------+---------+ - 6) ID_AA64ZFR0_EL1 - SVE feature ID register 0 + 7) ID_AA64ZFR0_EL1 - SVE feature ID register 0 +------------------------------+---------+---------+ | Name | bits | visible | From a5315819c5e7e50b2b457b60aaf2cc61d76888a2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 24 Oct 2019 13:01:43 +0100 Subject: [PATCH 25/48] arm64: pgtable: Correct typo in comment vmmemmap -> vmemmap Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7576df00eb50..4438a23f969c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -17,7 +17,7 @@ * VMALLOC range. * * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space + * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space * and fixed mappings */ #define VMALLOC_START (MODULES_END) From bfcef4ab1d7ee8921bc322109b1692036cc6cbe0 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Mon, 21 Oct 2019 19:31:21 +0800 Subject: [PATCH 26/48] arm64: psci: Reduce the waiting time for cpu_psci_cpu_kill() In cases like suspend-to-disk and suspend-to-ram, a large number of CPU cores need to be shut down. At present, the CPU hotplug operation is serialised, and the CPU cores can only be shut down one by one. In this process, if PSCI affinity_info() does not return LEVEL_OFF quickly, cpu_psci_cpu_kill() needs to wait for 10ms. If hundreds of CPU cores need to be shut down, it will take a long time. Normally, there is no need to wait 10ms in cpu_psci_cpu_kill(). So change the wait interval from 10 ms to max 1 ms and use usleep_range() instead of msleep() for more accurate timer. In addition, reducing the time interval will increase the messages output, so remove the "Retry ..." message, instead, track time and output to the the sucessful message. Signed-off-by: Yunfeng Ye Reviewed-by: Sudeep Holla Signed-off-by: Catalin Marinas --- arch/arm64/kernel/psci.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index c9f72b2665f1..43ae4e0c968f 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); From 4686da5140c18c84ca01a8ab994571d832c63398 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 28 Oct 2019 16:45:07 +0000 Subject: [PATCH 27/48] arm64: Make arm64_dma32_phys_limit static This variable is only used in the arch/arm64/mm/init.c file for ZONE_DMA32 initialisation, no need to expose it. Reported-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 71b45c58218b..39fc69873b18 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -63,7 +63,7 @@ EXPORT_SYMBOL(vmemmap); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -phys_addr_t arm64_dma32_phys_limit __ro_after_init; +static phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* From e44ec4a35dbdf3f3fe772f176fab3b8be7e02b0f Mon Sep 17 00:00:00 2001 From: Xiang Zheng Date: Tue, 29 Oct 2019 20:41:31 +0800 Subject: [PATCH 28/48] arm64: print additional fault message when executing non-exec memory When attempting to executing non-executable memory, the fault message shows: Unable to handle kernel read from unreadable memory at virtual address ffff802dac469000 This may confuse someone, so add a new fault message for instruction abort. Acked-by: Will Deacon Signed-off-by: Xiang Zheng Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 855f2a7954e6..d46a2bb90f54 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -314,6 +314,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; + else if (is_el1_instruction_abort(esr)) + msg = "execute from non-executable memory"; else msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { From 8b5369ea580964dbc982781bfb9fb93459fc5e8d Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 14 Oct 2019 20:31:03 +0200 Subject: [PATCH 29/48] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable Some architectures, notably ARM, are interested in tweaking this depending on their runtime DMA addressing limitations. Acked-by: Christoph Hellwig Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/page.h | 2 -- arch/arm64/mm/init.c | 9 +++++++-- arch/powerpc/include/asm/page.h | 9 --------- arch/powerpc/mm/mem.c | 20 +++++++++++++++----- arch/s390/include/asm/page.h | 2 -- arch/s390/mm/init.c | 1 + include/linux/dma-direct.h | 2 ++ kernel/dma/direct.c | 13 ++++++------- 8 files changed, 31 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 7b8c98830101..d39ddb258a04 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -38,6 +38,4 @@ extern int pfn_valid(unsigned long); #include -#define ARCH_ZONE_DMA_BITS 30 - #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 39fc69873b18..35f27b839101 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #include #include +#define ARM64_ZONE_DMA_BITS 30 + /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() @@ -440,8 +443,10 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (IS_ENABLED(CONFIG_ZONE_DMA)) - arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS); + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + zone_dma_bits = ARM64_ZONE_DMA_BITS; + arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS); + } if (IS_ENABLED(CONFIG_ZONE_DMA32)) arm64_dma32_phys_limit = max_zone_phys(32); diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index c8bb14ff4713..f6c562acc3f8 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -329,13 +329,4 @@ struct vm_area_struct; #endif /* __ASSEMBLY__ */ #include -/* - * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. - */ -#ifdef CONFIG_PPC32 -#define ARCH_ZONE_DMA_BITS 30 -#else -#define ARCH_ZONE_DMA_BITS 31 -#endif - #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d..c95b7fe9f298 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void) * everything else. GFP_DMA32 page allocations automatically fall back to * ZONE_DMA. * - * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to - * inform the generic DMA mapping code. 32-bit only devices (if not handled - * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get - * otherwise served by ZONE_DMA. + * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the + * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU + * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by + * ZONE_DMA. */ static unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -237,9 +238,18 @@ void __init paging_init(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); + /* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many + * powerbooks. + */ + if (IS_ENABLED(CONFIG_PPC32)) + zone_dma_bits = 30; + else + zone_dma_bits = 31; + #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, - 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT)); + 1UL << (zone_dma_bits - PAGE_SHIFT)); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 823578c6b9e2..a4d38092530a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define ARCH_ZONE_DMA_BITS 31 - #include #include diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a124f19f7b3c..f0ce22220565 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -118,6 +118,7 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); + zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index adf993a3bd58..d03af3605460 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,6 +5,8 @@ #include #include +extern unsigned int zone_dma_bits; + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8402b29c280f..0b67c04e531b 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -16,12 +16,11 @@ #include /* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but - * some use it for entirely different regions: + * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it + * it for entirely different regions. In that case the arch code needs to + * override the variable below for dma-direct to work properly. */ -#ifndef ARCH_ZONE_DMA_BITS -#define ARCH_ZONE_DMA_BITS 24 -#endif +unsigned int zone_dma_bits __ro_after_init = 24; static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) { @@ -69,7 +68,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ - if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; if (*phys_mask <= DMA_BIT_MASK(32)) return GFP_DMA32; @@ -395,7 +394,7 @@ int dma_direct_supported(struct device *dev, u64 mask) u64 min_mask; if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); + min_mask = DMA_BIT_MASK(zone_dma_bits); else min_mask = DMA_BIT_MASK(32); From 478016c3839d53bd4c89af1f095195be543fa1a3 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 1 Nov 2019 15:20:22 +0000 Subject: [PATCH 30/48] docs/arm64: cpu-feature-registers: Rewrite bitfields that don't follow [e, s] Commit "docs/arm64: cpu-feature-registers: Documents missing visible fields" added bitfields following the convention [s, e]. However, the documentation is following [s, e] and so does the Arm ARM. Rewrite the bitfields to match the format [s, e]. Fixes: a8613e7070e7 ("docs/arm64: cpu-feature-registers: Documents missing visible fields") Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index ffcf4e2c71ef..7c40e4581bae 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -193,9 +193,9 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ - | SB | [36-39] | y | + | SB | [39-36] | y | +------------------------------+---------+---------+ - | FRINTTS | [32-35] | y | + | FRINTTS | [35-32] | y | +------------------------------+---------+---------+ | GPI | [31-28] | y | +------------------------------+---------+---------+ From 32d1870877ba7675c642e903e5ef71c82a245325 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 3 Nov 2019 21:35:58 +0900 Subject: [PATCH 31/48] arm64: mm: simplify the page end calculation in __create_pgd_mapping() Calculate the page-aligned end address more simply. The local variable, "length" is unneeded. Reviewed-by: Mark Rutland Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 60c929f3683b..a9f541912289 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys_addr_t (*pgtable_alloc)(int), int flags) { - unsigned long addr, length, end, next; + unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_raw(pgdir, virt); /* @@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys &= PAGE_MASK; addr = virt & PAGE_MASK; - length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); + end = PAGE_ALIGN(virt + size); - end = addr + length; do { next = pgd_addr_end(addr, end); alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, From 218564b164ad9d283d3cb3d5367705726123a610 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Tue, 5 Nov 2019 03:26:46 +0530 Subject: [PATCH 32/48] arm64: mm: Remove MAX_USER_VA_BITS definition commit 9b31cf493ffa ("arm64: mm: Introduce MAX_USER_VA_BITS definition") introduced the MAX_USER_VA_BITS definition, which was used to support the arm64 mm use-cases where the user-space could use 52-bit virtual addresses whereas the kernel-space would still could a maximum of 48-bit virtual addressing. But, now with commit b6d00d47e81a ("arm64: mm: Introduce 52-bit Kernel VAs"), we removed the 52-bit user/48-bit kernel kconfig option and hence there is no longer any scenario where user VA != kernel VA size (even with CONFIG_ARM64_FORCE_52BIT enabled, the same is true). Hence we can do away with the MAX_USER_VA_BITS macro as it is equal to VA_BITS (maximum VA space size) in all possible use-cases. Note that even though the 'vabits_actual' value would be 48 for arm64 hardware which don't support LVA-8.2 extension (even when CONFIG_ARM64_VA_BITS_52 is enabled), VA_BITS would still be set to a value 52. Hence this change would be safe in all possible VA address space combinations. Cc: James Morse Cc: Will Deacon Cc: Steve Capper Cc: Ard Biesheuvel Cc: linux-kernel@vger.kernel.org Cc: kexec@lists.infradead.org Reviewed-by: Mark Rutland Signed-off-by: Bhupesh Sharma Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 6 ------ arch/arm64/include/asm/pgtable-hwdef.h | 2 +- arch/arm64/include/asm/processor.h | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b61b50bf68b1..4867e58dbc9c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -69,12 +69,6 @@ #define KERNEL_START _text #define KERNEL_END _end -#ifdef CONFIG_ARM64_VA_BITS_52 -#define MAX_USER_VA_BITS 52 -#else -#define MAX_USER_VA_BITS VA_BITS -#endif - /* * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual * address space for the shadow region respectively. They can bloat the stack diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 3df60f97da1f..d9fbd433cc17 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -69,7 +69,7 @@ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT)) +#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) /* * Section address mask and size definitions. diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5623685c7d13..586fcd4b1965 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -9,7 +9,7 @@ #define __ASM_PROCESSOR_H #define KERNEL_DS UL(-1) -#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1) +#define USER_DS ((UL(1) << VA_BITS) - 1) /* * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is From 918e1946c8ac2c0473eefc1dc910780178490e95 Mon Sep 17 00:00:00 2001 From: Rich Wiley Date: Tue, 5 Nov 2019 10:45:10 -0800 Subject: [PATCH 33/48] arm64: kpti: Add NVIDIA's Carmel core to the KPTI whitelist NVIDIA Carmel CPUs don't implement ID_AA64PFR0_EL1.CSV3 but aren't susceptible to Meltdown, so add Carmel to kpti_safe_list[]. Signed-off-by: Rich Wiley Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cabebf1a7976..b3eea965c930 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -977,6 +977,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), { /* sentinel */ } }; char const *str = "kpti command line option"; From bff3b04460a80f425442fe8e5c6ee8c3ebef611f Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 7 Nov 2019 10:56:11 +0100 Subject: [PATCH 34/48] arm64: mm: reserve CMA and crashkernel in ZONE_DMA32 With the introduction of ZONE_DMA in arm64 we moved the default CMA and crashkernel reservation into that area. This caused a regression on big machines that need big CMA and crashkernel reservations. Note that ZONE_DMA is only 1GB big. Restore the previous behavior as the wide majority of devices are OK with reserving these in ZONE_DMA32. The ones that need them in ZONE_DMA will configure it explicitly. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Reported-by: Qian Cai Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 35f27b839101..d933589c48e8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -91,7 +91,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -459,7 +459,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit); + dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) From 313a4db7f3387608a3ab4531ac8c0509a3d7617f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:06 +0100 Subject: [PATCH 35/48] kselftest: arm64: extend toplevel skeleton Makefile Modify KSFT arm64 toplevel Makefile to maintain arm64 kselftests organized by subsystem, keeping them into distinct subdirectories under arm64 custom KSFT directory: tools/testing/selftests/arm64/ Add to such toplevel Makefile a mechanism to guess the effective location of Kernel headers as installed by KSFT framework. Fit existing arm64 tags kselftest into this new schema moving them into their own subdirectory (arm64/tags). Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/arm64/Makefile | 64 +++++++++++++++++-- tools/testing/selftests/arm64/README | 25 ++++++++ .../selftests/arm64/{ => tags}/.gitignore | 0 tools/testing/selftests/arm64/tags/Makefile | 7 ++ .../arm64/{ => tags}/run_tags_test.sh | 0 .../selftests/arm64/{ => tags}/tags_test.c | 0 7 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/arm64/README rename tools/testing/selftests/arm64/{ => tags}/.gitignore (100%) create mode 100644 tools/testing/selftests/arm64/tags/Makefile rename tools/testing/selftests/arm64/{ => tags}/run_tags_test.sh (100%) rename tools/testing/selftests/arm64/{ => tags}/tags_test.c (100%) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4cdbae6f4e61..a740198e07d7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TARGETS = android +TARGETS += arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index f9f79fb272f0..cd27ca689224 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -1,12 +1,66 @@ # SPDX-License-Identifier: GPL-2.0 -# ARCH can be overridden by the user for cross compiling +# When ARCH not overridden for crosscompiling, lookup machine ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -CFLAGS += -I../../../../usr/include/ -TEST_GEN_PROGS := tags_test -TEST_PROGS := run_tags_test.sh +ARM64_SUBTARGETS ?= tags +else +ARM64_SUBTARGETS := endif -include ../lib.mk +CFLAGS := -Wall -O2 -g + +# A proper top_srcdir is needed by KSFT(lib.mk) +top_srcdir = $(realpath ../../../../) + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -I$(top_srcdir)/tools/testing/selftests/ + +# Guessing where the Kernel headers could have been installed +# depending on ENV config +ifeq ($(KBUILD_OUTPUT),) +khdr_dir = $(top_srcdir)/usr/include +else +# the KSFT preferred location when KBUILD_OUTPUT is set +khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include +endif + +CFLAGS += -I$(khdr_dir) + +export CFLAGS +export top_srcdir + +all: + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + mkdir -p $$BUILD_TARGET; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +install: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +run_tests: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +# Avoid any output on non arm64 on emit_tests +emit_tests: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +clean: + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +.PHONY: all clean install run_tests emit_tests diff --git a/tools/testing/selftests/arm64/README b/tools/testing/selftests/arm64/README new file mode 100644 index 000000000000..a1badd882102 --- /dev/null +++ b/tools/testing/selftests/arm64/README @@ -0,0 +1,25 @@ +KSelfTest ARM64 +=============== + +- These tests are arm64 specific and so not built or run but just skipped + completely when env-variable ARCH is found to be different than 'arm64' + and `uname -m` reports other than 'aarch64'. + +- Holding true the above, ARM64 KSFT tests can be run within the KSelfTest + framework using standard Linux top-level-makefile targets: + + $ make TARGETS=arm64 kselftest-clean + $ make TARGETS=arm64 kselftest + + or + + $ make -C tools/testing/selftests TARGETS=arm64 \ + INSTALL_PATH= install + + or, alternatively, only specific arm64/ subtargets can be picked: + + $ make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS="tags signal" \ + INSTALL_PATH= install + + Further details on building and running KFST can be found in: + Documentation/dev-tools/kselftest.rst diff --git a/tools/testing/selftests/arm64/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore similarity index 100% rename from tools/testing/selftests/arm64/.gitignore rename to tools/testing/selftests/arm64/tags/.gitignore diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile new file mode 100644 index 000000000000..41cb75070511 --- /dev/null +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := tags_test +TEST_PROGS := run_tags_test.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh similarity index 100% rename from tools/testing/selftests/arm64/run_tags_test.sh rename to tools/testing/selftests/arm64/tags/run_tags_test.sh diff --git a/tools/testing/selftests/arm64/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c similarity index 100% rename from tools/testing/selftests/arm64/tags_test.c rename to tools/testing/selftests/arm64/tags/tags_test.c From f96bf43403165e4478942b0998931b14621ec207 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:07 +0100 Subject: [PATCH 36/48] kselftest: arm64: mangle_pstate_invalid_compat_toggle and common utils Add some arm64/signal specific boilerplate and utility code to help further testcases' development. Introduce also one simple testcase mangle_pstate_invalid_compat_toggle and some related helpers: it is a simple mangle testcase which messes with the ucontext_t from within the signal handler, trying to toggle PSTATE state bits to switch the system between 32bit/64bit execution state. Expects SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/Makefile | 2 +- .../testing/selftests/arm64/signal/.gitignore | 3 + tools/testing/selftests/arm64/signal/Makefile | 32 ++ tools/testing/selftests/arm64/signal/README | 59 ++++ .../selftests/arm64/signal/test_signals.c | 29 ++ .../selftests/arm64/signal/test_signals.h | 93 ++++++ .../arm64/signal/test_signals_utils.c | 283 ++++++++++++++++++ .../arm64/signal/test_signals_utils.h | 19 ++ .../mangle_pstate_invalid_compat_toggle.c | 31 ++ .../arm64/signal/testcases/testcases.c | 150 ++++++++++ .../arm64/signal/testcases/testcases.h | 100 +++++++ 11 files changed, 800 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/signal/.gitignore create mode 100644 tools/testing/selftests/arm64/signal/Makefile create mode 100644 tools/testing/selftests/arm64/signal/README create mode 100644 tools/testing/selftests/arm64/signal/test_signals.c create mode 100644 tools/testing/selftests/arm64/signal/test_signals.h create mode 100644 tools/testing/selftests/arm64/signal/test_signals_utils.c create mode 100644 tools/testing/selftests/arm64/signal/test_signals_utils.h create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/testcases.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/testcases.h diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index cd27ca689224..93b567d23c8b 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags +ARM64_SUBTARGETS ?= tags signal else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore new file mode 100644 index 000000000000..3c5b4e8ff894 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -0,0 +1,3 @@ +mangle_* +fake_sigreturn_* +!*.[ch] diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile new file mode 100644 index 000000000000..f78f5190e3d4 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 ARM Limited + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. + +SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(notdir $(PROGS)) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ + +clean: + $(CLEAN) + rm -f $(PROGS) + +# Common test-unit targets to build common-layout test-cases executables +# Needs secondary expansion to properly include the testcase c-file in pre-reqs +.SECONDEXPANSION: +$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c $$@.c test_signals.h test_signals_utils.h testcases/testcases.h + $(CC) $(CFLAGS) $^ -o $@ diff --git a/tools/testing/selftests/arm64/signal/README b/tools/testing/selftests/arm64/signal/README new file mode 100644 index 000000000000..967a531b245c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/README @@ -0,0 +1,59 @@ +KSelfTest arm64/signal/ +======================= + +Signals Tests ++++++++++++++ + +- Tests are built around a common main compilation unit: such shared main + enforces a standard sequence of operations needed to perform a single + signal-test (setup/trigger/run/result/cleanup) + +- The above mentioned ops are configurable on a test-by-test basis: each test + is described (and configured) using the descriptor signals.h::struct tdescr + +- Each signal testcase is compiled into its own executable: a separate + executable is used for each test since many tests complete successfully + by receiving some kind of fatal signal from the Kernel, so it's safer + to run each test unit in its own standalone process, so as to start each + test from a clean slate. + +- New tests can be simply defined in testcases/ dir providing a proper struct + tdescr overriding all the defaults we wish to change (as of now providing a + custom run method is mandatory though) + +- Signals' test-cases hereafter defined belong currently to two + principal families: + + - 'mangle_' tests: a real signal (SIGUSR1) is raised and used as a trigger + and then the test case code modifies the signal frame from inside the + signal handler itself. + + - 'fake_sigreturn_' tests: a brand new custom artificial sigframe structure + is placed on the stack and a sigreturn syscall is called to simulate a + real signal return. This kind of tests does not use a trigger usually and + they are just fired using some simple included assembly trampoline code. + + - Most of these tests are successfully passing if the process gets killed by + some fatal signal: usually SIGSEGV or SIGBUS. Since while writing this + kind of tests it is extremely easy in fact to end-up injecting other + unrelated SEGV bugs in the testcases, it becomes extremely tricky to + be really sure that the tests are really addressing what they are meant + to address and they are not instead falling apart due to unplanned bugs + in the test code. + In order to alleviate the misery of the life of such test-developer, a few + helpers are provided: + + - a couple of ASSERT_BAD/GOOD_CONTEXT() macros to easily parse a ucontext_t + and verify if it is indeed GOOD or BAD (depending on what we were + expecting), using the same logic/perspective as in the arm64 Kernel signals + routines. + + - a sanity mechanism to be used in 'fake_sigreturn_'-alike tests: enabled by + default it takes care to verify that the test-execution had at least + successfully progressed up to the stage of triggering the fake sigreturn + call. + + In both cases test results are expected in terms of: + - some fatal signal sent by the Kernel to the test process + or + - analyzing some final regs state diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c new file mode 100644 index 000000000000..cb970346b280 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Generic test wrapper for arm64 signal tests. + * + * Each test provides its own tde struct tdescr descriptor to link with + * this wrapper. Framework provides common helpers. + */ +#include + +#include "test_signals.h" +#include "test_signals_utils.h" + +struct tdescr *current; + +int main(int argc, char *argv[]) +{ + current = &tde; + + ksft_print_msg("%s :: %s\n", current->name, current->descr); + if (test_setup(current)) { + test_run(current); + test_result(current); + test_cleanup(current); + } + + return current->pass ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h new file mode 100644 index 000000000000..d730e9041da9 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#ifndef __TEST_SIGNALS_H__ +#define __TEST_SIGNALS_H__ + +#include +#include +#include + +/* + * Using ARCH specific and sanitized Kernel headers installed by KSFT + * framework since we asked for it by setting flag KSFT_KHDR_INSTALL + * in our Makefile. + */ +#include +#include + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define get_regval(regname, out) \ +{ \ + asm volatile("mrs %0, " __stringify(regname) \ + : "=r" (out) \ + : \ + : "memory"); \ +} + +/* + * Feature flags used in tdescr.feats_required to specify + * any feature by the test + */ +enum { + FSSBS_BIT, + FMAX_END +}; + +#define FEAT_SSBS (1UL << FSSBS_BIT) + +/* + * A descriptor used to describe and configure a test case. + * Fields with a non-trivial meaning are described inline in the following. + */ +struct tdescr { + /* KEEP THIS FIELD FIRST for easier lookup from assembly */ + void *token; + /* when disabled token based sanity checking is skipped in handler */ + bool sanity_disabled; + /* just a name for the test-case; manadatory field */ + char *name; + char *descr; + unsigned long feats_required; + /* bitmask of effectively supported feats: populated at run-time */ + unsigned long feats_supported; + bool initialized; + unsigned int minsigstksz; + /* signum used as a test trigger. Zero if no trigger-signal is used */ + int sig_trig; + /* + * signum considered as a successful test completion. + * Zero when no signal is expected on success + */ + int sig_ok; + /* signum expected on unsupported CPU features. */ + int sig_unsupp; + /* a timeout in second for test completion */ + unsigned int timeout; + bool triggered; + bool pass; + /* optional sa_flags for the installed handler */ + int sa_flags; + ucontext_t saved_uc; + /* optional test private data */ + void *priv; + + /* a custom setup function to be called before test starts */ + int (*setup)(struct tdescr *td); + /* a custom cleanup function called before test exits */ + void (*cleanup)(struct tdescr *td); + /* an optional function to be used as a trigger for test starting */ + int (*trigger)(struct tdescr *td); + /* + * the actual test-core: invoked differently depending on the + * presence of the trigger function above; this is mandatory + */ + int (*run)(struct tdescr *td, siginfo_t *si, ucontext_t *uc); + /* an optional function for custom results' processing */ + void (*check_result)(struct tdescr *td); +}; + +extern struct tdescr tde; +#endif diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c new file mode 100644 index 000000000000..fbce41750590 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 ARM Limited */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_signals.h" +#include "test_signals_utils.h" +#include "testcases/testcases.h" + +extern struct tdescr *current; + +static char const *const feats_names[FMAX_END] = { + " SSBS ", +}; + +#define MAX_FEATS_SZ 128 +static char feats_string[MAX_FEATS_SZ]; + +static inline char *feats_to_string(unsigned long feats) +{ + size_t flen = MAX_FEATS_SZ - 1; + + for (int i = 0; i < FMAX_END; i++) { + if (feats & (1UL << i)) { + size_t tlen = strlen(feats_names[i]); + + assert(flen > tlen); + flen -= tlen; + strncat(feats_string, feats_names[i], flen); + } + } + + return feats_string; +} + +static void unblock_signal(int signum) +{ + sigset_t sset; + + sigemptyset(&sset); + sigaddset(&sset, signum); + sigprocmask(SIG_UNBLOCK, &sset, NULL); +} + +static void default_result(struct tdescr *td, bool force_exit) +{ + if (td->pass) + fprintf(stderr, "==>> completed. PASS(1)\n"); + else + fprintf(stdout, "==>> completed. FAIL(0)\n"); + if (force_exit) + exit(td->pass ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * The following handle_signal_* helpers are used by main default_handler + * and are meant to return true when signal is handled successfully: + * when false is returned instead, it means that the signal was somehow + * unexpected in that context and it was NOT handled; default_handler will + * take care of such unexpected situations. + */ + +static bool handle_signal_unsupported(struct tdescr *td, + siginfo_t *si, void *uc) +{ + if (feats_ok(td)) + return false; + + /* Mangling PC to avoid loops on original SIGILL */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + + if (!td->initialized) { + fprintf(stderr, + "Got SIG_UNSUPP @test_init. Ignore.\n"); + } else { + fprintf(stderr, + "-- RX SIG_UNSUPP on unsupported feat...OK\n"); + td->pass = 1; + default_result(current, 1); + } + + return true; +} + +static bool handle_signal_trigger(struct tdescr *td, + siginfo_t *si, void *uc) +{ + td->triggered = 1; + /* ->run was asserted NON-NULL in test_setup() already */ + td->run(td, si, uc); + + return true; +} + +static bool handle_signal_ok(struct tdescr *td, + siginfo_t *si, void *uc) +{ + /* + * it's a bug in the test code when this assert fail: + * if sig_trig was defined, it must have been used before getting here. + */ + assert(!td->sig_trig || td->triggered); + fprintf(stderr, + "SIG_OK -- SP:0x%llX si_addr@:%p si_code:%d token@:%p offset:%ld\n", + ((ucontext_t *)uc)->uc_mcontext.sp, + si->si_addr, si->si_code, td->token, td->token - si->si_addr); + /* + * fake_sigreturn tests, which have sanity_enabled=1, set, at the very + * last time, the token field to the SP address used to place the fake + * sigframe: so token==0 means we never made it to the end, + * segfaulting well-before, and the test is possibly broken. + */ + if (!td->sanity_disabled && !td->token) { + fprintf(stdout, + "current->token ZEROED...test is probably broken!\n"); + abort(); + } + /* + * Trying to narrow down the SEGV to the ones generated by Kernel itself + * via arm64_notify_segfault(). This is a best-effort check anyway, and + * the si_code check may need to change if this aspect of the kernel + * ABI changes. + */ + if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { + fprintf(stdout, + "si_code != SEGV_ACCERR...test is probably broken!\n"); + abort(); + } + td->pass = 1; + /* + * Some tests can lead to SEGV loops: in such a case we want to + * terminate immediately exiting straight away; some others are not + * supposed to outlive the signal handler code, due to the content of + * the fake sigframe which caused the signal itself. + */ + default_result(current, 1); + + return true; +} + +static void default_handler(int signum, siginfo_t *si, void *uc) +{ + if (current->sig_unsupp && signum == current->sig_unsupp && + handle_signal_unsupported(current, si, uc)) { + fprintf(stderr, "Handled SIG_UNSUPP\n"); + } else if (current->sig_trig && signum == current->sig_trig && + handle_signal_trigger(current, si, uc)) { + fprintf(stderr, "Handled SIG_TRIG\n"); + } else if (current->sig_ok && signum == current->sig_ok && + handle_signal_ok(current, si, uc)) { + fprintf(stderr, "Handled SIG_OK\n"); + } else { + if (signum == SIGALRM && current->timeout) { + fprintf(stderr, "-- Timeout !\n"); + } else { + fprintf(stderr, + "-- RX UNEXPECTED SIGNAL: %d\n", signum); + } + default_result(current, 1); + } +} + +static int default_setup(struct tdescr *td) +{ + struct sigaction sa; + + sa.sa_sigaction = default_handler; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + sa.sa_flags |= td->sa_flags; + sigemptyset(&sa.sa_mask); + /* uncatchable signals naturally skipped ... */ + for (int sig = 1; sig < 32; sig++) + sigaction(sig, &sa, NULL); + /* + * RT Signals default disposition is Term but they cannot be + * generated by the Kernel in response to our tests; so just catch + * them all and report them as UNEXPECTED signals. + */ + for (int sig = SIGRTMIN; sig <= SIGRTMAX; sig++) + sigaction(sig, &sa, NULL); + + /* just in case...unblock explicitly all we need */ + if (td->sig_trig) + unblock_signal(td->sig_trig); + if (td->sig_ok) + unblock_signal(td->sig_ok); + if (td->sig_unsupp) + unblock_signal(td->sig_unsupp); + + if (td->timeout) { + unblock_signal(SIGALRM); + alarm(td->timeout); + } + fprintf(stderr, "Registered handlers for all signals.\n"); + + return 1; +} + +static inline int default_trigger(struct tdescr *td) +{ + return !raise(td->sig_trig); +} + +static int test_init(struct tdescr *td) +{ + td->minsigstksz = getauxval(AT_MINSIGSTKSZ); + if (!td->minsigstksz) + td->minsigstksz = MINSIGSTKSZ; + fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz); + + if (td->feats_required) { + td->feats_supported = 0; + /* + * Checking for CPU required features using both the + * auxval and the arm64 MRS Emulation to read sysregs. + */ + if (getauxval(AT_HWCAP) & HWCAP_SSBS) + td->feats_supported |= FEAT_SSBS; + if (feats_ok(td)) + fprintf(stderr, + "Required Features: [%s] supported\n", + feats_to_string(td->feats_required & + td->feats_supported)); + else + fprintf(stderr, + "Required Features: [%s] NOT supported\n", + feats_to_string(td->feats_required & + ~td->feats_supported)); + } + + td->initialized = 1; + return 1; +} + +int test_setup(struct tdescr *td) +{ + /* assert core invariants symptom of a rotten testcase */ + assert(current); + assert(td); + assert(td->name); + assert(td->run); + + if (!test_init(td)) + return 0; + + if (td->setup) + return td->setup(td); + else + return default_setup(td); +} + +int test_run(struct tdescr *td) +{ + if (td->sig_trig) { + if (td->trigger) + return td->trigger(td); + else + return default_trigger(td); + } else { + return td->run(td, NULL, NULL); + } +} + +void test_result(struct tdescr *td) +{ + if (td->check_result) + td->check_result(td); + default_result(td, 0); +} + +void test_cleanup(struct tdescr *td) +{ + if (td->cleanup) + td->cleanup(td); +} diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h new file mode 100644 index 000000000000..47a7592b7c53 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#ifndef __TEST_SIGNALS_UTILS_H__ +#define __TEST_SIGNALS_UTILS_H__ + +#include "test_signals.h" + +int test_setup(struct tdescr *td); +void test_cleanup(struct tdescr *td); +int test_run(struct tdescr *td); +void test_result(struct tdescr *td); + +static inline bool feats_ok(struct tdescr *td) +{ + return (td->feats_required & td->feats_supported) == td->feats_required; +} + +#endif diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c new file mode 100644 index 000000000000..2cb118b0ba05 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the execution state bit: this attempt must be spotted by Kernel and + * the test case is expected to be terminated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* This config should trigger a SIGSEGV by Kernel */ + uc->uc_mcontext.pstate ^= PSR_MODE32_BIT; + + return 1; +} + +struct tdescr tde = { + .sanity_disabled = true, + .name = "MANGLE_PSTATE_INVALID_STATE_TOGGLE", + .descr = "Mangling uc_mcontext with INVALID STATE_TOGGLE", + .sig_trig = SIGUSR1, + .sig_ok = SIGSEGV, + .run = mangle_invalid_pstate_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c new file mode 100644 index 000000000000..1914a01222a1 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 ARM Limited */ +#include "testcases.h" + +struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *found = NULL; + + if (!head || resv_sz < HDR_SZ) + return found; + + while (offs <= resv_sz - HDR_SZ && + head->magic != magic && head->magic) { + offs += head->size; + head = GET_RESV_NEXT_HEAD(head); + } + if (head->magic == magic) { + found = head; + if (offset) + *offset = offs; + } + + return found; +} + +bool validate_extra_context(struct extra_context *extra, char **err) +{ + struct _aarch64_ctx *term; + + if (!extra || !err) + return false; + + fprintf(stderr, "Validating EXTRA...\n"); + term = GET_RESV_NEXT_HEAD(extra); + if (!term || term->magic || term->size) { + *err = "Missing terminator after EXTRA context"; + return false; + } + if (extra->datap & 0x0fUL) + *err = "Extra DATAP misaligned"; + else if (extra->size & 0x0fUL) + *err = "Extra SIZE misaligned"; + else if (extra->datap != (uint64_t)term + sizeof(*term)) + *err = "Extra DATAP misplaced (not contiguos)"; + if (*err) + return false; + + return true; +} + +bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) +{ + bool terminated = false; + size_t offs = 0; + int flags = 0; + struct extra_context *extra = NULL; + struct _aarch64_ctx *head = + (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; + + if (!err) + return false; + /* Walk till the end terminator verifying __reserved contents */ + while (head && !terminated && offs < resv_sz) { + if ((uint64_t)head & 0x0fUL) { + *err = "Misaligned HEAD"; + return false; + } + + switch (head->magic) { + case 0: + if (head->size) + *err = "Bad size for terminator"; + else + terminated = true; + break; + case FPSIMD_MAGIC: + if (flags & FPSIMD_CTX) + *err = "Multiple FPSIMD_MAGIC"; + else if (head->size != + sizeof(struct fpsimd_context)) + *err = "Bad size for fpsimd_context"; + flags |= FPSIMD_CTX; + break; + case ESR_MAGIC: + if (head->size != sizeof(struct esr_context)) + *err = "Bad size for esr_context"; + break; + case SVE_MAGIC: + if (flags & SVE_CTX) + *err = "Multiple SVE_MAGIC"; + else if (head->size != + sizeof(struct sve_context)) + *err = "Bad size for sve_context"; + flags |= SVE_CTX; + break; + case EXTRA_MAGIC: + if (flags & EXTRA_CTX) + *err = "Multiple EXTRA_MAGIC"; + else if (head->size != + sizeof(struct extra_context)) + *err = "Bad size for extra_context"; + flags |= EXTRA_CTX; + extra = (struct extra_context *)head; + break; + case KSFT_BAD_MAGIC: + /* + * This is a BAD magic header defined + * artificially by a testcase and surely + * unknown to the Kernel parse_user_sigframe(). + * It MUST cause a Kernel induced SEGV + */ + *err = "BAD MAGIC !"; + break; + default: + /* + * A still unknown Magic: potentially freshly added + * to the Kernel code and still unknown to the + * tests. + */ + fprintf(stdout, + "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n", + head->magic); + break; + } + + if (*err) + return false; + + offs += head->size; + if (resv_sz < offs + sizeof(*head)) { + *err = "HEAD Overrun"; + return false; + } + + if (flags & EXTRA_CTX) + if (!validate_extra_context(extra, err)) + return false; + + head = GET_RESV_NEXT_HEAD(head); + } + + if (terminated && !(flags & FPSIMD_CTX)) { + *err = "Missing FPSIMD"; + return false; + } + + return true; +} diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h new file mode 100644 index 000000000000..04987f7870bc --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ +#ifndef __TESTCASES_H__ +#define __TESTCASES_H__ + +#include +#include +#include +#include +#include +#include +#include + +/* Architecture specific sigframe definitions */ +#include + +#define FPSIMD_CTX (1 << 0) +#define SVE_CTX (1 << 1) +#define EXTRA_CTX (1 << 2) + +#define KSFT_BAD_MAGIC 0xdeadbeef + +#define HDR_SZ \ + sizeof(struct _aarch64_ctx) + +#define GET_SF_RESV_HEAD(sf) \ + (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved) + +#define GET_SF_RESV_SIZE(sf) \ + sizeof((sf).uc.uc_mcontext.__reserved) + +#define GET_UCP_RESV_SIZE(ucp) \ + sizeof((ucp)->uc_mcontext.__reserved) + +#define ASSERT_BAD_CONTEXT(uc) do { \ + char *err = NULL; \ + if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \ + if (err) \ + fprintf(stderr, \ + "Using badly built context - ERR: %s\n",\ + err); \ + } else { \ + abort(); \ + } \ +} while (0) + +#define ASSERT_GOOD_CONTEXT(uc) do { \ + char *err = NULL; \ + if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \ + if (err) \ + fprintf(stderr, \ + "Detected BAD context - ERR: %s\n", err);\ + abort(); \ + } else { \ + fprintf(stderr, "uc context validated.\n"); \ + } \ +} while (0) + +/* + * A simple record-walker for __reserved area: it walks through assuming + * only to find a proper struct __aarch64_ctx header descriptor. + * + * Instead it makes no assumptions on the content and ordering of the + * records, any needed bounds checking must be enforced by the caller + * if wanted: this way can be used by caller on any maliciously built bad + * contexts. + * + * head->size accounts both for payload and header _aarch64_ctx size ! + */ +#define GET_RESV_NEXT_HEAD(h) \ + (struct _aarch64_ctx *)((char *)(h) + (h)->size) + +struct fake_sigframe { + siginfo_t info; + ucontext_t uc; +}; + + +bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); + +bool validate_extra_context(struct extra_context *extra, char **err); + +struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset); + +static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head, + size_t resv_sz, + size_t *offset) +{ + return get_header(head, 0, resv_sz, offset); +} + +static inline void write_terminator_record(struct _aarch64_ctx *tail) +{ + if (tail) { + tail->magic = 0; + tail->size = 0; + } +} +#endif From 0fc89f08df8cf9878eb0a1a957f5948f831fbb8c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:08 +0100 Subject: [PATCH 37/48] kselftest: arm64: mangle_pstate_invalid_daif_bits Add a simple mangle testcase which messes with the ucontext_t from within the signal handler, trying to set PSTATE DAIF bits to an invalid value (masking everything). Expects SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../mangle_pstate_invalid_daif_bits.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c new file mode 100644 index 000000000000..434b82597007 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, mangling the + * DAIF bits in an illegal manner: this attempt must be spotted by Kernel + * and the test case is expected to be terminated via SEGV. + * + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* + * This config should trigger a SIGSEGV by Kernel when it checks + * the sigframe consistency in valid_user_regs() routine. + */ + uc->uc_mcontext.pstate |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + + return 1; +} + +struct tdescr tde = { + .sanity_disabled = true, + .name = "MANGLE_PSTATE_INVALID_DAIF_BITS", + .descr = "Mangling uc_mcontext with INVALID DAIF_BITS", + .sig_trig = SIGUSR1, + .sig_ok = SIGSEGV, + .run = mangle_invalid_pstate_run, +}; From c2820987047c08d813396b2b1d8ac2894ba0bd5f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:09 +0100 Subject: [PATCH 38/48] kselftest: arm64: mangle_pstate_invalid_mode_el[123][ht] Add 6 simple mangle testcases that mess with the ucontext_t from within the signal handler, trying to toggle PSTATE mode bits to trick the system into switching to EL1/EL2/EL3 using both SP_EL0(t) and SP_ELx(h). Expects SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../mangle_pstate_invalid_mode_el1h.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el1t.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el2h.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el2t.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el3h.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el3t.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_template.h | 28 +++++++++++++++++++ 7 files changed, 118 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c new file mode 100644 index 000000000000..95f821abdf46 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c new file mode 100644 index 000000000000..cc222d8a618a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c new file mode 100644 index 000000000000..2188add7d28c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c new file mode 100644 index 000000000000..df32dd5a479c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c new file mode 100644 index 000000000000..9e6829b7e5db --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c new file mode 100644 index 000000000000..5685a4f10d06 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h new file mode 100644 index 000000000000..f5bf1804d858 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 ARM Limited + * + * Utility macro to ease definition of testcases toggling mode EL + */ + +#define DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(_mode) \ + \ +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, \ + ucontext_t *uc) \ +{ \ + ASSERT_GOOD_CONTEXT(uc); \ + \ + uc->uc_mcontext.pstate &= ~PSR_MODE_MASK; \ + uc->uc_mcontext.pstate |= PSR_MODE_EL ## _mode; \ + \ + return 1; \ +} \ + \ +struct tdescr tde = { \ + .sanity_disabled = true, \ + .name = "MANGLE_PSTATE_INVALID_MODE_EL"#_mode, \ + .descr = "Mangling uc_mcontext INVALID MODE EL"#_mode, \ + .sig_trig = SIGUSR1, \ + .sig_ok = SIGSEGV, \ + .run = mangle_invalid_pstate_run, \ +} From 837387a2cbc719667822856beabac127921a36c4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:10 +0100 Subject: [PATCH 39/48] kselftest: arm64: extend test_init functionalities Extend signal testing framework to allow the definition of a custom per test initialization function to be run at the end of the common test_init after test setup phase has completed and before test-run routine. This custom per-test initialization function also enables the test writer to decide on its own when forcibly skip the test itself using standard KSFT mechanism. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../selftests/arm64/signal/test_signals.c | 6 ++-- .../selftests/arm64/signal/test_signals.h | 7 +++-- .../arm64/signal/test_signals_utils.c | 30 ++++++++++++++----- .../arm64/signal/test_signals_utils.h | 1 + 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index cb970346b280..416b1ff43199 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -19,11 +19,11 @@ int main(int argc, char *argv[]) current = &tde; ksft_print_msg("%s :: %s\n", current->name, current->descr); - if (test_setup(current)) { + if (test_setup(current) && test_init(current)) { test_run(current); - test_result(current); test_cleanup(current); } + test_result(current); - return current->pass ? KSFT_PASS : KSFT_FAIL; + return current->result; } diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index d730e9041da9..c431e7b3e46c 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -68,17 +68,20 @@ struct tdescr { unsigned int timeout; bool triggered; bool pass; + unsigned int result; /* optional sa_flags for the installed handler */ int sa_flags; ucontext_t saved_uc; /* optional test private data */ void *priv; - /* a custom setup function to be called before test starts */ + /* a custom setup: called alternatively to default_setup */ int (*setup)(struct tdescr *td); + /* a custom init: called by default test init after test_setup */ + bool (*init)(struct tdescr *td); /* a custom cleanup function called before test exits */ void (*cleanup)(struct tdescr *td); - /* an optional function to be used as a trigger for test starting */ + /* an optional function to be used as a trigger for starting test */ int (*trigger)(struct tdescr *td); /* * the actual test-core: invoked differently depending on the diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index fbce41750590..76eaa505a789 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -11,6 +11,8 @@ #include #include +#include + #include "test_signals.h" #include "test_signals_utils.h" #include "testcases/testcases.h" @@ -52,12 +54,18 @@ static void unblock_signal(int signum) static void default_result(struct tdescr *td, bool force_exit) { - if (td->pass) + if (td->result == KSFT_SKIP) { + fprintf(stderr, "==>> completed. SKIP.\n"); + } else if (td->pass) { fprintf(stderr, "==>> completed. PASS(1)\n"); - else + td->result = KSFT_PASS; + } else { fprintf(stdout, "==>> completed. FAIL(0)\n"); + td->result = KSFT_FAIL; + } + if (force_exit) - exit(td->pass ? EXIT_SUCCESS : EXIT_FAILURE); + exit(td->result); } /* @@ -209,7 +217,7 @@ static inline int default_trigger(struct tdescr *td) return !raise(td->sig_trig); } -static int test_init(struct tdescr *td) +int test_init(struct tdescr *td) { td->minsigstksz = getauxval(AT_MINSIGSTKSZ); if (!td->minsigstksz) @@ -236,7 +244,14 @@ static int test_init(struct tdescr *td) ~td->feats_supported)); } + /* Perform test specific additional initialization */ + if (td->init && !td->init(td)) { + fprintf(stderr, "FAILED Testcase initialization.\n"); + return 0; + } td->initialized = 1; + fprintf(stderr, "Testcase initialized.\n"); + return 1; } @@ -248,9 +263,8 @@ int test_setup(struct tdescr *td) assert(td->name); assert(td->run); - if (!test_init(td)) - return 0; - + /* Default result is FAIL if test setup fails */ + td->result = KSFT_FAIL; if (td->setup) return td->setup(td); else @@ -271,7 +285,7 @@ int test_run(struct tdescr *td) void test_result(struct tdescr *td) { - if (td->check_result) + if (td->initialized && td->result != KSFT_SKIP && td->check_result) td->check_result(td); default_result(td, 0); } diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 47a7592b7c53..5e3a2b7aaa8b 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -6,6 +6,7 @@ #include "test_signals.h" +int test_init(struct tdescr *td); int test_setup(struct tdescr *td); void test_cleanup(struct tdescr *td); int test_run(struct tdescr *td); From 34306b05d3106447c87d29d262e824ca4a30c569 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:11 +0100 Subject: [PATCH 40/48] kselftest: arm64: add helper get_current_context Introduce a new common utility function get_current_context() which can be used to grab a ucontext without the help of libc, and also to detect if such ucontext has been successfully used by placing it on the stack as a fake sigframe. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../selftests/arm64/signal/test_signals.h | 6 +- .../arm64/signal/test_signals_utils.c | 31 ++++++ .../arm64/signal/test_signals_utils.h | 98 +++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index c431e7b3e46c..f96baf1cef1a 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -72,8 +72,12 @@ struct tdescr { /* optional sa_flags for the installed handler */ int sa_flags; ucontext_t saved_uc; + /* used by get_current_ctx() */ + size_t live_sz; + ucontext_t *live_uc; + volatile sig_atomic_t live_uc_valid; /* optional test private data */ - void *priv; + void *priv; /* a custom setup: called alternatively to default_setup */ int (*setup)(struct tdescr *td); diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 76eaa505a789..2de6e5ed5e25 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -11,14 +11,19 @@ #include #include +#include + #include #include "test_signals.h" #include "test_signals_utils.h" #include "testcases/testcases.h" + extern struct tdescr *current; +static int sig_copyctx = SIGTRAP; + static char const *const feats_names[FMAX_END] = { " SSBS ", }; @@ -154,6 +159,20 @@ static bool handle_signal_ok(struct tdescr *td, return true; } +static bool handle_signal_copyctx(struct tdescr *td, + siginfo_t *si, void *uc) +{ + /* Mangling PC to avoid loops on original BRK instr */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + memcpy(td->live_uc, uc, td->live_sz); + ASSERT_GOOD_CONTEXT(td->live_uc); + td->live_uc_valid = 1; + fprintf(stderr, + "GOOD CONTEXT grabbed from sig_copyctx handler\n"); + + return true; +} + static void default_handler(int signum, siginfo_t *si, void *uc) { if (current->sig_unsupp && signum == current->sig_unsupp && @@ -165,6 +184,9 @@ static void default_handler(int signum, siginfo_t *si, void *uc) } else if (current->sig_ok && signum == current->sig_ok && handle_signal_ok(current, si, uc)) { fprintf(stderr, "Handled SIG_OK\n"); + } else if (signum == sig_copyctx && current->live_uc && + handle_signal_copyctx(current, si, uc)) { + fprintf(stderr, "Handled SIG_COPYCTX\n"); } else { if (signum == SIGALRM && current->timeout) { fprintf(stderr, "-- Timeout !\n"); @@ -219,6 +241,15 @@ static inline int default_trigger(struct tdescr *td) int test_init(struct tdescr *td) { + if (td->sig_trig == sig_copyctx) { + fprintf(stdout, + "Signal %d is RESERVED, cannot be used as a trigger. Aborting\n", + sig_copyctx); + return 0; + } + /* just in case */ + unblock_signal(sig_copyctx); + td->minsigstksz = getauxval(AT_MINSIGSTKSZ); if (!td->minsigstksz) td->minsigstksz = MINSIGSTKSZ; diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 5e3a2b7aaa8b..fd67b1f23c41 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -4,6 +4,10 @@ #ifndef __TEST_SIGNALS_UTILS_H__ #define __TEST_SIGNALS_UTILS_H__ +#include +#include +#include + #include "test_signals.h" int test_init(struct tdescr *td); @@ -17,4 +21,98 @@ static inline bool feats_ok(struct tdescr *td) return (td->feats_required & td->feats_supported) == td->feats_required; } +/* + * Obtaining a valid and full-blown ucontext_t from userspace is tricky: + * libc getcontext does() not save all the regs and messes with some of + * them (pstate value in particular is not reliable). + * + * Here we use a service signal to grab the ucontext_t from inside a + * dedicated signal handler, since there, it is populated by Kernel + * itself in setup_sigframe(). The grabbed context is then stored and + * made available in td->live_uc. + * + * As service-signal is used a SIGTRAP induced by a 'brk' instruction, + * because here we have to avoid syscalls to trigger the signal since + * they would cause any SVE sigframe content (if any) to be removed. + * + * Anyway this function really serves a dual purpose: + * + * 1. grab a valid sigcontext into td->live_uc for result analysis: in + * such case it returns 1. + * + * 2. detect if, somehow, a previously grabbed live_uc context has been + * used actively with a sigreturn: in such a case the execution would have + * magically resumed in the middle of this function itself (seen_already==1): + * in such a case return 0, since in fact we have not just simply grabbed + * the context. + * + * This latter case is useful to detect when a fake_sigreturn test-case has + * unexpectedly survived without hitting a SEGV. + * + * Note that the case of runtime dynamically sized sigframes (like in SVE + * context) is still NOT addressed: sigframe size is supposed to be fixed + * at sizeof(ucontext_t). + */ +static __always_inline bool get_current_context(struct tdescr *td, + ucontext_t *dest_uc) +{ + static volatile bool seen_already; + + assert(td && dest_uc); + /* it's a genuine invocation..reinit */ + seen_already = 0; + td->live_uc_valid = 0; + td->live_sz = sizeof(*dest_uc); + memset(dest_uc, 0x00, td->live_sz); + td->live_uc = dest_uc; + /* + * Grab ucontext_t triggering a SIGTRAP. + * + * Note that: + * - live_uc_valid is declared volatile sig_atomic_t in + * struct tdescr since it will be changed inside the + * sig_copyctx handler + * - the additional 'memory' clobber is there to avoid possible + * compiler's assumption on live_uc_valid and the content + * pointed by dest_uc, which are all changed inside the signal + * handler + * - BRK causes a debug exception which is handled by the Kernel + * and finally causes the SIGTRAP signal to be delivered to this + * test thread. Since such delivery happens on the ret_to_user() + * /do_notify_resume() debug exception return-path, we are sure + * that the registered SIGTRAP handler has been run to completion + * before the execution path is restored here: as a consequence + * we can be sure that the volatile sig_atomic_t live_uc_valid + * carries a meaningful result. Being in a single thread context + * we'll also be sure that any access to memory modified by the + * handler (namely ucontext_t) will be visible once returned. + * - note that since we are using a breakpoint instruction here + * to cause a SIGTRAP, the ucontext_t grabbed from the signal + * handler would naturally contain a PC pointing exactly to this + * BRK line, which means that, on return from the signal handler, + * or if we place the ucontext_t on the stack to fake a sigreturn, + * we'll end up in an infinite loop of BRK-SIGTRAP-handler. + * For this reason we take care to artificially move forward the + * PC to the next instruction while inside the signal handler. + */ + asm volatile ("brk #666" + : "+m" (*dest_uc) + : + : "memory"); + + /* + * If we get here with seen_already==1 it implies the td->live_uc + * context has been used to get back here....this probably means + * a test has failed to cause a SEGV...anyway live_uc does not + * point to a just acquired copy of ucontext_t...so return 0 + */ + if (seen_already) { + fprintf(stdout, + "Unexpected successful sigreturn detected: live_uc is stale !\n"); + return 0; + } + seen_already = 1; + + return td->live_uc_valid; +} #endif From 6c2aa4284513585e9cc0c25b125ab4d57ef4ce76 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:12 +0100 Subject: [PATCH 41/48] kselftest: arm64: fake_sigreturn_bad_magic Add a simple fake_sigreturn testcase which builds a ucontext_t with a bad magic header and place it onto the stack. Expects a SIGSEGV on test PASS. Introduce a common utility assembly trampoline function to invoke a sigreturn while placing the provided sigframe at wanted alignment and also an helper to make space when needed inside the sigframe reserved area. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/Makefile | 2 +- .../testing/selftests/arm64/signal/signals.S | 64 +++++++++++++++++++ .../arm64/signal/test_signals_utils.h | 2 + .../testcases/fake_sigreturn_bad_magic.c | 52 +++++++++++++++ .../arm64/signal/testcases/testcases.c | 46 +++++++++++++ .../arm64/signal/testcases/testcases.h | 4 ++ 6 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/signal/signals.S create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index f78f5190e3d4..b497cfea4643 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -28,5 +28,5 @@ clean: # Common test-unit targets to build common-layout test-cases executables # Needs secondary expansion to properly include the testcase c-file in pre-reqs .SECONDEXPANSION: -$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c $$@.c test_signals.h test_signals_utils.h testcases/testcases.h +$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h $(CC) $(CFLAGS) $^ -o $@ diff --git a/tools/testing/selftests/arm64/signal/signals.S b/tools/testing/selftests/arm64/signal/signals.S new file mode 100644 index 000000000000..9f8c1aefc3b9 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/signals.S @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#include + +.section .rodata, "a" +call_fmt: + .asciz "Calling sigreturn with fake sigframe sized:%zd at SP @%08lX\n" + +.text + +.globl fake_sigreturn + +/* fake_sigreturn x0:&sigframe, x1:sigframe_size, x2:misalign_bytes */ +fake_sigreturn: + stp x29, x30, [sp, #-16]! + mov x29, sp + + mov x20, x0 + mov x21, x1 + mov x22, x2 + + /* create space on the stack for fake sigframe 16 bytes-aligned */ + add x0, x21, x22 + add x0, x0, #15 + bic x0, x0, #15 /* round_up(sigframe_size + misalign_bytes, 16) */ + sub sp, sp, x0 + add x23, sp, x22 /* new sigframe base with misaligment if any */ + + ldr x0, =call_fmt + mov x1, x21 + mov x2, x23 + bl printf + + /* memcpy the provided content, while still keeping SP aligned */ + mov x0, x23 + mov x1, x20 + mov x2, x21 + bl memcpy + + /* + * Here saving a last minute SP to current->token acts as a marker: + * if we got here, we are successfully faking a sigreturn; in other + * words we are sure no bad fatal signal has been raised till now + * for unrelated reasons, so we should consider the possibly observed + * fatal signal like SEGV coming from Kernel restore_sigframe() and + * triggered as expected from our test-case. + * For simplicity this assumes that current field 'token' is laid out + * as first in struct tdescr + */ + ldr x0, current + str x23, [x0] + /* finally move SP to misaligned address...if any requested */ + mov sp, x23 + + mov x8, #__NR_rt_sigreturn + svc #0 + + /* + * Above sigreturn should not return...looping here leads to a timeout + * and ensure proper and clean test failure, instead of jumping around + * on a potentially corrupted stack. + */ + b . diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index fd67b1f23c41..6772b5c8d274 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -115,4 +115,6 @@ static __always_inline bool get_current_context(struct tdescr *td, return td->live_uc_valid; } + +int fake_sigreturn(void *sigframe, size_t sz, int misalign_bytes); #endif diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c new file mode 100644 index 000000000000..8dc600a7d4fd --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a BAD Unknown magic + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_bad_magic_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */ + head = get_starting_head(shead, HDR_SZ * 2, GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + /* + * use a well known NON existent bad magic...something + * we should pretty sure won't be ever defined in Kernel + */ + head->magic = KSFT_BAD_MAGIC; + head->size = HDR_SZ; + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_MAGIC", + .descr = "Trigger a sigreturn with a sigframe with a bad magic", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_magic_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 1914a01222a1..e3521949b800 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -148,3 +148,49 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) return true; } + +/* + * This function walks through the records inside the provided reserved area + * trying to find enough space to fit @need_sz bytes: if not enough space is + * available and an extra_context record is present, it throws away the + * extra_context record. + * + * It returns a pointer to a new header where it is possible to start storing + * our need_sz bytes. + * + * @shead: points to the start of reserved area + * @need_sz: needed bytes + * @resv_sz: reserved area size in bytes + * @offset: if not null, this will be filled with the offset of the return + * head pointer from @shead + * + * @return: pointer to a new head where to start storing need_sz bytes, or + * NULL if space could not be made available. + */ +struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead, + size_t need_sz, size_t resv_sz, + size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *head; + + head = get_terminator(shead, resv_sz, &offs); + /* not found a terminator...no need to update offset if any */ + if (!head) + return head; + if (resv_sz - offs < need_sz) { + fprintf(stderr, "Low on space:%zd. Discarding extra_context.\n", + resv_sz - offs); + head = get_header(shead, EXTRA_MAGIC, resv_sz, &offs); + if (!head || resv_sz - offs < need_sz) { + fprintf(stderr, + "Failed to reclaim space on sigframe.\n"); + return NULL; + } + } + + fprintf(stderr, "Available space:%zd\n", resv_sz - offs); + if (offset) + *offset = offs; + return head; +} diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 04987f7870bc..ad884c135314 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -97,4 +97,8 @@ static inline void write_terminator_record(struct _aarch64_ctx *tail) tail->size = 0; } } + +struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead, + size_t need_sz, size_t resv_sz, + size_t *offset); #endif From 4c94a0ba02b20068a8e3f80b471dfcedb5c8800a Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:13 +0100 Subject: [PATCH 42/48] kselftest: arm64: fake_sigreturn_bad_size_for_magic0 Add a simple fake_sigreturn testcase which builds a ucontext_t with a badly sized terminator record and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../fake_sigreturn_bad_size_for_magic0.c | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c new file mode 100644 index 000000000000..a44b88bfc81a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a badly sized terminator + * record: on sigreturn Kernel must spot this attempt and the test case + * is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* at least HDR_SZ for the badly sized terminator. */ + head = get_starting_head(shead, HDR_SZ, GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + head->magic = 0; + head->size = HDR_SZ; + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_SIZE_FOR_TERMINATOR", + .descr = "Trigger a sigreturn using non-zero size terminator", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_size_for_magic0_run, +}; From 8aa9d08fcb5368777f64d4a7c94899159249f0a0 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:14 +0100 Subject: [PATCH 43/48] kselftest: arm64: fake_sigreturn_missing_fpsimd Add a simple fake_sigreturn testcase which builds a ucontext_t without the required fpsimd_context and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../testcases/fake_sigreturn_missing_fpsimd.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c new file mode 100644 index 000000000000..08ecd8073a1a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack missing the mandatory FPSIMD + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, FPSIMD_MAGIC, resv_sz, &offset); + if (head && resv_sz - offset >= HDR_SZ) { + fprintf(stderr, "Mangling template header. Spare space:%zd\n", + resv_sz - offset); + /* Just overwrite fpsmid_context */ + write_terminator_record(head); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + } + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_MISSING_FPSIMD", + .descr = "Triggers a sigreturn with a missing fpsimd_context", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_missing_fpsimd_run, +}; From 46185cd1241b46c94933752137cde18ecfaf3766 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:15 +0100 Subject: [PATCH 44/48] kselftest: arm64: fake_sigreturn_duplicated_fpsimd Add a simple fake_sigreturn testcase which builds a ucontext_t with an anomalous additional fpsimd_context and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../fake_sigreturn_duplicated_fpsimd.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c new file mode 100644 index 000000000000..afe8915f0998 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including an additional FPSIMD + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ, + GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + /* Add a spurious fpsimd_context */ + head->magic = FPSIMD_MAGIC; + head->size = sizeof(struct fpsimd_context); + /* and terminate */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_DUPLICATED_FPSIMD", + .descr = "Triggers a sigreturn including two fpsimd_context", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_duplicated_fpsimd_run, +}; From 49978aa8f079633192928cb966dcde39a3ff0747 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:16 +0100 Subject: [PATCH 45/48] kselftest: arm64: fake_sigreturn_bad_size Add a simple fake_sigreturn testcase which builds a ucontext_t with a badly sized header that causes a overrun in the __reserved area and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../testcases/fake_sigreturn_bad_size.c | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c new file mode 100644 index 000000000000..b3c362100666 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a bad record overflowing + * the __reserved space: on sigreturn Kernel must spot this attempt and + * the test case is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +#define MIN_SZ_ALIGN 16 + +static int fake_sigreturn_bad_size_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, need_sz, offset; + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + /* at least HDR_SZ + bad sized esr_context needed */ + need_sz = sizeof(struct esr_context) + HDR_SZ; + head = get_starting_head(shead, need_sz, resv_sz, &offset); + if (!head) + return 0; + + /* + * Use an esr_context to build a fake header with a + * size greater then the free __reserved area minus HDR_SZ; + * using ESR_MAGIC here since it is not checked for size nor + * is limited to one instance. + * + * At first inject an additional normal esr_context + */ + head->magic = ESR_MAGIC; + head->size = sizeof(struct esr_context); + /* and terminate properly */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + ASSERT_GOOD_CONTEXT(&sf.uc); + + /* + * now mess with fake esr_context size: leaving less space than + * needed while keeping size value 16-aligned + * + * It must trigger a SEGV from Kernel on: + * + * resv_sz - offset < sizeof(*head) + */ + /* at first set the maximum good 16-aligned size */ + head->size = (resv_sz - offset - need_sz + MIN_SZ_ALIGN) & ~0xfUL; + /* plus a bit more of 16-aligned sized stuff */ + head->size += MIN_SZ_ALIGN; + /* and terminate properly */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_SIZE", + .descr = "Triggers a sigreturn with a overrun __reserved area", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_size_run, +}; From 3f484ce3750f7a29c3be806e891de99aa5c4ca43 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:17 +0100 Subject: [PATCH 46/48] kselftest: arm64: fake_sigreturn_misaligned_sp Add a simple fake_sigreturn testcase which places a valid sigframe on a non-16 bytes aligned SP. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../testcases/fake_sigreturn_misaligned_sp.c | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c new file mode 100644 index 000000000000..1e089e66f9f3 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack at a misaligned SP: on sigreturn + * Kernel must spot this attempt and the test case is expected to be + * terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_misaligned_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* Forcing sigframe on misaligned SP (16 + 3) */ + fake_sigreturn(&sf, sizeof(sf), 3); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_MISALIGNED_SP", + .descr = "Triggers a sigreturn with a misaligned sigframe", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_misaligned_run, +}; From 294a9ddde6cdbf931a28b8c8c928d3f799b61cb5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Nov 2019 17:12:43 +0000 Subject: [PATCH 47/48] arm64: kaslr: Announce KASLR status on boot Currently the KASLR code is silent at boot unless it forces on KPTI in which case a message will be printed for that. This can lead to users incorrectly believing their system has the feature enabled when it in fact does not, and if they notice the problem the lack of any diagnostics makes it harder to understand the problem. Add an initcall which prints a message showing the status of KASLR during boot to make the status clear. This is particularly useful in cases where we don't have a seed. It seems to be a relatively common error for system integrators and administrators to enable KASLR in their configuration but not provide the seed at runtime, often due to seed provisioning breaking at some later point after it is initially enabled and verified. Signed-off-by: Mark Brown Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 41 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 416f537bf614..0039dc50e556 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -19,6 +19,14 @@ #include #include +enum kaslr_status { + KASLR_ENABLED, + KASLR_DISABLED_CMDLINE, + KASLR_DISABLED_NO_SEED, + KASLR_DISABLED_FDT_REMAP, +}; + +enum kaslr_status __ro_after_init kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; @@ -91,15 +99,19 @@ u64 __init kaslr_early_init(u64 dt_phys) */ early_fixmap_init(); fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); - if (!fdt) + if (!fdt) { + kaslr_status = KASLR_DISABLED_FDT_REMAP; return 0; + } /* * Retrieve (and wipe) the seed from the FDT */ seed = get_kaslr_seed(fdt); - if (!seed) + if (!seed) { + kaslr_status = KASLR_DISABLED_NO_SEED; return 0; + } /* * Check if 'nokaslr' appears on the command line, and @@ -107,8 +119,10 @@ u64 __init kaslr_early_init(u64 dt_phys) */ cmdline = kaslr_get_cmdline(fdt); str = strstr(cmdline, "nokaslr"); - if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) { + kaslr_status = KASLR_DISABLED_CMDLINE; return 0; + } /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable @@ -170,3 +184,24 @@ u64 __init kaslr_early_init(u64 dt_phys) return offset; } + +static int __init kaslr_init(void) +{ + switch (kaslr_status) { + case KASLR_ENABLED: + pr_info("KASLR enabled\n"); + break; + case KASLR_DISABLED_CMDLINE: + pr_info("KASLR disabled on command line\n"); + break; + case KASLR_DISABLED_NO_SEED: + pr_warn("KASLR disabled due to lack of seed\n"); + break; + case KASLR_DISABLED_FDT_REMAP: + pr_warn("KASLR disabled due to FDT remapping failure\n"); + break; + } + + return 0; +} +core_initcall(kaslr_init) From 2203e1adb936a92ab2fd8f705e888af322462736 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Nov 2019 17:12:44 +0000 Subject: [PATCH 48/48] arm64: kaslr: Check command line before looking for a seed Now that we print diagnostics at boot the reason why we do not initialise KASLR matters. Currently we check for a seed before we check if the user has explicitly disabled KASLR on the command line which will result in misleading diagnostics so reverse the order of those checks. We still parse the seed from the DT early so that if the user has both provided a seed and disabled KASLR on the command line we still mask the seed on the command line. Signed-off-by: Mark Brown Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 0039dc50e556..2a11a962e571 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -26,7 +26,7 @@ enum kaslr_status { KASLR_DISABLED_FDT_REMAP, }; -enum kaslr_status __ro_after_init kaslr_status; +static enum kaslr_status __initdata kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; @@ -108,10 +108,6 @@ u64 __init kaslr_early_init(u64 dt_phys) * Retrieve (and wipe) the seed from the FDT */ seed = get_kaslr_seed(fdt); - if (!seed) { - kaslr_status = KASLR_DISABLED_NO_SEED; - return 0; - } /* * Check if 'nokaslr' appears on the command line, and @@ -124,6 +120,11 @@ u64 __init kaslr_early_init(u64 dt_phys) return 0; } + if (!seed) { + kaslr_status = KASLR_DISABLED_NO_SEED; + return 0; + } + /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the