|  | // SPDX-License-Identifier: GPL-2.0+ | 
|  | /* | 
|  | * PowerPC Memory Protection Keys management | 
|  | * | 
|  | * Copyright 2017, Ram Pai, IBM Corporation. | 
|  | */ | 
|  |  | 
|  | #include <asm/mman.h> | 
|  | #include <asm/mmu_context.h> | 
|  | #include <asm/mmu.h> | 
|  | #include <asm/setup.h> | 
|  | #include <asm/smp.h> | 
|  | #include <asm/firmware.h> | 
|  |  | 
|  | #include <linux/pkeys.h> | 
|  | #include <linux/of_fdt.h> | 
|  |  | 
|  |  | 
|  | int  num_pkey;		/* Max number of pkeys supported */ | 
|  | /* | 
|  | *  Keys marked in the reservation list cannot be allocated by  userspace | 
|  | */ | 
|  | u32 reserved_allocation_mask __ro_after_init; | 
|  |  | 
|  | /* Bits set for the initially allocated keys */ | 
|  | static u32 initial_allocation_mask __ro_after_init; | 
|  |  | 
|  | /* | 
|  | * Even if we allocate keys with sys_pkey_alloc(), we need to make sure | 
|  | * other thread still find the access denied using the same keys. | 
|  | */ | 
|  | u64 default_amr __ro_after_init  = ~0x0UL; | 
|  | u64 default_iamr __ro_after_init = 0x5555555555555555UL; | 
|  | u64 default_uamor __ro_after_init; | 
|  | EXPORT_SYMBOL(default_amr); | 
|  | /* | 
|  | * Key used to implement PROT_EXEC mmap. Denies READ/WRITE | 
|  | * We pick key 2 because 0 is special key and 1 is reserved as per ISA. | 
|  | */ | 
|  | static int execute_only_key = 2; | 
|  | static bool pkey_execute_disable_supported; | 
|  |  | 
|  |  | 
|  | #define AMR_BITS_PER_PKEY 2 | 
|  | #define AMR_RD_BIT 0x1UL | 
|  | #define AMR_WR_BIT 0x2UL | 
|  | #define IAMR_EX_BIT 0x1UL | 
|  | #define PKEY_REG_BITS (sizeof(u64) * 8) | 
|  | #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) | 
|  |  | 
|  | static int __init dt_scan_storage_keys(unsigned long node, | 
|  | const char *uname, int depth, | 
|  | void *data) | 
|  | { | 
|  | const char *type = of_get_flat_dt_prop(node, "device_type", NULL); | 
|  | const __be32 *prop; | 
|  | int *pkeys_total = (int *) data; | 
|  |  | 
|  | /* We are scanning "cpu" nodes only */ | 
|  | if (type == NULL || strcmp(type, "cpu") != 0) | 
|  | return 0; | 
|  |  | 
|  | prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL); | 
|  | if (!prop) | 
|  | return 0; | 
|  | *pkeys_total = be32_to_cpu(prop[0]); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | static int __init scan_pkey_feature(void) | 
|  | { | 
|  | int ret; | 
|  | int pkeys_total = 0; | 
|  |  | 
|  | /* | 
|  | * Pkey is not supported with Radix translation. | 
|  | */ | 
|  | if (early_radix_enabled()) | 
|  | return 0; | 
|  |  | 
|  | ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); | 
|  | if (ret == 0) { | 
|  | /* | 
|  | * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device | 
|  | * tree. We make this exception since some version of skiboot forgot to | 
|  | * expose this property on power8/9. | 
|  | */ | 
|  | if (!firmware_has_feature(FW_FEATURE_LPAR)) { | 
|  | unsigned long pvr = mfspr(SPRN_PVR); | 
|  |  | 
|  | if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || | 
|  | PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9 || | 
|  | PVR_VER(pvr) == PVR_HX_C2000) | 
|  | pkeys_total = 32; | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifdef CONFIG_PPC_MEM_KEYS | 
|  | /* | 
|  | * Adjust the upper limit, based on the number of bits supported by | 
|  | * arch-neutral code. | 
|  | */ | 
|  | pkeys_total = min_t(int, pkeys_total, | 
|  | ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); | 
|  | #endif | 
|  | return pkeys_total; | 
|  | } | 
|  |  | 
|  | void __init pkey_early_init_devtree(void) | 
|  | { | 
|  | int pkeys_total, i; | 
|  |  | 
|  | #ifdef CONFIG_PPC_MEM_KEYS | 
|  | /* | 
|  | * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral | 
|  | * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. | 
|  | * Ensure that the bits a distinct. | 
|  | */ | 
|  | BUILD_BUG_ON(PKEY_DISABLE_EXECUTE & | 
|  | (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); | 
|  |  | 
|  | /* | 
|  | * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous | 
|  | * in the vmaflag. Make sure that is really the case. | 
|  | */ | 
|  | BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + | 
|  | __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) | 
|  | != (sizeof(u64) * BITS_PER_BYTE)); | 
|  | #endif | 
|  | /* | 
|  | * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 | 
|  | */ | 
|  | if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) | 
|  | return; | 
|  |  | 
|  | /* scan the device tree for pkey feature */ | 
|  | pkeys_total = scan_pkey_feature(); | 
|  | if (!pkeys_total) | 
|  | goto out; | 
|  |  | 
|  | /* Allow all keys to be modified by default */ | 
|  | default_uamor = ~0x0UL; | 
|  |  | 
|  | cur_cpu_spec->mmu_features |= MMU_FTR_PKEY; | 
|  |  | 
|  | /* | 
|  | * The device tree cannot be relied to indicate support for | 
|  | * execute_disable support. Instead we use a PVR check. | 
|  | */ | 
|  | if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p)) | 
|  | pkey_execute_disable_supported = false; | 
|  | else | 
|  | pkey_execute_disable_supported = true; | 
|  |  | 
|  | #ifdef CONFIG_PPC_4K_PAGES | 
|  | /* | 
|  | * The OS can manage only 8 pkeys due to its inability to represent them | 
|  | * in the Linux 4K PTE. Mark all other keys reserved. | 
|  | */ | 
|  | num_pkey = min(8, pkeys_total); | 
|  | #else | 
|  | num_pkey = pkeys_total; | 
|  | #endif | 
|  |  | 
|  | if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) { | 
|  | /* | 
|  | * Insufficient number of keys to support | 
|  | * execute only key. Mark it unavailable. | 
|  | */ | 
|  | execute_only_key = -1; | 
|  | } else { | 
|  | /* | 
|  | * Mark the execute_only_pkey as not available for | 
|  | * user allocation via pkey_alloc. | 
|  | */ | 
|  | reserved_allocation_mask |= (0x1 << execute_only_key); | 
|  |  | 
|  | /* | 
|  | * Deny READ/WRITE for execute_only_key. | 
|  | * Allow execute in IAMR. | 
|  | */ | 
|  | default_amr  |= (0x3ul << pkeyshift(execute_only_key)); | 
|  | default_iamr &= ~(0x1ul << pkeyshift(execute_only_key)); | 
|  |  | 
|  | /* | 
|  | * Clear the uamor bits for this key. | 
|  | */ | 
|  | default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); | 
|  | } | 
|  |  | 
|  | if (unlikely(num_pkey <= 3)) { | 
|  | /* | 
|  | * Insufficient number of keys to support | 
|  | * KUAP/KUEP feature. | 
|  | */ | 
|  | disable_kuep = true; | 
|  | disable_kuap = true; | 
|  | WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey); | 
|  | } else { | 
|  | /*  handle key which is used by kernel for KAUP */ | 
|  | reserved_allocation_mask |= (0x1 << 3); | 
|  | /* | 
|  | * Mark access for kup_key in default amr so that | 
|  | * we continue to operate with that AMR in | 
|  | * copy_to/from_user(). | 
|  | */ | 
|  | default_amr   &= ~(0x3ul << pkeyshift(3)); | 
|  | default_iamr  &= ~(0x1ul << pkeyshift(3)); | 
|  | default_uamor &= ~(0x3ul << pkeyshift(3)); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Allow access for only key 0. And prevent any other modification. | 
|  | */ | 
|  | default_amr   &= ~(0x3ul << pkeyshift(0)); | 
|  | default_iamr  &= ~(0x1ul << pkeyshift(0)); | 
|  | default_uamor &= ~(0x3ul << pkeyshift(0)); | 
|  | /* | 
|  | * key 0 is special in that we want to consider it an allocated | 
|  | * key which is preallocated. We don't allow changing AMR bits | 
|  | * w.r.t key 0. But one can pkey_free(key0) | 
|  | */ | 
|  | initial_allocation_mask |= (0x1 << 0); | 
|  |  | 
|  | /* | 
|  | * key 1 is recommended not to be used. PowerISA(3.0) page 1015, | 
|  | * programming note. | 
|  | */ | 
|  | reserved_allocation_mask |= (0x1 << 1); | 
|  | default_uamor &= ~(0x3ul << pkeyshift(1)); | 
|  |  | 
|  | /* | 
|  | * Prevent the usage of OS reserved keys. Update UAMOR | 
|  | * for those keys. Also mark the rest of the bits in the | 
|  | * 32 bit mask as reserved. | 
|  | */ | 
|  | for (i = num_pkey; i < 32 ; i++) { | 
|  | reserved_allocation_mask |= (0x1 << i); | 
|  | default_uamor &= ~(0x3ul << pkeyshift(i)); | 
|  | } | 
|  | /* | 
|  | * Prevent the allocation of reserved keys too. | 
|  | */ | 
|  | initial_allocation_mask |= reserved_allocation_mask; | 
|  |  | 
|  | pr_info("Enabling pkeys with max key count %d\n", num_pkey); | 
|  | out: | 
|  | /* | 
|  | * Setup uamor on boot cpu | 
|  | */ | 
|  | mtspr(SPRN_UAMOR, default_uamor); | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  | #ifdef CONFIG_PPC_KUEP | 
|  | void setup_kuep(bool disabled) | 
|  | { | 
|  | if (disabled) | 
|  | return; | 
|  | /* | 
|  | * On hash if PKEY feature is not enabled, disable KUAP too. | 
|  | */ | 
|  | if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) | 
|  | return; | 
|  |  | 
|  | if (smp_processor_id() == boot_cpuid) { | 
|  | pr_info("Activating Kernel Userspace Execution Prevention\n"); | 
|  | cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Radix always uses key0 of the IAMR to determine if an access is | 
|  | * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction | 
|  | * fetch. | 
|  | */ | 
|  | mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); | 
|  | isync(); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | #ifdef CONFIG_PPC_KUAP | 
|  | void setup_kuap(bool disabled) | 
|  | { | 
|  | if (disabled) | 
|  | return; | 
|  | /* | 
|  | * On hash if PKEY feature is not enabled, disable KUAP too. | 
|  | */ | 
|  | if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) | 
|  | return; | 
|  |  | 
|  | if (smp_processor_id() == boot_cpuid) { | 
|  | pr_info("Activating Kernel Userspace Access Prevention\n"); | 
|  | cur_cpu_spec->mmu_features |= MMU_FTR_KUAP; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Set the default kernel AMR values on all cpus. | 
|  | */ | 
|  | mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); | 
|  | isync(); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | #ifdef CONFIG_PPC_MEM_KEYS | 
|  | void pkey_mm_init(struct mm_struct *mm) | 
|  | { | 
|  | if (!mmu_has_feature(MMU_FTR_PKEY)) | 
|  | return; | 
|  | mm_pkey_allocation_map(mm) = initial_allocation_mask; | 
|  | mm->context.execute_only_pkey = execute_only_key; | 
|  | } | 
|  |  | 
|  | static inline void init_amr(int pkey, u8 init_bits) | 
|  | { | 
|  | u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); | 
|  | u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); | 
|  |  | 
|  | current->thread.regs->amr = old_amr | new_amr_bits; | 
|  | } | 
|  |  | 
|  | static inline void init_iamr(int pkey, u8 init_bits) | 
|  | { | 
|  | u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); | 
|  | u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); | 
|  |  | 
|  | if (!likely(pkey_execute_disable_supported)) | 
|  | return; | 
|  |  | 
|  | current->thread.regs->iamr = old_iamr | new_iamr_bits; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that | 
|  | * specified in @init_val. | 
|  | */ | 
|  | int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | 
|  | unsigned long init_val) | 
|  | { | 
|  | u64 new_amr_bits = 0x0ul; | 
|  | u64 new_iamr_bits = 0x0ul; | 
|  | u64 pkey_bits, uamor_pkey_bits; | 
|  |  | 
|  | /* | 
|  | * Check whether the key is disabled by UAMOR. | 
|  | */ | 
|  | pkey_bits = 0x3ul << pkeyshift(pkey); | 
|  | uamor_pkey_bits = (default_uamor & pkey_bits); | 
|  |  | 
|  | /* | 
|  | * Both the bits in UAMOR corresponding to the key should be set | 
|  | */ | 
|  | if (uamor_pkey_bits != pkey_bits) | 
|  | return -EINVAL; | 
|  |  | 
|  | if (init_val & PKEY_DISABLE_EXECUTE) { | 
|  | if (!pkey_execute_disable_supported) | 
|  | return -EINVAL; | 
|  | new_iamr_bits |= IAMR_EX_BIT; | 
|  | } | 
|  | init_iamr(pkey, new_iamr_bits); | 
|  |  | 
|  | /* Set the bits we need in AMR: */ | 
|  | if (init_val & PKEY_DISABLE_ACCESS) | 
|  | new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT; | 
|  | else if (init_val & PKEY_DISABLE_WRITE) | 
|  | new_amr_bits |= AMR_WR_BIT; | 
|  |  | 
|  | init_amr(pkey, new_amr_bits); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int execute_only_pkey(struct mm_struct *mm) | 
|  | { | 
|  | return mm->context.execute_only_pkey; | 
|  | } | 
|  |  | 
|  | static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) | 
|  | { | 
|  | /* Do this check first since the vm_flags should be hot */ | 
|  | if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) | 
|  | return false; | 
|  |  | 
|  | return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * This should only be called for *plain* mprotect calls. | 
|  | */ | 
|  | int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, | 
|  | int pkey) | 
|  | { | 
|  | /* | 
|  | * If the currently associated pkey is execute-only, but the requested | 
|  | * protection is not execute-only, move it back to the default pkey. | 
|  | */ | 
|  | if (vma_is_pkey_exec_only(vma) && (prot != PROT_EXEC)) | 
|  | return 0; | 
|  |  | 
|  | /* | 
|  | * The requested protection is execute-only. Hence let's use an | 
|  | * execute-only pkey. | 
|  | */ | 
|  | if (prot == PROT_EXEC) { | 
|  | pkey = execute_only_pkey(vma->vm_mm); | 
|  | if (pkey > 0) | 
|  | return pkey; | 
|  | } | 
|  |  | 
|  | /* Nothing to override. */ | 
|  | return vma_pkey(vma); | 
|  | } | 
|  |  | 
|  | static bool pkey_access_permitted(int pkey, bool write, bool execute) | 
|  | { | 
|  | int pkey_shift; | 
|  | u64 amr; | 
|  |  | 
|  | pkey_shift = pkeyshift(pkey); | 
|  | if (execute) | 
|  | return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift)); | 
|  |  | 
|  | amr = current_thread_amr(); | 
|  | if (write) | 
|  | return !(amr & (AMR_WR_BIT << pkey_shift)); | 
|  |  | 
|  | return !(amr & (AMR_RD_BIT << pkey_shift)); | 
|  | } | 
|  |  | 
|  | bool arch_pte_access_permitted(u64 pte, bool write, bool execute) | 
|  | { | 
|  | if (!mmu_has_feature(MMU_FTR_PKEY)) | 
|  | return true; | 
|  |  | 
|  | return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We only want to enforce protection keys on the current thread because we | 
|  | * effectively have no access to AMR/IAMR for other threads or any way to tell | 
|  | * which AMR/IAMR in a threaded process we could use. | 
|  | * | 
|  | * So do not enforce things if the VMA is not from the current mm, or if we are | 
|  | * in a kernel thread. | 
|  | */ | 
|  | bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, | 
|  | bool execute, bool foreign) | 
|  | { | 
|  | if (!mmu_has_feature(MMU_FTR_PKEY)) | 
|  | return true; | 
|  | /* | 
|  | * Do not enforce our key-permissions on a foreign vma. | 
|  | */ | 
|  | if (foreign || vma_is_foreign(vma)) | 
|  | return true; | 
|  |  | 
|  | return pkey_access_permitted(vma_pkey(vma), write, execute); | 
|  | } | 
|  |  | 
|  | void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) | 
|  | { | 
|  | if (!mmu_has_feature(MMU_FTR_PKEY)) | 
|  | return; | 
|  |  | 
|  | /* Duplicate the oldmm pkey state in mm: */ | 
|  | mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); | 
|  | mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; | 
|  | } | 
|  |  | 
|  | #endif /* CONFIG_PPC_MEM_KEYS */ |