| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2020 - Google LLC |
| * Author: Quentin Perret <qperret@google.com> |
| */ |
| |
| #include <linux/io.h> |
| #include <linux/kvm_host.h> |
| #include <linux/memblock.h> |
| #include <linux/mm.h> |
| #include <linux/mutex.h> |
| #include <linux/of_fdt.h> |
| #include <linux/of_reserved_mem.h> |
| #include <linux/sort.h> |
| |
| #include <asm/kvm_pkvm.h> |
| |
| #include "hyp_constants.h" |
| |
| static struct reserved_mem *pkvm_firmware_mem; |
| static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base); |
| static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size); |
| |
| static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); |
| static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); |
| |
| phys_addr_t hyp_mem_base; |
| phys_addr_t hyp_mem_size; |
| |
| static int cmp_hyp_memblock(const void *p1, const void *p2) |
| { |
| const struct memblock_region *r1 = p1; |
| const struct memblock_region *r2 = p2; |
| |
| return r1->base < r2->base ? -1 : (r1->base > r2->base); |
| } |
| |
| static void __init sort_memblock_regions(void) |
| { |
| sort(hyp_memory, |
| *hyp_memblock_nr_ptr, |
| sizeof(struct memblock_region), |
| cmp_hyp_memblock, |
| NULL); |
| } |
| |
| static int __init register_memblock_regions(void) |
| { |
| struct memblock_region *reg; |
| |
| for_each_mem_region(reg) { |
| if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) |
| return -ENOMEM; |
| |
| hyp_memory[*hyp_memblock_nr_ptr] = *reg; |
| (*hyp_memblock_nr_ptr)++; |
| } |
| sort_memblock_regions(); |
| |
| return 0; |
| } |
| |
| void __init kvm_hyp_reserve(void) |
| { |
| u64 hyp_mem_pages = 0; |
| int ret; |
| |
| if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) |
| return; |
| |
| if (kvm_get_mode() != KVM_MODE_PROTECTED) |
| return; |
| |
| ret = register_memblock_regions(); |
| if (ret) { |
| *hyp_memblock_nr_ptr = 0; |
| kvm_err("Failed to register hyp memblocks: %d\n", ret); |
| return; |
| } |
| |
| hyp_mem_pages += hyp_s1_pgtable_pages(); |
| hyp_mem_pages += host_s2_pgtable_pages(); |
| hyp_mem_pages += hyp_shadow_table_pages(KVM_SHADOW_VM_SIZE); |
| hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); |
| hyp_mem_pages += hyp_ffa_proxy_pages(); |
| |
| /* |
| * Try to allocate a PMD-aligned region to reduce TLB pressure once |
| * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. |
| */ |
| hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; |
| hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), |
| ALIGN(hyp_mem_size, PMD_SIZE), |
| PMD_SIZE); |
| if (!hyp_mem_base) |
| hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), |
| hyp_mem_size, PAGE_SIZE); |
| else |
| hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); |
| |
| if (!hyp_mem_base) { |
| kvm_err("Failed to reserve hyp memory\n"); |
| return; |
| } |
| memblock_reserve(hyp_mem_base, hyp_mem_size); |
| |
| kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, |
| hyp_mem_base); |
| } |
| |
| /* |
| * Updates the state of the host's version of the vcpu state. |
| */ |
| static void update_vcpu_state(struct kvm_vcpu *vcpu, int shadow_handle) |
| { |
| vcpu->arch.pkvm.shadow_handle = shadow_handle; |
| } |
| |
| /* |
| * Allocates and donates memory for EL2 shadow structs. |
| * |
| * Allocates space for the shadow state, which includes the shadow vm as well as |
| * the shadow vcpu states. |
| * |
| * Stores an opaque handler in the kvm struct for future reference. |
| * |
| * Return 0 on success, negative error code on failure. |
| */ |
| static int __create_el2_shadow(struct kvm *kvm) |
| { |
| struct kvm_vcpu *vcpu, **vcpu_array; |
| size_t pgd_sz, shadow_sz; |
| void *pgd, *shadow_addr; |
| unsigned long idx; |
| int shadow_handle; |
| int ret, i; |
| |
| if (kvm->created_vcpus < 1) |
| return -EINVAL; |
| |
| pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); |
| /* |
| * The PGD pages will be reclaimed using a hyp_memcache which implies |
| * page granularity. So, use alloc_pages_exact() to get individual |
| * refcounts. |
| */ |
| pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); |
| if (!pgd) |
| return -ENOMEM; |
| |
| /* Allocate memory to donate to hyp for the kvm and vcpu state. */ |
| shadow_sz = PAGE_ALIGN(KVM_SHADOW_VM_SIZE + |
| SHADOW_VCPU_STATE_SIZE * kvm->created_vcpus); |
| shadow_addr = alloc_pages_exact(shadow_sz, GFP_KERNEL_ACCOUNT); |
| if (!shadow_addr) { |
| ret = -ENOMEM; |
| goto free_pgd; |
| } |
| |
| /* Stash the vcpu pointers into the PGD */ |
| BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64))); |
| vcpu_array = pgd; |
| kvm_for_each_vcpu(idx, vcpu, kvm) |
| vcpu_array[idx] = vcpu; |
| |
| /* Donate the shadow memory to hyp and let hyp initialize it. */ |
| ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz, |
| pgd); |
| if (ret < 0) |
| goto free_shadow; |
| |
| shadow_handle = ret; |
| |
| /* Store the shadow handle given by hyp for future call reference. */ |
| kvm->arch.pkvm.shadow_handle = shadow_handle; |
| |
| /* Adjust host's vcpu state as it doesn't control it anymore. */ |
| for (i = 0; i < kvm->created_vcpus; i++) |
| update_vcpu_state(kvm->vcpus[i], shadow_handle); |
| |
| return 0; |
| |
| free_shadow: |
| free_pages_exact(shadow_addr, shadow_sz); |
| free_pgd: |
| free_pages_exact(pgd, pgd_sz); |
| return ret; |
| } |
| |
| int create_el2_shadow(struct kvm *kvm) |
| { |
| int ret = 0; |
| |
| mutex_lock(&kvm->arch.pkvm.shadow_lock); |
| if (!kvm->arch.pkvm.shadow_handle) |
| ret = __create_el2_shadow(kvm); |
| mutex_unlock(&kvm->arch.pkvm.shadow_lock); |
| |
| return ret; |
| } |
| |
| void kvm_shadow_destroy(struct kvm *kvm) |
| { |
| struct kvm_pinned_page *ppage, *tmp; |
| struct mm_struct *mm = current->mm; |
| struct list_head *ppages; |
| |
| if (kvm->arch.pkvm.shadow_handle) |
| WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_shadow, |
| kvm->arch.pkvm.shadow_handle)); |
| |
| free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); |
| |
| ppages = &kvm->arch.pkvm.pinned_pages; |
| list_for_each_entry_safe(ppage, tmp, ppages, link) { |
| WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_reclaim_page, |
| page_to_pfn(ppage->page))); |
| cond_resched(); |
| |
| account_locked_vm(mm, 1, false); |
| unpin_user_pages_dirty_lock(&ppage->page, 1, true); |
| list_del(&ppage->link); |
| kfree(ppage); |
| } |
| } |
| |
| static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem, |
| const char *reason) |
| { |
| phys_addr_t end = rmem->base + rmem->size; |
| |
| kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n", |
| &rmem->base, &end, reason); |
| return -EINVAL; |
| } |
| |
| static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem) |
| { |
| unsigned long node = rmem->fdt_node; |
| |
| if (pkvm_firmware_mem) |
| return pkvm_firmware_rmem_err(rmem, "duplicate reservation"); |
| |
| if (!of_get_flat_dt_prop(node, "no-map", NULL)) |
| return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property"); |
| |
| if (of_get_flat_dt_prop(node, "reusable", NULL)) |
| return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported"); |
| |
| if (!PAGE_ALIGNED(rmem->base)) |
| return pkvm_firmware_rmem_err(rmem, "base is not page-aligned"); |
| |
| if (!PAGE_ALIGNED(rmem->size)) |
| return pkvm_firmware_rmem_err(rmem, "size is not page-aligned"); |
| |
| *pvmfw_size = rmem->size; |
| *pvmfw_base = rmem->base; |
| pkvm_firmware_mem = rmem; |
| return 0; |
| } |
| RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory", |
| pkvm_firmware_rmem_init); |
| |
| static int __init pkvm_firmware_rmem_clear(void) |
| { |
| void *addr; |
| phys_addr_t size; |
| |
| if (likely(!pkvm_firmware_mem) || is_protected_kvm_enabled()) |
| return 0; |
| |
| kvm_info("Clearing unused pKVM firmware memory\n"); |
| size = pkvm_firmware_mem->size; |
| addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB); |
| if (!addr) |
| return -EINVAL; |
| |
| memset(addr, 0, size); |
| dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size); |
| memunmap(addr); |
| return 0; |
| } |
| device_initcall_sync(pkvm_firmware_rmem_clear); |
| |
| static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa) |
| { |
| int ret = 0; |
| |
| if (!pkvm_firmware_mem) |
| return -EINVAL; |
| |
| mutex_lock(&kvm->arch.pkvm.shadow_lock); |
| if (kvm->arch.pkvm.shadow_handle) { |
| ret = -EBUSY; |
| goto out_unlock; |
| } |
| |
| kvm->arch.pkvm.pvmfw_load_addr = ipa; |
| out_unlock: |
| mutex_unlock(&kvm->arch.pkvm.shadow_lock); |
| return ret; |
| } |
| |
| static int pkvm_vm_ioctl_info(struct kvm *kvm, |
| struct kvm_protected_vm_info __user *info) |
| { |
| struct kvm_protected_vm_info kinfo = { |
| .firmware_size = pkvm_firmware_mem ? |
| pkvm_firmware_mem->size : |
| 0, |
| }; |
| |
| return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0; |
| } |
| |
| int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap) |
| { |
| if (cap->args[1] || cap->args[2] || cap->args[3]) |
| return -EINVAL; |
| |
| switch (cap->flags) { |
| case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA: |
| return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]); |
| case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: |
| return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]); |
| default: |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| int kvm_init_pvm(struct kvm *kvm, unsigned long type) |
| { |
| mutex_init(&kvm->arch.pkvm.shadow_lock); |
| kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR; |
| |
| if (!(type & KVM_VM_TYPE_ARM_PROTECTED)) |
| return 0; |
| |
| if (!is_protected_kvm_enabled()) |
| return -EINVAL; |
| |
| kvm->arch.pkvm.enabled = true; |
| return 0; |
| } |