| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2024, Intel, Inc |
| * |
| * Author: |
| * Isaku Yamahata <isaku.yamahata at gmail.com> |
| */ |
| #include <linux/sizes.h> |
| |
| #include <test_util.h> |
| #include <kvm_util.h> |
| #include <processor.h> |
| #include <pthread.h> |
| |
| /* Arbitrarily chosen values */ |
| #define TEST_SIZE (SZ_2M + PAGE_SIZE) |
| #define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) |
| #define TEST_SLOT 10 |
| |
| static void guest_code(uint64_t base_gva) |
| { |
| volatile uint64_t val __used; |
| int i; |
| |
| for (i = 0; i < TEST_NPAGES; i++) { |
| uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE); |
| |
| val = *src; |
| } |
| |
| GUEST_DONE(); |
| } |
| |
| struct slot_worker_data { |
| struct kvm_vm *vm; |
| u64 gpa; |
| uint32_t flags; |
| bool worker_ready; |
| bool prefault_ready; |
| bool recreate_slot; |
| }; |
| |
| static void *delete_slot_worker(void *__data) |
| { |
| struct slot_worker_data *data = __data; |
| struct kvm_vm *vm = data->vm; |
| |
| WRITE_ONCE(data->worker_ready, true); |
| |
| while (!READ_ONCE(data->prefault_ready)) |
| cpu_relax(); |
| |
| vm_mem_region_delete(vm, TEST_SLOT); |
| |
| while (!READ_ONCE(data->recreate_slot)) |
| cpu_relax(); |
| |
| vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, |
| TEST_SLOT, TEST_NPAGES, data->flags); |
| |
| return NULL; |
| } |
| |
| static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, |
| u64 size, u64 expected_left, bool private) |
| { |
| struct kvm_pre_fault_memory range = { |
| .gpa = base_gpa + offset, |
| .size = size, |
| .flags = 0, |
| }; |
| struct slot_worker_data data = { |
| .vm = vcpu->vm, |
| .gpa = base_gpa, |
| .flags = private ? KVM_MEM_GUEST_MEMFD : 0, |
| }; |
| bool slot_recreated = false; |
| pthread_t slot_worker; |
| int ret, save_errno; |
| u64 prev; |
| |
| /* |
| * Concurrently delete (and recreate) the slot to test KVM's handling |
| * of a racing memslot deletion with prefaulting. |
| */ |
| pthread_create(&slot_worker, NULL, delete_slot_worker, &data); |
| |
| while (!READ_ONCE(data.worker_ready)) |
| cpu_relax(); |
| |
| WRITE_ONCE(data.prefault_ready, true); |
| |
| for (;;) { |
| prev = range.size; |
| ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); |
| save_errno = errno; |
| TEST_ASSERT((range.size < prev) ^ (ret < 0), |
| "%sexpecting range.size to change on %s", |
| ret < 0 ? "not " : "", |
| ret < 0 ? "failure" : "success"); |
| |
| /* |
| * Immediately retry prefaulting if KVM was interrupted by an |
| * unrelated signal/event. |
| */ |
| if (ret < 0 && save_errno == EINTR) |
| continue; |
| |
| /* |
| * Tell the worker to recreate the slot in order to complete |
| * prefaulting (if prefault didn't already succeed before the |
| * slot was deleted) and/or to prepare for the next testcase. |
| * Wait for the worker to exit so that the next invocation of |
| * prefaulting is guaranteed to complete (assuming no KVM bugs). |
| */ |
| if (!slot_recreated) { |
| WRITE_ONCE(data.recreate_slot, true); |
| pthread_join(slot_worker, NULL); |
| slot_recreated = true; |
| |
| /* |
| * Retry prefaulting to get a stable result, i.e. to |
| * avoid seeing random EAGAIN failures. Don't retry if |
| * prefaulting already succeeded, as KVM disallows |
| * prefaulting with size=0, i.e. blindly retrying would |
| * result in test failures due to EINVAL. KVM should |
| * always return success if all bytes are prefaulted, |
| * i.e. there is no need to guard against EAGAIN being |
| * returned. |
| */ |
| if (range.size) |
| continue; |
| } |
| |
| /* |
| * All done if there are no remaining bytes to prefault, or if |
| * prefaulting failed (EINTR was handled above, and EAGAIN due |
| * to prefaulting a memslot that's being actively deleted should |
| * be impossible since the memslot has already been recreated). |
| */ |
| if (!range.size || ret < 0) |
| break; |
| } |
| |
| TEST_ASSERT(range.size == expected_left, |
| "Completed with %llu bytes left, expected %lu", |
| range.size, expected_left); |
| |
| /* |
| * Assert success if prefaulting the entire range should succeed, i.e. |
| * complete with no bytes remaining. Otherwise prefaulting should have |
| * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when |
| * no memslot exists). |
| */ |
| if (!expected_left) |
| TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); |
| else |
| TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, |
| KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); |
| } |
| |
| static void __test_pre_fault_memory(unsigned long vm_type, bool private) |
| { |
| uint64_t gpa, gva, alignment, guest_page_size; |
| const struct vm_shape shape = { |
| .mode = VM_MODE_DEFAULT, |
| .type = vm_type, |
| }; |
| struct kvm_vcpu *vcpu; |
| struct kvm_run *run; |
| struct kvm_vm *vm; |
| struct ucall uc; |
| |
| vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); |
| |
| alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; |
| gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size; |
| #ifdef __s390x__ |
| alignment = max(0x100000UL, guest_page_size); |
| #else |
| alignment = SZ_2M; |
| #endif |
| gpa = align_down(gpa, alignment); |
| gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1); |
| |
| vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT, |
| TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0); |
| virt_map(vm, gva, gpa, TEST_NPAGES); |
| |
| if (private) |
| vm_mem_set_private(vm, gpa, TEST_SIZE); |
| |
| pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private); |
| pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); |
| pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); |
| |
| vcpu_args_set(vcpu, 1, gva); |
| vcpu_run(vcpu); |
| |
| run = vcpu->run; |
| TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, |
| "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", |
| run->exit_reason, exit_reason_str(run->exit_reason)); |
| |
| switch (get_ucall(vcpu, &uc)) { |
| case UCALL_ABORT: |
| REPORT_GUEST_ASSERT(uc); |
| break; |
| case UCALL_DONE: |
| break; |
| default: |
| TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); |
| break; |
| } |
| |
| kvm_vm_free(vm); |
| } |
| |
| static void test_pre_fault_memory(unsigned long vm_type, bool private) |
| { |
| if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) { |
| pr_info("Skipping tests for vm_type 0x%lx\n", vm_type); |
| return; |
| } |
| |
| __test_pre_fault_memory(vm_type, private); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY)); |
| |
| test_pre_fault_memory(0, false); |
| #ifdef __x86_64__ |
| test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false); |
| test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true); |
| #endif |
| return 0; |
| } |