| // SPDX-License-Identifier: GPL-2.0-only |
| /* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES |
| */ |
| #include "iommufd_private.h" |
| |
| void iommufd_viommu_destroy(struct iommufd_object *obj) |
| { |
| struct iommufd_viommu *viommu = |
| container_of(obj, struct iommufd_viommu, obj); |
| |
| if (viommu->ops && viommu->ops->destroy) |
| viommu->ops->destroy(viommu); |
| refcount_dec(&viommu->hwpt->common.obj.users); |
| xa_destroy(&viommu->vdevs); |
| } |
| |
| int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) |
| { |
| struct iommu_viommu_alloc *cmd = ucmd->cmd; |
| const struct iommu_user_data user_data = { |
| .type = cmd->type, |
| .uptr = u64_to_user_ptr(cmd->data_uptr), |
| .len = cmd->data_len, |
| }; |
| struct iommufd_hwpt_paging *hwpt_paging; |
| struct iommufd_viommu *viommu; |
| struct iommufd_device *idev; |
| const struct iommu_ops *ops; |
| size_t viommu_size; |
| int rc; |
| |
| if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT) |
| return -EOPNOTSUPP; |
| |
| idev = iommufd_get_device(ucmd, cmd->dev_id); |
| if (IS_ERR(idev)) |
| return PTR_ERR(idev); |
| |
| ops = dev_iommu_ops(idev->dev); |
| if (!ops->get_viommu_size || !ops->viommu_init) { |
| rc = -EOPNOTSUPP; |
| goto out_put_idev; |
| } |
| |
| viommu_size = ops->get_viommu_size(idev->dev, cmd->type); |
| if (!viommu_size) { |
| rc = -EOPNOTSUPP; |
| goto out_put_idev; |
| } |
| |
| /* |
| * It is a driver bug for providing a viommu_size smaller than the core |
| * vIOMMU structure size |
| */ |
| if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) { |
| rc = -EOPNOTSUPP; |
| goto out_put_idev; |
| } |
| |
| hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id); |
| if (IS_ERR(hwpt_paging)) { |
| rc = PTR_ERR(hwpt_paging); |
| goto out_put_idev; |
| } |
| |
| if (!hwpt_paging->nest_parent) { |
| rc = -EINVAL; |
| goto out_put_hwpt; |
| } |
| |
| viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd( |
| ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU); |
| if (IS_ERR(viommu)) { |
| rc = PTR_ERR(viommu); |
| goto out_put_hwpt; |
| } |
| |
| xa_init(&viommu->vdevs); |
| viommu->type = cmd->type; |
| viommu->ictx = ucmd->ictx; |
| viommu->hwpt = hwpt_paging; |
| refcount_inc(&viommu->hwpt->common.obj.users); |
| INIT_LIST_HEAD(&viommu->veventqs); |
| init_rwsem(&viommu->veventqs_rwsem); |
| /* |
| * It is the most likely case that a physical IOMMU is unpluggable. A |
| * pluggable IOMMU instance (if exists) is responsible for refcounting |
| * on its own. |
| */ |
| viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); |
| |
| rc = ops->viommu_init(viommu, hwpt_paging->common.domain, |
| user_data.len ? &user_data : NULL); |
| if (rc) |
| goto out_put_hwpt; |
| |
| /* It is a driver bug that viommu->ops isn't filled */ |
| if (WARN_ON_ONCE(!viommu->ops)) { |
| rc = -EOPNOTSUPP; |
| goto out_put_hwpt; |
| } |
| |
| cmd->out_viommu_id = viommu->obj.id; |
| rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); |
| |
| out_put_hwpt: |
| iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); |
| out_put_idev: |
| iommufd_put_object(ucmd->ictx, &idev->obj); |
| return rc; |
| } |
| |
| void iommufd_vdevice_abort(struct iommufd_object *obj) |
| { |
| struct iommufd_vdevice *vdev = |
| container_of(obj, struct iommufd_vdevice, obj); |
| struct iommufd_viommu *viommu = vdev->viommu; |
| struct iommufd_device *idev = vdev->idev; |
| |
| lockdep_assert_held(&idev->igroup->lock); |
| |
| if (vdev->destroy) |
| vdev->destroy(vdev); |
| /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ |
| xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); |
| refcount_dec(&viommu->obj.users); |
| idev->vdev = NULL; |
| } |
| |
| void iommufd_vdevice_destroy(struct iommufd_object *obj) |
| { |
| struct iommufd_vdevice *vdev = |
| container_of(obj, struct iommufd_vdevice, obj); |
| struct iommufd_device *idev = vdev->idev; |
| struct iommufd_ctx *ictx = idev->ictx; |
| |
| mutex_lock(&idev->igroup->lock); |
| iommufd_vdevice_abort(obj); |
| mutex_unlock(&idev->igroup->lock); |
| iommufd_put_object(ictx, &idev->obj); |
| } |
| |
| int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) |
| { |
| struct iommu_vdevice_alloc *cmd = ucmd->cmd; |
| struct iommufd_vdevice *vdev, *curr; |
| size_t vdev_size = sizeof(*vdev); |
| struct iommufd_viommu *viommu; |
| struct iommufd_device *idev; |
| u64 virt_id = cmd->virt_id; |
| int rc = 0; |
| |
| /* virt_id indexes an xarray */ |
| if (virt_id > ULONG_MAX) |
| return -EINVAL; |
| |
| viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); |
| if (IS_ERR(viommu)) |
| return PTR_ERR(viommu); |
| |
| idev = iommufd_get_device(ucmd, cmd->dev_id); |
| if (IS_ERR(idev)) { |
| rc = PTR_ERR(idev); |
| goto out_put_viommu; |
| } |
| |
| if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) { |
| rc = -EINVAL; |
| goto out_put_idev; |
| } |
| |
| mutex_lock(&idev->igroup->lock); |
| if (idev->destroying) { |
| rc = -ENOENT; |
| goto out_unlock_igroup; |
| } |
| |
| if (idev->vdev) { |
| rc = -EEXIST; |
| goto out_unlock_igroup; |
| } |
| |
| if (viommu->ops && viommu->ops->vdevice_size) { |
| /* |
| * It is a driver bug for: |
| * - ops->vdevice_size smaller than the core structure size |
| * - not implementing a pairing ops->vdevice_init op |
| */ |
| if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size || |
| !viommu->ops->vdevice_init)) { |
| rc = -EOPNOTSUPP; |
| goto out_put_idev; |
| } |
| vdev_size = viommu->ops->vdevice_size; |
| } |
| |
| vdev = (struct iommufd_vdevice *)_iommufd_object_alloc( |
| ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE); |
| if (IS_ERR(vdev)) { |
| rc = PTR_ERR(vdev); |
| goto out_unlock_igroup; |
| } |
| |
| vdev->virt_id = virt_id; |
| vdev->viommu = viommu; |
| refcount_inc(&viommu->obj.users); |
| /* |
| * A wait_cnt reference is held on the idev so long as we have the |
| * pointer. iommufd_device_pre_destroy() will revoke it before the |
| * idev real destruction. |
| */ |
| vdev->idev = idev; |
| |
| /* |
| * iommufd_device_destroy() delays until idev->vdev is NULL before |
| * freeing the idev, which only happens once the vdev is finished |
| * destruction. |
| */ |
| idev->vdev = vdev; |
| |
| curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); |
| if (curr) { |
| rc = xa_err(curr) ?: -EEXIST; |
| goto out_abort; |
| } |
| |
| if (viommu->ops && viommu->ops->vdevice_init) { |
| rc = viommu->ops->vdevice_init(vdev); |
| if (rc) |
| goto out_abort; |
| } |
| |
| cmd->out_vdevice_id = vdev->obj.id; |
| rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); |
| if (rc) |
| goto out_abort; |
| iommufd_object_finalize(ucmd->ictx, &vdev->obj); |
| goto out_unlock_igroup; |
| |
| out_abort: |
| iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); |
| out_unlock_igroup: |
| mutex_unlock(&idev->igroup->lock); |
| out_put_idev: |
| if (rc) |
| iommufd_put_object(ucmd->ictx, &idev->obj); |
| out_put_viommu: |
| iommufd_put_object(ucmd->ictx, &viommu->obj); |
| return rc; |
| } |
| |
| static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx, |
| struct iommufd_access *access, |
| u64 base_iova, size_t length) |
| { |
| u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova); |
| u64 offset = base_iova - aligned_iova; |
| |
| iommufd_access_unpin_pages(access, aligned_iova, |
| PAGE_ALIGN(length + offset)); |
| iommufd_access_detach_internal(access); |
| iommufd_access_destroy_internal(ictx, access); |
| } |
| |
| void iommufd_hw_queue_destroy(struct iommufd_object *obj) |
| { |
| struct iommufd_hw_queue *hw_queue = |
| container_of(obj, struct iommufd_hw_queue, obj); |
| |
| if (hw_queue->destroy) |
| hw_queue->destroy(hw_queue); |
| if (hw_queue->access) |
| iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx, |
| hw_queue->access, |
| hw_queue->base_addr, |
| hw_queue->length); |
| if (hw_queue->viommu) |
| refcount_dec(&hw_queue->viommu->obj.users); |
| } |
| |
| /* |
| * When the HW accesses the guest queue via physical addresses, the underlying |
| * physical pages of the guest queue must be contiguous. Also, for the security |
| * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of |
| * the guest queue from the nesting parent iopt while the HW is still accessing |
| * the guest queue memory physically, such a HW queue must require an access to |
| * pin the underlying pages and prevent that from happening. |
| */ |
| static struct iommufd_access * |
| iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, |
| struct iommufd_viommu *viommu, phys_addr_t *base_pa) |
| { |
| u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova); |
| u64 offset = cmd->nesting_parent_iova - aligned_iova; |
| struct iommufd_access *access; |
| struct page **pages; |
| size_t max_npages; |
| size_t length; |
| size_t i; |
| int rc; |
| |
| /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */ |
| if (check_add_overflow(offset, cmd->length, &length)) |
| return ERR_PTR(-ERANGE); |
| if (check_add_overflow(length, PAGE_SIZE - 1, &length)) |
| return ERR_PTR(-ERANGE); |
| max_npages = length / PAGE_SIZE; |
| /* length needs to be page aligned too */ |
| length = max_npages * PAGE_SIZE; |
| |
| /* |
| * Use kvcalloc() to avoid memory fragmentation for a large page array. |
| * Set __GFP_NOWARN to avoid syzkaller blowups |
| */ |
| pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN); |
| if (!pages) |
| return ERR_PTR(-ENOMEM); |
| |
| access = iommufd_access_create_internal(viommu->ictx); |
| if (IS_ERR(access)) { |
| rc = PTR_ERR(access); |
| goto out_free; |
| } |
| |
| rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas); |
| if (rc) |
| goto out_destroy; |
| |
| rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0); |
| if (rc) |
| goto out_detach; |
| |
| /* Validate if the underlying physical pages are contiguous */ |
| for (i = 1; i < max_npages; i++) { |
| if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1) |
| continue; |
| rc = -EFAULT; |
| goto out_unpin; |
| } |
| |
| *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; |
| kvfree(pages); |
| return access; |
| |
| out_unpin: |
| iommufd_access_unpin_pages(access, aligned_iova, length); |
| out_detach: |
| iommufd_access_detach_internal(access); |
| out_destroy: |
| iommufd_access_destroy_internal(viommu->ictx, access); |
| out_free: |
| kvfree(pages); |
| return ERR_PTR(rc); |
| } |
| |
| int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd) |
| { |
| struct iommu_hw_queue_alloc *cmd = ucmd->cmd; |
| struct iommufd_hw_queue *hw_queue; |
| struct iommufd_viommu *viommu; |
| struct iommufd_access *access; |
| size_t hw_queue_size; |
| phys_addr_t base_pa; |
| u64 last; |
| int rc; |
| |
| if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT) |
| return -EOPNOTSUPP; |
| if (!cmd->length) |
| return -EINVAL; |
| if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1, |
| &last)) |
| return -EOVERFLOW; |
| |
| viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); |
| if (IS_ERR(viommu)) |
| return PTR_ERR(viommu); |
| |
| if (!viommu->ops || !viommu->ops->get_hw_queue_size || |
| !viommu->ops->hw_queue_init_phys) { |
| rc = -EOPNOTSUPP; |
| goto out_put_viommu; |
| } |
| |
| hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type); |
| if (!hw_queue_size) { |
| rc = -EOPNOTSUPP; |
| goto out_put_viommu; |
| } |
| |
| /* |
| * It is a driver bug for providing a hw_queue_size smaller than the |
| * core HW queue structure size |
| */ |
| if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) { |
| rc = -EOPNOTSUPP; |
| goto out_put_viommu; |
| } |
| |
| hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd( |
| ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE); |
| if (IS_ERR(hw_queue)) { |
| rc = PTR_ERR(hw_queue); |
| goto out_put_viommu; |
| } |
| |
| access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa); |
| if (IS_ERR(access)) { |
| rc = PTR_ERR(access); |
| goto out_put_viommu; |
| } |
| |
| hw_queue->viommu = viommu; |
| refcount_inc(&viommu->obj.users); |
| hw_queue->access = access; |
| hw_queue->type = cmd->type; |
| hw_queue->length = cmd->length; |
| hw_queue->base_addr = cmd->nesting_parent_iova; |
| |
| rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa); |
| if (rc) |
| goto out_put_viommu; |
| |
| cmd->out_hw_queue_id = hw_queue->obj.id; |
| rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); |
| |
| out_put_viommu: |
| iommufd_put_object(ucmd->ictx, &viommu->obj); |
| return rc; |
| } |