| // SPDX-License-Identifier: GPL-2.0-only OR MIT |
| /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ |
| /* Copyright 2025 Arm, Ltd. */ |
| |
| #include <linux/bitfield.h> |
| #include <linux/genalloc.h> |
| #include <linux/interrupt.h> |
| #include <linux/iopoll.h> |
| #include <linux/platform_device.h> |
| #include <linux/pm_runtime.h> |
| |
| #include <drm/drm_file.h> |
| #include <drm/drm_gem.h> |
| #include <drm/drm_gem_dma_helper.h> |
| #include <drm/drm_print.h> |
| #include <drm/ethosu_accel.h> |
| |
| #include "ethosu_device.h" |
| #include "ethosu_drv.h" |
| #include "ethosu_gem.h" |
| #include "ethosu_job.h" |
| |
| #define JOB_TIMEOUT_MS 500 |
| |
| static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) |
| { |
| return container_of(sched_job, struct ethosu_job, base); |
| } |
| |
| static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) |
| { |
| return "ethosu"; |
| } |
| |
| static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) |
| { |
| return "ethosu-npu"; |
| } |
| |
| static const struct dma_fence_ops ethosu_fence_ops = { |
| .get_driver_name = ethosu_fence_get_driver_name, |
| .get_timeline_name = ethosu_fence_get_timeline_name, |
| }; |
| |
| static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) |
| { |
| struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); |
| struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info; |
| |
| for (int i = 0; i < job->region_cnt; i++) { |
| struct drm_gem_dma_object *bo; |
| int region = job->region_bo_num[i]; |
| |
| bo = to_drm_gem_dma_obj(job->region_bo[i]); |
| writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); |
| writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); |
| dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr); |
| } |
| |
| if (job->sram_size) { |
| writel_relaxed(lower_32_bits(dev->sramphys), |
| dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); |
| writel_relaxed(upper_32_bits(dev->sramphys), |
| dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); |
| dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n", |
| ETHOSU_SRAM_REGION, &dev->sramphys); |
| } |
| |
| writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); |
| writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); |
| writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); |
| |
| writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD); |
| |
| dev_dbg(dev->base.dev, |
| "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr); |
| } |
| |
| static int ethosu_acquire_object_fences(struct ethosu_job *job) |
| { |
| int i, ret; |
| struct drm_gem_object **bos = job->region_bo; |
| struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; |
| |
| for (i = 0; i < job->region_cnt; i++) { |
| bool is_write; |
| |
| if (!bos[i]) |
| break; |
| |
| ret = dma_resv_reserve_fences(bos[i]->resv, 1); |
| if (ret) |
| return ret; |
| |
| is_write = info->output_region[job->region_bo_num[i]]; |
| ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i], |
| is_write); |
| if (ret) |
| return ret; |
| } |
| |
| return 0; |
| } |
| |
| static void ethosu_attach_object_fences(struct ethosu_job *job) |
| { |
| int i; |
| struct dma_fence *fence = job->inference_done_fence; |
| struct drm_gem_object **bos = job->region_bo; |
| struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; |
| |
| for (i = 0; i < job->region_cnt; i++) |
| if (info->output_region[job->region_bo_num[i]]) |
| dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); |
| } |
| |
| static int ethosu_job_push(struct ethosu_job *job) |
| { |
| struct ww_acquire_ctx acquire_ctx; |
| int ret; |
| |
| ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); |
| if (ret) |
| return ret; |
| |
| ret = ethosu_acquire_object_fences(job); |
| if (ret) |
| goto out; |
| |
| ret = pm_runtime_resume_and_get(job->dev->base.dev); |
| if (!ret) { |
| guard(mutex)(&job->dev->sched_lock); |
| |
| drm_sched_job_arm(&job->base); |
| job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); |
| kref_get(&job->refcount); /* put by scheduler job completion */ |
| drm_sched_entity_push_job(&job->base); |
| ethosu_attach_object_fences(job); |
| } |
| |
| out: |
| drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); |
| return ret; |
| } |
| |
| static void ethosu_job_err_cleanup(struct ethosu_job *job) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < job->region_cnt; i++) |
| drm_gem_object_put(job->region_bo[i]); |
| |
| drm_gem_object_put(job->cmd_bo); |
| |
| kfree(job); |
| } |
| |
| static void ethosu_job_cleanup(struct kref *ref) |
| { |
| struct ethosu_job *job = container_of(ref, struct ethosu_job, |
| refcount); |
| |
| pm_runtime_put_autosuspend(job->dev->base.dev); |
| |
| dma_fence_put(job->done_fence); |
| dma_fence_put(job->inference_done_fence); |
| |
| ethosu_job_err_cleanup(job); |
| } |
| |
| static void ethosu_job_put(struct ethosu_job *job) |
| { |
| kref_put(&job->refcount, ethosu_job_cleanup); |
| } |
| |
| static void ethosu_job_free(struct drm_sched_job *sched_job) |
| { |
| struct ethosu_job *job = to_ethosu_job(sched_job); |
| |
| drm_sched_job_cleanup(sched_job); |
| ethosu_job_put(job); |
| } |
| |
| static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) |
| { |
| struct ethosu_job *job = to_ethosu_job(sched_job); |
| struct ethosu_device *dev = job->dev; |
| struct dma_fence *fence = job->done_fence; |
| |
| if (unlikely(job->base.s_fence->finished.error)) |
| return NULL; |
| |
| dma_fence_init(fence, ðosu_fence_ops, &dev->fence_lock, |
| dev->fence_context, ++dev->emit_seqno); |
| dma_fence_get(fence); |
| |
| scoped_guard(mutex, &dev->job_lock) { |
| dev->in_flight_job = job; |
| ethosu_job_hw_submit(dev, job); |
| } |
| |
| return fence; |
| } |
| |
| static void ethosu_job_handle_irq(struct ethosu_device *dev) |
| { |
| u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); |
| |
| if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { |
| dev_err(dev->base.dev, "Error IRQ - %x\n", status); |
| drm_sched_fault(&dev->sched); |
| return; |
| } |
| |
| scoped_guard(mutex, &dev->job_lock) { |
| if (dev->in_flight_job) { |
| dma_fence_signal(dev->in_flight_job->done_fence); |
| dev->in_flight_job = NULL; |
| } |
| } |
| } |
| |
| static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) |
| { |
| struct ethosu_device *dev = data; |
| |
| ethosu_job_handle_irq(dev); |
| |
| return IRQ_HANDLED; |
| } |
| |
| static irqreturn_t ethosu_job_irq_handler(int irq, void *data) |
| { |
| struct ethosu_device *dev = data; |
| u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); |
| |
| if (!(status & STATUS_IRQ_RAISED)) |
| return IRQ_NONE; |
| |
| writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); |
| return IRQ_WAKE_THREAD; |
| } |
| |
| static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) |
| { |
| struct ethosu_job *job = to_ethosu_job(bad); |
| struct ethosu_device *dev = job->dev; |
| bool running; |
| u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; |
| u32 cmdaddr; |
| |
| cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); |
| running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); |
| |
| if (running) { |
| int ret; |
| u32 reg; |
| |
| ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, |
| reg, |
| reg != cmdaddr, |
| USEC_PER_MSEC, 100 * USEC_PER_MSEC); |
| |
| /* If still running and progress is being made, just return */ |
| if (!ret) |
| return DRM_GPU_SCHED_STAT_NO_HANG; |
| } |
| |
| dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n", |
| running ? "running" : "stopped", |
| cmdaddr, bocmds[cmdaddr / 4]); |
| |
| drm_sched_stop(&dev->sched, bad); |
| |
| scoped_guard(mutex, &dev->job_lock) |
| dev->in_flight_job = NULL; |
| |
| /* Proceed with reset now. */ |
| pm_runtime_force_suspend(dev->base.dev); |
| pm_runtime_force_resume(dev->base.dev); |
| |
| /* Restart the scheduler */ |
| drm_sched_start(&dev->sched, 0); |
| |
| return DRM_GPU_SCHED_STAT_RESET; |
| } |
| |
| static const struct drm_sched_backend_ops ethosu_sched_ops = { |
| .run_job = ethosu_job_run, |
| .timedout_job = ethosu_job_timedout, |
| .free_job = ethosu_job_free |
| }; |
| |
| int ethosu_job_init(struct ethosu_device *edev) |
| { |
| struct device *dev = edev->base.dev; |
| struct drm_sched_init_args args = { |
| .ops = ðosu_sched_ops, |
| .num_rqs = DRM_SCHED_PRIORITY_COUNT, |
| .credit_limit = 1, |
| .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), |
| .name = dev_name(dev), |
| .dev = dev, |
| }; |
| int ret; |
| |
| spin_lock_init(&edev->fence_lock); |
| ret = devm_mutex_init(dev, &edev->job_lock); |
| if (ret) |
| return ret; |
| ret = devm_mutex_init(dev, &edev->sched_lock); |
| if (ret) |
| return ret; |
| |
| edev->irq = platform_get_irq(to_platform_device(dev), 0); |
| if (edev->irq < 0) |
| return edev->irq; |
| |
| ret = devm_request_threaded_irq(dev, edev->irq, |
| ethosu_job_irq_handler, |
| ethosu_job_irq_handler_thread, |
| IRQF_SHARED, KBUILD_MODNAME, |
| edev); |
| if (ret) { |
| dev_err(dev, "failed to request irq\n"); |
| return ret; |
| } |
| |
| edev->fence_context = dma_fence_context_alloc(1); |
| |
| ret = drm_sched_init(&edev->sched, &args); |
| if (ret) { |
| dev_err(dev, "Failed to create scheduler: %d\n", ret); |
| goto err_sched; |
| } |
| |
| return 0; |
| |
| err_sched: |
| drm_sched_fini(&edev->sched); |
| return ret; |
| } |
| |
| void ethosu_job_fini(struct ethosu_device *dev) |
| { |
| drm_sched_fini(&dev->sched); |
| } |
| |
| int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) |
| { |
| struct ethosu_device *dev = ethosu_priv->edev; |
| struct drm_gpu_scheduler *sched = &dev->sched; |
| int ret; |
| |
| ret = drm_sched_entity_init(ðosu_priv->sched_entity, |
| DRM_SCHED_PRIORITY_NORMAL, |
| &sched, 1, NULL); |
| return WARN_ON(ret); |
| } |
| |
| void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) |
| { |
| struct drm_sched_entity *entity = ðosu_priv->sched_entity; |
| |
| drm_sched_entity_destroy(entity); |
| } |
| |
| static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, |
| struct drm_ethosu_job *job) |
| { |
| struct ethosu_device *edev = to_ethosu_device(dev); |
| struct ethosu_file_priv *file_priv = file->driver_priv; |
| struct ethosu_job *ejob = NULL; |
| struct ethosu_validated_cmdstream_info *cmd_info; |
| int ret = 0; |
| |
| /* BO region 2 is reserved if SRAM is used */ |
| if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) |
| return -EINVAL; |
| |
| if (edev->npu_info.sram_size < job->sram_size) |
| return -EINVAL; |
| |
| ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); |
| if (!ejob) |
| return -ENOMEM; |
| |
| kref_init(&ejob->refcount); |
| |
| ejob->dev = edev; |
| ejob->sram_size = job->sram_size; |
| |
| ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); |
| if (!ejob->done_fence) { |
| ret = -ENOMEM; |
| goto out_cleanup_job; |
| } |
| |
| ret = drm_sched_job_init(&ejob->base, |
| &file_priv->sched_entity, |
| 1, NULL, file->client_id); |
| if (ret) |
| goto out_put_job; |
| |
| ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo); |
| if (!ejob->cmd_bo) { |
| ret = -ENOENT; |
| goto out_cleanup_job; |
| } |
| cmd_info = to_ethosu_bo(ejob->cmd_bo)->info; |
| if (!cmd_info) { |
| ret = -EINVAL; |
| goto out_cleanup_job; |
| } |
| |
| for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { |
| struct drm_gem_object *gem; |
| |
| /* Can only omit a BO handle if the region is not used or used for SRAM */ |
| if (!job->region_bo_handles[i] && |
| (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) |
| continue; |
| |
| if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { |
| dev_err(dev->dev, |
| "Cmdstream BO handle %d set for unused region %d\n", |
| job->region_bo_handles[i], i); |
| ret = -EINVAL; |
| goto out_cleanup_job; |
| } |
| |
| gem = drm_gem_object_lookup(file, job->region_bo_handles[i]); |
| if (!gem) { |
| dev_err(dev->dev, |
| "Invalid BO handle %d for region %d\n", |
| job->region_bo_handles[i], i); |
| ret = -ENOENT; |
| goto out_cleanup_job; |
| } |
| |
| ejob->region_bo[ejob->region_cnt] = gem; |
| ejob->region_bo_num[ejob->region_cnt] = i; |
| ejob->region_cnt++; |
| |
| if (to_ethosu_bo(gem)->info) { |
| dev_err(dev->dev, |
| "Cmdstream BO handle %d used for region %d\n", |
| job->region_bo_handles[i], i); |
| ret = -EINVAL; |
| goto out_cleanup_job; |
| } |
| |
| /* Verify the command stream doesn't have accesses outside the BO */ |
| if (cmd_info->region_size[i] > gem->size) { |
| dev_err(dev->dev, |
| "cmd stream region %d size greater than BO size (%llu > %zu)\n", |
| i, cmd_info->region_size[i], gem->size); |
| ret = -EOVERFLOW; |
| goto out_cleanup_job; |
| } |
| } |
| ret = ethosu_job_push(ejob); |
| if (!ret) { |
| ethosu_job_put(ejob); |
| return 0; |
| } |
| |
| out_cleanup_job: |
| if (ret) |
| drm_sched_job_cleanup(&ejob->base); |
| out_put_job: |
| ethosu_job_err_cleanup(ejob); |
| |
| return ret; |
| } |
| |
| int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) |
| { |
| struct drm_ethosu_submit *args = data; |
| int ret = 0; |
| unsigned int i = 0; |
| |
| if (args->pad) { |
| drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n"); |
| return -EINVAL; |
| } |
| |
| struct drm_ethosu_job __free(kvfree) *jobs = |
| kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); |
| if (!jobs) |
| return -ENOMEM; |
| |
| if (copy_from_user(jobs, |
| (void __user *)(uintptr_t)args->jobs, |
| args->job_count * sizeof(*jobs))) { |
| drm_dbg(dev, "Failed to copy incoming job array\n"); |
| return -EFAULT; |
| } |
| |
| for (i = 0; i < args->job_count; i++) { |
| ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]); |
| if (ret) |
| return ret; |
| } |
| |
| return 0; |
| } |