|  | // SPDX-License-Identifier: GPL-2.0 OR MIT | 
|  | /* | 
|  | * Copyright 2014-2022 Advanced Micro Devices, Inc. | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a | 
|  | * copy of this software and associated documentation files (the "Software"), | 
|  | * to deal in the Software without restriction, including without limitation | 
|  | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
|  | * and/or sell copies of the Software, and to permit persons to whom the | 
|  | * Software is furnished to do so, subject to the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL | 
|  | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | 
|  | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | 
|  | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 
|  | * OTHER DEALINGS IN THE SOFTWARE. | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include <linux/slab.h> | 
|  | #include <linux/mutex.h> | 
|  | #include "kfd_device_queue_manager.h" | 
|  | #include "kfd_kernel_queue.h" | 
|  | #include "kfd_priv.h" | 
|  |  | 
|  | static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, | 
|  | unsigned int buffer_size_bytes) | 
|  | { | 
|  | unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); | 
|  |  | 
|  | WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, | 
|  | "Runlist IB overflow"); | 
|  | *wptr = temp; | 
|  | } | 
|  |  | 
|  | static void pm_calc_rlib_size(struct packet_manager *pm, | 
|  | unsigned int *rlib_size, | 
|  | bool *over_subscription) | 
|  | { | 
|  | unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; | 
|  | unsigned int map_queue_size; | 
|  | unsigned int max_proc_per_quantum = 1; | 
|  | struct kfd_dev *dev = pm->dqm->dev; | 
|  |  | 
|  | process_count = pm->dqm->processes_count; | 
|  | queue_count = pm->dqm->active_queue_count; | 
|  | compute_queue_count = pm->dqm->active_cp_queue_count; | 
|  | gws_queue_count = pm->dqm->gws_queue_count; | 
|  |  | 
|  | /* check if there is over subscription | 
|  | * Note: the arbitration between the number of VMIDs and | 
|  | * hws_max_conc_proc has been done in | 
|  | * kgd2kfd_device_init(). | 
|  | */ | 
|  | *over_subscription = false; | 
|  |  | 
|  | if (dev->max_proc_per_quantum > 1) | 
|  | max_proc_per_quantum = dev->max_proc_per_quantum; | 
|  |  | 
|  | if ((process_count > max_proc_per_quantum) || | 
|  | compute_queue_count > get_cp_queues_num(pm->dqm) || | 
|  | gws_queue_count > 1) { | 
|  | *over_subscription = true; | 
|  | pr_debug("Over subscribed runlist\n"); | 
|  | } | 
|  |  | 
|  | map_queue_size = pm->pmf->map_queues_size; | 
|  | /* calculate run list ib allocation size */ | 
|  | *rlib_size = process_count * pm->pmf->map_process_size + | 
|  | queue_count * map_queue_size; | 
|  |  | 
|  | /* | 
|  | * Increase the allocation size in case we need a chained run list | 
|  | * when over subscription | 
|  | */ | 
|  | if (*over_subscription) | 
|  | *rlib_size += pm->pmf->runlist_size; | 
|  |  | 
|  | pr_debug("runlist ib size %d\n", *rlib_size); | 
|  | } | 
|  |  | 
|  | static int pm_allocate_runlist_ib(struct packet_manager *pm, | 
|  | unsigned int **rl_buffer, | 
|  | uint64_t *rl_gpu_buffer, | 
|  | unsigned int *rl_buffer_size, | 
|  | bool *is_over_subscription) | 
|  | { | 
|  | int retval; | 
|  |  | 
|  | if (WARN_ON(pm->allocated)) | 
|  | return -EINVAL; | 
|  |  | 
|  | pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); | 
|  |  | 
|  | mutex_lock(&pm->lock); | 
|  |  | 
|  | retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, | 
|  | &pm->ib_buffer_obj); | 
|  |  | 
|  | if (retval) { | 
|  | pr_err("Failed to allocate runlist IB\n"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; | 
|  | *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr; | 
|  |  | 
|  | memset(*rl_buffer, 0, *rl_buffer_size); | 
|  | pm->allocated = true; | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&pm->lock); | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | static int pm_create_runlist_ib(struct packet_manager *pm, | 
|  | struct list_head *queues, | 
|  | uint64_t *rl_gpu_addr, | 
|  | size_t *rl_size_bytes) | 
|  | { | 
|  | unsigned int alloc_size_bytes; | 
|  | unsigned int *rl_buffer, rl_wptr, i; | 
|  | int retval, processes_mapped; | 
|  | struct device_process_node *cur; | 
|  | struct qcm_process_device *qpd; | 
|  | struct queue *q; | 
|  | struct kernel_queue *kq; | 
|  | bool is_over_subscription; | 
|  |  | 
|  | rl_wptr = retval = processes_mapped = 0; | 
|  |  | 
|  | retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, | 
|  | &alloc_size_bytes, &is_over_subscription); | 
|  | if (retval) | 
|  | return retval; | 
|  |  | 
|  | *rl_size_bytes = alloc_size_bytes; | 
|  | pm->ib_size_bytes = alloc_size_bytes; | 
|  |  | 
|  | pr_debug("Building runlist ib process count: %d queues count %d\n", | 
|  | pm->dqm->processes_count, pm->dqm->active_queue_count); | 
|  |  | 
|  | /* build the run list ib packet */ | 
|  | list_for_each_entry(cur, queues, list) { | 
|  | qpd = cur->qpd; | 
|  | /* build map process packet */ | 
|  | if (processes_mapped >= pm->dqm->processes_count) { | 
|  | pr_debug("Not enough space left in runlist IB\n"); | 
|  | pm_release_ib(pm); | 
|  | return -ENOMEM; | 
|  | } | 
|  |  | 
|  | retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); | 
|  | if (retval) | 
|  | return retval; | 
|  |  | 
|  | processes_mapped++; | 
|  | inc_wptr(&rl_wptr, pm->pmf->map_process_size, | 
|  | alloc_size_bytes); | 
|  |  | 
|  | list_for_each_entry(kq, &qpd->priv_queue_list, list) { | 
|  | if (!kq->queue->properties.is_active) | 
|  | continue; | 
|  |  | 
|  | pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", | 
|  | kq->queue->queue, qpd->is_debug); | 
|  |  | 
|  | retval = pm->pmf->map_queues(pm, | 
|  | &rl_buffer[rl_wptr], | 
|  | kq->queue, | 
|  | qpd->is_debug); | 
|  | if (retval) | 
|  | return retval; | 
|  |  | 
|  | inc_wptr(&rl_wptr, | 
|  | pm->pmf->map_queues_size, | 
|  | alloc_size_bytes); | 
|  | } | 
|  |  | 
|  | list_for_each_entry(q, &qpd->queues_list, list) { | 
|  | if (!q->properties.is_active) | 
|  | continue; | 
|  |  | 
|  | pr_debug("static_queue, mapping user queue %d, is debug status %d\n", | 
|  | q->queue, qpd->is_debug); | 
|  |  | 
|  | retval = pm->pmf->map_queues(pm, | 
|  | &rl_buffer[rl_wptr], | 
|  | q, | 
|  | qpd->is_debug); | 
|  |  | 
|  | if (retval) | 
|  | return retval; | 
|  |  | 
|  | inc_wptr(&rl_wptr, | 
|  | pm->pmf->map_queues_size, | 
|  | alloc_size_bytes); | 
|  | } | 
|  | } | 
|  |  | 
|  | pr_debug("Finished map process and queues to runlist\n"); | 
|  |  | 
|  | if (is_over_subscription) { | 
|  | if (!pm->is_over_subscription) | 
|  | pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n"); | 
|  | retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], | 
|  | *rl_gpu_addr, | 
|  | alloc_size_bytes / sizeof(uint32_t), | 
|  | true); | 
|  | } | 
|  | pm->is_over_subscription = is_over_subscription; | 
|  |  | 
|  | for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) | 
|  | pr_debug("0x%2X ", rl_buffer[i]); | 
|  | pr_debug("\n"); | 
|  |  | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) | 
|  | { | 
|  | switch (dqm->dev->adev->asic_type) { | 
|  | case CHIP_KAVERI: | 
|  | case CHIP_HAWAII: | 
|  | /* PM4 packet structures on CIK are the same as on VI */ | 
|  | case CHIP_CARRIZO: | 
|  | case CHIP_TONGA: | 
|  | case CHIP_FIJI: | 
|  | case CHIP_POLARIS10: | 
|  | case CHIP_POLARIS11: | 
|  | case CHIP_POLARIS12: | 
|  | case CHIP_VEGAM: | 
|  | pm->pmf = &kfd_vi_pm_funcs; | 
|  | break; | 
|  | default: | 
|  | if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) | 
|  | pm->pmf = &kfd_aldebaran_pm_funcs; | 
|  | else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) | 
|  | pm->pmf = &kfd_v9_pm_funcs; | 
|  | else { | 
|  | WARN(1, "Unexpected ASIC family %u", | 
|  | dqm->dev->adev->asic_type); | 
|  | return -EINVAL; | 
|  | } | 
|  | } | 
|  |  | 
|  | pm->dqm = dqm; | 
|  | mutex_init(&pm->lock); | 
|  | pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); | 
|  | if (!pm->priv_queue) { | 
|  | mutex_destroy(&pm->lock); | 
|  | return -ENOMEM; | 
|  | } | 
|  | pm->allocated = false; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void pm_uninit(struct packet_manager *pm, bool hanging) | 
|  | { | 
|  | mutex_destroy(&pm->lock); | 
|  | kernel_queue_uninit(pm->priv_queue, hanging); | 
|  | pm->priv_queue = NULL; | 
|  | } | 
|  |  | 
|  | int pm_send_set_resources(struct packet_manager *pm, | 
|  | struct scheduling_resources *res) | 
|  | { | 
|  | uint32_t *buffer, size; | 
|  | int retval = 0; | 
|  |  | 
|  | size = pm->pmf->set_resources_size; | 
|  | mutex_lock(&pm->lock); | 
|  | kq_acquire_packet_buffer(pm->priv_queue, | 
|  | size / sizeof(uint32_t), | 
|  | (unsigned int **)&buffer); | 
|  | if (!buffer) { | 
|  | pr_err("Failed to allocate buffer on kernel queue\n"); | 
|  | retval = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | retval = pm->pmf->set_resources(pm, buffer, res); | 
|  | if (!retval) | 
|  | kq_submit_packet(pm->priv_queue); | 
|  | else | 
|  | kq_rollback_packet(pm->priv_queue); | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&pm->lock); | 
|  |  | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) | 
|  | { | 
|  | uint64_t rl_gpu_ib_addr; | 
|  | uint32_t *rl_buffer; | 
|  | size_t rl_ib_size, packet_size_dwords; | 
|  | int retval; | 
|  |  | 
|  | retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, | 
|  | &rl_ib_size); | 
|  | if (retval) | 
|  | goto fail_create_runlist_ib; | 
|  |  | 
|  | pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); | 
|  |  | 
|  | packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); | 
|  | mutex_lock(&pm->lock); | 
|  |  | 
|  | retval = kq_acquire_packet_buffer(pm->priv_queue, | 
|  | packet_size_dwords, &rl_buffer); | 
|  | if (retval) | 
|  | goto fail_acquire_packet_buffer; | 
|  |  | 
|  | retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, | 
|  | rl_ib_size / sizeof(uint32_t), false); | 
|  | if (retval) | 
|  | goto fail_create_runlist; | 
|  |  | 
|  | kq_submit_packet(pm->priv_queue); | 
|  |  | 
|  | mutex_unlock(&pm->lock); | 
|  |  | 
|  | return retval; | 
|  |  | 
|  | fail_create_runlist: | 
|  | kq_rollback_packet(pm->priv_queue); | 
|  | fail_acquire_packet_buffer: | 
|  | mutex_unlock(&pm->lock); | 
|  | fail_create_runlist_ib: | 
|  | pm_release_ib(pm); | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, | 
|  | uint64_t fence_value) | 
|  | { | 
|  | uint32_t *buffer, size; | 
|  | int retval = 0; | 
|  |  | 
|  | if (WARN_ON(!fence_address)) | 
|  | return -EFAULT; | 
|  |  | 
|  | size = pm->pmf->query_status_size; | 
|  | mutex_lock(&pm->lock); | 
|  | kq_acquire_packet_buffer(pm->priv_queue, | 
|  | size / sizeof(uint32_t), (unsigned int **)&buffer); | 
|  | if (!buffer) { | 
|  | pr_err("Failed to allocate buffer on kernel queue\n"); | 
|  | retval = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); | 
|  | if (!retval) | 
|  | kq_submit_packet(pm->priv_queue); | 
|  | else | 
|  | kq_rollback_packet(pm->priv_queue); | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&pm->lock); | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | int pm_send_unmap_queue(struct packet_manager *pm, | 
|  | enum kfd_unmap_queues_filter filter, | 
|  | uint32_t filter_param, bool reset) | 
|  | { | 
|  | uint32_t *buffer, size; | 
|  | int retval = 0; | 
|  |  | 
|  | size = pm->pmf->unmap_queues_size; | 
|  | mutex_lock(&pm->lock); | 
|  | kq_acquire_packet_buffer(pm->priv_queue, | 
|  | size / sizeof(uint32_t), (unsigned int **)&buffer); | 
|  | if (!buffer) { | 
|  | pr_err("Failed to allocate buffer on kernel queue\n"); | 
|  | retval = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset); | 
|  | if (!retval) | 
|  | kq_submit_packet(pm->priv_queue); | 
|  | else | 
|  | kq_rollback_packet(pm->priv_queue); | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&pm->lock); | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | void pm_release_ib(struct packet_manager *pm) | 
|  | { | 
|  | mutex_lock(&pm->lock); | 
|  | if (pm->allocated) { | 
|  | kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); | 
|  | pm->allocated = false; | 
|  | } | 
|  | mutex_unlock(&pm->lock); | 
|  | } | 
|  |  | 
|  | #if defined(CONFIG_DEBUG_FS) | 
|  |  | 
|  | int pm_debugfs_runlist(struct seq_file *m, void *data) | 
|  | { | 
|  | struct packet_manager *pm = data; | 
|  |  | 
|  | mutex_lock(&pm->lock); | 
|  |  | 
|  | if (!pm->allocated) { | 
|  | seq_puts(m, "  No active runlist\n"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4, | 
|  | pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&pm->lock); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int pm_debugfs_hang_hws(struct packet_manager *pm) | 
|  | { | 
|  | uint32_t *buffer, size; | 
|  | int r = 0; | 
|  |  | 
|  | if (!pm->priv_queue) | 
|  | return -EAGAIN; | 
|  |  | 
|  | size = pm->pmf->query_status_size; | 
|  | mutex_lock(&pm->lock); | 
|  | kq_acquire_packet_buffer(pm->priv_queue, | 
|  | size / sizeof(uint32_t), (unsigned int **)&buffer); | 
|  | if (!buffer) { | 
|  | pr_err("Failed to allocate buffer on kernel queue\n"); | 
|  | r = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  | memset(buffer, 0x55, size); | 
|  | kq_submit_packet(pm->priv_queue); | 
|  |  | 
|  | pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", | 
|  | buffer[0], buffer[1], buffer[2], buffer[3], | 
|  | buffer[4], buffer[5], buffer[6]); | 
|  | out: | 
|  | mutex_unlock(&pm->lock); | 
|  | return r; | 
|  | } | 
|  |  | 
|  |  | 
|  | #endif |