| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Benchmarking code execution time inside the kernel |
| * |
| * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer |
| */ |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| |
| #include <linux/module.h> |
| #include <linux/time.h> |
| |
| #include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ |
| |
| /* For concurrency testing */ |
| #include <linux/completion.h> |
| #include <linux/sched.h> |
| #include <linux/workqueue.h> |
| #include <linux/kthread.h> |
| |
| #include "time_bench.h" |
| |
| static int verbose = 1; |
| |
| /** TSC (Time-Stamp Counter) based ** |
| * See: linux/time_bench.h |
| * tsc_start_clock() and tsc_stop_clock() |
| */ |
| |
| /** Wall-clock based ** |
| */ |
| |
| /** PMU (Performance Monitor Unit) based ** |
| */ |
| #define PERF_FORMAT \ |
| (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ |
| PERF_FORMAT_TOTAL_TIME_RUNNING) |
| |
| struct raw_perf_event { |
| uint64_t config; /* event */ |
| uint64_t config1; /* umask */ |
| struct perf_event *save; |
| char *desc; |
| }; |
| |
| /* if HT is enable a maximum of 4 events (5 if one is instructions |
| * retired can be specified, if HT is disabled a maximum of 8 (9 if |
| * one is instructions retired) can be specified. |
| * |
| * From Table 19-1. Architectural Performance Events |
| * Architectures Software Developer’s Manual Volume 3: System Programming |
| * Guide |
| */ |
| struct raw_perf_event perf_events[] = { |
| { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, |
| { 0xc0, 0x00, NULL, "Instruction Retired" } |
| }; |
| |
| #define NUM_EVTS (ARRAY_SIZE(perf_events)) |
| |
| /* WARNING: PMU config is currently broken! |
| */ |
| bool time_bench_PMU_config(bool enable) |
| { |
| int i; |
| struct perf_event_attr perf_conf; |
| struct perf_event *perf_event; |
| int cpu; |
| |
| preempt_disable(); |
| cpu = smp_processor_id(); |
| pr_info("DEBUG: cpu:%d\n", cpu); |
| preempt_enable(); |
| |
| memset(&perf_conf, 0, sizeof(struct perf_event_attr)); |
| perf_conf.type = PERF_TYPE_RAW; |
| perf_conf.size = sizeof(struct perf_event_attr); |
| perf_conf.read_format = PERF_FORMAT; |
| perf_conf.pinned = 1; |
| perf_conf.exclude_user = 1; /* No userspace events */ |
| perf_conf.exclude_kernel = 0; /* Only kernel events */ |
| |
| for (i = 0; i < NUM_EVTS; i++) { |
| perf_conf.disabled = enable; |
| //perf_conf.disabled = (i == 0) ? 1 : 0; |
| perf_conf.config = perf_events[i].config; |
| perf_conf.config1 = perf_events[i].config1; |
| if (verbose) |
| pr_info("%s() enable PMU counter: %s\n", |
| __func__, perf_events[i].desc); |
| perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, |
| NULL /* task */, |
| NULL /* overflow_handler*/, |
| NULL /* context */); |
| if (perf_event) { |
| perf_events[i].save = perf_event; |
| pr_info("%s():DEBUG perf_event success\n", __func__); |
| |
| perf_event_enable(perf_event); |
| } else { |
| pr_info("%s():DEBUG perf_event is NULL\n", __func__); |
| } |
| } |
| |
| return true; |
| } |
| |
| /** Generic functions ** |
| */ |
| |
| /* Calculate stats, store results in record */ |
| bool time_bench_calc_stats(struct time_bench_record *rec) |
| { |
| #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ |
| uint64_t ns_per_call_tmp_rem = 0; |
| uint32_t ns_per_call_remainder = 0; |
| uint64_t pmc_ipc_tmp_rem = 0; |
| uint32_t pmc_ipc_remainder = 0; |
| uint32_t pmc_ipc_div = 0; |
| uint32_t invoked_cnt_precision = 0; |
| uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ |
| |
| if (rec->flags & TIME_BENCH_LOOP) { |
| if (rec->invoked_cnt < 1000) { |
| pr_err("ERR: need more(>1000) loops(%llu) for timing\n", |
| rec->invoked_cnt); |
| return false; |
| } |
| if (rec->invoked_cnt > ((1ULL << 32) - 1)) { |
| /* div_u64_rem() can only support div with 32bit*/ |
| pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", |
| rec->invoked_cnt); |
| return false; |
| } |
| invoked_cnt = (uint32_t)rec->invoked_cnt; |
| } |
| |
| /* TSC (Time-Stamp Counter) records */ |
| if (rec->flags & TIME_BENCH_TSC) { |
| rec->tsc_interval = rec->tsc_stop - rec->tsc_start; |
| if (rec->tsc_interval == 0) { |
| pr_err("ABORT: timing took ZERO TSC time\n"); |
| return false; |
| } |
| /* Calculate stats */ |
| if (rec->flags & TIME_BENCH_LOOP) |
| rec->tsc_cycles = rec->tsc_interval / invoked_cnt; |
| else |
| rec->tsc_cycles = rec->tsc_interval; |
| } |
| |
| /* Wall-clock time calc */ |
| if (rec->flags & TIME_BENCH_WALLCLOCK) { |
| rec->time_start = rec->ts_start.tv_nsec + |
| (NANOSEC_PER_SEC * rec->ts_start.tv_sec); |
| rec->time_stop = rec->ts_stop.tv_nsec + |
| (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); |
| rec->time_interval = rec->time_stop - rec->time_start; |
| if (rec->time_interval == 0) { |
| pr_err("ABORT: timing took ZERO wallclock time\n"); |
| return false; |
| } |
| /* Calculate stats */ |
| /*** Division in kernel it tricky ***/ |
| /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ |
| /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ |
| rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, |
| &rec->time_sec_remainder); |
| //TODO: use existing struct timespec records instead of div? |
| |
| if (rec->flags & TIME_BENCH_LOOP) { |
| /*** Division in kernel it tricky ***/ |
| /* Orig: ns = ((double)time_interval / invoked_cnt); */ |
| /* First get quotient */ |
| rec->ns_per_call_quotient = |
| div_u64_rem(rec->time_interval, invoked_cnt, |
| &ns_per_call_remainder); |
| /* Now get decimals .xxx precision (incorrect roundup)*/ |
| ns_per_call_tmp_rem = ns_per_call_remainder; |
| invoked_cnt_precision = invoked_cnt / 1000; |
| if (invoked_cnt_precision > 0) { |
| rec->ns_per_call_decimal = |
| div_u64_rem(ns_per_call_tmp_rem, |
| invoked_cnt_precision, |
| &ns_per_call_remainder); |
| } |
| } |
| } |
| |
| /* Performance Monitor Unit (PMU) counters */ |
| if (rec->flags & TIME_BENCH_PMU) { |
| //FIXME: Overflow handling??? |
| rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; |
| rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; |
| |
| /* Calc Instruction Per Cycle (IPC) */ |
| /* First get quotient */ |
| rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, |
| &pmc_ipc_remainder); |
| /* Now get decimals .xxx precision (incorrect roundup)*/ |
| pmc_ipc_tmp_rem = pmc_ipc_remainder; |
| pmc_ipc_div = rec->pmc_clk / 1000; |
| if (pmc_ipc_div > 0) { |
| rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, |
| pmc_ipc_div, |
| &pmc_ipc_remainder); |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Generic function for invoking a loop function and calculating |
| * execution time stats. The function being called/timed is assumed |
| * to perform a tight loop, and update the timing record struct. |
| */ |
| bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, |
| int (*func)(struct time_bench_record *record, void *data)) |
| { |
| struct time_bench_record rec; |
| |
| /* Setup record */ |
| memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ |
| rec.version_abi = 1; |
| rec.loops = loops; |
| rec.step = step; |
| rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); |
| |
| /*** Loop function being timed ***/ |
| if (!func(&rec, data)) { |
| pr_err("ABORT: function being timed failed\n"); |
| return false; |
| } |
| |
| if (rec.invoked_cnt < loops) |
| pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", |
| rec.invoked_cnt, loops); |
| |
| /* Calculate stats */ |
| time_bench_calc_stats(&rec); |
| |
| pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", |
| txt, rec.tsc_cycles, rec.ns_per_call_quotient, |
| rec.ns_per_call_decimal, rec.step, rec.time_sec, |
| rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, |
| rec.tsc_interval); |
| if (rec.flags & TIME_BENCH_PMU) |
| pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", |
| txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, |
| rec.pmc_ipc_decimal); |
| return true; |
| } |
| |
| /* Function getting invoked by kthread */ |
| static int invoke_test_on_cpu_func(void *private) |
| { |
| struct time_bench_cpu *cpu = private; |
| struct time_bench_sync *sync = cpu->sync; |
| cpumask_t newmask = CPU_MASK_NONE; |
| void *data = cpu->data; |
| |
| /* Restrict CPU */ |
| cpumask_set_cpu(cpu->rec.cpu, &newmask); |
| set_cpus_allowed_ptr(current, &newmask); |
| |
| /* Synchronize start of concurrency test */ |
| atomic_inc(&sync->nr_tests_running); |
| wait_for_completion(&sync->start_event); |
| |
| /* Start benchmark function */ |
| if (!cpu->bench_func(&cpu->rec, data)) { |
| pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", |
| cpu->rec.cpu, smp_processor_id()); |
| } else { |
| if (verbose) |
| pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, |
| smp_processor_id()); |
| } |
| cpu->did_bench_run = true; |
| |
| /* End test */ |
| atomic_dec(&sync->nr_tests_running); |
| /* Wait for kthread_stop() telling us to stop */ |
| while (!kthread_should_stop()) { |
| set_current_state(TASK_INTERRUPTIBLE); |
| schedule(); |
| } |
| __set_current_state(TASK_RUNNING); |
| return 0; |
| } |
| |
| void time_bench_print_stats_cpumask(const char *desc, |
| struct time_bench_cpu *cpu_tasks, |
| const struct cpumask *mask) |
| { |
| uint64_t average = 0; |
| int cpu; |
| int step = 0; |
| struct sum { |
| uint64_t tsc_cycles; |
| int records; |
| } sum = { 0 }; |
| |
| /* Get stats */ |
| for_each_cpu(cpu, mask) { |
| struct time_bench_cpu *c = &cpu_tasks[cpu]; |
| struct time_bench_record *rec = &c->rec; |
| |
| /* Calculate stats */ |
| time_bench_calc_stats(rec); |
| |
| pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", |
| desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, |
| rec->ns_per_call_decimal, rec->step, rec->time_sec, |
| rec->time_sec_remainder, rec->time_interval, |
| rec->invoked_cnt, rec->tsc_interval); |
| |
| /* Collect average */ |
| sum.records++; |
| sum.tsc_cycles += rec->tsc_cycles; |
| step = rec->step; |
| } |
| |
| if (sum.records) /* avoid div-by-zero */ |
| average = sum.tsc_cycles / sum.records; |
| pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, |
| average, sum.records, step); |
| } |
| |
| void time_bench_run_concurrent(uint32_t loops, int step, void *data, |
| const struct cpumask *mask, /* Support masking outsome CPUs*/ |
| struct time_bench_sync *sync, |
| struct time_bench_cpu *cpu_tasks, |
| int (*func)(struct time_bench_record *record, void *data)) |
| { |
| int cpu, running = 0; |
| |
| if (verbose) // DEBUG |
| pr_warn("%s() Started on CPU:%d\n", __func__, |
| smp_processor_id()); |
| |
| /* Reset sync conditions */ |
| atomic_set(&sync->nr_tests_running, 0); |
| init_completion(&sync->start_event); |
| |
| /* Spawn off jobs on all CPUs */ |
| for_each_cpu(cpu, mask) { |
| struct time_bench_cpu *c = &cpu_tasks[cpu]; |
| |
| running++; |
| c->sync = sync; /* Send sync variable along */ |
| c->data = data; /* Send opaque along */ |
| |
| /* Init benchmark record */ |
| memset(&c->rec, 0, sizeof(struct time_bench_record)); |
| c->rec.version_abi = 1; |
| c->rec.loops = loops; |
| c->rec.step = step; |
| c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | |
| TIME_BENCH_WALLCLOCK); |
| c->rec.cpu = cpu; |
| c->bench_func = func; |
| c->task = kthread_run(invoke_test_on_cpu_func, c, |
| "time_bench%d", cpu); |
| if (IS_ERR(c->task)) { |
| pr_err("%s(): Failed to start test func\n", __func__); |
| return; /* Argh, what about cleanup?! */ |
| } |
| } |
| |
| /* Wait until all processes are running */ |
| while (atomic_read(&sync->nr_tests_running) < running) { |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| schedule_timeout(10); |
| } |
| /* Kick off all CPU concurrently on completion event */ |
| complete_all(&sync->start_event); |
| |
| /* Wait for CPUs to finish */ |
| while (atomic_read(&sync->nr_tests_running)) { |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| schedule_timeout(10); |
| } |
| |
| /* Stop the kthreads */ |
| for_each_cpu(cpu, mask) { |
| struct time_bench_cpu *c = &cpu_tasks[cpu]; |
| |
| kthread_stop(c->task); |
| } |
| |
| if (verbose) // DEBUG - happens often, finish on another CPU |
| pr_warn("%s() Finished on CPU:%d\n", __func__, |
| smp_processor_id()); |
| } |