| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Memory bandwidth monitoring and allocation library |
| * |
| * Copyright (C) 2018 Intel Corporation |
| * |
| * Authors: |
| * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, |
| * Fenghua Yu <fenghua.yu@intel.com> |
| */ |
| #include "resctrl.h" |
| |
| #define UNCORE_IMC "uncore_imc" |
| #define READ_FILE_NAME "events/cas_count_read" |
| #define DYN_PMU_PATH "/sys/bus/event_source/devices" |
| #define SCALE 0.00006103515625 |
| #define MAX_IMCS 20 |
| #define MAX_TOKENS 5 |
| |
| #define CON_MBM_LOCAL_BYTES_PATH \ |
| "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" |
| |
| struct membw_read_format { |
| __u64 value; /* The value of the event */ |
| __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ |
| __u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ |
| __u64 id; /* if PERF_FORMAT_ID */ |
| }; |
| |
| struct imc_counter_config { |
| __u32 type; |
| __u64 event; |
| __u64 umask; |
| struct perf_event_attr pe; |
| struct membw_read_format return_value; |
| int fd; |
| }; |
| |
| static char mbm_total_path[1024]; |
| static int imcs; |
| static struct imc_counter_config imc_counters_config[MAX_IMCS]; |
| static const struct resctrl_test *current_test; |
| |
| static void read_mem_bw_initialize_perf_event_attr(int i) |
| { |
| memset(&imc_counters_config[i].pe, 0, |
| sizeof(struct perf_event_attr)); |
| imc_counters_config[i].pe.type = imc_counters_config[i].type; |
| imc_counters_config[i].pe.size = sizeof(struct perf_event_attr); |
| imc_counters_config[i].pe.disabled = 1; |
| imc_counters_config[i].pe.inherit = 1; |
| imc_counters_config[i].pe.exclude_guest = 0; |
| imc_counters_config[i].pe.config = |
| imc_counters_config[i].umask << 8 | |
| imc_counters_config[i].event; |
| imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER; |
| imc_counters_config[i].pe.read_format = |
| PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
| } |
| |
| static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i) |
| { |
| ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0); |
| ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0); |
| } |
| |
| static void read_mem_bw_ioctl_perf_event_ioc_disable(int i) |
| { |
| ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0); |
| } |
| |
| /* |
| * get_read_event_and_umask: Parse config into event and umask |
| * @cas_count_cfg: Config |
| * @count: iMC number |
| */ |
| static void get_read_event_and_umask(char *cas_count_cfg, int count) |
| { |
| char *token[MAX_TOKENS]; |
| int i = 0; |
| |
| token[0] = strtok(cas_count_cfg, "=,"); |
| |
| for (i = 1; i < MAX_TOKENS; i++) |
| token[i] = strtok(NULL, "=,"); |
| |
| for (i = 0; i < MAX_TOKENS - 1; i++) { |
| if (!token[i]) |
| break; |
| if (strcmp(token[i], "event") == 0) |
| imc_counters_config[count].event = strtol(token[i + 1], NULL, 16); |
| if (strcmp(token[i], "umask") == 0) |
| imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16); |
| } |
| } |
| |
| static int open_perf_read_event(int i, int cpu_no) |
| { |
| imc_counters_config[i].fd = |
| perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1, |
| PERF_FLAG_FD_CLOEXEC); |
| |
| if (imc_counters_config[i].fd == -1) { |
| fprintf(stderr, "Error opening leader %llx\n", |
| imc_counters_config[i].pe.config); |
| |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| /* Get type and config of an iMC counter's read event. */ |
| static int read_from_imc_dir(char *imc_dir, int count) |
| { |
| char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; |
| FILE *fp; |
| |
| /* Get type of iMC counter */ |
| sprintf(imc_counter_type, "%s%s", imc_dir, "type"); |
| fp = fopen(imc_counter_type, "r"); |
| if (!fp) { |
| ksft_perror("Failed to open iMC counter type file"); |
| |
| return -1; |
| } |
| if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) { |
| ksft_perror("Could not get iMC type"); |
| fclose(fp); |
| |
| return -1; |
| } |
| fclose(fp); |
| |
| /* Get read config */ |
| sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); |
| fp = fopen(imc_counter_cfg, "r"); |
| if (!fp) { |
| ksft_perror("Failed to open iMC config file"); |
| |
| return -1; |
| } |
| if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) { |
| ksft_perror("Could not get iMC cas count read"); |
| fclose(fp); |
| |
| return -1; |
| } |
| fclose(fp); |
| |
| get_read_event_and_umask(cas_count_cfg, count); |
| |
| return 0; |
| } |
| |
| /* |
| * A system can have 'n' number of iMC (Integrated Memory Controller) |
| * counters, get that 'n'. Discover the properties of the available |
| * counters in support of needed performance measurement via perf. |
| * For each iMC counter get it's type and config. Also obtain each |
| * counter's event and umask for the memory read events that will be |
| * measured. |
| * |
| * Enumerate all these details into an array of structures. |
| * |
| * Return: >= 0 on success. < 0 on failure. |
| */ |
| static int num_of_imcs(void) |
| { |
| char imc_dir[512], *temp; |
| unsigned int count = 0; |
| struct dirent *ep; |
| int ret; |
| DIR *dp; |
| |
| dp = opendir(DYN_PMU_PATH); |
| if (dp) { |
| while ((ep = readdir(dp))) { |
| temp = strstr(ep->d_name, UNCORE_IMC); |
| if (!temp) |
| continue; |
| |
| /* |
| * imc counters are named as "uncore_imc_<n>", hence |
| * increment the pointer to point to <n>. Note that |
| * sizeof(UNCORE_IMC) would count for null character as |
| * well and hence the last underscore character in |
| * uncore_imc'_' need not be counted. |
| */ |
| temp = temp + sizeof(UNCORE_IMC); |
| |
| /* |
| * Some directories under "DYN_PMU_PATH" could have |
| * names like "uncore_imc_free_running", hence, check if |
| * first character is a numerical digit or not. |
| */ |
| if (temp[0] >= '0' && temp[0] <= '9') { |
| sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH, |
| ep->d_name); |
| ret = read_from_imc_dir(imc_dir, count); |
| if (ret) { |
| closedir(dp); |
| |
| return ret; |
| } |
| count++; |
| } |
| } |
| closedir(dp); |
| if (count == 0) { |
| ksft_print_msg("Unable to find iMC counters\n"); |
| |
| return -1; |
| } |
| } else { |
| ksft_perror("Unable to open PMU directory"); |
| |
| return -1; |
| } |
| |
| return count; |
| } |
| |
| int initialize_read_mem_bw_imc(void) |
| { |
| int imc; |
| |
| imcs = num_of_imcs(); |
| if (imcs <= 0) |
| return imcs; |
| |
| /* Initialize perf_event_attr structures for all iMC's */ |
| for (imc = 0; imc < imcs; imc++) |
| read_mem_bw_initialize_perf_event_attr(imc); |
| |
| return 0; |
| } |
| |
| static void perf_close_imc_read_mem_bw(void) |
| { |
| int mc; |
| |
| for (mc = 0; mc < imcs; mc++) { |
| if (imc_counters_config[mc].fd != -1) |
| close(imc_counters_config[mc].fd); |
| } |
| } |
| |
| /* |
| * perf_open_imc_read_mem_bw - Open perf fds for IMCs |
| * @cpu_no: CPU number that the benchmark PID is bound to |
| * |
| * Return: = 0 on success. < 0 on failure. |
| */ |
| static int perf_open_imc_read_mem_bw(int cpu_no) |
| { |
| int imc, ret; |
| |
| for (imc = 0; imc < imcs; imc++) |
| imc_counters_config[imc].fd = -1; |
| |
| for (imc = 0; imc < imcs; imc++) { |
| ret = open_perf_read_event(imc, cpu_no); |
| if (ret) |
| goto close_fds; |
| } |
| |
| return 0; |
| |
| close_fds: |
| perf_close_imc_read_mem_bw(); |
| return -1; |
| } |
| |
| /* |
| * do_imc_read_mem_bw_test - Perform memory bandwidth test |
| * |
| * Runs memory bandwidth test over one second period. Also, handles starting |
| * and stopping of the IMC perf counters around the test. |
| */ |
| static void do_imc_read_mem_bw_test(void) |
| { |
| int imc; |
| |
| for (imc = 0; imc < imcs; imc++) |
| read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc); |
| |
| sleep(1); |
| |
| /* Stop counters after a second to get results. */ |
| for (imc = 0; imc < imcs; imc++) |
| read_mem_bw_ioctl_perf_event_ioc_disable(imc); |
| } |
| |
| /* |
| * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters |
| * |
| * Memory read bandwidth utilized by a process on a socket can be calculated |
| * using iMC counters' read events. Perf events are used to read these |
| * counters. |
| * |
| * Return: = 0 on success. < 0 on failure. |
| */ |
| static int get_read_mem_bw_imc(float *bw_imc) |
| { |
| float reads = 0, of_mul_read = 1; |
| int imc; |
| |
| /* |
| * Log read event values from all iMC counters into |
| * struct imc_counter_config. |
| * Take overflow into consideration before calculating total bandwidth. |
| */ |
| for (imc = 0; imc < imcs; imc++) { |
| struct imc_counter_config *r = |
| &imc_counters_config[imc]; |
| |
| if (read(r->fd, &r->return_value, |
| sizeof(struct membw_read_format)) == -1) { |
| ksft_perror("Couldn't get read bandwidth through iMC"); |
| return -1; |
| } |
| |
| __u64 r_time_enabled = r->return_value.time_enabled; |
| __u64 r_time_running = r->return_value.time_running; |
| |
| if (r_time_enabled != r_time_running) |
| of_mul_read = (float)r_time_enabled / |
| (float)r_time_running; |
| |
| reads += r->return_value.value * of_mul_read * SCALE; |
| } |
| |
| *bw_imc = reads; |
| return 0; |
| } |
| |
| /* |
| * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" |
| * @param: Parameters passed to resctrl_val() |
| * @domain_id: Domain ID (cache ID; for MB, L3 cache ID) |
| */ |
| void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, |
| int domain_id) |
| { |
| sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, |
| param->ctrlgrp, domain_id); |
| } |
| |
| /* |
| * Open file to read MBM local bytes from resctrl FS |
| */ |
| static FILE *open_mem_bw_resctrl(const char *mbm_bw_file) |
| { |
| FILE *fp; |
| |
| fp = fopen(mbm_bw_file, "r"); |
| if (!fp) |
| ksft_perror("Failed to open total memory bandwidth file"); |
| |
| return fp; |
| } |
| |
| /* |
| * Get MBM Local bytes as reported by resctrl FS |
| */ |
| static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) |
| { |
| if (fscanf(fp, "%lu\n", mbm_total) <= 0) { |
| ksft_perror("Could not get MBM local bytes"); |
| return -1; |
| } |
| return 0; |
| } |
| |
| static pid_t bm_pid; |
| |
| void ctrlc_handler(int signum, siginfo_t *info, void *ptr) |
| { |
| /* Only kill child after bm_pid is set after fork() */ |
| if (bm_pid) |
| kill(bm_pid, SIGKILL); |
| umount_resctrlfs(); |
| if (current_test && current_test->cleanup) |
| current_test->cleanup(); |
| ksft_print_msg("Ending\n\n"); |
| |
| exit(EXIT_SUCCESS); |
| } |
| |
| /* |
| * Register CTRL-C handler for parent, as it has to kill |
| * child process before exiting. |
| */ |
| int signal_handler_register(const struct resctrl_test *test) |
| { |
| struct sigaction sigact = {}; |
| int ret = 0; |
| |
| bm_pid = 0; |
| |
| current_test = test; |
| sigact.sa_sigaction = ctrlc_handler; |
| sigemptyset(&sigact.sa_mask); |
| sigact.sa_flags = SA_SIGINFO; |
| if (sigaction(SIGINT, &sigact, NULL) || |
| sigaction(SIGTERM, &sigact, NULL) || |
| sigaction(SIGHUP, &sigact, NULL)) { |
| ksft_perror("sigaction"); |
| ret = -1; |
| } |
| return ret; |
| } |
| |
| /* |
| * Reset signal handler to SIG_DFL. |
| * Non-Value return because the caller should keep |
| * the error code of other path even if sigaction fails. |
| */ |
| void signal_handler_unregister(void) |
| { |
| struct sigaction sigact = {}; |
| |
| current_test = NULL; |
| sigact.sa_handler = SIG_DFL; |
| sigemptyset(&sigact.sa_mask); |
| if (sigaction(SIGINT, &sigact, NULL) || |
| sigaction(SIGTERM, &sigact, NULL) || |
| sigaction(SIGHUP, &sigact, NULL)) { |
| ksft_perror("sigaction"); |
| } |
| } |
| |
| /* |
| * print_results_bw: the memory bandwidth results are stored in a file |
| * @filename: file that stores the results |
| * @bm_pid: child pid that runs benchmark |
| * @bw_imc: perf imc counter value |
| * @bw_resc: memory bandwidth value |
| * |
| * Return: 0 on success, < 0 on error. |
| */ |
| static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, |
| unsigned long bw_resc) |
| { |
| unsigned long diff = fabs(bw_imc - bw_resc); |
| FILE *fp; |
| |
| if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { |
| printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc); |
| printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); |
| } else { |
| fp = fopen(filename, "a"); |
| if (!fp) { |
| ksft_perror("Cannot open results file"); |
| |
| return -1; |
| } |
| if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", |
| (int)bm_pid, bw_imc, bw_resc, diff) <= 0) { |
| ksft_print_msg("Could not log results\n"); |
| fclose(fp); |
| |
| return -1; |
| } |
| fclose(fp); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs |
| * @uparams: User supplied parameters |
| * @param: Parameters passed to resctrl_val() |
| * @bm_pid: PID that runs the benchmark |
| * |
| * Measure memory bandwidth from resctrl and from another source which is |
| * perf imc value or could be something else if perf imc event is not |
| * available. Compare the two values to validate resctrl value. It takes |
| * 1 sec to measure the data. |
| * resctrl does not distinguish between read and write operations so |
| * its data includes all memory operations. |
| */ |
| int measure_read_mem_bw(const struct user_params *uparams, |
| struct resctrl_val_param *param, pid_t bm_pid) |
| { |
| unsigned long bw_resc, bw_resc_start, bw_resc_end; |
| FILE *mem_bw_fp; |
| float bw_imc; |
| int ret; |
| |
| mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); |
| if (!mem_bw_fp) |
| return -1; |
| |
| ret = perf_open_imc_read_mem_bw(uparams->cpu); |
| if (ret < 0) |
| goto close_fp; |
| |
| ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start); |
| if (ret < 0) |
| goto close_imc; |
| |
| rewind(mem_bw_fp); |
| |
| do_imc_read_mem_bw_test(); |
| |
| ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); |
| if (ret < 0) |
| goto close_imc; |
| |
| ret = get_read_mem_bw_imc(&bw_imc); |
| if (ret < 0) |
| goto close_imc; |
| |
| perf_close_imc_read_mem_bw(); |
| fclose(mem_bw_fp); |
| |
| bw_resc = (bw_resc_end - bw_resc_start) / MB; |
| |
| return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); |
| |
| close_imc: |
| perf_close_imc_read_mem_bw(); |
| close_fp: |
| fclose(mem_bw_fp); |
| return ret; |
| } |
| |
| /* |
| * resctrl_val: execute benchmark and measure memory bandwidth on |
| * the benchmark |
| * @test: test information structure |
| * @uparams: user supplied parameters |
| * @param: parameters passed to resctrl_val() |
| * |
| * Return: 0 when the test was run, < 0 on error. |
| */ |
| int resctrl_val(const struct resctrl_test *test, |
| const struct user_params *uparams, |
| struct resctrl_val_param *param) |
| { |
| unsigned char *buf = NULL; |
| cpu_set_t old_affinity; |
| int domain_id; |
| int ret = 0; |
| pid_t ppid; |
| |
| if (strcmp(param->filename, "") == 0) |
| sprintf(param->filename, "stdio"); |
| |
| ret = get_domain_id(test->resource, uparams->cpu, &domain_id); |
| if (ret < 0) { |
| ksft_print_msg("Could not get domain ID\n"); |
| return ret; |
| } |
| |
| ppid = getpid(); |
| |
| /* Taskset test to specified CPU. */ |
| ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity); |
| if (ret) |
| return ret; |
| |
| /* Write test to specified control & monitoring group in resctrl FS. */ |
| ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp); |
| if (ret) |
| goto reset_affinity; |
| |
| if (param->init) { |
| ret = param->init(param, domain_id); |
| if (ret) |
| goto reset_affinity; |
| } |
| |
| /* |
| * If not running user provided benchmark, run the default |
| * "fill_buf". First phase of "fill_buf" is to prepare the |
| * buffer that the benchmark will operate on. No measurements |
| * are needed during this phase and prepared memory will be |
| * passed to next part of benchmark via copy-on-write thus |
| * no impact on the benchmark that relies on reading from |
| * memory only. |
| */ |
| if (param->fill_buf) { |
| buf = alloc_buffer(param->fill_buf->buf_size, |
| param->fill_buf->memflush); |
| if (!buf) { |
| ret = -ENOMEM; |
| goto reset_affinity; |
| } |
| } |
| |
| fflush(stdout); |
| bm_pid = fork(); |
| if (bm_pid == -1) { |
| ret = -errno; |
| ksft_perror("Unable to fork"); |
| goto free_buf; |
| } |
| |
| /* |
| * What needs to be measured runs in separate process until |
| * terminated. |
| */ |
| if (bm_pid == 0) { |
| if (param->fill_buf) |
| fill_cache_read(buf, param->fill_buf->buf_size, false); |
| else if (uparams->benchmark_cmd[0]) |
| execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd); |
| exit(EXIT_SUCCESS); |
| } |
| |
| ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); |
| |
| /* Give benchmark enough time to fully run. */ |
| sleep(1); |
| |
| /* Test runs until the callback setup() tells the test to stop. */ |
| while (1) { |
| ret = param->setup(test, uparams, param); |
| if (ret == END_OF_TESTS) { |
| ret = 0; |
| break; |
| } |
| if (ret < 0) |
| break; |
| |
| ret = param->measure(uparams, param, bm_pid); |
| if (ret) |
| break; |
| } |
| |
| kill(bm_pid, SIGKILL); |
| free_buf: |
| free(buf); |
| reset_affinity: |
| taskset_restore(ppid, &old_affinity); |
| return ret; |
| } |