| // SPDX-License-Identifier: GPL-2.0 |
| |
| #include <pthread.h> |
| #include <sys/shm.h> |
| #include <sys/mman.h> |
| #include <fcntl.h> |
| #include <stdbool.h> |
| #include <time.h> |
| #include <assert.h> |
| #include "logging.h" |
| #include "futextest.h" |
| #include "futex2test.h" |
| |
| typedef u_int32_t u32; |
| typedef int32_t s32; |
| typedef u_int64_t u64; |
| |
| static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE); |
| static int fnode = FUTEX_NO_NODE; |
| |
| /* fairly stupid test-and-set lock with a waiter flag */ |
| |
| #define N_LOCK 0x0000001 |
| #define N_WAITERS 0x0001000 |
| |
| struct futex_numa_32 { |
| union { |
| u64 full; |
| struct { |
| u32 val; |
| u32 node; |
| }; |
| }; |
| }; |
| |
| void futex_numa_32_lock(struct futex_numa_32 *lock) |
| { |
| for (;;) { |
| struct futex_numa_32 new, old = { |
| .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED), |
| }; |
| |
| for (;;) { |
| new = old; |
| if (old.val == 0) { |
| /* no waiter, no lock -> first lock, set no-node */ |
| new.node = fnode; |
| } |
| if (old.val & N_LOCK) { |
| /* contention, set waiter */ |
| new.val |= N_WAITERS; |
| } |
| new.val |= N_LOCK; |
| |
| /* nothing changed, ready to block */ |
| if (old.full == new.full) |
| break; |
| |
| /* |
| * Use u64 cmpxchg to set the futex value and node in a |
| * consistent manner. |
| */ |
| if (__atomic_compare_exchange_n(&lock->full, |
| &old.full, new.full, |
| /* .weak */ false, |
| __ATOMIC_ACQUIRE, |
| __ATOMIC_RELAXED)) { |
| |
| /* if we just set N_LOCK, we own it */ |
| if (!(old.val & N_LOCK)) |
| return; |
| |
| /* go block */ |
| break; |
| } |
| } |
| |
| futex2_wait(lock, new.val, fflags, NULL, 0); |
| } |
| } |
| |
| void futex_numa_32_unlock(struct futex_numa_32 *lock) |
| { |
| u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE); |
| assert((s32)val >= 0); |
| if (val & N_WAITERS) { |
| int woken = futex2_wake(lock, 1, fflags); |
| assert(val == N_WAITERS); |
| if (!woken) { |
| __atomic_compare_exchange_n(&lock->val, &val, 0U, |
| false, __ATOMIC_RELAXED, |
| __ATOMIC_RELAXED); |
| } |
| } |
| } |
| |
| static long nanos = 50000; |
| |
| struct thread_args { |
| pthread_t tid; |
| volatile int * done; |
| struct futex_numa_32 *lock; |
| int val; |
| int *val1, *val2; |
| int node; |
| }; |
| |
| static void *threadfn(void *_arg) |
| { |
| struct thread_args *args = _arg; |
| struct timespec ts = { |
| .tv_nsec = nanos, |
| }; |
| int node; |
| |
| while (!*args->done) { |
| |
| futex_numa_32_lock(args->lock); |
| args->val++; |
| |
| assert(*args->val1 == *args->val2); |
| (*args->val1)++; |
| nanosleep(&ts, NULL); |
| (*args->val2)++; |
| |
| node = args->lock->node; |
| futex_numa_32_unlock(args->lock); |
| |
| if (node != args->node) { |
| args->node = node; |
| printf("node: %d\n", node); |
| } |
| |
| nanosleep(&ts, NULL); |
| } |
| |
| return NULL; |
| } |
| |
| static void *contendfn(void *_arg) |
| { |
| struct thread_args *args = _arg; |
| |
| while (!*args->done) { |
| /* |
| * futex2_wait() will take hb-lock, verify *var == val and |
| * queue/abort. By knowingly setting val 'wrong' this will |
| * abort and thereby generate hb-lock contention. |
| */ |
| futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0); |
| args->val++; |
| } |
| |
| return NULL; |
| } |
| |
| static volatile int done = 0; |
| static struct futex_numa_32 lock = { .val = 0, }; |
| static int val1, val2; |
| |
| int main(int argc, char *argv[]) |
| { |
| struct thread_args *tas[512], *cas[512]; |
| int c, t, threads = 2, contenders = 0; |
| int sleeps = 10; |
| int total = 0; |
| |
| while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) { |
| switch (c) { |
| case 'c': |
| contenders = atoi(optarg); |
| break; |
| case 't': |
| threads = atoi(optarg); |
| break; |
| case 's': |
| sleeps = atoi(optarg); |
| break; |
| case 'n': |
| nanos = atoi(optarg); |
| break; |
| case 'N': |
| fflags |= FUTEX2_NUMA; |
| if (optarg) |
| fnode = atoi(optarg); |
| break; |
| default: |
| exit(1); |
| break; |
| } |
| } |
| |
| for (t = 0; t < contenders; t++) { |
| struct thread_args *args = calloc(1, sizeof(*args)); |
| if (!args) { |
| perror("thread_args"); |
| exit(-1); |
| } |
| |
| args->done = &done; |
| args->lock = &lock; |
| args->val1 = &val1; |
| args->val2 = &val2; |
| args->node = -1; |
| |
| if (pthread_create(&args->tid, NULL, contendfn, args)) { |
| perror("pthread_create"); |
| exit(-1); |
| } |
| |
| cas[t] = args; |
| } |
| |
| for (t = 0; t < threads; t++) { |
| struct thread_args *args = calloc(1, sizeof(*args)); |
| if (!args) { |
| perror("thread_args"); |
| exit(-1); |
| } |
| |
| args->done = &done; |
| args->lock = &lock; |
| args->val1 = &val1; |
| args->val2 = &val2; |
| args->node = -1; |
| |
| if (pthread_create(&args->tid, NULL, threadfn, args)) { |
| perror("pthread_create"); |
| exit(-1); |
| } |
| |
| tas[t] = args; |
| } |
| |
| sleep(sleeps); |
| |
| done = true; |
| |
| for (t = 0; t < threads; t++) { |
| struct thread_args *args = tas[t]; |
| |
| pthread_join(args->tid, NULL); |
| total += args->val; |
| // printf("tval: %d\n", args->val); |
| } |
| printf("total: %d\n", total); |
| |
| if (contenders) { |
| total = 0; |
| for (t = 0; t < contenders; t++) { |
| struct thread_args *args = cas[t]; |
| |
| pthread_join(args->tid, NULL); |
| total += args->val; |
| // printf("tval: %d\n", args->val); |
| } |
| printf("contenders: %d\n", total); |
| } |
| |
| return 0; |
| } |
| |