| #include "vmlinux.h" |
| #include "bpf_tracing_net.h" |
| #include <bpf/bpf_helpers.h> |
| #include <bpf/bpf_tracing.h> |
| #include "bpf_misc.h" |
| #include "bpf_kfuncs.h" |
| #include <errno.h> |
| |
| __u32 monitored_pid = 0; |
| |
| int nr_active; |
| int nr_snd; |
| int nr_passive; |
| int nr_sched; |
| int nr_txsw; |
| int nr_ack; |
| |
| struct sk_stg { |
| __u64 sendmsg_ns; /* record ts when sendmsg is called */ |
| }; |
| |
| struct sk_tskey { |
| u64 cookie; |
| u32 tskey; |
| }; |
| |
| struct delay_info { |
| u64 sendmsg_ns; /* record ts when sendmsg is called */ |
| u32 sched_delay; /* SCHED_CB - sendmsg_ns */ |
| u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */ |
| u32 ack_delay; /* ACK_CB - SND_SW_CB */ |
| }; |
| |
| struct { |
| __uint(type, BPF_MAP_TYPE_SK_STORAGE); |
| __uint(map_flags, BPF_F_NO_PREALLOC); |
| __type(key, int); |
| __type(value, struct sk_stg); |
| } sk_stg_map SEC(".maps"); |
| |
| struct { |
| __uint(type, BPF_MAP_TYPE_HASH); |
| __type(key, struct sk_tskey); |
| __type(value, struct delay_info); |
| __uint(max_entries, 1024); |
| } time_map SEC(".maps"); |
| |
| static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */ |
| |
| extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym; |
| |
| static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected) |
| { |
| int tmp, new = SK_BPF_CB_TX_TIMESTAMPING; |
| int opt = SK_BPF_CB_FLAGS; |
| int level = SOL_SOCKET; |
| |
| if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected) |
| return 1; |
| |
| if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected || |
| (!expected && tmp != new)) |
| return 1; |
| |
| return 0; |
| } |
| |
| static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk) |
| { |
| if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP)) |
| return true; |
| return false; |
| } |
| |
| static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops) |
| { |
| u8 opt[3] = {0}; |
| int load_flags = 0; |
| int ret; |
| |
| ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags); |
| if (ret != -EOPNOTSUPP) |
| return true; |
| |
| return false; |
| } |
| |
| static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops) |
| { |
| int ret; |
| |
| ret = bpf_sock_ops_cb_flags_set(skops, 0); |
| if (ret != -EOPNOTSUPP) |
| return true; |
| |
| return false; |
| } |
| |
| /* In the timestamping callbacks, we're not allowed to call the following |
| * BPF CALLs for the safety concern. Return false if expected. |
| */ |
| static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops, |
| const struct sock *sk) |
| { |
| if (bpf_test_access_sockopt(skops, sk)) |
| return true; |
| |
| if (bpf_test_access_load_hdr_opt(skops)) |
| return true; |
| |
| if (bpf_test_access_cb_flags_set(skops)) |
| return true; |
| |
| return false; |
| } |
| |
| static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk) |
| { |
| struct bpf_sock_ops_kern *skops_kern; |
| u64 timestamp = bpf_ktime_get_ns(); |
| struct skb_shared_info *shinfo; |
| struct delay_info dinfo = {0}; |
| struct sk_tskey key = {0}; |
| struct delay_info *val; |
| struct sk_buff *skb; |
| struct sk_stg *stg; |
| u64 prior_ts, delay; |
| |
| if (bpf_test_access_bpf_calls(skops, sk)) |
| return false; |
| |
| skops_kern = bpf_cast_to_kern_ctx(skops); |
| skb = skops_kern->skb; |
| shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info); |
| |
| key.cookie = bpf_get_socket_cookie(skops); |
| if (!key.cookie) |
| return false; |
| |
| if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) { |
| stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0); |
| if (!stg) |
| return false; |
| dinfo.sendmsg_ns = stg->sendmsg_ns; |
| bpf_sock_ops_enable_tx_tstamp(skops_kern, 0); |
| key.tskey = shinfo->tskey; |
| if (!key.tskey) |
| return false; |
| bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY); |
| return true; |
| } |
| |
| key.tskey = shinfo->tskey; |
| if (!key.tskey) |
| return false; |
| |
| val = bpf_map_lookup_elem(&time_map, &key); |
| if (!val) |
| return false; |
| |
| switch (skops->op) { |
| case BPF_SOCK_OPS_TSTAMP_SCHED_CB: |
| val->sched_delay = timestamp - val->sendmsg_ns; |
| delay = val->sched_delay; |
| break; |
| case BPF_SOCK_OPS_TSTAMP_SND_SW_CB: |
| prior_ts = val->sched_delay + val->sendmsg_ns; |
| val->snd_sw_delay = timestamp - prior_ts; |
| delay = val->snd_sw_delay; |
| break; |
| case BPF_SOCK_OPS_TSTAMP_ACK_CB: |
| prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns; |
| val->ack_delay = timestamp - prior_ts; |
| delay = val->ack_delay; |
| break; |
| } |
| |
| if (delay >= delay_tolerance_nsec) |
| return false; |
| |
| /* Since it's the last one, remove from the map after latency check */ |
| if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB) |
| bpf_map_delete_elem(&time_map, &key); |
| |
| return true; |
| } |
| |
| SEC("fentry/tcp_sendmsg_locked") |
| int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, |
| size_t size) |
| { |
| __u32 pid = bpf_get_current_pid_tgid() >> 32; |
| u64 timestamp = bpf_ktime_get_ns(); |
| u32 flag = sk->sk_bpf_cb_flags; |
| struct sk_stg *stg; |
| |
| if (pid != monitored_pid || !flag) |
| return 0; |
| |
| stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, |
| BPF_SK_STORAGE_GET_F_CREATE); |
| if (!stg) |
| return 0; |
| |
| stg->sendmsg_ns = timestamp; |
| nr_snd += 1; |
| return 0; |
| } |
| |
| SEC("sockops") |
| int skops_sockopt(struct bpf_sock_ops *skops) |
| { |
| struct bpf_sock *bpf_sk = skops->sk; |
| const struct sock *sk; |
| |
| if (!bpf_sk) |
| return 1; |
| |
| sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk); |
| if (!sk) |
| return 1; |
| |
| switch (skops->op) { |
| case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: |
| nr_active += !bpf_test_sockopt(skops, sk, 0); |
| break; |
| case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB: |
| if (bpf_test_delay(skops, sk)) |
| nr_snd += 1; |
| break; |
| case BPF_SOCK_OPS_TSTAMP_SCHED_CB: |
| if (bpf_test_delay(skops, sk)) |
| nr_sched += 1; |
| break; |
| case BPF_SOCK_OPS_TSTAMP_SND_SW_CB: |
| if (bpf_test_delay(skops, sk)) |
| nr_txsw += 1; |
| break; |
| case BPF_SOCK_OPS_TSTAMP_ACK_CB: |
| if (bpf_test_delay(skops, sk)) |
| nr_ack += 1; |
| break; |
| } |
| |
| return 1; |
| } |
| |
| char _license[] SEC("license") = "GPL"; |