|  | #!/bin/bash | 
|  | # SPDX-License-Identifier: GPL-2.0 | 
|  | # | 
|  | # Copyright (c) 2019 Facebook | 
|  | # | 
|  | # This program is free software; you can redistribute it and/or | 
|  | # modify it under the terms of version 2 of the GNU General Public | 
|  | # License as published by the Free Software Foundation. | 
|  |  | 
|  | Usage() { | 
|  | echo "Script for testing HBM (Host Bandwidth Manager) framework." | 
|  | echo "It creates a cgroup to use for testing and load a BPF program to limit" | 
|  | echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create" | 
|  | echo "loads. The output is the goodput in Mbps (unless -D was used)." | 
|  | echo "" | 
|  | echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" | 
|  | echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" | 
|  | echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" | 
|  | echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" | 
|  | echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" | 
|  | echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" | 
|  | echo "  Where:" | 
|  | echo "    out               egress (default)" | 
|  | echo "    -b or --bpf       BPF program filename to load and attach." | 
|  | echo "                      Default is hbm_out_kern.o for egress," | 
|  | echo "    -c or -cc         TCP congestion control (cubic or dctcp)" | 
|  | echo "    --debug           print BPF trace buffer" | 
|  | echo "    -d or --delay     add a delay in ms using netem" | 
|  | echo "    -D                In addition to the goodput in Mbps, it also outputs" | 
|  | echo "                      other detailed information. This information is" | 
|  | echo "                      test dependent (i.e. iperf3 or netperf)." | 
|  | echo "    -E                enable ECN (not required for dctcp)" | 
|  | echo "    --edt             use fq's Earliest Departure Time (requires fq)" | 
|  | echo "    -f or --flows     number of concurrent flows (default=1)" | 
|  | echo "    -i or --id        cgroup id (an integer, default is 1)" | 
|  | echo "    -N                use netperf instead of iperf3" | 
|  | echo "    --no_cn           Do not return CN notifications" | 
|  | echo "    -l                do not limit flows using loopback" | 
|  | echo "    -h                Help" | 
|  | echo "    -p or --port      iperf3 port (default is 5201)" | 
|  | echo "    -P                use an iperf3 instance for each flow" | 
|  | echo "    -q                use the specified qdisc" | 
|  | echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)" | 
|  | echo "    -R                Use TCP_RR for netperf. 1st flow has req" | 
|  | echo "                      size of 10KB, rest of 1MB. Reply in all" | 
|  | echo "                      cases is 1 byte." | 
|  | echo "                      More detailed output for each flow can be found" | 
|  | echo "                      in the files netperf.<cg>.<flow>, where <cg> is the" | 
|  | echo "                      cgroup id as specified with the -i flag, and <flow>" | 
|  | echo "                      is the flow id starting at 1 and increasing by 1 for" | 
|  | echo "                      flow (as specified by -f)." | 
|  | echo "    -s or --server    hostname of netperf server. Used to create netperf" | 
|  | echo "                      test traffic between to hosts (default is within host)" | 
|  | echo "                      netserver must be running on the host." | 
|  | echo "    -S or --stats     whether to update hbm stats (default is yes)." | 
|  | echo "    -t or --time      duration of iperf3 in seconds (default=5)" | 
|  | echo "    -w                Work conserving flag. cgroup can increase its" | 
|  | echo "                      bandwidth beyond the rate limit specified" | 
|  | echo "                      while there is available bandwidth. Current" | 
|  | echo "                      implementation assumes there is only one NIC" | 
|  | echo "                      (eth0), but can be extended to support multiple" | 
|  | echo "                       NICs." | 
|  | echo "    cubic or dctcp    specify which TCP CC to use" | 
|  | echo " " | 
|  | exit | 
|  | } | 
|  |  | 
|  | #set -x | 
|  |  | 
|  | debug_flag=0 | 
|  | args="$@" | 
|  | name="$0" | 
|  | netem=0 | 
|  | cc=x | 
|  | dir="-o" | 
|  | dir_name="out" | 
|  | dur=5 | 
|  | flows=1 | 
|  | id=1 | 
|  | prog="" | 
|  | port=5201 | 
|  | rate=1000 | 
|  | multi_iperf=0 | 
|  | flow_cnt=1 | 
|  | use_netperf=0 | 
|  | rr=0 | 
|  | ecn=0 | 
|  | details=0 | 
|  | server="" | 
|  | qdisc="" | 
|  | flags="" | 
|  | do_stats=0 | 
|  |  | 
|  | BPFFS=/sys/fs/bpf | 
|  | function config_bpffs () { | 
|  | if mount | grep $BPFFS > /dev/null; then | 
|  | echo "bpffs already mounted" | 
|  | else | 
|  | echo "bpffs not mounted. Mounting..." | 
|  | mount -t bpf none $BPFFS | 
|  | fi | 
|  | } | 
|  |  | 
|  | function start_hbm () { | 
|  | rm -f hbm.out | 
|  | echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out | 
|  | echo " " >> hbm.out | 
|  | ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  & | 
|  | echo $! | 
|  | } | 
|  |  | 
|  | processArgs () { | 
|  | for i in $args ; do | 
|  | case $i in | 
|  | # Support for upcomming ingress rate limiting | 
|  | #in)         # support for upcoming ingress rate limiting | 
|  | #  dir="-i" | 
|  | #  dir_name="in" | 
|  | #  ;; | 
|  | out) | 
|  | dir="-o" | 
|  | dir_name="out" | 
|  | ;; | 
|  | -b=*|--bpf=*) | 
|  | prog="${i#*=}" | 
|  | ;; | 
|  | -c=*|--cc=*) | 
|  | cc="${i#*=}" | 
|  | ;; | 
|  | --no_cn) | 
|  | flags="$flags --no_cn" | 
|  | ;; | 
|  | --debug) | 
|  | flags="$flags -d" | 
|  | debug_flag=1 | 
|  | ;; | 
|  | -d=*|--delay=*) | 
|  | netem="${i#*=}" | 
|  | ;; | 
|  | -D) | 
|  | details=1 | 
|  | ;; | 
|  | -E) | 
|  | ecn=1 | 
|  | ;; | 
|  | --edt) | 
|  | flags="$flags --edt" | 
|  | qdisc="fq" | 
|  | ;; | 
|  | -f=*|--flows=*) | 
|  | flows="${i#*=}" | 
|  | ;; | 
|  | -i=*|--id=*) | 
|  | id="${i#*=}" | 
|  | ;; | 
|  | -l) | 
|  | flags="$flags -l" | 
|  | ;; | 
|  | -N) | 
|  | use_netperf=1 | 
|  | ;; | 
|  | -p=*|--port=*) | 
|  | port="${i#*=}" | 
|  | ;; | 
|  | -P) | 
|  | multi_iperf=1 | 
|  | ;; | 
|  | -q=*) | 
|  | qdisc="${i#*=}" | 
|  | ;; | 
|  | -r=*|--rate=*) | 
|  | rate="${i#*=}" | 
|  | ;; | 
|  | -R) | 
|  | rr=1 | 
|  | ;; | 
|  | -s=*|--server=*) | 
|  | server="${i#*=}" | 
|  | ;; | 
|  | -S|--stats) | 
|  | flags="$flags -s" | 
|  | do_stats=1 | 
|  | ;; | 
|  | -t=*|--time=*) | 
|  | dur="${i#*=}" | 
|  | ;; | 
|  | -w) | 
|  | flags="$flags -w" | 
|  | ;; | 
|  | cubic) | 
|  | cc=cubic | 
|  | ;; | 
|  | dctcp) | 
|  | cc=dctcp | 
|  | ;; | 
|  | *) | 
|  | echo "Unknown arg:$i" | 
|  | Usage | 
|  | ;; | 
|  | esac | 
|  | done | 
|  | } | 
|  |  | 
|  | processArgs | 
|  | config_bpffs | 
|  |  | 
|  | if [ $debug_flag -eq 1 ] ; then | 
|  | rm -f hbm_out.log | 
|  | fi | 
|  |  | 
|  | hbm_pid=$(start_hbm) | 
|  | usleep 100000 | 
|  |  | 
|  | host=`hostname` | 
|  | cg_base_dir=/sys/fs/cgroup/unified | 
|  | cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" | 
|  |  | 
|  | echo $$ >> $cg_dir/cgroup.procs | 
|  |  | 
|  | ulimit -l unlimited | 
|  |  | 
|  | rm -f ss.out | 
|  | rm -f hbm.[0-9]*.$dir_name | 
|  | if [ $ecn -ne 0 ] ; then | 
|  | sysctl -w -q -n net.ipv4.tcp_ecn=1 | 
|  | fi | 
|  |  | 
|  | if [ $use_netperf -eq 0 ] ; then | 
|  | cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` | 
|  | if [ "$cc" != "x" ] ; then | 
|  | sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc | 
|  | fi | 
|  | fi | 
|  |  | 
|  | if [ "$netem" -ne "0" ] ; then | 
|  | if [ "$qdisc" != "" ] ; then | 
|  | echo "WARNING: Ignoring -q options because -d option used" | 
|  | fi | 
|  | tc qdisc del dev lo root > /dev/null 2>&1 | 
|  | tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 | 
|  | elif [ "$qdisc" != "" ] ; then | 
|  | tc qdisc del dev eth0 root > /dev/null 2>&1 | 
|  | tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 | 
|  | fi | 
|  |  | 
|  | n=0 | 
|  | m=$[$dur * 5] | 
|  | hn="::1" | 
|  | if [ $use_netperf -ne 0 ] ; then | 
|  | if [ "$server" != "" ] ; then | 
|  | hn=$server | 
|  | fi | 
|  | fi | 
|  |  | 
|  | ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & | 
|  |  | 
|  | if [ $use_netperf -ne 0 ] ; then | 
|  | begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ | 
|  | awk '{ print $1 }'` | 
|  | if [ "$begNetserverPid" == "" ] ; then | 
|  | if [ "$server" == "" ] ; then | 
|  | ( ./netserver > /dev/null 2>&1) & | 
|  | usleep 100000 | 
|  | fi | 
|  | fi | 
|  | flow_cnt=1 | 
|  | if [ "$server" == "" ] ; then | 
|  | np_server=$host | 
|  | else | 
|  | np_server=$server | 
|  | fi | 
|  | if [ "$cc" == "x" ] ; then | 
|  | np_cc="" | 
|  | else | 
|  | np_cc="-K $cc,$cc" | 
|  | fi | 
|  | replySize=1 | 
|  | while [ $flow_cnt -le $flows ] ; do | 
|  | if [ $rr -ne 0 ] ; then | 
|  | reqSize=1M | 
|  | if [ $flow_cnt -eq 1 ] ; then | 
|  | reqSize=10K | 
|  | fi | 
|  | if [ "$dir" == "-i" ] ; then | 
|  | replySize=$reqSize | 
|  | reqSize=1 | 
|  | fi | 
|  | ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & | 
|  | else | 
|  | if [ "$dir" == "-i" ] ; then | 
|  | ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & | 
|  | else | 
|  | ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & | 
|  | fi | 
|  | fi | 
|  | flow_cnt=$[flow_cnt+1] | 
|  | done | 
|  |  | 
|  | # sleep for duration of test (plus some buffer) | 
|  | n=$[dur+2] | 
|  | sleep $n | 
|  |  | 
|  | # force graceful termination of netperf | 
|  | pids=`pgrep netperf` | 
|  | for p in $pids ; do | 
|  | kill -SIGALRM $p | 
|  | done | 
|  |  | 
|  | flow_cnt=1 | 
|  | rate=0 | 
|  | if [ $details -ne 0 ] ; then | 
|  | echo "" | 
|  | echo "Details for HBM in cgroup $id" | 
|  | if [ $do_stats -eq 1 ] ; then | 
|  | if [ -e hbm.$id.$dir_name ] ; then | 
|  | cat hbm.$id.$dir_name | 
|  | fi | 
|  | fi | 
|  | fi | 
|  | while [ $flow_cnt -le $flows ] ; do | 
|  | if [ "$dir" == "-i" ] ; then | 
|  | r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` | 
|  | else | 
|  | r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` | 
|  | fi | 
|  | echo "rate for flow $flow_cnt: $r" | 
|  | rate=$[rate+r] | 
|  | if [ $details -ne 0 ] ; then | 
|  | echo "-----" | 
|  | echo "Details for cgroup $id, flow $flow_cnt" | 
|  | cat netperf.$id.$flow_cnt | 
|  | fi | 
|  | flow_cnt=$[flow_cnt+1] | 
|  | done | 
|  | if [ $details -ne 0 ] ; then | 
|  | echo "" | 
|  | delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` | 
|  | echo "PING AVG DELAY:$delay" | 
|  | echo "AGGREGATE_GOODPUT:$rate" | 
|  | else | 
|  | echo $rate | 
|  | fi | 
|  | elif [ $multi_iperf -eq 0 ] ; then | 
|  | (iperf3 -s -p $port -1 > /dev/null 2>&1) & | 
|  | usleep 100000 | 
|  | iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id | 
|  | rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` | 
|  | rate=`echo $rates | grep -o "[0-9]*$"` | 
|  |  | 
|  | if [ $details -ne 0 ] ; then | 
|  | echo "" | 
|  | echo "Details for HBM in cgroup $id" | 
|  | if [ $do_stats -eq 1 ] ; then | 
|  | if [ -e hbm.$id.$dir_name ] ; then | 
|  | cat hbm.$id.$dir_name | 
|  | fi | 
|  | fi | 
|  | delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` | 
|  | echo "PING AVG DELAY:$delay" | 
|  | echo "AGGREGATE_GOODPUT:$rate" | 
|  | else | 
|  | echo $rate | 
|  | fi | 
|  | else | 
|  | flow_cnt=1 | 
|  | while [ $flow_cnt -le $flows ] ; do | 
|  | (iperf3 -s -p $port -1 > /dev/null 2>&1) & | 
|  | ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & | 
|  | port=$[port+1] | 
|  | flow_cnt=$[flow_cnt+1] | 
|  | done | 
|  | n=$[dur+1] | 
|  | sleep $n | 
|  | flow_cnt=1 | 
|  | rate=0 | 
|  | if [ $details -ne 0 ] ; then | 
|  | echo "" | 
|  | echo "Details for HBM in cgroup $id" | 
|  | if [ $do_stats -eq 1 ] ; then | 
|  | if [ -e hbm.$id.$dir_name ] ; then | 
|  | cat hbm.$id.$dir_name | 
|  | fi | 
|  | fi | 
|  | fi | 
|  |  | 
|  | while [ $flow_cnt -le $flows ] ; do | 
|  | r=`cat iperf3.$id.$flow_cnt` | 
|  | #    echo "rate for flow $flow_cnt: $r" | 
|  | if [ $details -ne 0 ] ; then | 
|  | echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" | 
|  | fi | 
|  | rate=$[rate+r] | 
|  | flow_cnt=$[flow_cnt+1] | 
|  | done | 
|  | if [ $details -ne 0 ] ; then | 
|  | delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` | 
|  | echo "PING AVG DELAY:$delay" | 
|  | echo "AGGREGATE_GOODPUT:$rate" | 
|  | else | 
|  | echo $rate | 
|  | fi | 
|  | fi | 
|  |  | 
|  | if [ $use_netperf -eq 0 ] ; then | 
|  | sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc | 
|  | fi | 
|  | if [ $ecn -ne 0 ] ; then | 
|  | sysctl -w -q -n net.ipv4.tcp_ecn=0 | 
|  | fi | 
|  | if [ "$netem" -ne "0" ] ; then | 
|  | tc qdisc del dev lo root > /dev/null 2>&1 | 
|  | fi | 
|  | if [ "$qdisc" != "" ] ; then | 
|  | tc qdisc del dev eth0 root > /dev/null 2>&1 | 
|  | fi | 
|  | sleep 2 | 
|  |  | 
|  | hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` | 
|  | if [ "$hbmPid" == "$hbm_pid" ] ; then | 
|  | kill $hbm_pid | 
|  | fi | 
|  |  | 
|  | sleep 1 | 
|  |  | 
|  | # Detach any pinned BPF programs that may have lingered | 
|  | rm -rf $BPFFS/hbm* | 
|  |  | 
|  | if [ $use_netperf -ne 0 ] ; then | 
|  | if [ "$server" == "" ] ; then | 
|  | if [ "$begNetserverPid" == "" ] ; then | 
|  | netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` | 
|  | if [ "$netserverPid" != "" ] ; then | 
|  | kill $netserverPid | 
|  | fi | 
|  | fi | 
|  | fi | 
|  | fi | 
|  | exit |