| #!/bin/bash | 
 | # SPDX-License-Identifier: GPL-2.0 | 
 | # | 
 | # Copyright (c) 2019 Facebook | 
 | # | 
 | # This program is free software; you can redistribute it and/or | 
 | # modify it under the terms of version 2 of the GNU General Public | 
 | # License as published by the Free Software Foundation. | 
 |  | 
 | Usage() { | 
 |   echo "Script for testing HBM (Host Bandwidth Manager) framework." | 
 |   echo "It creates a cgroup to use for testing and load a BPF program to limit" | 
 |   echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" | 
 |   echo "loads. The output is the goodput in Mbps (unless -D was used)." | 
 |   echo "" | 
 |   echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" | 
 |   echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" | 
 |   echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" | 
 |   echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" | 
 |   echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" | 
 |   echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" | 
 |   echo "  Where:" | 
 |   echo "    out               egress (default)" | 
 |   echo "    -b or --bpf       BPF program filename to load and attach." | 
 |   echo "                      Default is hbm_out_kern.o for egress," | 
 |   echo "    -c or -cc         TCP congestion control (cubic or dctcp)" | 
 |   echo "    --debug           print BPF trace buffer" | 
 |   echo "    -d or --delay     add a delay in ms using netem" | 
 |   echo "    -D                In addition to the goodput in Mbps, it also outputs" | 
 |   echo "                      other detailed information. This information is" | 
 |   echo "                      test dependent (i.e. iperf3 or netperf)." | 
 |   echo "    -E                enable ECN (not required for dctcp)" | 
 |   echo "    --edt             use fq's Earliest Departure Time (requires fq)" | 
 |   echo "    -f or --flows     number of concurrent flows (default=1)" | 
 |   echo "    -i or --id        cgroup id (an integer, default is 1)" | 
 |   echo "    -N                use netperf instead of iperf3" | 
 |   echo "    --no_cn           Do not return CN notifications" | 
 |   echo "    -l                do not limit flows using loopback" | 
 |   echo "    -h                Help" | 
 |   echo "    -p or --port      iperf3 port (default is 5201)" | 
 |   echo "    -P                use an iperf3 instance for each flow" | 
 |   echo "    -q                use the specified qdisc" | 
 |   echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)" | 
 |   echo "    -R                Use TCP_RR for netperf. 1st flow has req" | 
 |   echo "                      size of 10KB, rest of 1MB. Reply in all" | 
 |   echo "                      cases is 1 byte." | 
 |   echo "                      More detailed output for each flow can be found" | 
 |   echo "                      in the files netperf.<cg>.<flow>, where <cg> is the" | 
 |   echo "                      cgroup id as specified with the -i flag, and <flow>" | 
 |   echo "                      is the flow id starting at 1 and increasing by 1 for" | 
 |   echo "                      flow (as specified by -f)." | 
 |   echo "    -s or --server    hostname of netperf server. Used to create netperf" | 
 |   echo "                      test traffic between to hosts (default is within host)" | 
 |   echo "                      netserver must be running on the host." | 
 |   echo "    -S or --stats     whether to update hbm stats (default is yes)." | 
 |   echo "    -t or --time      duration of iperf3 in seconds (default=5)" | 
 |   echo "    -w                Work conserving flag. cgroup can increase its" | 
 |   echo "                      bandwidth beyond the rate limit specified" | 
 |   echo "                      while there is available bandwidth. Current" | 
 |   echo "                      implementation assumes there is only one NIC" | 
 |   echo "                      (eth0), but can be extended to support multiple" | 
 |   echo "                       NICs." | 
 |   echo "    cubic or dctcp    specify which TCP CC to use" | 
 |   echo " " | 
 |   exit | 
 | } | 
 |  | 
 | #set -x | 
 |  | 
 | debug_flag=0 | 
 | args="$@" | 
 | name="$0" | 
 | netem=0 | 
 | cc=x | 
 | dir="-o" | 
 | dir_name="out" | 
 | dur=5 | 
 | flows=1 | 
 | id=1 | 
 | prog="" | 
 | port=5201 | 
 | rate=1000 | 
 | multi_iperf=0 | 
 | flow_cnt=1 | 
 | use_netperf=0 | 
 | rr=0 | 
 | ecn=0 | 
 | details=0 | 
 | server="" | 
 | qdisc="" | 
 | flags="" | 
 | do_stats=0 | 
 |  | 
 | function start_hbm () { | 
 |   rm -f hbm.out | 
 |   echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out | 
 |   echo " " >> hbm.out | 
 |   ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  & | 
 |   echo $! | 
 | } | 
 |  | 
 | processArgs () { | 
 |   for i in $args ; do | 
 |     case $i in | 
 |     # Support for upcomming ingress rate limiting | 
 |     #in)         # support for upcoming ingress rate limiting | 
 |     #  dir="-i" | 
 |     #  dir_name="in" | 
 |     #  ;; | 
 |     out) | 
 |       dir="-o" | 
 |       dir_name="out" | 
 |       ;; | 
 |     -b=*|--bpf=*) | 
 |       prog="${i#*=}" | 
 |       ;; | 
 |     -c=*|--cc=*) | 
 |       cc="${i#*=}" | 
 |       ;; | 
 |     --no_cn) | 
 |       flags="$flags --no_cn" | 
 |       ;; | 
 |     --debug) | 
 |       flags="$flags -d" | 
 |       debug_flag=1 | 
 |       ;; | 
 |     -d=*|--delay=*) | 
 |       netem="${i#*=}" | 
 |       ;; | 
 |     -D) | 
 |       details=1 | 
 |       ;; | 
 |     -E) | 
 |       ecn=1 | 
 |       ;; | 
 |     --edt) | 
 |       flags="$flags --edt" | 
 |       qdisc="fq" | 
 |      ;; | 
 |     -f=*|--flows=*) | 
 |       flows="${i#*=}" | 
 |       ;; | 
 |     -i=*|--id=*) | 
 |       id="${i#*=}" | 
 |       ;; | 
 |     -l) | 
 |       flags="$flags -l" | 
 |       ;; | 
 |     -N) | 
 |       use_netperf=1 | 
 |       ;; | 
 |     -p=*|--port=*) | 
 |       port="${i#*=}" | 
 |       ;; | 
 |     -P) | 
 |       multi_iperf=1 | 
 |       ;; | 
 |     -q=*) | 
 |       qdisc="${i#*=}" | 
 |       ;; | 
 |     -r=*|--rate=*) | 
 |       rate="${i#*=}" | 
 |       ;; | 
 |     -R) | 
 |       rr=1 | 
 |       ;; | 
 |     -s=*|--server=*) | 
 |       server="${i#*=}" | 
 |       ;; | 
 |     -S|--stats) | 
 |       flags="$flags -s" | 
 |       do_stats=1 | 
 |       ;; | 
 |     -t=*|--time=*) | 
 |       dur="${i#*=}" | 
 |       ;; | 
 |     -w) | 
 |       flags="$flags -w" | 
 |       ;; | 
 |     cubic) | 
 |       cc=cubic | 
 |       ;; | 
 |     dctcp) | 
 |       cc=dctcp | 
 |       ;; | 
 |     *) | 
 |       echo "Unknown arg:$i" | 
 |       Usage | 
 |       ;; | 
 |     esac | 
 |   done | 
 | } | 
 |  | 
 | processArgs | 
 |  | 
 | if [ $debug_flag -eq 1 ] ; then | 
 |   rm -f hbm_out.log | 
 | fi | 
 |  | 
 | hbm_pid=$(start_hbm) | 
 | usleep 100000 | 
 |  | 
 | host=`hostname` | 
 | cg_base_dir=/sys/fs/cgroup | 
 | cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" | 
 |  | 
 | echo $$ >> $cg_dir/cgroup.procs | 
 |  | 
 | ulimit -l unlimited | 
 |  | 
 | rm -f ss.out | 
 | rm -f hbm.[0-9]*.$dir_name | 
 | if [ $ecn -ne 0 ] ; then | 
 |   sysctl -w -q -n net.ipv4.tcp_ecn=1 | 
 | fi | 
 |  | 
 | if [ $use_netperf -eq 0 ] ; then | 
 |   cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` | 
 |   if [ "$cc" != "x" ] ; then | 
 |     sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc | 
 |   fi | 
 | fi | 
 |  | 
 | if [ "$netem" -ne "0" ] ; then | 
 |   if [ "$qdisc" != "" ] ; then | 
 |     echo "WARNING: Ignoring -q options because -d option used" | 
 |   fi | 
 |   tc qdisc del dev lo root > /dev/null 2>&1 | 
 |   tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 | 
 | elif [ "$qdisc" != "" ] ; then | 
 |   tc qdisc del dev eth0 root > /dev/null 2>&1 | 
 |   tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 | 
 | fi | 
 |  | 
 | n=0 | 
 | m=$[$dur * 5] | 
 | hn="::1" | 
 | if [ $use_netperf -ne 0 ] ; then | 
 |   if [ "$server" != "" ] ; then | 
 |     hn=$server | 
 |   fi | 
 | fi | 
 |  | 
 | ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & | 
 |  | 
 | if [ $use_netperf -ne 0 ] ; then | 
 |   begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ | 
 |                    awk '{ print $1 }'` | 
 |   if [ "$begNetserverPid" == "" ] ; then | 
 |     if [ "$server" == "" ] ; then | 
 |       ( ./netserver > /dev/null 2>&1) & | 
 |       usleep 100000 | 
 |     fi | 
 |   fi | 
 |   flow_cnt=1 | 
 |   if [ "$server" == "" ] ; then | 
 |     np_server=$host | 
 |   else | 
 |     np_server=$server | 
 |   fi | 
 |   if [ "$cc" == "x" ] ; then | 
 |     np_cc="" | 
 |   else | 
 |     np_cc="-K $cc,$cc" | 
 |   fi | 
 |   replySize=1 | 
 |   while [ $flow_cnt -le $flows ] ; do | 
 |     if [ $rr -ne 0 ] ; then | 
 |       reqSize=1M | 
 |       if [ $flow_cnt -eq 1 ] ; then | 
 |         reqSize=10K | 
 |       fi | 
 |       if [ "$dir" == "-i" ] ; then | 
 |         replySize=$reqSize | 
 |         reqSize=1 | 
 |       fi | 
 |       ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & | 
 |     else | 
 |       if [ "$dir" == "-i" ] ; then | 
 |         ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & | 
 |       else | 
 |         ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & | 
 |       fi | 
 |     fi | 
 |     flow_cnt=$[flow_cnt+1] | 
 |   done | 
 |  | 
 | # sleep for duration of test (plus some buffer) | 
 |   n=$[dur+2] | 
 |   sleep $n | 
 |  | 
 | # force graceful termination of netperf | 
 |   pids=`pgrep netperf` | 
 |   for p in $pids ; do | 
 |     kill -SIGALRM $p | 
 |   done | 
 |  | 
 |   flow_cnt=1 | 
 |   rate=0 | 
 |   if [ $details -ne 0 ] ; then | 
 |     echo "" | 
 |     echo "Details for HBM in cgroup $id" | 
 |     if [ $do_stats -eq 1 ] ; then | 
 |       if [ -e hbm.$id.$dir_name ] ; then | 
 |         cat hbm.$id.$dir_name | 
 |       fi | 
 |     fi | 
 |   fi | 
 |   while [ $flow_cnt -le $flows ] ; do | 
 |     if [ "$dir" == "-i" ] ; then | 
 |       r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` | 
 |     else | 
 |       r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` | 
 |     fi | 
 |     echo "rate for flow $flow_cnt: $r" | 
 |     rate=$[rate+r] | 
 |     if [ $details -ne 0 ] ; then | 
 |       echo "-----" | 
 |       echo "Details for cgroup $id, flow $flow_cnt" | 
 |       cat netperf.$id.$flow_cnt | 
 |     fi | 
 |     flow_cnt=$[flow_cnt+1] | 
 |   done | 
 |   if [ $details -ne 0 ] ; then | 
 |     echo "" | 
 |     delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` | 
 |     echo "PING AVG DELAY:$delay" | 
 |     echo "AGGREGATE_GOODPUT:$rate" | 
 |   else | 
 |     echo $rate | 
 |   fi | 
 | elif [ $multi_iperf -eq 0 ] ; then | 
 |   (iperf3 -s -p $port -1 > /dev/null 2>&1) & | 
 |   usleep 100000 | 
 |   iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id | 
 |   rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` | 
 |   rate=`echo $rates | grep -o "[0-9]*$"` | 
 |  | 
 |   if [ $details -ne 0 ] ; then | 
 |     echo "" | 
 |     echo "Details for HBM in cgroup $id" | 
 |     if [ $do_stats -eq 1 ] ; then | 
 |       if [ -e hbm.$id.$dir_name ] ; then | 
 |         cat hbm.$id.$dir_name | 
 |       fi | 
 |     fi | 
 |     delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` | 
 |     echo "PING AVG DELAY:$delay" | 
 |     echo "AGGREGATE_GOODPUT:$rate" | 
 |   else | 
 |     echo $rate | 
 |   fi | 
 | else | 
 |   flow_cnt=1 | 
 |   while [ $flow_cnt -le $flows ] ; do | 
 |     (iperf3 -s -p $port -1 > /dev/null 2>&1) & | 
 |     ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & | 
 |     port=$[port+1] | 
 |     flow_cnt=$[flow_cnt+1] | 
 |   done | 
 |   n=$[dur+1] | 
 |   sleep $n | 
 |   flow_cnt=1 | 
 |   rate=0 | 
 |   if [ $details -ne 0 ] ; then | 
 |     echo "" | 
 |     echo "Details for HBM in cgroup $id" | 
 |     if [ $do_stats -eq 1 ] ; then | 
 |       if [ -e hbm.$id.$dir_name ] ; then | 
 |         cat hbm.$id.$dir_name | 
 |       fi | 
 |     fi | 
 |   fi | 
 |  | 
 |   while [ $flow_cnt -le $flows ] ; do | 
 |     r=`cat iperf3.$id.$flow_cnt` | 
 | #    echo "rate for flow $flow_cnt: $r" | 
 |   if [ $details -ne 0 ] ; then | 
 |     echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" | 
 |   fi | 
 |     rate=$[rate+r] | 
 |     flow_cnt=$[flow_cnt+1] | 
 |   done | 
 |   if [ $details -ne 0 ] ; then | 
 |     delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` | 
 |     echo "PING AVG DELAY:$delay" | 
 |     echo "AGGREGATE_GOODPUT:$rate" | 
 |   else | 
 |     echo $rate | 
 |   fi | 
 | fi | 
 |  | 
 | if [ $use_netperf -eq 0 ] ; then | 
 |   sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc | 
 | fi | 
 | if [ $ecn -ne 0 ] ; then | 
 |   sysctl -w -q -n net.ipv4.tcp_ecn=0 | 
 | fi | 
 | if [ "$netem" -ne "0" ] ; then | 
 |   tc qdisc del dev lo root > /dev/null 2>&1 | 
 | fi | 
 | if [ "$qdisc" != "" ] ; then | 
 |   tc qdisc del dev eth0 root > /dev/null 2>&1 | 
 | fi | 
 | sleep 2 | 
 |  | 
 | hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` | 
 | if [ "$hbmPid" == "$hbm_pid" ] ; then | 
 |   kill $hbm_pid | 
 | fi | 
 |  | 
 | sleep 1 | 
 |  | 
 | # Detach any BPF programs that may have lingered | 
 | ttx=`bpftool cgroup tree | grep hbm` | 
 | v=2 | 
 | for x in $ttx ; do | 
 |     if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then | 
 | 	cg=$x ; v=0 | 
 |     else | 
 | 	if [ $v -eq 0 ] ; then | 
 | 	    id=$x ; v=1 | 
 | 	else | 
 | 	    if [ $v -eq 1 ] ; then | 
 | 		type=$x ; bpftool cgroup detach $cg $type id $id | 
 | 		v=0 | 
 | 	    fi | 
 | 	fi | 
 |     fi | 
 | done | 
 |  | 
 | if [ $use_netperf -ne 0 ] ; then | 
 |   if [ "$server" == "" ] ; then | 
 |     if [ "$begNetserverPid" == "" ] ; then | 
 |       netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` | 
 |       if [ "$netserverPid" != "" ] ; then | 
 |         kill $netserverPid | 
 |       fi | 
 |     fi | 
 |   fi | 
 | fi | 
 | exit |