|  | // SPDX-License-Identifier: GPL-2.0 | 
|  |  | 
|  | /* net/sched/sch_etf.c  Earliest TxTime First queueing discipline. | 
|  | * | 
|  | * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> | 
|  | *		Vinicius Costa Gomes <vinicius.gomes@intel.com> | 
|  | */ | 
|  |  | 
|  | #include <linux/module.h> | 
|  | #include <linux/types.h> | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/string.h> | 
|  | #include <linux/errno.h> | 
|  | #include <linux/errqueue.h> | 
|  | #include <linux/rbtree.h> | 
|  | #include <linux/skbuff.h> | 
|  | #include <linux/posix-timers.h> | 
|  | #include <net/netlink.h> | 
|  | #include <net/sch_generic.h> | 
|  | #include <net/pkt_sched.h> | 
|  | #include <net/sock.h> | 
|  |  | 
|  | #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) | 
|  | #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) | 
|  | #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK) | 
|  |  | 
|  | struct etf_sched_data { | 
|  | bool offload; | 
|  | bool deadline_mode; | 
|  | bool skip_sock_check; | 
|  | int clockid; | 
|  | int queue; | 
|  | s32 delta; /* in ns */ | 
|  | ktime_t last; /* The txtime of the last skb sent to the netdevice. */ | 
|  | struct rb_root_cached head; | 
|  | struct qdisc_watchdog watchdog; | 
|  | ktime_t (*get_time)(void); | 
|  | }; | 
|  |  | 
|  | static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { | 
|  | [TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) }, | 
|  | }; | 
|  |  | 
|  | static inline int validate_input_params(struct tc_etf_qopt *qopt, | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | /* Check if params comply to the following rules: | 
|  | *	* Clockid and delta must be valid. | 
|  | * | 
|  | *	* Dynamic clockids are not supported. | 
|  | * | 
|  | *	* Delta must be a positive integer. | 
|  | * | 
|  | * Also note that for the HW offload case, we must | 
|  | * expect that system clocks have been synchronized to PHC. | 
|  | */ | 
|  | if (qopt->clockid < 0) { | 
|  | NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); | 
|  | return -ENOTSUPP; | 
|  | } | 
|  |  | 
|  | if (qopt->clockid != CLOCK_TAI) { | 
|  | NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | if (qopt->delta < 0) { | 
|  | NL_SET_ERR_MSG(extack, "Delta must be positive"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | ktime_t txtime = nskb->tstamp; | 
|  | struct sock *sk = nskb->sk; | 
|  | ktime_t now; | 
|  |  | 
|  | if (q->skip_sock_check) | 
|  | goto skip; | 
|  |  | 
|  | if (!sk || !sk_fullsock(sk)) | 
|  | return false; | 
|  |  | 
|  | if (!sock_flag(sk, SOCK_TXTIME)) | 
|  | return false; | 
|  |  | 
|  | /* We don't perform crosstimestamping. | 
|  | * Drop if packet's clockid differs from qdisc's. | 
|  | */ | 
|  | if (sk->sk_clockid != q->clockid) | 
|  | return false; | 
|  |  | 
|  | if (sk->sk_txtime_deadline_mode != q->deadline_mode) | 
|  | return false; | 
|  |  | 
|  | skip: | 
|  | now = q->get_time(); | 
|  | if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) | 
|  | return false; | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct rb_node *p; | 
|  |  | 
|  | p = rb_first_cached(&q->head); | 
|  | if (!p) | 
|  | return NULL; | 
|  |  | 
|  | return rb_to_skb(p); | 
|  | } | 
|  |  | 
|  | static void reset_watchdog(struct Qdisc *sch) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct sk_buff *skb = etf_peek_timesortedlist(sch); | 
|  | ktime_t next; | 
|  |  | 
|  | if (!skb) { | 
|  | qdisc_watchdog_cancel(&q->watchdog); | 
|  | return; | 
|  | } | 
|  |  | 
|  | next = ktime_sub_ns(skb->tstamp, q->delta); | 
|  | qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); | 
|  | } | 
|  |  | 
|  | static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) | 
|  | { | 
|  | struct sock_exterr_skb *serr; | 
|  | struct sk_buff *clone; | 
|  | ktime_t txtime = skb->tstamp; | 
|  | struct sock *sk = skb->sk; | 
|  |  | 
|  | if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors)) | 
|  | return; | 
|  |  | 
|  | clone = skb_clone(skb, GFP_ATOMIC); | 
|  | if (!clone) | 
|  | return; | 
|  |  | 
|  | serr = SKB_EXT_ERR(clone); | 
|  | serr->ee.ee_errno = err; | 
|  | serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; | 
|  | serr->ee.ee_type = 0; | 
|  | serr->ee.ee_code = code; | 
|  | serr->ee.ee_pad = 0; | 
|  | serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ | 
|  | serr->ee.ee_info = txtime; /* low part of tstamp */ | 
|  |  | 
|  | if (sock_queue_err_skb(sk, clone)) | 
|  | kfree_skb(clone); | 
|  | } | 
|  |  | 
|  | static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, | 
|  | struct sk_buff **to_free) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; | 
|  | ktime_t txtime = nskb->tstamp; | 
|  | bool leftmost = true; | 
|  |  | 
|  | if (!is_packet_valid(sch, nskb)) { | 
|  | report_sock_error(nskb, EINVAL, | 
|  | SO_EE_CODE_TXTIME_INVALID_PARAM); | 
|  | return qdisc_drop(nskb, sch, to_free); | 
|  | } | 
|  |  | 
|  | while (*p) { | 
|  | struct sk_buff *skb; | 
|  |  | 
|  | parent = *p; | 
|  | skb = rb_to_skb(parent); | 
|  | if (ktime_compare(txtime, skb->tstamp) >= 0) { | 
|  | p = &parent->rb_right; | 
|  | leftmost = false; | 
|  | } else { | 
|  | p = &parent->rb_left; | 
|  | } | 
|  | } | 
|  | rb_link_node(&nskb->rbnode, parent, p); | 
|  | rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); | 
|  |  | 
|  | qdisc_qstats_backlog_inc(sch, nskb); | 
|  | sch->q.qlen++; | 
|  |  | 
|  | /* Now we may need to re-arm the qdisc watchdog for the next packet. */ | 
|  | reset_watchdog(sch); | 
|  |  | 
|  | return NET_XMIT_SUCCESS; | 
|  | } | 
|  |  | 
|  | static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, | 
|  | ktime_t now) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct sk_buff *to_free = NULL; | 
|  | struct sk_buff *tmp = NULL; | 
|  |  | 
|  | skb_rbtree_walk_from_safe(skb, tmp) { | 
|  | if (ktime_after(skb->tstamp, now)) | 
|  | break; | 
|  |  | 
|  | rb_erase_cached(&skb->rbnode, &q->head); | 
|  |  | 
|  | /* The rbnode field in the skb re-uses these fields, now that | 
|  | * we are done with the rbnode, reset them. | 
|  | */ | 
|  | skb->next = NULL; | 
|  | skb->prev = NULL; | 
|  | skb->dev = qdisc_dev(sch); | 
|  |  | 
|  | report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); | 
|  |  | 
|  | qdisc_qstats_backlog_dec(sch, skb); | 
|  | qdisc_drop(skb, sch, &to_free); | 
|  | qdisc_qstats_overlimit(sch); | 
|  | sch->q.qlen--; | 
|  | } | 
|  |  | 
|  | kfree_skb_list(to_free); | 
|  | } | 
|  |  | 
|  | static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  |  | 
|  | rb_erase_cached(&skb->rbnode, &q->head); | 
|  |  | 
|  | /* The rbnode field in the skb re-uses these fields, now that | 
|  | * we are done with the rbnode, reset them. | 
|  | */ | 
|  | skb->next = NULL; | 
|  | skb->prev = NULL; | 
|  | skb->dev = qdisc_dev(sch); | 
|  |  | 
|  | qdisc_qstats_backlog_dec(sch, skb); | 
|  |  | 
|  | qdisc_bstats_update(sch, skb); | 
|  |  | 
|  | q->last = skb->tstamp; | 
|  |  | 
|  | sch->q.qlen--; | 
|  | } | 
|  |  | 
|  | static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct sk_buff *skb; | 
|  | ktime_t now, next; | 
|  |  | 
|  | skb = etf_peek_timesortedlist(sch); | 
|  | if (!skb) | 
|  | return NULL; | 
|  |  | 
|  | now = q->get_time(); | 
|  |  | 
|  | /* Drop if packet has expired while in queue. */ | 
|  | if (ktime_before(skb->tstamp, now)) { | 
|  | timesortedlist_drop(sch, skb, now); | 
|  | skb = NULL; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* When in deadline mode, dequeue as soon as possible and change the | 
|  | * txtime from deadline to (now + delta). | 
|  | */ | 
|  | if (q->deadline_mode) { | 
|  | timesortedlist_remove(sch, skb); | 
|  | skb->tstamp = now; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | next = ktime_sub_ns(skb->tstamp, q->delta); | 
|  |  | 
|  | /* Dequeue only if now is within the [txtime - delta, txtime] range. */ | 
|  | if (ktime_after(now, next)) | 
|  | timesortedlist_remove(sch, skb); | 
|  | else | 
|  | skb = NULL; | 
|  |  | 
|  | out: | 
|  | /* Now we may need to re-arm the qdisc watchdog for the next packet. */ | 
|  | reset_watchdog(sch); | 
|  |  | 
|  | return skb; | 
|  | } | 
|  |  | 
|  | static void etf_disable_offload(struct net_device *dev, | 
|  | struct etf_sched_data *q) | 
|  | { | 
|  | struct tc_etf_qopt_offload etf = { }; | 
|  | const struct net_device_ops *ops; | 
|  | int err; | 
|  |  | 
|  | if (!q->offload) | 
|  | return; | 
|  |  | 
|  | ops = dev->netdev_ops; | 
|  | if (!ops->ndo_setup_tc) | 
|  | return; | 
|  |  | 
|  | etf.queue = q->queue; | 
|  | etf.enable = 0; | 
|  |  | 
|  | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); | 
|  | if (err < 0) | 
|  | pr_warn("Couldn't disable ETF offload for queue %d\n", | 
|  | etf.queue); | 
|  | } | 
|  |  | 
|  | static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | const struct net_device_ops *ops = dev->netdev_ops; | 
|  | struct tc_etf_qopt_offload etf = { }; | 
|  | int err; | 
|  |  | 
|  | if (!ops->ndo_setup_tc) { | 
|  | NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); | 
|  | return -EOPNOTSUPP; | 
|  | } | 
|  |  | 
|  | etf.queue = q->queue; | 
|  | etf.enable = 1; | 
|  |  | 
|  | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); | 
|  | if (err < 0) { | 
|  | NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int etf_init(struct Qdisc *sch, struct nlattr *opt, | 
|  | struct netlink_ext_ack *extack) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct net_device *dev = qdisc_dev(sch); | 
|  | struct nlattr *tb[TCA_ETF_MAX + 1]; | 
|  | struct tc_etf_qopt *qopt; | 
|  | int err; | 
|  |  | 
|  | if (!opt) { | 
|  | NL_SET_ERR_MSG(extack, | 
|  | "Missing ETF qdisc options which are mandatory"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, | 
|  | extack); | 
|  | if (err < 0) | 
|  | return err; | 
|  |  | 
|  | if (!tb[TCA_ETF_PARMS]) { | 
|  | NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | qopt = nla_data(tb[TCA_ETF_PARMS]); | 
|  |  | 
|  | pr_debug("delta %d clockid %d offload %s deadline %s\n", | 
|  | qopt->delta, qopt->clockid, | 
|  | OFFLOAD_IS_ON(qopt) ? "on" : "off", | 
|  | DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); | 
|  |  | 
|  | err = validate_input_params(qopt, extack); | 
|  | if (err < 0) | 
|  | return err; | 
|  |  | 
|  | q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); | 
|  |  | 
|  | if (OFFLOAD_IS_ON(qopt)) { | 
|  | err = etf_enable_offload(dev, q, extack); | 
|  | if (err < 0) | 
|  | return err; | 
|  | } | 
|  |  | 
|  | /* Everything went OK, save the parameters used. */ | 
|  | q->delta = qopt->delta; | 
|  | q->clockid = qopt->clockid; | 
|  | q->offload = OFFLOAD_IS_ON(qopt); | 
|  | q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); | 
|  | q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt); | 
|  |  | 
|  | switch (q->clockid) { | 
|  | case CLOCK_REALTIME: | 
|  | q->get_time = ktime_get_real; | 
|  | break; | 
|  | case CLOCK_MONOTONIC: | 
|  | q->get_time = ktime_get; | 
|  | break; | 
|  | case CLOCK_BOOTTIME: | 
|  | q->get_time = ktime_get_boottime; | 
|  | break; | 
|  | case CLOCK_TAI: | 
|  | q->get_time = ktime_get_clocktai; | 
|  | break; | 
|  | default: | 
|  | NL_SET_ERR_MSG(extack, "Clockid is not supported"); | 
|  | return -ENOTSUPP; | 
|  | } | 
|  |  | 
|  | qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void timesortedlist_clear(struct Qdisc *sch) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct rb_node *p = rb_first_cached(&q->head); | 
|  |  | 
|  | while (p) { | 
|  | struct sk_buff *skb = rb_to_skb(p); | 
|  |  | 
|  | p = rb_next(p); | 
|  |  | 
|  | rb_erase_cached(&skb->rbnode, &q->head); | 
|  | rtnl_kfree_skbs(skb, skb); | 
|  | sch->q.qlen--; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void etf_reset(struct Qdisc *sch) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  |  | 
|  | /* Only cancel watchdog if it's been initialized. */ | 
|  | if (q->watchdog.qdisc == sch) | 
|  | qdisc_watchdog_cancel(&q->watchdog); | 
|  |  | 
|  | /* No matter which mode we are on, it's safe to clear both lists. */ | 
|  | timesortedlist_clear(sch); | 
|  | __qdisc_reset_queue(&sch->q); | 
|  |  | 
|  | q->last = 0; | 
|  | } | 
|  |  | 
|  | static void etf_destroy(struct Qdisc *sch) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct net_device *dev = qdisc_dev(sch); | 
|  |  | 
|  | /* Only cancel watchdog if it's been initialized. */ | 
|  | if (q->watchdog.qdisc == sch) | 
|  | qdisc_watchdog_cancel(&q->watchdog); | 
|  |  | 
|  | etf_disable_offload(dev, q); | 
|  | } | 
|  |  | 
|  | static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) | 
|  | { | 
|  | struct etf_sched_data *q = qdisc_priv(sch); | 
|  | struct tc_etf_qopt opt = { }; | 
|  | struct nlattr *nest; | 
|  |  | 
|  | nest = nla_nest_start_noflag(skb, TCA_OPTIONS); | 
|  | if (!nest) | 
|  | goto nla_put_failure; | 
|  |  | 
|  | opt.delta = READ_ONCE(q->delta); | 
|  | opt.clockid = READ_ONCE(q->clockid); | 
|  | if (READ_ONCE(q->offload)) | 
|  | opt.flags |= TC_ETF_OFFLOAD_ON; | 
|  |  | 
|  | if (READ_ONCE(q->deadline_mode)) | 
|  | opt.flags |= TC_ETF_DEADLINE_MODE_ON; | 
|  |  | 
|  | if (READ_ONCE(q->skip_sock_check)) | 
|  | opt.flags |= TC_ETF_SKIP_SOCK_CHECK; | 
|  |  | 
|  | if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) | 
|  | goto nla_put_failure; | 
|  |  | 
|  | return nla_nest_end(skb, nest); | 
|  |  | 
|  | nla_put_failure: | 
|  | nla_nest_cancel(skb, nest); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static struct Qdisc_ops etf_qdisc_ops __read_mostly = { | 
|  | .id		=	"etf", | 
|  | .priv_size	=	sizeof(struct etf_sched_data), | 
|  | .enqueue	=	etf_enqueue_timesortedlist, | 
|  | .dequeue	=	etf_dequeue_timesortedlist, | 
|  | .peek		=	etf_peek_timesortedlist, | 
|  | .init		=	etf_init, | 
|  | .reset		=	etf_reset, | 
|  | .destroy	=	etf_destroy, | 
|  | .dump		=	etf_dump, | 
|  | .owner		=	THIS_MODULE, | 
|  | }; | 
|  | MODULE_ALIAS_NET_SCH("etf"); | 
|  |  | 
|  | static int __init etf_module_init(void) | 
|  | { | 
|  | return register_qdisc(&etf_qdisc_ops); | 
|  | } | 
|  |  | 
|  | static void __exit etf_module_exit(void) | 
|  | { | 
|  | unregister_qdisc(&etf_qdisc_ops); | 
|  | } | 
|  | module_init(etf_module_init) | 
|  | module_exit(etf_module_exit) | 
|  | MODULE_LICENSE("GPL"); | 
|  | MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); |