Merge tag 'wireless-2025-09-03' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:
====================
Just a few updates:
- a set of buffer overflow fixes
- ath11k: a fix for GTK rekeying
- ath12k: a missed WiFi7 capability
* tag 'wireless-2025-09-03' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
wifi: wilc1000: avoid buffer overflow in WID string configuration
wifi: cfg80211: sme: cap SSID length in __cfg80211_connect_result()
wifi: libertas: cap SSID len in lbs_associate()
wifi: cw1200: cap SSID length in cw1200_do_join()
wifi: ath11k: fix group data packet drops during rekey
wifi: ath12k: Set EMLSR support flag in MLO flags for EML-capable stations
====================
Link: https://patch.msgid.link/20250903075602.30263-4-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst
index a15754a..7dd6036 100644
--- a/Documentation/networking/napi.rst
+++ b/Documentation/networking/napi.rst
@@ -433,9 +433,8 @@
Threaded NAPI is an operating mode that uses dedicated kernel
threads rather than software IRQ context for NAPI processing.
-The configuration is per netdevice and will affect all
-NAPI instances of that device. Each NAPI instance will spawn a separate
-thread (called ``napi/${ifc-name}-${napi-id}``).
+Each threaded NAPI instance will spawn a separate thread
+(called ``napi/${ifc-name}-${napi-id}``).
It is recommended to pin each kernel thread to a single CPU, the same
CPU as the CPU which services the interrupt. Note that the mapping
diff --git a/MAINTAINERS b/MAINTAINERS
index 09b34bb..1819c47 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24252,6 +24252,12 @@
F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
F: drivers/input/keyboard/sun4i-lradc-keys.c
+SUNDANCE NETWORK DRIVER
+M: Denis Kirjanov <dkirjanov@suse.de>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/dlink/sundance.c
+
SUNPLUS ETHERNET DRIVER
M: Wells Lu <wellslutw@gmail.com>
L: netdev@vger.kernel.org
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index e4bcdb6..2707ab1 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -273,6 +273,7 @@
CONFIG_ULI526X=m
CONFIG_PCMCIA_XIRCOM=m
CONFIG_DL2K=m
+CONFIG_SUNDANCE=m
CONFIG_PCMCIA_FMVJ18X=m
CONFIG_E100=m
CONFIG_E1000=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index bb35964..b082c1f 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -433,6 +433,7 @@
CONFIG_ULI526X=m
CONFIG_PCMCIA_XIRCOM=m
CONFIG_DL2K=m
+CONFIG_SUNDANCE=m
CONFIG_S2IO=m
CONFIG_FEC_MPC52xx=m
CONFIG_GIANFAR=m
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index f7d8c3c..2fef082 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -380,6 +380,28 @@ static const struct file_operations force_devcoredump_fops = {
.write = force_devcd_write,
};
+static void vhci_debugfs_init(struct vhci_data *data)
+{
+ struct hci_dev *hdev = data->hdev;
+
+ debugfs_create_file("force_suspend", 0644, hdev->debugfs, data,
+ &force_suspend_fops);
+
+ debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data,
+ &force_wakeup_fops);
+
+ if (IS_ENABLED(CONFIG_BT_MSFTEXT))
+ debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data,
+ &msft_opcode_fops);
+
+ if (IS_ENABLED(CONFIG_BT_AOSPEXT))
+ debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data,
+ &aosp_capable_fops);
+
+ debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data,
+ &force_devcoredump_fops);
+}
+
static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
{
struct hci_dev *hdev;
@@ -434,22 +456,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
return -EBUSY;
}
- debugfs_create_file("force_suspend", 0644, hdev->debugfs, data,
- &force_suspend_fops);
-
- debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data,
- &force_wakeup_fops);
-
- if (IS_ENABLED(CONFIG_BT_MSFTEXT))
- debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data,
- &msft_opcode_fops);
-
- if (IS_ENABLED(CONFIG_BT_AOSPEXT))
- debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data,
- &aosp_capable_fops);
-
- debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data,
- &force_devcoredump_fops);
+ if (!IS_ERR_OR_NULL(hdev->debugfs))
+ vhci_debugfs_init(data);
hci_skb_pkt_type(skb) = HCI_VENDOR_PKT;
@@ -651,6 +659,21 @@ static int vhci_open(struct inode *inode, struct file *file)
return 0;
}
+static void vhci_debugfs_remove(struct hci_dev *hdev)
+{
+ debugfs_lookup_and_remove("force_suspend", hdev->debugfs);
+
+ debugfs_lookup_and_remove("force_wakeup", hdev->debugfs);
+
+ if (IS_ENABLED(CONFIG_BT_MSFTEXT))
+ debugfs_lookup_and_remove("msft_opcode", hdev->debugfs);
+
+ if (IS_ENABLED(CONFIG_BT_AOSPEXT))
+ debugfs_lookup_and_remove("aosp_capable", hdev->debugfs);
+
+ debugfs_lookup_and_remove("force_devcoredump", hdev->debugfs);
+}
+
static int vhci_release(struct inode *inode, struct file *file)
{
struct vhci_data *data = file->private_data;
@@ -662,6 +685,8 @@ static int vhci_release(struct inode *inode, struct file *file)
hdev = data->hdev;
if (hdev) {
+ if (!IS_ERR_OR_NULL(hdev->debugfs))
+ vhci_debugfs_remove(hdev);
hci_unregister_dev(hdev);
hci_free_dev(hdev);
}
diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c
index 0b3f291..0cd216e 100644
--- a/drivers/isdn/mISDN/dsp_hwec.c
+++ b/drivers/isdn/mISDN/dsp_hwec.c
@@ -51,14 +51,14 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg)
goto _do;
{
- char *dup, *tok, *name, *val;
+ char *dup, *next, *tok, *name, *val;
int tmp;
- dup = kstrdup(arg, GFP_ATOMIC);
+ dup = next = kstrdup(arg, GFP_ATOMIC);
if (!dup)
return;
- while ((tok = strsep(&dup, ","))) {
+ while ((tok = strsep(&next, ","))) {
if (!strlen(tok))
continue;
name = strsep(&tok, "=");
diff --git a/drivers/net/dsa/mv88e6xxx/leds.c b/drivers/net/dsa/mv88e6xxx/leds.c
index 1c88bfa..ab3bc64 100644
--- a/drivers/net/dsa/mv88e6xxx/leds.c
+++ b/drivers/net/dsa/mv88e6xxx/leds.c
@@ -779,7 +779,8 @@ int mv88e6xxx_port_setup_leds(struct mv88e6xxx_chip *chip, int port)
continue;
if (led_num > 1) {
dev_err(dev, "invalid LED specified port %d\n", port);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_put_led;
}
if (led_num == 0)
@@ -823,17 +824,25 @@ int mv88e6xxx_port_setup_leds(struct mv88e6xxx_chip *chip, int port)
init_data.devname_mandatory = true;
init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d:0%d", chip->info->name,
port, led_num);
- if (!init_data.devicename)
- return -ENOMEM;
+ if (!init_data.devicename) {
+ ret = -ENOMEM;
+ goto err_put_led;
+ }
ret = devm_led_classdev_register_ext(dev, l, &init_data);
kfree(init_data.devicename);
if (ret) {
dev_err(dev, "Failed to init LED %d for port %d", led_num, port);
- return ret;
+ goto err_put_led;
}
}
+ fwnode_handle_put(leds);
return 0;
+
+err_put_led:
+ fwnode_handle_put(led);
+ fwnode_handle_put(leds);
+ return ret;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 31e3d82..0daa08c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4397,7 +4397,7 @@ static void bnxt_alloc_one_rx_ring_netmem(struct bnxt *bp,
for (i = 0; i < bp->rx_agg_ring_size; i++) {
if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) {
netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n",
- ring_nr, i, bp->rx_ring_size);
+ ring_nr, i, bp->rx_agg_ring_size);
break;
}
prod = NEXT_RX_AGG(prod);
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 16d28a8..c769b7d 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1223,12 +1223,13 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
{
struct macb *bp = queue->bp;
u16 queue_index = queue - bp->queues;
+ unsigned long flags;
unsigned int tail;
unsigned int head;
int packets = 0;
u32 bytes = 0;
- spin_lock(&queue->tx_ptr_lock);
+ spin_lock_irqsave(&queue->tx_ptr_lock, flags);
head = queue->tx_head;
for (tail = queue->tx_tail; tail != head && packets < budget; tail++) {
struct macb_tx_skb *tx_skb;
@@ -1291,7 +1292,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
CIRC_CNT(queue->tx_head, queue->tx_tail,
bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
netif_wake_subqueue(bp->dev, queue_index);
- spin_unlock(&queue->tx_ptr_lock);
+ spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
return packets;
}
@@ -1707,8 +1708,9 @@ static void macb_tx_restart(struct macb_queue *queue)
{
struct macb *bp = queue->bp;
unsigned int head_idx, tbqp;
+ unsigned long flags;
- spin_lock(&queue->tx_ptr_lock);
+ spin_lock_irqsave(&queue->tx_ptr_lock, flags);
if (queue->tx_head == queue->tx_tail)
goto out_tx_ptr_unlock;
@@ -1720,19 +1722,20 @@ static void macb_tx_restart(struct macb_queue *queue)
if (tbqp == head_idx)
goto out_tx_ptr_unlock;
- spin_lock_irq(&bp->lock);
+ spin_lock(&bp->lock);
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
- spin_unlock_irq(&bp->lock);
+ spin_unlock(&bp->lock);
out_tx_ptr_unlock:
- spin_unlock(&queue->tx_ptr_lock);
+ spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
}
static bool macb_tx_complete_pending(struct macb_queue *queue)
{
bool retval = false;
+ unsigned long flags;
- spin_lock(&queue->tx_ptr_lock);
+ spin_lock_irqsave(&queue->tx_ptr_lock, flags);
if (queue->tx_head != queue->tx_tail) {
/* Make hw descriptor updates visible to CPU */
rmb();
@@ -1740,7 +1743,7 @@ static bool macb_tx_complete_pending(struct macb_queue *queue)
if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED))
retval = true;
}
- spin_unlock(&queue->tx_ptr_lock);
+ spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
return retval;
}
@@ -2308,6 +2311,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct macb_queue *queue = &bp->queues[queue_index];
unsigned int desc_cnt, nr_frags, frag_size, f;
unsigned int hdrlen;
+ unsigned long flags;
bool is_lso;
netdev_tx_t ret = NETDEV_TX_OK;
@@ -2368,7 +2372,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length);
}
- spin_lock_bh(&queue->tx_ptr_lock);
+ spin_lock_irqsave(&queue->tx_ptr_lock, flags);
/* This is a hard error, log it. */
if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
@@ -2392,15 +2396,15 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index),
skb->len);
- spin_lock_irq(&bp->lock);
+ spin_lock(&bp->lock);
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
- spin_unlock_irq(&bp->lock);
+ spin_unlock(&bp->lock);
if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
netif_stop_subqueue(dev, queue_index);
unlock:
- spin_unlock_bh(&queue->tx_ptr_lock);
+ spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
return ret;
}
diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig
index e9e1365..0d77f84 100644
--- a/drivers/net/ethernet/dlink/Kconfig
+++ b/drivers/net/ethernet/dlink/Kconfig
@@ -32,4 +32,24 @@
To compile this driver as a module, choose M here: the
module will be called dl2k.
+config SUNDANCE
+ tristate "Sundance Alta support"
+ depends on PCI
+ select CRC32
+ select MII
+ help
+ This driver is for the Sundance "Alta" chip.
+ More specific information and updates are available from
+ <http://www.scyld.com/network/sundance.html>.
+
+config SUNDANCE_MMIO
+ bool "Use MMIO instead of PIO"
+ depends on SUNDANCE
+ help
+ Enable memory-mapped I/O for interaction with Sundance NIC registers.
+ Do NOT enable this by default, PIO (enabled when MMIO is disabled)
+ is known to solve bugs on certain chips.
+
+ If unsure, say N.
+
endif # NET_VENDOR_DLINK
diff --git a/drivers/net/ethernet/dlink/Makefile b/drivers/net/ethernet/dlink/Makefile
index 38c236eb..3ff503c 100644
--- a/drivers/net/ethernet/dlink/Makefile
+++ b/drivers/net/ethernet/dlink/Makefile
@@ -4,3 +4,4 @@
#
obj-$(CONFIG_DL2K) += dl2k.o
+obj-$(CONFIG_SUNDANCE) += sundance.o
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
new file mode 100644
index 0000000..277c50e
--- /dev/null
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -0,0 +1,1990 @@
+/* sundance.c: A Linux device driver for the Sundance ST201 "Alta". */
+/*
+ Written 1999-2000 by Donald Becker.
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License (GPL), incorporated herein by reference.
+ Drivers based on or derived from this code fall under the GPL and must
+ retain the authorship, copyright and license notice. This file is not
+ a complete program and may only be used when the entire operating
+ system is licensed under the GPL.
+
+ The author may be reached as becker@scyld.com, or C/O
+ Scyld Computing Corporation
+ 410 Severn Ave., Suite 210
+ Annapolis MD 21403
+
+ Support and updates available at
+ http://www.scyld.com/network/sundance.html
+ [link no longer provides useful info -jgarzik]
+ Archives of the mailing list are still available at
+ https://www.beowulf.org/pipermail/netdrivers/
+
+*/
+
+#define DRV_NAME "sundance"
+
+/* The user-configurable values.
+ These may be modified when a driver module is loaded.*/
+static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */
+/* Maximum number of multicast addresses to filter (vs. rx-all-multicast).
+ Typical is a 64 element hash table based on the Ethernet CRC. */
+static const int multicast_filter_limit = 32;
+
+/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
+ Setting to > 1518 effectively disables this feature.
+ This chip can receive into offset buffers, so the Alpha does not
+ need a copy-align. */
+static int rx_copybreak;
+static int flowctrl=1;
+
+/* media[] specifies the media type the NIC operates at.
+ autosense Autosensing active media.
+ 10mbps_hd 10Mbps half duplex.
+ 10mbps_fd 10Mbps full duplex.
+ 100mbps_hd 100Mbps half duplex.
+ 100mbps_fd 100Mbps full duplex.
+ 0 Autosensing active media.
+ 1 10Mbps half duplex.
+ 2 10Mbps full duplex.
+ 3 100Mbps half duplex.
+ 4 100Mbps full duplex.
+*/
+#define MAX_UNITS 8
+static char *media[MAX_UNITS];
+
+
+/* Operational parameters that are set at compile time. */
+
+/* Keep the ring sizes a power of two for compile efficiency.
+ The compiler will convert <unsigned>'%'<2^N> into a bit mask.
+ Making the Tx ring too large decreases the effectiveness of channel
+ bonding and packet priority, and more than 128 requires modifying the
+ Tx error recovery.
+ Large receive rings merely waste memory. */
+#define TX_RING_SIZE 32
+#define TX_QUEUE_LEN (TX_RING_SIZE - 1) /* Limit ring entries actually used. */
+#define RX_RING_SIZE 64
+#define RX_BUDGET 32
+#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct netdev_desc)
+#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct netdev_desc)
+
+/* Operational parameters that usually are not changed. */
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT (4*HZ)
+#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/
+
+/* Include files, designed to support most kernel versions 2.0.0 and later. */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/uaccess.h>
+#include <asm/processor.h> /* Processor type for cache alignment. */
+#include <asm/io.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+#include <linux/crc32.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+
+MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
+MODULE_DESCRIPTION("Sundance Alta Ethernet driver");
+MODULE_LICENSE("GPL");
+
+module_param(debug, int, 0);
+module_param(rx_copybreak, int, 0);
+module_param_array(media, charp, NULL, 0);
+module_param(flowctrl, int, 0);
+MODULE_PARM_DESC(debug, "Sundance Alta debug level (0-5)");
+MODULE_PARM_DESC(rx_copybreak, "Sundance Alta copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(flowctrl, "Sundance Alta flow control [0|1]");
+
+/*
+ Theory of Operation
+
+I. Board Compatibility
+
+This driver is designed for the Sundance Technologies "Alta" ST201 chip.
+
+II. Board-specific settings
+
+III. Driver operation
+
+IIIa. Ring buffers
+
+This driver uses two statically allocated fixed-size descriptor lists
+formed into rings by a branch from the final descriptor to the beginning of
+the list. The ring sizes are set at compile time by RX/TX_RING_SIZE.
+Some chips explicitly use only 2^N sized rings, while others use a
+'next descriptor' pointer that the driver forms into rings.
+
+IIIb/c. Transmit/Receive Structure
+
+This driver uses a zero-copy receive and transmit scheme.
+The driver allocates full frame size skbuffs for the Rx ring buffers at
+open() time and passes the skb->data field to the chip as receive data
+buffers. When an incoming frame is less than RX_COPYBREAK bytes long,
+a fresh skbuff is allocated and the frame is copied to the new skbuff.
+When the incoming frame is larger, the skbuff is passed directly up the
+protocol stack. Buffers consumed this way are replaced by newly allocated
+skbuffs in a later phase of receives.
+
+The RX_COPYBREAK value is chosen to trade-off the memory wasted by
+using a full-sized skbuff for small frames vs. the copying costs of larger
+frames. New boards are typically used in generously configured machines
+and the underfilled buffers have negligible impact compared to the benefit of
+a single allocation size, so the default value of zero results in never
+copying packets. When copying is done, the cost is usually mitigated by using
+a combined copy/checksum routine. Copying also preloads the cache, which is
+most useful with small frames.
+
+A subtle aspect of the operation is that the IP header at offset 14 in an
+ethernet frame isn't longword aligned for further processing.
+Unaligned buffers are permitted by the Sundance hardware, so
+frames are received into the skbuff at an offset of "+2", 16-byte aligning
+the IP header.
+
+IIId. Synchronization
+
+The driver runs as two independent, single-threaded flows of control. One
+is the send-packet routine, which enforces single-threaded use by the
+dev->tbusy flag. The other thread is the interrupt handler, which is single
+threaded by the hardware and interrupt handling software.
+
+The send packet thread has partial control over the Tx ring and 'dev->tbusy'
+flag. It sets the tbusy flag whenever it's queuing a Tx packet. If the next
+queue slot is empty, it clears the tbusy flag when finished otherwise it sets
+the 'lp->tx_full' flag.
+
+The interrupt handler has exclusive control over the Rx ring and records stats
+from the Tx ring. After reaping the stats, it marks the Tx queue entry as
+empty by incrementing the dirty_tx mark. Iff the 'lp->tx_full' flag is set, it
+clears both the tx_full and tbusy flags.
+
+IV. Notes
+
+IVb. References
+
+The Sundance ST201 datasheet, preliminary version.
+The Kendin KS8723 datasheet, preliminary version.
+The ICplus IP100 datasheet, preliminary version.
+http://www.scyld.com/expert/100mbps.html
+http://www.scyld.com/expert/NWay.html
+
+IVc. Errata
+
+*/
+
+/* Work-around for Kendin chip bugs. */
+#ifndef CONFIG_SUNDANCE_MMIO
+#define USE_IO_OPS 1
+#endif
+
+static const struct pci_device_id sundance_pci_tbl[] = {
+ { 0x1186, 0x1002, 0x1186, 0x1002, 0, 0, 0 },
+ { 0x1186, 0x1002, 0x1186, 0x1003, 0, 0, 1 },
+ { 0x1186, 0x1002, 0x1186, 0x1012, 0, 0, 2 },
+ { 0x1186, 0x1002, 0x1186, 0x1040, 0, 0, 3 },
+ { 0x1186, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 },
+ { 0x13F0, 0x0201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 },
+ { 0x13F0, 0x0200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6 },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, sundance_pci_tbl);
+
+enum {
+ netdev_io_size = 128
+};
+
+struct pci_id_info {
+ const char *name;
+};
+static const struct pci_id_info pci_id_tbl[] = {
+ {"D-Link DFE-550TX FAST Ethernet Adapter"},
+ {"D-Link DFE-550FX 100Mbps Fiber-optics Adapter"},
+ {"D-Link DFE-580TX 4 port Server Adapter"},
+ {"D-Link DFE-530TXS FAST Ethernet Adapter"},
+ {"D-Link DL10050-based FAST Ethernet Adapter"},
+ {"Sundance Technology Alta"},
+ {"IC Plus Corporation IP100A FAST Ethernet Adapter"},
+ { } /* terminate list. */
+};
+
+/* This driver was written to use PCI memory space, however x86-oriented
+ hardware often uses I/O space accesses. */
+
+/* Offsets to the device registers.
+ Unlike software-only systems, device drivers interact with complex hardware.
+ It's not useful to define symbolic names for every register bit in the
+ device. The name can only partially document the semantics and make
+ the driver longer and more difficult to read.
+ In general, only the important configuration values or bits changed
+ multiple times should be defined symbolically.
+*/
+enum alta_offsets {
+ DMACtrl = 0x00,
+ TxListPtr = 0x04,
+ TxDMABurstThresh = 0x08,
+ TxDMAUrgentThresh = 0x09,
+ TxDMAPollPeriod = 0x0a,
+ RxDMAStatus = 0x0c,
+ RxListPtr = 0x10,
+ DebugCtrl0 = 0x1a,
+ DebugCtrl1 = 0x1c,
+ RxDMABurstThresh = 0x14,
+ RxDMAUrgentThresh = 0x15,
+ RxDMAPollPeriod = 0x16,
+ LEDCtrl = 0x1a,
+ ASICCtrl = 0x30,
+ EEData = 0x34,
+ EECtrl = 0x36,
+ FlashAddr = 0x40,
+ FlashData = 0x44,
+ WakeEvent = 0x45,
+ TxStatus = 0x46,
+ TxFrameId = 0x47,
+ DownCounter = 0x18,
+ IntrClear = 0x4a,
+ IntrEnable = 0x4c,
+ IntrStatus = 0x4e,
+ MACCtrl0 = 0x50,
+ MACCtrl1 = 0x52,
+ StationAddr = 0x54,
+ MaxFrameSize = 0x5A,
+ RxMode = 0x5c,
+ MIICtrl = 0x5e,
+ MulticastFilter0 = 0x60,
+ MulticastFilter1 = 0x64,
+ RxOctetsLow = 0x68,
+ RxOctetsHigh = 0x6a,
+ TxOctetsLow = 0x6c,
+ TxOctetsHigh = 0x6e,
+ TxFramesOK = 0x70,
+ RxFramesOK = 0x72,
+ StatsCarrierError = 0x74,
+ StatsLateColl = 0x75,
+ StatsMultiColl = 0x76,
+ StatsOneColl = 0x77,
+ StatsTxDefer = 0x78,
+ RxMissed = 0x79,
+ StatsTxXSDefer = 0x7a,
+ StatsTxAbort = 0x7b,
+ StatsBcastTx = 0x7c,
+ StatsBcastRx = 0x7d,
+ StatsMcastTx = 0x7e,
+ StatsMcastRx = 0x7f,
+ /* Aliased and bogus values! */
+ RxStatus = 0x0c,
+};
+
+#define ASIC_HI_WORD(x) ((x) + 2)
+
+enum ASICCtrl_HiWord_bit {
+ GlobalReset = 0x0001,
+ RxReset = 0x0002,
+ TxReset = 0x0004,
+ DMAReset = 0x0008,
+ FIFOReset = 0x0010,
+ NetworkReset = 0x0020,
+ HostReset = 0x0040,
+ ResetBusy = 0x0400,
+};
+
+/* Bits in the interrupt status/mask registers. */
+enum intr_status_bits {
+ IntrSummary=0x0001, IntrPCIErr=0x0002, IntrMACCtrl=0x0008,
+ IntrTxDone=0x0004, IntrRxDone=0x0010, IntrRxStart=0x0020,
+ IntrDrvRqst=0x0040,
+ StatsMax=0x0080, LinkChange=0x0100,
+ IntrTxDMADone=0x0200, IntrRxDMADone=0x0400,
+};
+
+/* Bits in the RxMode register. */
+enum rx_mode_bits {
+ AcceptAllIPMulti=0x20, AcceptMultiHash=0x10, AcceptAll=0x08,
+ AcceptBroadcast=0x04, AcceptMulticast=0x02, AcceptMyPhys=0x01,
+};
+/* Bits in MACCtrl. */
+enum mac_ctrl0_bits {
+ EnbFullDuplex=0x20, EnbRcvLargeFrame=0x40,
+ EnbFlowCtrl=0x100, EnbPassRxCRC=0x200,
+};
+enum mac_ctrl1_bits {
+ StatsEnable=0x0020, StatsDisable=0x0040, StatsEnabled=0x0080,
+ TxEnable=0x0100, TxDisable=0x0200, TxEnabled=0x0400,
+ RxEnable=0x0800, RxDisable=0x1000, RxEnabled=0x2000,
+};
+
+/* Bits in WakeEvent register. */
+enum wake_event_bits {
+ WakePktEnable = 0x01,
+ MagicPktEnable = 0x02,
+ LinkEventEnable = 0x04,
+ WolEnable = 0x80,
+};
+
+/* The Rx and Tx buffer descriptors. */
+/* Note that using only 32 bit fields simplifies conversion to big-endian
+ architectures. */
+struct netdev_desc {
+ __le32 next_desc;
+ __le32 status;
+ struct desc_frag { __le32 addr, length; } frag;
+};
+
+/* Bits in netdev_desc.status */
+enum desc_status_bits {
+ DescOwn=0x8000,
+ DescEndPacket=0x4000,
+ DescEndRing=0x2000,
+ LastFrag=0x80000000,
+ DescIntrOnTx=0x8000,
+ DescIntrOnDMADone=0x80000000,
+ DisableAlign = 0x00000001,
+};
+
+#define PRIV_ALIGN 15 /* Required alignment mask */
+/* Use __attribute__((aligned (L1_CACHE_BYTES))) to maintain alignment
+ within the structure. */
+#define MII_CNT 4
+struct netdev_private {
+ /* Descriptor rings first for alignment. */
+ struct netdev_desc *rx_ring;
+ struct netdev_desc *tx_ring;
+ struct sk_buff* rx_skbuff[RX_RING_SIZE];
+ struct sk_buff* tx_skbuff[TX_RING_SIZE];
+ dma_addr_t tx_ring_dma;
+ dma_addr_t rx_ring_dma;
+ struct timer_list timer; /* Media monitoring timer. */
+ struct net_device *ndev; /* backpointer */
+ /* ethtool extra stats */
+ struct {
+ u64 tx_multiple_collisions;
+ u64 tx_single_collisions;
+ u64 tx_late_collisions;
+ u64 tx_deferred;
+ u64 tx_deferred_excessive;
+ u64 tx_aborted;
+ u64 tx_bcasts;
+ u64 rx_bcasts;
+ u64 tx_mcasts;
+ u64 rx_mcasts;
+ } xstats;
+ /* Frequently used values: keep some adjacent for cache effect. */
+ spinlock_t lock;
+ int msg_enable;
+ int chip_id;
+ unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */
+ unsigned int rx_buf_sz; /* Based on MTU+slack. */
+ struct netdev_desc *last_tx; /* Last Tx descriptor used. */
+ unsigned int cur_tx, dirty_tx;
+ /* These values are keep track of the transceiver/media in use. */
+ unsigned int flowctrl:1;
+ unsigned int default_port:4; /* Last dev->if_port value. */
+ unsigned int an_enable:1;
+ unsigned int speed;
+ unsigned int wol_enabled:1; /* Wake on LAN enabled */
+ struct tasklet_struct rx_tasklet;
+ struct tasklet_struct tx_tasklet;
+ int budget;
+ int cur_task;
+ /* Multicast and receive mode. */
+ spinlock_t mcastlock; /* SMP lock multicast updates. */
+ u16 mcast_filter[4];
+ /* MII transceiver section. */
+ struct mii_if_info mii_if;
+ int mii_preamble_required;
+ unsigned char phys[MII_CNT]; /* MII device addresses, only first one used. */
+ struct pci_dev *pci_dev;
+ void __iomem *base;
+ spinlock_t statlock;
+};
+
+/* The station address location in the EEPROM. */
+#define EEPROM_SA_OFFSET 0x10
+#define DEFAULT_INTR (IntrRxDMADone | IntrPCIErr | \
+ IntrDrvRqst | IntrTxDone | StatsMax | \
+ LinkChange)
+
+static int change_mtu(struct net_device *dev, int new_mtu);
+static int eeprom_read(void __iomem *ioaddr, int location);
+static int mdio_read(struct net_device *dev, int phy_id, int location);
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
+static int mdio_wait_link(struct net_device *dev, int wait);
+static int netdev_open(struct net_device *dev);
+static void check_duplex(struct net_device *dev);
+static void netdev_timer(struct timer_list *t);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
+static void init_ring(struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
+static int reset_tx (struct net_device *dev);
+static irqreturn_t intr_handler(int irq, void *dev_instance);
+static void rx_poll(struct tasklet_struct *t);
+static void tx_poll(struct tasklet_struct *t);
+static void refill_rx (struct net_device *dev);
+static void netdev_error(struct net_device *dev, int intr_status);
+static void netdev_error(struct net_device *dev, int intr_status);
+static void set_rx_mode(struct net_device *dev);
+static int __set_mac_addr(struct net_device *dev);
+static int sundance_set_mac_addr(struct net_device *dev, void *data);
+static struct net_device_stats *get_stats(struct net_device *dev);
+static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int netdev_close(struct net_device *dev);
+static const struct ethtool_ops ethtool_ops;
+
+static void sundance_reset(struct net_device *dev, unsigned long reset_cmd)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base + ASICCtrl;
+ int countdown;
+
+ /* ST201 documentation states ASICCtrl is a 32bit register */
+ iowrite32 (reset_cmd | ioread32 (ioaddr), ioaddr);
+ /* ST201 documentation states reset can take up to 1 ms */
+ countdown = 10 + 1;
+ while (ioread32 (ioaddr) & (ResetBusy << 16)) {
+ if (--countdown == 0) {
+ printk(KERN_WARNING "%s : reset not completed !!\n", dev->name);
+ break;
+ }
+ udelay(100);
+ }
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void sundance_poll_controller(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+
+ disable_irq(np->pci_dev->irq);
+ intr_handler(np->pci_dev->irq, dev);
+ enable_irq(np->pci_dev->irq);
+}
+#endif
+
+static const struct net_device_ops netdev_ops = {
+ .ndo_open = netdev_open,
+ .ndo_stop = netdev_close,
+ .ndo_start_xmit = start_tx,
+ .ndo_get_stats = get_stats,
+ .ndo_set_rx_mode = set_rx_mode,
+ .ndo_eth_ioctl = netdev_ioctl,
+ .ndo_tx_timeout = tx_timeout,
+ .ndo_change_mtu = change_mtu,
+ .ndo_set_mac_address = sundance_set_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = sundance_poll_controller,
+#endif
+};
+
+static int sundance_probe1(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct net_device *dev;
+ struct netdev_private *np;
+ static int card_idx;
+ int chip_idx = ent->driver_data;
+ int irq;
+ int i;
+ void __iomem *ioaddr;
+ u16 mii_ctl;
+ void *ring_space;
+ dma_addr_t ring_dma;
+#ifdef USE_IO_OPS
+ int bar = 0;
+#else
+ int bar = 1;
+#endif
+ int phy, phy_end, phy_idx = 0;
+ __le16 addr[ETH_ALEN / 2];
+
+ if (pci_enable_device(pdev))
+ return -EIO;
+ pci_set_master(pdev);
+
+ irq = pdev->irq;
+
+ dev = alloc_etherdev(sizeof(*np));
+ if (!dev)
+ return -ENOMEM;
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
+ if (pci_request_regions(pdev, DRV_NAME))
+ goto err_out_netdev;
+
+ ioaddr = pci_iomap(pdev, bar, netdev_io_size);
+ if (!ioaddr)
+ goto err_out_res;
+
+ for (i = 0; i < 3; i++)
+ addr[i] =
+ cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET));
+ eth_hw_addr_set(dev, (u8 *)addr);
+
+ np = netdev_priv(dev);
+ np->ndev = dev;
+ np->base = ioaddr;
+ np->pci_dev = pdev;
+ np->chip_id = chip_idx;
+ np->msg_enable = (1 << debug) - 1;
+ spin_lock_init(&np->lock);
+ spin_lock_init(&np->statlock);
+ tasklet_setup(&np->rx_tasklet, rx_poll);
+ tasklet_setup(&np->tx_tasklet, tx_poll);
+
+ ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE,
+ &ring_dma, GFP_KERNEL);
+ if (!ring_space)
+ goto err_out_cleardev;
+ np->tx_ring = (struct netdev_desc *)ring_space;
+ np->tx_ring_dma = ring_dma;
+
+ ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE,
+ &ring_dma, GFP_KERNEL);
+ if (!ring_space)
+ goto err_out_unmap_tx;
+ np->rx_ring = (struct netdev_desc *)ring_space;
+ np->rx_ring_dma = ring_dma;
+
+ np->mii_if.dev = dev;
+ np->mii_if.mdio_read = mdio_read;
+ np->mii_if.mdio_write = mdio_write;
+ np->mii_if.phy_id_mask = 0x1f;
+ np->mii_if.reg_num_mask = 0x1f;
+
+ /* The chip-specific entries in the device structure. */
+ dev->netdev_ops = &netdev_ops;
+ dev->ethtool_ops = ðtool_ops;
+ dev->watchdog_timeo = TX_TIMEOUT;
+
+ /* MTU range: 68 - 8191 */
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = 8191;
+
+ pci_set_drvdata(pdev, dev);
+
+ i = register_netdev(dev);
+ if (i)
+ goto err_out_unmap_rx;
+
+ printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n",
+ dev->name, pci_id_tbl[chip_idx].name, ioaddr,
+ dev->dev_addr, irq);
+
+ np->phys[0] = 1; /* Default setting */
+ np->mii_preamble_required++;
+
+ /*
+ * It seems some phys doesn't deal well with address 0 being accessed
+ * first
+ */
+ if (sundance_pci_tbl[np->chip_id].device == 0x0200) {
+ phy = 0;
+ phy_end = 31;
+ } else {
+ phy = 1;
+ phy_end = 32; /* wraps to zero, due to 'phy & 0x1f' */
+ }
+ for (; phy <= phy_end && phy_idx < MII_CNT; phy++) {
+ int phyx = phy & 0x1f;
+ int mii_status = mdio_read(dev, phyx, MII_BMSR);
+ if (mii_status != 0xffff && mii_status != 0x0000) {
+ np->phys[phy_idx++] = phyx;
+ np->mii_if.advertising = mdio_read(dev, phyx, MII_ADVERTISE);
+ if ((mii_status & 0x0040) == 0)
+ np->mii_preamble_required++;
+ printk(KERN_INFO "%s: MII PHY found at address %d, status "
+ "0x%4.4x advertising %4.4x.\n",
+ dev->name, phyx, mii_status, np->mii_if.advertising);
+ }
+ }
+ np->mii_preamble_required--;
+
+ if (phy_idx == 0) {
+ printk(KERN_INFO "%s: No MII transceiver found, aborting. ASIC status %x\n",
+ dev->name, ioread32(ioaddr + ASICCtrl));
+ goto err_out_unregister;
+ }
+
+ np->mii_if.phy_id = np->phys[0];
+
+ /* Parse override configuration */
+ np->an_enable = 1;
+ if (card_idx < MAX_UNITS) {
+ if (media[card_idx] != NULL) {
+ np->an_enable = 0;
+ if (strcmp (media[card_idx], "100mbps_fd") == 0 ||
+ strcmp (media[card_idx], "4") == 0) {
+ np->speed = 100;
+ np->mii_if.full_duplex = 1;
+ } else if (strcmp (media[card_idx], "100mbps_hd") == 0 ||
+ strcmp (media[card_idx], "3") == 0) {
+ np->speed = 100;
+ np->mii_if.full_duplex = 0;
+ } else if (strcmp (media[card_idx], "10mbps_fd") == 0 ||
+ strcmp (media[card_idx], "2") == 0) {
+ np->speed = 10;
+ np->mii_if.full_duplex = 1;
+ } else if (strcmp (media[card_idx], "10mbps_hd") == 0 ||
+ strcmp (media[card_idx], "1") == 0) {
+ np->speed = 10;
+ np->mii_if.full_duplex = 0;
+ } else {
+ np->an_enable = 1;
+ }
+ }
+ if (flowctrl == 1)
+ np->flowctrl = 1;
+ }
+
+ /* Fibre PHY? */
+ if (ioread32 (ioaddr + ASICCtrl) & 0x80) {
+ /* Default 100Mbps Full */
+ if (np->an_enable) {
+ np->speed = 100;
+ np->mii_if.full_duplex = 1;
+ np->an_enable = 0;
+ }
+ }
+ /* Reset PHY */
+ mdio_write (dev, np->phys[0], MII_BMCR, BMCR_RESET);
+ mdelay (300);
+ /* If flow control enabled, we need to advertise it.*/
+ if (np->flowctrl)
+ mdio_write (dev, np->phys[0], MII_ADVERTISE, np->mii_if.advertising | 0x0400);
+ mdio_write (dev, np->phys[0], MII_BMCR, BMCR_ANENABLE|BMCR_ANRESTART);
+ /* Force media type */
+ if (!np->an_enable) {
+ mii_ctl = 0;
+ mii_ctl |= (np->speed == 100) ? BMCR_SPEED100 : 0;
+ mii_ctl |= (np->mii_if.full_duplex) ? BMCR_FULLDPLX : 0;
+ mdio_write (dev, np->phys[0], MII_BMCR, mii_ctl);
+ printk (KERN_INFO "Override speed=%d, %s duplex\n",
+ np->speed, np->mii_if.full_duplex ? "Full" : "Half");
+
+ }
+
+ /* Perhaps move the reset here? */
+ /* Reset the chip to erase previous misconfiguration. */
+ if (netif_msg_hw(np))
+ printk("ASIC Control is %x.\n", ioread32(ioaddr + ASICCtrl));
+ sundance_reset(dev, 0x00ff << 16);
+ if (netif_msg_hw(np))
+ printk("ASIC Control is now %x.\n", ioread32(ioaddr + ASICCtrl));
+
+ card_idx++;
+ return 0;
+
+err_out_unregister:
+ unregister_netdev(dev);
+err_out_unmap_rx:
+ dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE,
+ np->rx_ring, np->rx_ring_dma);
+err_out_unmap_tx:
+ dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE,
+ np->tx_ring, np->tx_ring_dma);
+err_out_cleardev:
+ pci_iounmap(pdev, ioaddr);
+err_out_res:
+ pci_release_regions(pdev);
+err_out_netdev:
+ free_netdev (dev);
+ return -ENODEV;
+}
+
+static int change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (netif_running(dev))
+ return -EBUSY;
+ WRITE_ONCE(dev->mtu, new_mtu);
+ return 0;
+}
+
+#define eeprom_delay(ee_addr) ioread32(ee_addr)
+/* Read the EEPROM and MII Management Data I/O (MDIO) interfaces. */
+static int eeprom_read(void __iomem *ioaddr, int location)
+{
+ int boguscnt = 10000; /* Typical 1900 ticks. */
+ iowrite16(0x0200 | (location & 0xff), ioaddr + EECtrl);
+ do {
+ eeprom_delay(ioaddr + EECtrl);
+ if (! (ioread16(ioaddr + EECtrl) & 0x8000)) {
+ return ioread16(ioaddr + EEData);
+ }
+ } while (--boguscnt > 0);
+ return 0;
+}
+
+/* MII transceiver control section.
+ Read and write the MII registers using software-generated serial
+ MDIO protocol. See the MII specifications or DP83840A data sheet
+ for details.
+
+ The maximum data clock rate is 2.5 Mhz. The minimum timing is usually
+ met by back-to-back 33Mhz PCI cycles. */
+#define mdio_delay() ioread8(mdio_addr)
+
+enum mii_reg_bits {
+ MDIO_ShiftClk=0x0001, MDIO_Data=0x0002, MDIO_EnbOutput=0x0004,
+};
+#define MDIO_EnbIn (0)
+#define MDIO_WRITE0 (MDIO_EnbOutput)
+#define MDIO_WRITE1 (MDIO_Data | MDIO_EnbOutput)
+
+/* Generate the preamble required for initial synchronization and
+ a few older transceivers. */
+static void mdio_sync(void __iomem *mdio_addr)
+{
+ int bits = 32;
+
+ /* Establish sync by sending at least 32 logic ones. */
+ while (--bits >= 0) {
+ iowrite8(MDIO_WRITE1, mdio_addr);
+ mdio_delay();
+ iowrite8(MDIO_WRITE1 | MDIO_ShiftClk, mdio_addr);
+ mdio_delay();
+ }
+}
+
+static int mdio_read(struct net_device *dev, int phy_id, int location)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *mdio_addr = np->base + MIICtrl;
+ int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location;
+ int i, retval = 0;
+
+ if (np->mii_preamble_required)
+ mdio_sync(mdio_addr);
+
+ /* Shift the read command bits out. */
+ for (i = 15; i >= 0; i--) {
+ int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0;
+
+ iowrite8(dataval, mdio_addr);
+ mdio_delay();
+ iowrite8(dataval | MDIO_ShiftClk, mdio_addr);
+ mdio_delay();
+ }
+ /* Read the two transition, 16 data, and wire-idle bits. */
+ for (i = 19; i > 0; i--) {
+ iowrite8(MDIO_EnbIn, mdio_addr);
+ mdio_delay();
+ retval = (retval << 1) | ((ioread8(mdio_addr) & MDIO_Data) ? 1 : 0);
+ iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr);
+ mdio_delay();
+ }
+ return (retval>>1) & 0xffff;
+}
+
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *mdio_addr = np->base + MIICtrl;
+ int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location<<18) | value;
+ int i;
+
+ if (np->mii_preamble_required)
+ mdio_sync(mdio_addr);
+
+ /* Shift the command bits out. */
+ for (i = 31; i >= 0; i--) {
+ int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0;
+
+ iowrite8(dataval, mdio_addr);
+ mdio_delay();
+ iowrite8(dataval | MDIO_ShiftClk, mdio_addr);
+ mdio_delay();
+ }
+ /* Clear out extra bits. */
+ for (i = 2; i > 0; i--) {
+ iowrite8(MDIO_EnbIn, mdio_addr);
+ mdio_delay();
+ iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr);
+ mdio_delay();
+ }
+}
+
+static int mdio_wait_link(struct net_device *dev, int wait)
+{
+ int bmsr;
+ int phy_id;
+ struct netdev_private *np;
+
+ np = netdev_priv(dev);
+ phy_id = np->phys[0];
+
+ do {
+ bmsr = mdio_read(dev, phy_id, MII_BMSR);
+ if (bmsr & 0x0004)
+ return 0;
+ mdelay(1);
+ } while (--wait > 0);
+ return -1;
+}
+
+static int netdev_open(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ const int irq = np->pci_dev->irq;
+ unsigned long flags;
+ int i;
+
+ sundance_reset(dev, 0x00ff << 16);
+
+ i = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev);
+ if (i)
+ return i;
+
+ if (netif_msg_ifup(np))
+ printk(KERN_DEBUG "%s: netdev_open() irq %d\n", dev->name, irq);
+
+ init_ring(dev);
+
+ iowrite32(np->rx_ring_dma, ioaddr + RxListPtr);
+ /* The Tx list pointer is written as packets are queued. */
+
+ /* Initialize other registers. */
+ __set_mac_addr(dev);
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize);
+#else
+ iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize);
+#endif
+ if (dev->mtu > 2047)
+ iowrite32(ioread32(ioaddr + ASICCtrl) | 0x0C, ioaddr + ASICCtrl);
+
+ /* Configure the PCI bus bursts and FIFO thresholds. */
+
+ if (dev->if_port == 0)
+ dev->if_port = np->default_port;
+
+ spin_lock_init(&np->mcastlock);
+
+ set_rx_mode(dev);
+ iowrite16(0, ioaddr + IntrEnable);
+ iowrite16(0, ioaddr + DownCounter);
+ /* Set the chip to poll every N*320nsec. */
+ iowrite8(100, ioaddr + RxDMAPollPeriod);
+ iowrite8(127, ioaddr + TxDMAPollPeriod);
+ /* Fix DFE-580TX packet drop issue */
+ if (np->pci_dev->revision >= 0x14)
+ iowrite8(0x01, ioaddr + DebugCtrl1);
+ netif_start_queue(dev);
+
+ spin_lock_irqsave(&np->lock, flags);
+ reset_tx(dev);
+ spin_unlock_irqrestore(&np->lock, flags);
+
+ iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1);
+
+ /* Disable Wol */
+ iowrite8(ioread8(ioaddr + WakeEvent) | 0x00, ioaddr + WakeEvent);
+ np->wol_enabled = 0;
+
+ if (netif_msg_ifup(np))
+ printk(KERN_DEBUG "%s: Done netdev_open(), status: Rx %x Tx %x "
+ "MAC Control %x, %4.4x %4.4x.\n",
+ dev->name, ioread32(ioaddr + RxStatus), ioread8(ioaddr + TxStatus),
+ ioread32(ioaddr + MACCtrl0),
+ ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0));
+
+ /* Set the timer to check for link beat. */
+ timer_setup(&np->timer, netdev_timer, 0);
+ np->timer.expires = jiffies + 3*HZ;
+ add_timer(&np->timer);
+
+ /* Enable interrupts by setting the interrupt mask. */
+ iowrite16(DEFAULT_INTR, ioaddr + IntrEnable);
+
+ return 0;
+}
+
+static void check_duplex(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ int mii_lpa = mdio_read(dev, np->phys[0], MII_LPA);
+ int negotiated = mii_lpa & np->mii_if.advertising;
+ int duplex;
+
+ /* Force media */
+ if (!np->an_enable || mii_lpa == 0xffff) {
+ if (np->mii_if.full_duplex)
+ iowrite16 (ioread16 (ioaddr + MACCtrl0) | EnbFullDuplex,
+ ioaddr + MACCtrl0);
+ return;
+ }
+
+ /* Autonegotiation */
+ duplex = (negotiated & 0x0100) || (negotiated & 0x01C0) == 0x0040;
+ if (np->mii_if.full_duplex != duplex) {
+ np->mii_if.full_duplex = duplex;
+ if (netif_msg_link(np))
+ printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d "
+ "negotiated capability %4.4x.\n", dev->name,
+ duplex ? "full" : "half", np->phys[0], negotiated);
+ iowrite16(ioread16(ioaddr + MACCtrl0) | (duplex ? 0x20 : 0), ioaddr + MACCtrl0);
+ }
+}
+
+static void netdev_timer(struct timer_list *t)
+{
+ struct netdev_private *np = timer_container_of(np, t, timer);
+ struct net_device *dev = np->mii_if.dev;
+ void __iomem *ioaddr = np->base;
+ int next_tick = 10*HZ;
+
+ if (netif_msg_timer(np)) {
+ printk(KERN_DEBUG "%s: Media selection timer tick, intr status %4.4x, "
+ "Tx %x Rx %x.\n",
+ dev->name, ioread16(ioaddr + IntrEnable),
+ ioread8(ioaddr + TxStatus), ioread32(ioaddr + RxStatus));
+ }
+ check_duplex(dev);
+ np->timer.expires = jiffies + next_tick;
+ add_timer(&np->timer);
+}
+
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ unsigned long flag;
+
+ netif_stop_queue(dev);
+ tasklet_disable_in_atomic(&np->tx_tasklet);
+ iowrite16(0, ioaddr + IntrEnable);
+ printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x "
+ "TxFrameId %2.2x,"
+ " resetting...\n", dev->name, ioread8(ioaddr + TxStatus),
+ ioread8(ioaddr + TxFrameId));
+
+ {
+ int i;
+ for (i=0; i<TX_RING_SIZE; i++) {
+ printk(KERN_DEBUG "%02x %08llx %08x %08x(%02x) %08x %08x\n", i,
+ (unsigned long long)(np->tx_ring_dma + i*sizeof(*np->tx_ring)),
+ le32_to_cpu(np->tx_ring[i].next_desc),
+ le32_to_cpu(np->tx_ring[i].status),
+ (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff,
+ le32_to_cpu(np->tx_ring[i].frag.addr),
+ le32_to_cpu(np->tx_ring[i].frag.length));
+ }
+ printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n",
+ ioread32(np->base + TxListPtr),
+ netif_queue_stopped(dev));
+ printk(KERN_DEBUG "cur_tx=%d(%02x) dirty_tx=%d(%02x)\n",
+ np->cur_tx, np->cur_tx % TX_RING_SIZE,
+ np->dirty_tx, np->dirty_tx % TX_RING_SIZE);
+ printk(KERN_DEBUG "cur_rx=%d dirty_rx=%d\n", np->cur_rx, np->dirty_rx);
+ printk(KERN_DEBUG "cur_task=%d\n", np->cur_task);
+ }
+ spin_lock_irqsave(&np->lock, flag);
+
+ /* Stop and restart the chip's Tx processes . */
+ reset_tx(dev);
+ spin_unlock_irqrestore(&np->lock, flag);
+
+ dev->if_port = 0;
+
+ netif_trans_update(dev); /* prevent tx timeout */
+ dev->stats.tx_errors++;
+ if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) {
+ netif_wake_queue(dev);
+ }
+ iowrite16(DEFAULT_INTR, ioaddr + IntrEnable);
+ tasklet_enable(&np->tx_tasklet);
+}
+
+
+/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+static void init_ring(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ int i;
+
+ np->cur_rx = np->cur_tx = 0;
+ np->dirty_rx = np->dirty_tx = 0;
+ np->cur_task = 0;
+
+ np->rx_buf_sz = (dev->mtu <= 1520 ? PKT_BUF_SZ : dev->mtu + 16);
+
+ /* Initialize all Rx descriptors. */
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma +
+ ((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring));
+ np->rx_ring[i].status = 0;
+ np->rx_ring[i].frag.length = 0;
+ np->rx_skbuff[i] = NULL;
+ }
+
+ /* Fill in the Rx buffers. Handle allocation failure gracefully. */
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ dma_addr_t addr;
+
+ struct sk_buff *skb =
+ netdev_alloc_skb(dev, np->rx_buf_sz + 2);
+ np->rx_skbuff[i] = skb;
+ if (skb == NULL)
+ break;
+ skb_reserve(skb, 2); /* 16 byte align the IP header. */
+ addr = dma_map_single(&np->pci_dev->dev, skb->data,
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&np->pci_dev->dev, addr)) {
+ dev_kfree_skb(skb);
+ np->rx_skbuff[i] = NULL;
+ break;
+ }
+ np->rx_ring[i].frag.addr = cpu_to_le32(addr);
+ np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag);
+ }
+ np->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
+
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ np->tx_skbuff[i] = NULL;
+ np->tx_ring[i].status = 0;
+ }
+}
+
+static void tx_poll(struct tasklet_struct *t)
+{
+ struct netdev_private *np = from_tasklet(np, t, tx_tasklet);
+ unsigned head = np->cur_task % TX_RING_SIZE;
+ struct netdev_desc *txdesc =
+ &np->tx_ring[(np->cur_tx - 1) % TX_RING_SIZE];
+
+ /* Chain the next pointer */
+ for (; np->cur_tx - np->cur_task > 0; np->cur_task++) {
+ int entry = np->cur_task % TX_RING_SIZE;
+ txdesc = &np->tx_ring[entry];
+ if (np->last_tx) {
+ np->last_tx->next_desc = cpu_to_le32(np->tx_ring_dma +
+ entry*sizeof(struct netdev_desc));
+ }
+ np->last_tx = txdesc;
+ }
+ /* Indicate the latest descriptor of tx ring */
+ txdesc->status |= cpu_to_le32(DescIntrOnTx);
+
+ if (ioread32 (np->base + TxListPtr) == 0)
+ iowrite32 (np->tx_ring_dma + head * sizeof(struct netdev_desc),
+ np->base + TxListPtr);
+}
+
+static netdev_tx_t
+start_tx (struct sk_buff *skb, struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ struct netdev_desc *txdesc;
+ dma_addr_t addr;
+ unsigned entry;
+
+ /* Calculate the next Tx descriptor entry. */
+ entry = np->cur_tx % TX_RING_SIZE;
+ np->tx_skbuff[entry] = skb;
+ txdesc = &np->tx_ring[entry];
+
+ addr = dma_map_single(&np->pci_dev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&np->pci_dev->dev, addr))
+ goto drop_frame;
+
+ txdesc->next_desc = 0;
+ txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign);
+ txdesc->frag.addr = cpu_to_le32(addr);
+ txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag);
+
+ /* Increment cur_tx before tasklet_schedule() */
+ np->cur_tx++;
+ mb();
+ /* Schedule a tx_poll() task */
+ tasklet_schedule(&np->tx_tasklet);
+
+ /* On some architectures: explicitly flush cache lines here. */
+ if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 1 &&
+ !netif_queue_stopped(dev)) {
+ /* do nothing */
+ } else {
+ netif_stop_queue (dev);
+ }
+ if (netif_msg_tx_queued(np)) {
+ printk (KERN_DEBUG
+ "%s: Transmit frame #%d queued in slot %d.\n",
+ dev->name, np->cur_tx, entry);
+ }
+ return NETDEV_TX_OK;
+
+drop_frame:
+ dev_kfree_skb_any(skb);
+ np->tx_skbuff[entry] = NULL;
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+}
+
+/* Reset hardware tx and free all of tx buffers */
+static int
+reset_tx (struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ struct sk_buff *skb;
+ int i;
+
+ /* Reset tx logic, TxListPtr will be cleaned */
+ iowrite16 (TxDisable, ioaddr + MACCtrl1);
+ sundance_reset(dev, (NetworkReset|FIFOReset|DMAReset|TxReset) << 16);
+
+ /* free all tx skbuff */
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ np->tx_ring[i].next_desc = 0;
+
+ skb = np->tx_skbuff[i];
+ if (skb) {
+ dma_unmap_single(&np->pci_dev->dev,
+ le32_to_cpu(np->tx_ring[i].frag.addr),
+ skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ np->tx_skbuff[i] = NULL;
+ dev->stats.tx_dropped++;
+ }
+ }
+ np->cur_tx = np->dirty_tx = 0;
+ np->cur_task = 0;
+
+ np->last_tx = NULL;
+ iowrite8(127, ioaddr + TxDMAPollPeriod);
+
+ iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1);
+ return 0;
+}
+
+/* The interrupt handler cleans up after the Tx thread,
+ and schedule a Rx thread work */
+static irqreturn_t intr_handler(int irq, void *dev_instance)
+{
+ struct net_device *dev = (struct net_device *)dev_instance;
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ int hw_frame_id;
+ int tx_cnt;
+ int tx_status;
+ int handled = 0;
+ int i;
+
+ do {
+ int intr_status = ioread16(ioaddr + IntrStatus);
+ iowrite16(intr_status, ioaddr + IntrStatus);
+
+ if (netif_msg_intr(np))
+ printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n",
+ dev->name, intr_status);
+
+ if (!(intr_status & DEFAULT_INTR))
+ break;
+
+ handled = 1;
+
+ if (intr_status & (IntrRxDMADone)) {
+ iowrite16(DEFAULT_INTR & ~(IntrRxDone|IntrRxDMADone),
+ ioaddr + IntrEnable);
+ if (np->budget < 0)
+ np->budget = RX_BUDGET;
+ tasklet_schedule(&np->rx_tasklet);
+ }
+ if (intr_status & (IntrTxDone | IntrDrvRqst)) {
+ tx_status = ioread16 (ioaddr + TxStatus);
+ for (tx_cnt=32; tx_status & 0x80; --tx_cnt) {
+ if (netif_msg_tx_done(np))
+ printk
+ ("%s: Transmit status is %2.2x.\n",
+ dev->name, tx_status);
+ if (tx_status & 0x1e) {
+ if (netif_msg_tx_err(np))
+ printk("%s: Transmit error status %4.4x.\n",
+ dev->name, tx_status);
+ dev->stats.tx_errors++;
+ if (tx_status & 0x10)
+ dev->stats.tx_fifo_errors++;
+ if (tx_status & 0x08)
+ dev->stats.collisions++;
+ if (tx_status & 0x04)
+ dev->stats.tx_fifo_errors++;
+ if (tx_status & 0x02)
+ dev->stats.tx_window_errors++;
+
+ /*
+ ** This reset has been verified on
+ ** DFE-580TX boards ! phdm@macqel.be.
+ */
+ if (tx_status & 0x10) { /* TxUnderrun */
+ /* Restart Tx FIFO and transmitter */
+ sundance_reset(dev, (NetworkReset|FIFOReset|TxReset) << 16);
+ /* No need to reset the Tx pointer here */
+ }
+ /* Restart the Tx. Need to make sure tx enabled */
+ i = 10;
+ do {
+ iowrite16(ioread16(ioaddr + MACCtrl1) | TxEnable, ioaddr + MACCtrl1);
+ if (ioread16(ioaddr + MACCtrl1) & TxEnabled)
+ break;
+ mdelay(1);
+ } while (--i);
+ }
+ /* Yup, this is a documentation bug. It cost me *hours*. */
+ iowrite16 (0, ioaddr + TxStatus);
+ if (tx_cnt < 0) {
+ iowrite32(5000, ioaddr + DownCounter);
+ break;
+ }
+ tx_status = ioread16 (ioaddr + TxStatus);
+ }
+ hw_frame_id = (tx_status >> 8) & 0xff;
+ } else {
+ hw_frame_id = ioread8(ioaddr + TxFrameId);
+ }
+
+ if (np->pci_dev->revision >= 0x14) {
+ spin_lock(&np->lock);
+ for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) {
+ int entry = np->dirty_tx % TX_RING_SIZE;
+ struct sk_buff *skb;
+ int sw_frame_id;
+ sw_frame_id = (le32_to_cpu(
+ np->tx_ring[entry].status) >> 2) & 0xff;
+ if (sw_frame_id == hw_frame_id &&
+ !(le32_to_cpu(np->tx_ring[entry].status)
+ & 0x00010000))
+ break;
+ if (sw_frame_id == (hw_frame_id + 1) %
+ TX_RING_SIZE)
+ break;
+ skb = np->tx_skbuff[entry];
+ /* Free the original skb. */
+ dma_unmap_single(&np->pci_dev->dev,
+ le32_to_cpu(np->tx_ring[entry].frag.addr),
+ skb->len, DMA_TO_DEVICE);
+ dev_consume_skb_irq(np->tx_skbuff[entry]);
+ np->tx_skbuff[entry] = NULL;
+ np->tx_ring[entry].frag.addr = 0;
+ np->tx_ring[entry].frag.length = 0;
+ }
+ spin_unlock(&np->lock);
+ } else {
+ spin_lock(&np->lock);
+ for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) {
+ int entry = np->dirty_tx % TX_RING_SIZE;
+ struct sk_buff *skb;
+ if (!(le32_to_cpu(np->tx_ring[entry].status)
+ & 0x00010000))
+ break;
+ skb = np->tx_skbuff[entry];
+ /* Free the original skb. */
+ dma_unmap_single(&np->pci_dev->dev,
+ le32_to_cpu(np->tx_ring[entry].frag.addr),
+ skb->len, DMA_TO_DEVICE);
+ dev_consume_skb_irq(np->tx_skbuff[entry]);
+ np->tx_skbuff[entry] = NULL;
+ np->tx_ring[entry].frag.addr = 0;
+ np->tx_ring[entry].frag.length = 0;
+ }
+ spin_unlock(&np->lock);
+ }
+
+ if (netif_queue_stopped(dev) &&
+ np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) {
+ /* The ring is no longer full, clear busy flag. */
+ netif_wake_queue (dev);
+ }
+ /* Abnormal error summary/uncommon events handlers. */
+ if (intr_status & (IntrPCIErr | LinkChange | StatsMax))
+ netdev_error(dev, intr_status);
+ } while (0);
+ if (netif_msg_intr(np))
+ printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
+ dev->name, ioread16(ioaddr + IntrStatus));
+ return IRQ_RETVAL(handled);
+}
+
+static void rx_poll(struct tasklet_struct *t)
+{
+ struct netdev_private *np = from_tasklet(np, t, rx_tasklet);
+ struct net_device *dev = np->ndev;
+ int entry = np->cur_rx % RX_RING_SIZE;
+ int boguscnt = np->budget;
+ void __iomem *ioaddr = np->base;
+ int received = 0;
+
+ /* If EOP is set on the next entry, it's a new packet. Send it up. */
+ while (1) {
+ struct netdev_desc *desc = &(np->rx_ring[entry]);
+ u32 frame_status = le32_to_cpu(desc->status);
+ int pkt_len;
+
+ if (--boguscnt < 0) {
+ goto not_done;
+ }
+ if (!(frame_status & DescOwn))
+ break;
+ pkt_len = frame_status & 0x1fff; /* Chip omits the CRC. */
+ if (netif_msg_rx_status(np))
+ printk(KERN_DEBUG " netdev_rx() status was %8.8x.\n",
+ frame_status);
+ if (frame_status & 0x001f4000) {
+ /* There was a error. */
+ if (netif_msg_rx_err(np))
+ printk(KERN_DEBUG " netdev_rx() Rx error was %8.8x.\n",
+ frame_status);
+ dev->stats.rx_errors++;
+ if (frame_status & 0x00100000)
+ dev->stats.rx_length_errors++;
+ if (frame_status & 0x00010000)
+ dev->stats.rx_fifo_errors++;
+ if (frame_status & 0x00060000)
+ dev->stats.rx_frame_errors++;
+ if (frame_status & 0x00080000)
+ dev->stats.rx_crc_errors++;
+ if (frame_status & 0x00100000) {
+ printk(KERN_WARNING "%s: Oversized Ethernet frame,"
+ " status %8.8x.\n",
+ dev->name, frame_status);
+ }
+ } else {
+ struct sk_buff *skb;
+#ifndef final_version
+ if (netif_msg_rx_status(np))
+ printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d"
+ ", bogus_cnt %d.\n",
+ pkt_len, boguscnt);
+#endif
+ /* Check if the packet is long enough to accept without copying
+ to a minimally-sized skbuff. */
+ if (pkt_len < rx_copybreak &&
+ (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
+ skb_reserve(skb, 2); /* 16 byte align the IP header */
+ dma_sync_single_for_cpu(&np->pci_dev->dev,
+ le32_to_cpu(desc->frag.addr),
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
+ dma_sync_single_for_device(&np->pci_dev->dev,
+ le32_to_cpu(desc->frag.addr),
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ skb_put(skb, pkt_len);
+ } else {
+ dma_unmap_single(&np->pci_dev->dev,
+ le32_to_cpu(desc->frag.addr),
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ skb_put(skb = np->rx_skbuff[entry], pkt_len);
+ np->rx_skbuff[entry] = NULL;
+ }
+ skb->protocol = eth_type_trans(skb, dev);
+ /* Note: checksum -> skb->ip_summed = CHECKSUM_UNNECESSARY; */
+ netif_rx(skb);
+ }
+ entry = (entry + 1) % RX_RING_SIZE;
+ received++;
+ }
+ np->cur_rx = entry;
+ refill_rx (dev);
+ np->budget -= received;
+ iowrite16(DEFAULT_INTR, ioaddr + IntrEnable);
+ return;
+
+not_done:
+ np->cur_rx = entry;
+ refill_rx (dev);
+ if (!received)
+ received = 1;
+ np->budget -= received;
+ if (np->budget <= 0)
+ np->budget = RX_BUDGET;
+ tasklet_schedule(&np->rx_tasklet);
+}
+
+static void refill_rx (struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ int entry;
+
+ /* Refill the Rx ring buffers. */
+ for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0;
+ np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) {
+ struct sk_buff *skb;
+ dma_addr_t addr;
+
+ entry = np->dirty_rx % RX_RING_SIZE;
+ if (np->rx_skbuff[entry] == NULL) {
+ skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2);
+ np->rx_skbuff[entry] = skb;
+ if (skb == NULL)
+ break; /* Better luck next round. */
+ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
+ addr = dma_map_single(&np->pci_dev->dev, skb->data,
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&np->pci_dev->dev, addr)) {
+ dev_kfree_skb_irq(skb);
+ np->rx_skbuff[entry] = NULL;
+ break;
+ }
+
+ np->rx_ring[entry].frag.addr = cpu_to_le32(addr);
+ }
+ /* Perhaps we need not reset this field. */
+ np->rx_ring[entry].frag.length =
+ cpu_to_le32(np->rx_buf_sz | LastFrag);
+ np->rx_ring[entry].status = 0;
+ }
+}
+static void netdev_error(struct net_device *dev, int intr_status)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ u16 mii_ctl, mii_advertise, mii_lpa;
+ int speed;
+
+ if (intr_status & LinkChange) {
+ if (mdio_wait_link(dev, 10) == 0) {
+ printk(KERN_INFO "%s: Link up\n", dev->name);
+ if (np->an_enable) {
+ mii_advertise = mdio_read(dev, np->phys[0],
+ MII_ADVERTISE);
+ mii_lpa = mdio_read(dev, np->phys[0], MII_LPA);
+ mii_advertise &= mii_lpa;
+ printk(KERN_INFO "%s: Link changed: ",
+ dev->name);
+ if (mii_advertise & ADVERTISE_100FULL) {
+ np->speed = 100;
+ printk("100Mbps, full duplex\n");
+ } else if (mii_advertise & ADVERTISE_100HALF) {
+ np->speed = 100;
+ printk("100Mbps, half duplex\n");
+ } else if (mii_advertise & ADVERTISE_10FULL) {
+ np->speed = 10;
+ printk("10Mbps, full duplex\n");
+ } else if (mii_advertise & ADVERTISE_10HALF) {
+ np->speed = 10;
+ printk("10Mbps, half duplex\n");
+ } else
+ printk("\n");
+
+ } else {
+ mii_ctl = mdio_read(dev, np->phys[0], MII_BMCR);
+ speed = (mii_ctl & BMCR_SPEED100) ? 100 : 10;
+ np->speed = speed;
+ printk(KERN_INFO "%s: Link changed: %dMbps ,",
+ dev->name, speed);
+ printk("%s duplex.\n",
+ (mii_ctl & BMCR_FULLDPLX) ?
+ "full" : "half");
+ }
+ check_duplex(dev);
+ if (np->flowctrl && np->mii_if.full_duplex) {
+ iowrite16(ioread16(ioaddr + MulticastFilter1+2) | 0x0200,
+ ioaddr + MulticastFilter1+2);
+ iowrite16(ioread16(ioaddr + MACCtrl0) | EnbFlowCtrl,
+ ioaddr + MACCtrl0);
+ }
+ netif_carrier_on(dev);
+ } else {
+ printk(KERN_INFO "%s: Link down\n", dev->name);
+ netif_carrier_off(dev);
+ }
+ }
+ if (intr_status & StatsMax) {
+ get_stats(dev);
+ }
+ if (intr_status & IntrPCIErr) {
+ printk(KERN_ERR "%s: Something Wicked happened! %4.4x.\n",
+ dev->name, intr_status);
+ /* We must do a global reset of DMA to continue. */
+ }
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ unsigned long flags;
+ u8 late_coll, single_coll, mult_coll;
+
+ spin_lock_irqsave(&np->statlock, flags);
+ /* The chip only need report frame silently dropped. */
+ dev->stats.rx_missed_errors += ioread8(ioaddr + RxMissed);
+ dev->stats.tx_packets += ioread16(ioaddr + TxFramesOK);
+ dev->stats.rx_packets += ioread16(ioaddr + RxFramesOK);
+ dev->stats.tx_carrier_errors += ioread8(ioaddr + StatsCarrierError);
+
+ mult_coll = ioread8(ioaddr + StatsMultiColl);
+ np->xstats.tx_multiple_collisions += mult_coll;
+ single_coll = ioread8(ioaddr + StatsOneColl);
+ np->xstats.tx_single_collisions += single_coll;
+ late_coll = ioread8(ioaddr + StatsLateColl);
+ np->xstats.tx_late_collisions += late_coll;
+ dev->stats.collisions += mult_coll
+ + single_coll
+ + late_coll;
+
+ np->xstats.tx_deferred += ioread8(ioaddr + StatsTxDefer);
+ np->xstats.tx_deferred_excessive += ioread8(ioaddr + StatsTxXSDefer);
+ np->xstats.tx_aborted += ioread8(ioaddr + StatsTxAbort);
+ np->xstats.tx_bcasts += ioread8(ioaddr + StatsBcastTx);
+ np->xstats.rx_bcasts += ioread8(ioaddr + StatsBcastRx);
+ np->xstats.tx_mcasts += ioread8(ioaddr + StatsMcastTx);
+ np->xstats.rx_mcasts += ioread8(ioaddr + StatsMcastRx);
+
+ dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsLow);
+ dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsHigh) << 16;
+ dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsLow);
+ dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsHigh) << 16;
+
+ spin_unlock_irqrestore(&np->statlock, flags);
+
+ return &dev->stats;
+}
+
+static void set_rx_mode(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ u16 mc_filter[4]; /* Multicast hash filter */
+ u32 rx_mode;
+ int i;
+
+ if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */
+ memset(mc_filter, 0xff, sizeof(mc_filter));
+ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptAll | AcceptMyPhys;
+ } else if ((netdev_mc_count(dev) > multicast_filter_limit) ||
+ (dev->flags & IFF_ALLMULTI)) {
+ /* Too many to match, or accept all multicasts. */
+ memset(mc_filter, 0xff, sizeof(mc_filter));
+ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
+ } else if (!netdev_mc_empty(dev)) {
+ struct netdev_hw_addr *ha;
+ int bit;
+ int index;
+ int crc;
+ memset (mc_filter, 0, sizeof (mc_filter));
+ netdev_for_each_mc_addr(ha, dev) {
+ crc = ether_crc_le(ETH_ALEN, ha->addr);
+ for (index=0, bit=0; bit < 6; bit++, crc <<= 1)
+ if (crc & 0x80000000) index |= 1 << bit;
+ mc_filter[index/16] |= (1 << (index % 16));
+ }
+ rx_mode = AcceptBroadcast | AcceptMultiHash | AcceptMyPhys;
+ } else {
+ iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode);
+ return;
+ }
+ if (np->mii_if.full_duplex && np->flowctrl)
+ mc_filter[3] |= 0x0200;
+
+ for (i = 0; i < 4; i++)
+ iowrite16(mc_filter[i], ioaddr + MulticastFilter0 + i*2);
+ iowrite8(rx_mode, ioaddr + RxMode);
+}
+
+static int __set_mac_addr(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ u16 addr16;
+
+ addr16 = (dev->dev_addr[0] | (dev->dev_addr[1] << 8));
+ iowrite16(addr16, np->base + StationAddr);
+ addr16 = (dev->dev_addr[2] | (dev->dev_addr[3] << 8));
+ iowrite16(addr16, np->base + StationAddr+2);
+ addr16 = (dev->dev_addr[4] | (dev->dev_addr[5] << 8));
+ iowrite16(addr16, np->base + StationAddr+4);
+ return 0;
+}
+
+/* Invoked with rtnl_lock held */
+static int sundance_set_mac_addr(struct net_device *dev, void *data)
+{
+ const struct sockaddr *addr = data;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+ eth_hw_addr_set(dev, addr->sa_data);
+ __set_mac_addr(dev);
+
+ return 0;
+}
+
+static const struct {
+ const char name[ETH_GSTRING_LEN];
+} sundance_stats[] = {
+ { "tx_multiple_collisions" },
+ { "tx_single_collisions" },
+ { "tx_late_collisions" },
+ { "tx_deferred" },
+ { "tx_deferred_excessive" },
+ { "tx_aborted" },
+ { "tx_bcasts" },
+ { "rx_bcasts" },
+ { "tx_mcasts" },
+ { "rx_mcasts" },
+};
+
+static int check_if_running(struct net_device *dev)
+{
+ if (!netif_running(dev))
+ return -EINVAL;
+ return 0;
+}
+
+static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info));
+}
+
+static int get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ spin_lock_irq(&np->lock);
+ mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
+ spin_unlock_irq(&np->lock);
+ return 0;
+}
+
+static int set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ int res;
+ spin_lock_irq(&np->lock);
+ res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd);
+ spin_unlock_irq(&np->lock);
+ return res;
+}
+
+static int nway_reset(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ return mii_nway_restart(&np->mii_if);
+}
+
+static u32 get_link(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ return mii_link_ok(&np->mii_if);
+}
+
+static u32 get_msglevel(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ return np->msg_enable;
+}
+
+static void set_msglevel(struct net_device *dev, u32 val)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ np->msg_enable = val;
+}
+
+static void get_strings(struct net_device *dev, u32 stringset,
+ u8 *data)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(data, sundance_stats, sizeof(sundance_stats));
+}
+
+static int get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ARRAY_SIZE(sundance_stats);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ int i = 0;
+
+ get_stats(dev);
+ data[i++] = np->xstats.tx_multiple_collisions;
+ data[i++] = np->xstats.tx_single_collisions;
+ data[i++] = np->xstats.tx_late_collisions;
+ data[i++] = np->xstats.tx_deferred;
+ data[i++] = np->xstats.tx_deferred_excessive;
+ data[i++] = np->xstats.tx_aborted;
+ data[i++] = np->xstats.tx_bcasts;
+ data[i++] = np->xstats.rx_bcasts;
+ data[i++] = np->xstats.tx_mcasts;
+ data[i++] = np->xstats.rx_mcasts;
+}
+
+#ifdef CONFIG_PM
+
+static void sundance_get_wol(struct net_device *dev,
+ struct ethtool_wolinfo *wol)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ u8 wol_bits;
+
+ wol->wolopts = 0;
+
+ wol->supported = (WAKE_PHY | WAKE_MAGIC);
+ if (!np->wol_enabled)
+ return;
+
+ wol_bits = ioread8(ioaddr + WakeEvent);
+ if (wol_bits & MagicPktEnable)
+ wol->wolopts |= WAKE_MAGIC;
+ if (wol_bits & LinkEventEnable)
+ wol->wolopts |= WAKE_PHY;
+}
+
+static int sundance_set_wol(struct net_device *dev,
+ struct ethtool_wolinfo *wol)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ u8 wol_bits;
+
+ if (!device_can_wakeup(&np->pci_dev->dev))
+ return -EOPNOTSUPP;
+
+ np->wol_enabled = !!(wol->wolopts);
+ wol_bits = ioread8(ioaddr + WakeEvent);
+ wol_bits &= ~(WakePktEnable | MagicPktEnable |
+ LinkEventEnable | WolEnable);
+
+ if (np->wol_enabled) {
+ if (wol->wolopts & WAKE_MAGIC)
+ wol_bits |= (MagicPktEnable | WolEnable);
+ if (wol->wolopts & WAKE_PHY)
+ wol_bits |= (LinkEventEnable | WolEnable);
+ }
+ iowrite8(wol_bits, ioaddr + WakeEvent);
+
+ device_set_wakeup_enable(&np->pci_dev->dev, np->wol_enabled);
+
+ return 0;
+}
+#else
+#define sundance_get_wol NULL
+#define sundance_set_wol NULL
+#endif /* CONFIG_PM */
+
+static const struct ethtool_ops ethtool_ops = {
+ .begin = check_if_running,
+ .get_drvinfo = get_drvinfo,
+ .nway_reset = nway_reset,
+ .get_link = get_link,
+ .get_wol = sundance_get_wol,
+ .set_wol = sundance_set_wol,
+ .get_msglevel = get_msglevel,
+ .set_msglevel = set_msglevel,
+ .get_strings = get_strings,
+ .get_sset_count = get_sset_count,
+ .get_ethtool_stats = get_ethtool_stats,
+ .get_link_ksettings = get_link_ksettings,
+ .set_link_ksettings = set_link_ksettings,
+};
+
+static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ int rc;
+
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ spin_lock_irq(&np->lock);
+ rc = generic_mii_ioctl(&np->mii_if, if_mii(rq), cmd, NULL);
+ spin_unlock_irq(&np->lock);
+
+ return rc;
+}
+
+static int netdev_close(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+ struct sk_buff *skb;
+ int i;
+
+ /* Wait and kill tasklet */
+ tasklet_kill(&np->rx_tasklet);
+ tasklet_kill(&np->tx_tasklet);
+ np->cur_tx = 0;
+ np->dirty_tx = 0;
+ np->cur_task = 0;
+ np->last_tx = NULL;
+
+ netif_stop_queue(dev);
+
+ if (netif_msg_ifdown(np)) {
+ printk(KERN_DEBUG "%s: Shutting down ethercard, status was Tx %2.2x "
+ "Rx %4.4x Int %2.2x.\n",
+ dev->name, ioread8(ioaddr + TxStatus),
+ ioread32(ioaddr + RxStatus), ioread16(ioaddr + IntrStatus));
+ printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n",
+ dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx);
+ }
+
+ /* Disable interrupts by clearing the interrupt mask. */
+ iowrite16(0x0000, ioaddr + IntrEnable);
+
+ /* Disable Rx and Tx DMA for safely release resource */
+ iowrite32(0x500, ioaddr + DMACtrl);
+
+ /* Stop the chip's Tx and Rx processes. */
+ iowrite16(TxDisable | RxDisable | StatsDisable, ioaddr + MACCtrl1);
+
+ for (i = 2000; i > 0; i--) {
+ if ((ioread32(ioaddr + DMACtrl) & 0xc000) == 0)
+ break;
+ mdelay(1);
+ }
+
+ iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset,
+ ioaddr + ASIC_HI_WORD(ASICCtrl));
+
+ for (i = 2000; i > 0; i--) {
+ if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0)
+ break;
+ mdelay(1);
+ }
+
+#ifdef __i386__
+ if (netif_msg_hw(np)) {
+ printk(KERN_DEBUG " Tx ring at %8.8x:\n",
+ (int)(np->tx_ring_dma));
+ for (i = 0; i < TX_RING_SIZE; i++)
+ printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n",
+ i, np->tx_ring[i].status, np->tx_ring[i].frag.addr,
+ np->tx_ring[i].frag.length);
+ printk(KERN_DEBUG " Rx ring %8.8x:\n",
+ (int)(np->rx_ring_dma));
+ for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) {
+ printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n",
+ i, np->rx_ring[i].status, np->rx_ring[i].frag.addr,
+ np->rx_ring[i].frag.length);
+ }
+ }
+#endif /* __i386__ debugging only */
+
+ free_irq(np->pci_dev->irq, dev);
+
+ timer_delete_sync(&np->timer);
+
+ /* Free all the skbuffs in the Rx queue. */
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ np->rx_ring[i].status = 0;
+ skb = np->rx_skbuff[i];
+ if (skb) {
+ dma_unmap_single(&np->pci_dev->dev,
+ le32_to_cpu(np->rx_ring[i].frag.addr),
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ dev_kfree_skb(skb);
+ np->rx_skbuff[i] = NULL;
+ }
+ np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */
+ }
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ np->tx_ring[i].next_desc = 0;
+ skb = np->tx_skbuff[i];
+ if (skb) {
+ dma_unmap_single(&np->pci_dev->dev,
+ le32_to_cpu(np->tx_ring[i].frag.addr),
+ skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb(skb);
+ np->tx_skbuff[i] = NULL;
+ }
+ }
+
+ return 0;
+}
+
+static void sundance_remove1(struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ if (dev) {
+ struct netdev_private *np = netdev_priv(dev);
+ unregister_netdev(dev);
+ dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE,
+ np->rx_ring, np->rx_ring_dma);
+ dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE,
+ np->tx_ring, np->tx_ring_dma);
+ pci_iounmap(pdev, np->base);
+ pci_release_regions(pdev);
+ free_netdev(dev);
+ }
+}
+
+static int __maybe_unused sundance_suspend(struct device *dev_d)
+{
+ struct net_device *dev = dev_get_drvdata(dev_d);
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->base;
+
+ if (!netif_running(dev))
+ return 0;
+
+ netdev_close(dev);
+ netif_device_detach(dev);
+
+ if (np->wol_enabled) {
+ iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode);
+ iowrite16(RxEnable, ioaddr + MACCtrl1);
+ }
+
+ device_set_wakeup_enable(dev_d, np->wol_enabled);
+
+ return 0;
+}
+
+static int __maybe_unused sundance_resume(struct device *dev_d)
+{
+ struct net_device *dev = dev_get_drvdata(dev_d);
+ int err = 0;
+
+ if (!netif_running(dev))
+ return 0;
+
+ err = netdev_open(dev);
+ if (err) {
+ printk(KERN_ERR "%s: Can't resume interface!\n",
+ dev->name);
+ goto out;
+ }
+
+ netif_device_attach(dev);
+
+out:
+ return err;
+}
+
+static SIMPLE_DEV_PM_OPS(sundance_pm_ops, sundance_suspend, sundance_resume);
+
+static struct pci_driver sundance_driver = {
+ .name = DRV_NAME,
+ .id_table = sundance_pci_tbl,
+ .probe = sundance_probe1,
+ .remove = sundance_remove1,
+ .driver.pm = &sundance_pm_ops,
+};
+
+module_pci_driver(sundance_driver);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 5a5fcde..e68997a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1761,6 +1761,13 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
bool gso = false;
int tx_num;
+ if (skb_vlan_tag_present(skb) &&
+ !eth_proto_is_802_3(eth_hdr(skb)->h_proto)) {
+ skb = __vlan_hwaccel_push_inside(skb);
+ if (!skb)
+ goto dropped;
+ }
+
/* normally we can rely on the stack not calling this more than once,
* however we have 2 queues running on the same ring so we need to lock
* the ring access
@@ -1806,8 +1813,9 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
drop:
spin_unlock(ð->page_lock);
- stats->tx_dropped++;
dev_kfree_skb_any(skb);
+dropped:
+ stats->tx_dropped++;
return NETDEV_TX_OK;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 92a16dd..13666d5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -267,8 +267,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
pp.dma_dir = priv->dma_dir;
ring->pp = page_pool_create(&pp);
- if (!ring->pp)
+ if (IS_ERR(ring->pp)) {
+ err = PTR_ERR(ring->pp);
goto err_ring;
+ }
if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0)
goto err_pp;
diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c
index 84c41f1..79b800d 100644
--- a/drivers/net/ethernet/microchip/lan865x/lan865x.c
+++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c
@@ -423,13 +423,16 @@ static void lan865x_remove(struct spi_device *spi)
free_netdev(priv->netdev);
}
-static const struct spi_device_id spidev_spi_ids[] = {
+static const struct spi_device_id lan865x_ids[] = {
{ .name = "lan8650" },
+ { .name = "lan8651" },
{},
};
+MODULE_DEVICE_TABLE(spi, lan865x_ids);
static const struct of_device_id lan865x_dt_ids[] = {
{ .compatible = "microchip,lan8650" },
+ { .compatible = "microchip,lan8651" },
{ /* Sentinel */ }
};
MODULE_DEVICE_TABLE(of, lan865x_dt_ids);
@@ -441,7 +444,7 @@ static struct spi_driver lan865x_driver = {
},
.probe = lan865x_probe,
.remove = lan865x_remove,
- .id_table = spidev_spi_ids,
+ .id_table = lan865x_ids,
};
module_spi_driver(lan865x_driver);
diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index db200e4..91a906a 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -1249,7 +1249,8 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
/* Set the SPI controller to pump at realtime priority */
tc6->spi->rt = true;
- spi_setup(tc6->spi);
+ if (spi_setup(tc6->spi) < 0)
+ return NULL;
tc6->spi_ctrl_tx_buf = devm_kzalloc(&tc6->spi->dev,
OA_TC6_CTRL_SPI_BUF_SIZE,
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index ecd6eca..8b2364f 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1522,7 +1522,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
}
}
- if (single_port) {
+ if (single_port && num_tx) {
netif_txq = netdev_get_tx_queue(ndev, chn);
netdev_tx_completed_queue(netif_txq, num_tx, total_bytes);
am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq);
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index a31d5d5..97e8888 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -1576,7 +1576,7 @@ do_reset(struct net_device *dev, int full)
msleep(40); /* wait 40 msec to let it complete */
}
if (full_duplex)
- PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR | FullDuplex));
+ PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR) | FullDuplex);
} else { /* No MII */
SelectPage(0);
value = GetByte(XIRCREG_ESR); /* read the ESR */
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 4c75d1f..01329fe 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1844,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
if (tb_sa[MACSEC_SA_ATTR_PN]) {
spin_lock_bh(&rx_sa->lock);
- rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]);
+ rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]);
spin_unlock_bh(&rx_sa->lock);
}
@@ -2086,7 +2086,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
}
spin_lock_bh(&tx_sa->lock);
- tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]);
+ tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]);
spin_unlock_bh(&tx_sa->lock);
if (tb_sa[MACSEC_SA_ATTR_ACTIVE])
@@ -2398,7 +2398,7 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&tx_sa->lock);
prev_pn = tx_sa->next_pn_halves;
- tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]);
+ tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]);
spin_unlock_bh(&tx_sa->lock);
}
@@ -2496,7 +2496,7 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&rx_sa->lock);
prev_pn = rx_sa->next_pn_halves;
- rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]);
+ rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]);
spin_unlock_bh(&rx_sa->lock);
}
diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c
index 775a386..36ccc53 100644
--- a/drivers/net/mctp/mctp-usb.c
+++ b/drivers/net/mctp/mctp-usb.c
@@ -183,6 +183,7 @@ static void mctp_usb_in_complete(struct urb *urb)
struct mctp_usb_hdr *hdr;
u8 pkt_len; /* length of MCTP packet, no USB header */
+ skb_reset_mac_header(skb);
hdr = skb_pull_data(skb, sizeof(*hdr));
if (!hdr)
break;
diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c
index d79bb9b..ce73d94 100644
--- a/drivers/net/pcs/pcs-rzn1-miic.c
+++ b/drivers/net/pcs/pcs-rzn1-miic.c
@@ -19,7 +19,7 @@
#define MIIC_PRCMD 0x0
#define MIIC_ESID_CODE 0x4
-#define MIIC_MODCTRL 0x20
+#define MIIC_MODCTRL 0x8
#define MIIC_MODCTRL_SW_MODE GENMASK(4, 0)
#define MIIC_CONVCTRL(port) (0x100 + (port) * 4)
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index c7f867b..f1b57e3 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1016,6 +1016,42 @@ static void phylink_pcs_an_restart(struct phylink *pl)
pl->pcs->ops->pcs_an_restart(pl->pcs);
}
+enum inband_type {
+ INBAND_NONE,
+ INBAND_CISCO_SGMII,
+ INBAND_BASEX,
+};
+
+static enum inband_type phylink_get_inband_type(phy_interface_t interface)
+{
+ switch (interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_QUSGMII:
+ case PHY_INTERFACE_MODE_USXGMII:
+ case PHY_INTERFACE_MODE_10G_QXGMII:
+ /* These protocols are designed for use with a PHY which
+ * communicates its negotiation result back to the MAC via
+ * inband communication. Note: there exist PHYs that run
+ * with SGMII but do not send the inband data.
+ */
+ return INBAND_CISCO_SGMII;
+
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ /* 1000base-X is designed for use media-side for Fibre
+ * connections, and thus the Autoneg bit needs to be
+ * taken into account. We also do this for 2500base-X
+ * as well, but drivers may not support this, so may
+ * need to override this.
+ */
+ return INBAND_BASEX;
+
+ default:
+ return INBAND_NONE;
+ }
+}
+
/**
* phylink_pcs_neg_mode() - helper to determine PCS inband mode
* @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -1043,46 +1079,19 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs,
unsigned int pcs_ib_caps = 0;
unsigned int phy_ib_caps = 0;
unsigned int neg_mode, mode;
- enum {
- INBAND_CISCO_SGMII,
- INBAND_BASEX,
- } type;
+ enum inband_type type;
+
+ type = phylink_get_inband_type(interface);
+ if (type == INBAND_NONE) {
+ pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE;
+ pl->act_link_an_mode = pl->req_link_an_mode;
+ return;
+ }
mode = pl->req_link_an_mode;
pl->phy_ib_mode = 0;
- switch (interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_QSGMII:
- case PHY_INTERFACE_MODE_QUSGMII:
- case PHY_INTERFACE_MODE_USXGMII:
- case PHY_INTERFACE_MODE_10G_QXGMII:
- /* These protocols are designed for use with a PHY which
- * communicates its negotiation result back to the MAC via
- * inband communication. Note: there exist PHYs that run
- * with SGMII but do not send the inband data.
- */
- type = INBAND_CISCO_SGMII;
- break;
-
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- /* 1000base-X is designed for use media-side for Fibre
- * connections, and thus the Autoneg bit needs to be
- * taken into account. We also do this for 2500base-X
- * as well, but drivers may not support this, so may
- * need to override this.
- */
- type = INBAND_BASEX;
- break;
-
- default:
- pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE;
- pl->act_link_an_mode = mode;
- return;
- }
-
if (pcs)
pcs_ib_caps = phylink_pcs_inband_caps(pcs, interface);
@@ -3625,6 +3634,7 @@ static int phylink_sfp_config_optical(struct phylink *pl)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(support);
struct phylink_link_state config;
+ enum inband_type inband_type;
phy_interface_t interface;
int ret;
@@ -3671,6 +3681,23 @@ static int phylink_sfp_config_optical(struct phylink *pl)
phylink_dbg(pl, "optical SFP: chosen %s interface\n",
phy_modes(interface));
+ inband_type = phylink_get_inband_type(interface);
+ if (inband_type == INBAND_NONE) {
+ /* If this is the sole interface, and there is no inband
+ * support, clear the advertising mask and Autoneg bit in
+ * the support mask. Otherwise, just clear the Autoneg bit
+ * in the advertising mask.
+ */
+ if (phy_interface_weight(pl->sfp_interfaces) == 1) {
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ pl->sfp_support);
+ linkmode_zero(config.advertising);
+ } else {
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ config.advertising);
+ }
+ }
+
if (!phylink_validate_pcs_inband_autoneg(pl, interface,
config.advertising)) {
phylink_err(pl, "autoneg setting not compatible with PCS");
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 5347c95..4cd1d6c 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -492,6 +492,9 @@ static const struct sfp_quirk sfp_quirks[] = {
SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex,
sfp_fixup_nokia),
+ // FLYPRO SFP-10GT-CS-30M uses Rollball protocol to talk to the PHY.
+ SFP_QUIRK_F("FLYPRO", "SFP-10GT-CS-30M", sfp_fixup_rollball),
+
// Fiberstore SFP-10G-T doesn't identify as copper, uses the Rollball
// protocol to talk to the PHY and needs 4 sec wait before probing the
// PHY.
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index f32be2e..dab864b 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1445,6 +1445,10 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
if (READ_ONCE(f->updated) != now)
WRITE_ONCE(f->updated, now);
+ /* Don't override an fdb with nexthop with a learnt entry */
+ if (rcu_access_pointer(f->nh))
+ return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
+
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
rdst->remote_ifindex == ifindex))
return SKB_NOT_DROPPED_YET;
@@ -1453,10 +1457,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
if (f->state & (NUD_PERMANENT | NUD_NOARP))
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
- /* Don't override an fdb with nexthop with a learnt entry */
- if (rcu_access_pointer(f->nh))
- return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
-
if (net_ratelimit())
netdev_info(dev,
"%pM migrated from %pIS to %pIS\n",
@@ -1877,6 +1877,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
n = neigh_lookup(&arp_tbl, &tip, dev);
if (n) {
+ struct vxlan_rdst *rdst = NULL;
struct vxlan_fdb *f;
struct sk_buff *reply;
@@ -1887,7 +1888,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
rcu_read_lock();
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+ if (f)
+ rdst = first_remote_rcu(f);
+ if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
/* bridge-local neighbor */
neigh_release(n);
rcu_read_unlock();
@@ -2044,6 +2047,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
if (n) {
+ struct vxlan_rdst *rdst = NULL;
struct vxlan_fdb *f;
struct sk_buff *reply;
@@ -2053,7 +2057,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
}
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+ if (f)
+ rdst = first_remote_rcu(f);
+ if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
/* bridge-local neighbor */
neigh_release(n);
goto out;
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
index 6c625fb..99fe772 100644
--- a/drivers/net/vxlan/vxlan_private.h
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -61,9 +61,7 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
}
-/* First remote destination for a forwarding entry.
- * Guaranteed to be non-NULL because remotes are never deleted.
- */
+/* First remote destination for a forwarding entry. */
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
{
if (rcu_access_pointer(fdb->nh))
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index d39073d..4e1286c 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -4557,8 +4557,7 @@ ptp_ocp_detach(struct ptp_ocp *bp)
ptp_ocp_debugfs_remove_device(bp);
ptp_ocp_detach_sysfs(bp);
ptp_ocp_attr_group_del(bp);
- if (timer_pending(&bp->watchdog))
- timer_delete_sync(&bp->watchdog);
+ timer_delete_sync(&bp->watchdog);
if (bp->ts0)
ptp_ocp_unregister_ext(bp->ts0);
if (bp->ts1)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4c2b8b6..bb45787 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf)
return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX);
}
+static inline unsigned int phy_interface_weight(const unsigned long *intf)
+{
+ return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX);
+}
+
static inline void phy_interface_and(unsigned long *dst, const unsigned long *a,
const unsigned long *b)
{
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 9f56308..af97d07 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1687,7 +1687,12 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
coding_len = ntohs(coded_packet_tmp.coded_len);
- if (coding_len > skb->len)
+ /* ensure dst buffer is large enough (payload only) */
+ if (coding_len + h_size > skb->len)
+ return NULL;
+
+ /* ensure src buffer is large enough (payload only) */
+ if (coding_len + h_size > nc_packet->skb->len)
return NULL;
/* Here the magic is reversed:
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index f4257c4..814fb86 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1422,7 +1422,10 @@ static int l2cap_sock_release(struct socket *sock)
if (!sk)
return 0;
+ lock_sock_nested(sk, L2CAP_NESTING_PARENT);
l2cap_sock_cleanup_listen(sk);
+ release_sock(sk);
+
bt_sock_unlink(&l2cap_sk_list, sk);
err = l2cap_sock_shutdown(sock, SHUT_RDWR);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 94cbe96..083e2fe 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -626,9 +626,6 @@ static unsigned int br_nf_local_in(void *priv,
break;
}
- ct = container_of(nfct, struct nf_conn, ct_general);
- WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
-
return ret;
}
#endif
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7d426a8..f112156 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -90,10 +90,12 @@ static void est_timer(struct timer_list *t)
rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
+ preempt_disable_nested();
write_seqcount_begin(&est->seq);
est->avbps += brate;
est->avpps += rate;
write_seqcount_end(&est->seq);
+ preempt_enable_nested();
est->last_bytes = b_bytes;
est->last_packets = b_packets;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2ffe73e..c48c572 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -799,11 +799,12 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
struct sk_buff *cloned_skb = NULL;
struct ip_options opts = { 0 };
enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
struct nf_conn *ct;
__be32 orig_ip;
ct = nf_ct_get(skb_in, &ctinfo);
- if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) {
__icmp_send(skb_in, type, code, info, &opts);
return;
}
@@ -818,7 +819,8 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
goto out;
orig_ip = ip_hdr(skb_in)->saddr;
- ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ dir = CTINFO2DIR(ctinfo);
+ ip_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.ip;
__icmp_send(skb_in, type, code, info, &opts);
ip_hdr(skb_in)->saddr = orig_ip;
out:
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index d1ef964..a23eb87 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -494,10 +494,8 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
- accept_rpl_seg = net->ipv6.devconf_all->rpl_seg_enabled;
- if (accept_rpl_seg > idev->cnf.rpl_seg_enabled)
- accept_rpl_seg = idev->cnf.rpl_seg_enabled;
-
+ accept_rpl_seg = min(READ_ONCE(net->ipv6.devconf_all->rpl_seg_enabled),
+ READ_ONCE(idev->cnf.rpl_seg_enabled));
if (!accept_rpl_seg) {
kfree_skb(skb);
return -1;
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 9e35748..233914b 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -54,11 +54,12 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
struct inet6_skb_parm parm = { 0 };
struct sk_buff *cloned_skb = NULL;
enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
struct in6_addr orig_ip;
struct nf_conn *ct;
ct = nf_ct_get(skb_in, &ctinfo);
- if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) {
__icmpv6_send(skb_in, type, code, info, &parm);
return;
}
@@ -73,7 +74,8 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
goto out;
orig_ip = ipv6_hdr(skb_in)->saddr;
- ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ dir = CTINFO2DIR(ctinfo);
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.in6;
__icmpv6_send(skb_in, type, code, info, &parm);
ipv6_hdr(skb_in)->saddr = orig_ip;
out:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7577e7e..e885629 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1431,17 +1431,17 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
ireq = inet_rsk(req);
if (sk_acceptq_is_full(sk))
- goto out_overflow;
+ goto exit_overflow;
if (!dst) {
dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
if (!dst)
- goto out;
+ goto exit;
}
newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
- goto out_nonewsk;
+ goto exit_nonewsk;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1525,25 +1525,19 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
const union tcp_md5_addr *addr;
addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
- if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
- inet_csk_prepare_forced_close(newsk);
- tcp_done(newsk);
- goto out;
- }
+ if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key))
+ goto put_and_exit;
}
}
#endif
#ifdef CONFIG_TCP_AO
/* Copy over tcp_ao_info if any */
if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
- goto out; /* OOM */
+ goto put_and_exit; /* OOM */
#endif
- if (__inet_inherit_port(sk, newsk) < 0) {
- inet_csk_prepare_forced_close(newsk);
- tcp_done(newsk);
- goto out;
- }
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
&found_dup_sk);
if (*own_req) {
@@ -1570,13 +1564,17 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
return newsk;
-out_overflow:
+exit_overflow:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-out_nonewsk:
+exit_nonewsk:
dst_release(dst);
-out:
+exit:
tcp_listendrop(sk);
return NULL;
+put_and_exit:
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
+ goto exit;
}
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 2b2b958..4d314e0 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -378,6 +378,7 @@ static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
#endif
+/* takes ownership of skb, both in success and failure cases */
static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
{
struct mctp_hdr *hdr = mctp_hdr(skb);
@@ -387,8 +388,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
& MCTP_HDR_SEQ_MASK;
if (!key->reasm_head) {
- /* Since we're manipulating the shared frag_list, ensure it isn't
- * shared with any other SKBs.
+ /* Since we're manipulating the shared frag_list, ensure it
+ * isn't shared with any other SKBs. In the cloned case,
+ * this will free the skb; callers can no longer access it
+ * safely.
*/
key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
if (!key->reasm_head)
@@ -402,10 +405,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
if (this_seq != exp_seq)
- return -EINVAL;
+ goto err_free;
if (key->reasm_head->len + skb->len > mctp_message_maxlen)
- return -EINVAL;
+ goto err_free;
skb->next = NULL;
skb->sk = NULL;
@@ -419,6 +422,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
key->reasm_head->truesize += skb->truesize;
return 0;
+
+err_free:
+ kfree_skb(skb);
+ return -EINVAL;
}
static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
@@ -532,18 +539,16 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
* key isn't observable yet
*/
mctp_frag_queue(key, skb);
+ skb = NULL;
/* if the key_add fails, we've raced with another
* SOM packet with the same src, dest and tag. There's
* no way to distinguish future packets, so all we
- * can do is drop; we'll free the skb on exit from
- * this function.
+ * can do is drop.
*/
rc = mctp_key_add(key, msk);
- if (!rc) {
+ if (!rc)
trace_mctp_key_acquire(key);
- skb = NULL;
- }
/* we don't need to release key->lock on exit, so
* clean up here and suppress the unlock via
@@ -561,8 +566,7 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
key = NULL;
} else {
rc = mctp_frag_queue(key, skb);
- if (!rc)
- skb = NULL;
+ skb = NULL;
}
}
@@ -572,17 +576,16 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
*/
/* we need to be continuing an existing reassembly... */
- if (!key->reasm_head)
+ if (!key->reasm_head) {
rc = -EINVAL;
- else
+ } else {
rc = mctp_frag_queue(key, skb);
+ skb = NULL;
+ }
if (rc)
goto out_unlock;
- /* we've queued; the queue owns the skb now */
- skb = NULL;
-
/* end of message? deliver to socket, and we're done with
* the reassembly/response key
*/
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4ed5878..ceb48c3 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -368,7 +368,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
(cur->tuple.src.l3num == NFPROTO_UNSPEC ||
cur->tuple.src.l3num == me->tuple.src.l3num) &&
cur->tuple.dst.protonum == me->tuple.dst.protonum) {
- ret = -EEXIST;
+ ret = -EBUSY;
goto out;
}
}
@@ -379,7 +379,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
&mask)) {
- ret = -EEXIST;
+ ret = -EBUSY;
goto out;
}
}
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 5382883..a42ef3f 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -742,6 +742,9 @@ bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
unsigned int i;
bool ret = false;
+ if (!lnk->smcibdev->ibdev->dma_device)
+ return ret;
+
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
index cd23af8..3e3a89a 100755
--- a/tools/testing/selftests/drivers/net/hw/csum.py
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -17,7 +17,7 @@
ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
- tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
with bkg(rx_cmd, exit_wait=True):
wait_port_listen(34000, proto="udp")
@@ -37,7 +37,7 @@
if extra_args != "-U -Z":
extra_args += " -r 1"
- rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b31a71f..c7e03e1 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -99,6 +99,7 @@
TEST_GEN_PROGS += bind_timewait
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nh.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += test_neigh.sh
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a4ee549..45832df 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -20,6 +20,7 @@
SOCAT_TIMEOUT=60
nsin=""
+nsin_small=""
ns1out=""
ns2out=""
@@ -36,7 +37,7 @@
cleanup_all_ns
- rm -f "$nsin" "$ns1out" "$ns2out"
+ rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
@@ -72,6 +73,7 @@
rmtu=2000
filesize=$((2 * 1024 * 1024))
+filesize_small=$((filesize / 16))
usage(){
echo "nft_flowtable.sh [OPTIONS]"
@@ -89,7 +91,10 @@
o) omtu=$OPTARG;;
l) lmtu=$OPTARG;;
r) rmtu=$OPTARG;;
- s) filesize=$OPTARG;;
+ s)
+ filesize=$OPTARG
+ filesize_small=$((OPTARG / 16))
+ ;;
*) usage;;
esac
done
@@ -215,6 +220,7 @@
fi
nsin=$(mktemp)
+nsin_small=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
@@ -265,6 +271,7 @@
check_dscp()
{
local what=$1
+ local pmtud="$2"
local ok=1
local counter
@@ -277,37 +284,39 @@
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
+ local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
+
case "$what" in
"dscp_none")
if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
*)
- echo "FAIL: Unknown DSCP check" 1>&2
+ echo "$failmsg: Unknown DSCP check" 1>&2
ret=1
ok=0
esac
@@ -319,9 +328,9 @@
check_transfer()
{
- in=$1
- out=$2
- what=$3
+ local in=$1
+ local out=$2
+ local what=$3
if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
@@ -342,25 +351,39 @@
{
local nsa=$1
local nsb=$2
- local dstip=$3
- local dstport=$4
+ local pmtu=$3
+ local dstip=$4
+ local dstport=$5
local lret=0
+ local socatc
+ local socatl
+ local infile="$nsin"
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+ if [ $pmtu -eq 0 ]; then
+ infile="$nsin_small"
+ fi
+
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
lpid=$!
busywait 1000 listener_ready
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+ socatc=$?
wait $lpid
+ socatl=$?
- if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+ if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
+ rc=1
+ fi
+
+ if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
lret=1
ret=1
fi
- if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+ if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
lret=1
ret=1
fi
@@ -370,14 +393,16 @@
test_tcp_forwarding()
{
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ local pmtu="$3"
+
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
return $?
}
test_tcp_forwarding_set_dscp()
{
- check_dscp "dscp_none"
+ local pmtu="$3"
ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
@@ -388,8 +413,8 @@
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_ingress"
+ test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
+ check_dscp "dscp_ingress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
@@ -405,10 +430,10 @@
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_egress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ check_dscp "dscp_egress" "$pmtu"
- ip netns exec "$nsr1" nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
@@ -416,48 +441,53 @@
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_fwd"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ check_dscp "dscp_fwd" "$pmtu"
}
test_tcp_forwarding_nat()
{
+ local nsa="$1"
+ local nsb="$2"
+ local pmtu="$3"
+ local what="$4"
local lret
- local pmtu
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
+
+ test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
lret=$?
- pmtu=$3
- what=$4
-
if [ "$lret" -eq 0 ] ; then
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with masquerade $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
- test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
lret=$?
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with dnat $what"
elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
+ else
+ echo "FAIL: flow offload for ns1/ns2 with dnat $what"
fi
return $lret
}
make_file "$nsin" "$filesize"
+make_file "$nsin_small" "$filesize_small"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 0; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -489,8 +519,9 @@
}
EOF
+check_dscp "dscp_none" "0"
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
- echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
exit 0
fi
@@ -513,6 +544,14 @@
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
+ exit 0
+fi
+
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
@@ -644,7 +683,7 @@
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 1; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -668,7 +707,7 @@
fi
echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
- $0 -o "$o" -l "$l" -r "$r" -s "$filesize"
+ $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
fi
exit $ret
diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh
new file mode 100755
index 0000000..20f3369
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nh.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ basic_tx_ipv4
+ basic_tx_ipv6
+ learning
+ proxy_ipv4
+ proxy_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Cleanup
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+nh_stats_get()
+{
+ ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
+}
+
+tc_stats_get()
+{
+ tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
+}
+
+basic_tx_common()
+{
+ local af_str=$1; shift
+ local proto=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+
+ RET=0
+
+ # Test basic Tx functionality. Check that stats are incremented on
+ # both the FDB nexthop group and the egress device.
+
+ run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
+ run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
+ run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
+ run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
+ check_err $? "FDB nexthop group stats did not increase"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
+ check_err $? "tc filter stats did not increase"
+
+ log_test "VXLAN FDB nexthop: $af_str basic Tx"
+}
+
+basic_tx_ipv4()
+{
+ basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
+}
+
+basic_tx_ipv6()
+{
+ basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
+}
+
+learning()
+{
+ RET=0
+
+ # When learning is enabled on the VXLAN device, an incoming packet
+ # might try to refresh an FDB entry that points to an FDB nexthop group
+ # instead of an ordinary remote destination. Check that the kernel does
+ # not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
+ run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
+ run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
+ run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
+
+ log_test "VXLAN FDB nexthop: learning"
+}
+
+proxy_common()
+{
+ local af_str=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+ local neigh_addr=$1; shift
+ local ping_cmd=$1; shift
+
+ RET=0
+
+ # When the "proxy" option is enabled on the VXLAN device, the device
+ # will suppress ARP requests and IPv6 Neighbor Solicitation messages if
+ # it is able to reply on behalf of the remote host. That is, if a
+ # matching and valid neighbor entry is configured on the VXLAN device
+ # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
+ # FDB entry for the neighbor's MAC address might point to an FDB
+ # nexthop group instead of an ordinary remote destination. Check that
+ # the kernel does not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
+
+ run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 $ping_cmd"
+
+ log_test "VXLAN FDB nexthop: $af_str proxy"
+}
+
+proxy_ipv4()
+{
+ proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
+ "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
+}
+
+proxy_ipv6()
+{
+ proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
+ "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command mausezahn
+require_command arping
+require_command ndisc6
+require_command jq
+
+if ! ip nexthop help 2>&1 | grep -q "stats"; then
+ echo "SKIP: iproute2 ip too old, missing nexthop stats support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup_ns ns1; $t; cleanup_all_ns;
+done