网络设备驱动是 Linux 系统中最核心的驱动之一,负责管理网卡等网络硬件设备,是实现网络通信的基础。无论是服务器、嵌入式设备还是物联网网关,网络驱动都是不可或缺的组件。
网络设备驱动在系统中的位置:

根据架构图,网络设备驱动分为四个层次:

2.2 各层职责详解
dev_queue_xmit()netif_receive_skb() | |||
struct net_device | |||
ndo_start_xmit | |||


struct net_device 是网络设备驱动的核心数据结构,代表一个网络接口:
structnet_device {char name[IFNAMSIZ]; // 设备名称(如 eth0)unsignedint flags; // 设备标志(IFF_UP等)unsignedint priv_flags; // 私有标志unsigned short type; // 设备类型(ARPHRD_ETHER)unsigned short hard_header_len; // 硬件头部长度unsignedint mtu; // 最大传输单元unsignedchar addr_len; // MAC地址长度unsignedchar perm_addr[MAX_ADDR_LEN]; // 永久MAC地址unsignedchar dev_addr[MAX_ADDR_LEN]; // 当前MAC地址/* 接收队列 */structnetdev_queue __rcu *rx_queue;/* 发送队列 */structnetdev_queue *tx_queue;unsignedint num_tx_queues;unsignedint real_num_tx_queues;/* 操作函数集 */conststructnet_device_ops *netdev_ops;conststructethtool_ops *ethtool_ops;conststructheader_ops *header_ops;/* 统计信息 */structrtnl_link_stats64 __percpu *stats64;/* NAPI相关 */structnapi_structnapi;unsignedint gro_flush_timeout;/* 队列状态 */enumnetdev_queue_state_t state;/* 私有数据 */void *priv;/* 设备引用计数 */refcount_t refcnt;/* 设备链表 */structlist_headdev_list;/* 网络命名空间 */structnet *nd_net;/* 队列控制 */structQdisc *qdisc;structQdisc *qdisc_sleeping;/* 硬件特性 */netdev_features_t features;netdev_features_t hw_features;netdev_features_t wanted_features;/* 定时器 */structtimer_listwatchdog_timer;/* MTU变更回调 */int (*change_mtu)(struct net_device *, int);/* 设备状态变更回调 */void (*destructor)(struct net_device *);/* DMA掩码 */ u64 dma_mask;/* 中断号 */unsignedint irq;/* 总线私有数据 */structdevicedev;/* 设备索引 */int ifindex;/* 网络设备类型 */enum net_device_type dev_type;/* 最大帧长度 */unsignedint max_mtu;unsignedint min_mtu;/* VLAN相关 */structnet_device *master;structlist_headvlans;/* 时间戳 */structhwtstamp_config *tstamp_config;/* 设备统计 */structpcpu_lstats __percpu *lstats;/* 接收缓冲区配置 */unsignedint rx_queue_len;/* 网络设备组 */unsignedint group;/* 设备状态 */unsignedlong state;/* 设备唤醒标志 */bool needs_free_netdev;/* 设备注册标志 */bool registered;/* 设备关闭标志 */bool shutdown;};关键字段解析:
name | ||
flags | ||
mtu | ||
dev_addr | ||
netdev_ops | ||
napi | ||
features | ||
priv |
structnet_device_ops {int (*ndo_init)(struct net_device *dev);void (*ndo_uninit)(struct net_device *dev);netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev);int (*ndo_open)(struct net_device *dev);int (*ndo_stop)(struct net_device *dev);int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);int (*ndo_set_mac_address)(struct net_device *dev, void *addr);int (*ndo_validate_addr)(struct net_device *dev);int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);netdev_features_t (*ndo_features_check)(struct sk_buff *skb, struct net_device *dev,netdev_features_t features);void (*ndo_tx_timeout)(struct net_device *dev, unsignedint txqueue);void (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *stats);int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);int (*ndo_fix_features)(struct net_device *dev);int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8 *mac);int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos, __be16 vlan_proto);int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool enable);int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *info);int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,int max_tx_rate);int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf);int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *);void (*ndo_netpoll_setup)(struct net_device *dev, struct netpoll_info *ni);void (*ndo_netpoll_cleanup)(struct net_device *dev);int (*ndo_direct_xmit)(struct net_device *dev, struct sk_buff *skb);};核心函数说明:
ndo_init | ||
ndo_open | ||
ndo_stop | ||
ndo_start_xmit | ||
ndo_tx_timeout | ||
ndo_get_stats64 | ||
ndo_change_mtu | ||
ndo_set_mac_address |
NAPI(New API)是 Linux 2.6 引入的网络中断处理机制,用于减少中断频率,提高网络吞吐量:
structnapi_struct {/* 轮询函数 */int (*poll)(struct napi_struct *, int);/* 设备上下文 */structnet_device *dev;/* 软中断处理 */structlist_headdev_list;structlist_headpoll_list;/* 权重 */unsignedint weight;/* 预算 */unsignedint budget;/* 状态标志 */unsignedint napi_state;/* 唤醒标志 */unsignedint wakeup_pending;/* 队列状态 */structsk_buff_headrx_queue;/* 工作队列 */structwork_structwork;/* 定时器 */structtimer_listtimer;/* 统计信息 */unsignedlong rx_packets;unsignedlong rx_bytes;/* 延迟处理 */unsignedint gro_count;/* 时间戳 */unsignedlong poll_start;};NAPI 工作原理:

struct sk_buff 是网络数据包在内核中的表示形式:
structsk_buff {/* 缓冲区头部指针 */union {structsk_buff *next;structrcu_headrcu; };/* 数据指针 */unsignedchar *head; // 缓冲区起始unsignedchar *data; // 当前数据起始unsignedchar *tail; // 当前数据末尾unsignedchar *end; // 缓冲区末尾/* 长度信息 */unsignedint len; // 数据长度unsignedint data_len; // 数据部分长度 __u16 mac_len; // MAC头部长度 __u16 hdr_len; // 协议头部长度/* 协议信息 */ __be16 protocol; // 协议类型/* 设备信息 */structnet_device *dev;// 所属设备/* 网络命名空间 */structnet *sk_net;/* 时间戳 */skb_mstamp_t tstamp;/* 校验和 */ __wsum csum; __u32 csum_start; __u32 csum_offset;/* 标志位 */unsignedint flags;/* 队列信息 */structlist_headlist;structsk_buff_head *list_head;/* 引用计数 */refcount_t users;/* 内存分配器 */structkmem_cache *destructor_cache;/* 私有数据 */void (*destructor)(struct sk_buff *skb);/* 网络层头部 */structiphdr *ip_hdr;structtcphdr *tcp_hdr;/* 传输特性 */netdev_features_t features;netdev_features_t skb_features;/* GRO相关 */structsk_buff *next_frag;structsk_buff *frag_list;/* 分片信息 */structskb_shared_info *shinfo;/* 优先级 */ __u32 priority;/* 流量控制 */structQdisc *qdisc;/* 路由信息 */structdst_entry *dst;/* 安全信息 */structsec_path *sp;/* 加密信息 */structcrypto_skb_info *crypto;};// 分配网络设备struct net_device *alloc_netdev(int sizeof_priv, constchar *name,void (*setup)(struct net_device *));// 注册网络设备intregister_netdev(struct net_device *dev);// 注销网络设备voidunregister_netdev(struct net_device *dev);// 释放网络设备voidfree_netdev(struct net_device *dev);使用示例:
staticvoidmynet_setup(struct net_device *dev){ ether_setup(dev); // 初始化以太网设备 dev->netdev_ops = &mynet_ops; dev->ethtool_ops = &mynet_ethtool_ops; dev->flags |= IFF_NOARP;}staticint __init mynet_init(void){structnet_device *dev; dev = alloc_netdev(sizeof(struct mynet_priv), "mynet%d", mynet_setup);if (!dev)return -ENOMEM;return register_netdev(dev);}staticvoid __exit mynet_exit(void){ unregister_netdev(dev); free_netdev(dev);}// 初始化NAPIvoidnapi_init(struct napi_struct *napi, int (*poll)(struct napi_struct *, int));// 调度NAPIvoidnapi_schedule(struct napi_struct *napi);// 启用NAPIvoidnapi_enable(struct napi_struct *napi);// 禁用NAPIvoidnapi_disable(struct napi_struct *napi);// 完成NAPI轮询voidnapi_complete(struct napi_struct *napi);// 完成NAPI轮询(扩展版)voidnapi_complete_done(struct napi_struct *napi, int work_done);// 发送数据包到网络层intdev_queue_xmit(struct sk_buff *skb);// 接收数据包intnetif_receive_skb(struct sk_buff *skb);// 批量接收数据包intnetif_receive_skb_list(struct list_head *head);// 通知发送完成voiddev_kfree_skb(struct sk_buff *skb);// 分配skbstruct sk_buff *dev_alloc_skb(unsignedint length);// 克隆skbstruct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask);// 设置设备标志voidset_bit(int nr, volatileunsignedlong *addr);voidclear_bit(int nr, volatileunsignedlong *addr);// 检查设备是否启动staticinlineboolnetif_running(const struct net_device *dev);// 检查设备是否可发送staticinlineboolnetif_xmit_stopped(const struct net_device *dev);// 唤醒发送队列voidnetif_wake_queue(struct net_device *dev);// 停止发送队列voidnetif_stop_queue(struct net_device *dev);// 更新链路状态voidnetif_carrier_on(struct net_device *dev);voidnetif_carrier_off(struct net_device *dev);// 更新设备统计voiddev_lstats_add(struct net_device *dev, unsignedint len);#include<linux/netdevice.h>#include<linux/etherdevice.h>#include<linux/module.h>#include<linux/platform_device.h>/* 驱动私有数据 */structmynet_priv {structnet_device *dev;structnapi_structnapi;structsk_buff_headrx_queue;spinlock_t lock;/* 硬件相关 */void __iomem *regs;int irq;};/* NAPI轮询函数 */staticintmynet_poll(struct napi_struct *napi, int budget){structmynet_priv *priv = container_of(napi, structmynet_priv, napi);structsk_buff *skb;int work_done = 0;while (work_done < budget) {/* 检查是否有接收的数据 */if (skb_queue_empty(&priv->rx_queue))break; skb = skb_dequeue(&priv->rx_queue);if (!skb)break;/* 传递给协议栈 */ netif_receive_skb(skb); work_done++; }/* 如果没有更多数据,重新启用中断 */if (work_done < budget) { napi_complete(napi);/* 启用硬件中断 */ }return work_done;}/* 发送函数 */staticnetdev_tx_tmynet_start_xmit(struct sk_buff *skb, struct net_device *dev){structmynet_priv *priv = netdev_priv(dev);/* 获取数据 */unsignedchar *data = skb->data;unsignedint len = skb->len;/* 发送数据到硬件 */// ... 硬件操作 .../* 释放skb */ dev_kfree_skb(skb);return NETDEV_TX_OK;}/* 打开设备 */staticintmynet_open(struct net_device *dev){structmynet_priv *priv = netdev_priv(dev);/* 启用NAPI */ napi_enable(&priv->napi);/* 注册中断 */// request_irq(priv->irq, mynet_interrupt, ...);/* 启动硬件 */// ...return0;}/* 关闭设备 */staticintmynet_stop(struct net_device *dev){structmynet_priv *priv = netdev_priv(dev);/* 禁用NAPI */ napi_disable(&priv->napi);/* 释放中断 */// free_irq(priv->irq, priv);/* 停止硬件 */// ...return0;}/* 设备操作函数集 */staticconststructnet_device_opsmynet_netdev_ops = { .ndo_open = mynet_open, .ndo_stop = mynet_stop, .ndo_start_xmit = mynet_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr,};/* 设备初始化 */staticvoidmynet_setup(struct net_device *dev){structmynet_priv *priv = netdev_priv(dev);/* 初始化以太网设备 */ ether_setup(dev);/* 设置操作函数 */ dev->netdev_ops = &mynet_netdev_ops;/* 设置设备特性 */ dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;/* 初始化NAPI */ napi_init(&priv->napi, mynet_poll); priv->napi.dev = dev;/* 初始化接收队列 */ skb_queue_head_init(&priv->rx_queue);/* 初始化锁 */ spin_lock_init(&priv->lock);}/* Probe函数 */staticintmynet_probe(struct platform_device *pdev){structmynet_priv *priv;structnet_device *dev;structresource *res;int err;/* 分配网络设备 */ dev = alloc_netdev(sizeof(*priv), "mynet%d", NET_NAME_UNKNOWN, mynet_setup);if (!dev)return -ENOMEM; priv = netdev_priv(dev); priv->dev = dev;/* 获取寄存器资源 */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->regs = devm_ioremap_resource(&pdev->dev, res);if (IS_ERR(priv->regs)) { err = PTR_ERR(priv->regs);goto out_free_dev; }/* 获取中断 */ priv->irq = platform_get_irq(pdev, 0);if (priv->irq < 0) { err = priv->irq;goto out_free_dev; }/* 设置私有数据 */ platform_set_drvdata(pdev, priv);/* 注册网络设备 */ err = register_netdev(dev);if (err)goto out_free_dev;return0;out_free_dev: free_netdev(dev);return err;}/* Remove函数 */staticintmynet_remove(struct platform_device *pdev){structmynet_priv *priv = platform_get_drvdata(pdev); unregister_netdev(priv->dev); free_netdev(priv->dev);return0;}/* 设备树匹配 */staticconststructof_device_idmynet_match[] = { { .compatible = "vendor,mynet" }, { /* Sentinel */ }};MODULE_DEVICE_TABLE(of, mynet_match);/* 平台驱动 */staticstructplatform_drivermynet_driver = { .probe = mynet_probe, .remove = mynet_remove, .driver = { .name = "mynet", .of_match_table = mynet_match, },};module_platform_driver(mynet_driver);MODULE_LICENSE("GPL");MODULE_DESCRIPTION("My Network Driver");MODULE_AUTHOR("WindRunner1");/* 中断处理函数 */staticirqreturn_tmynet_interrupt(int irq, void *dev_id){structmynet_priv *priv = dev_id; u32 status;/* 读取中断状态 */ status = readl(priv->regs + INTERRUPT_STATUS);/* 清除中断 */ writel(status, priv->regs + INTERRUPT_STATUS);/* 处理接收中断 */if (status & RX_INTERRUPT) {/* 读取数据包到skb */structsk_buff *skb = dev_alloc_skb(MAX_PACKET_SIZE);if (skb) {/* 从硬件读取数据 */// memcpy(skb_put(skb, len), rx_buffer, len);/* 设置协议类型 */ skb->protocol = eth_type_trans(skb, priv->dev);/* 加入接收队列 */ spin_lock(&priv->lock); skb_queue_tail(&priv->rx_queue, skb); spin_unlock(&priv->lock);/* 调度NAPI */ napi_schedule(&priv->napi); } }/* 处理发送完成中断 */if (status & TX_INTERRUPT) {/* 释放发送缓冲区 */// ... }return IRQ_HANDLED;}staticvoidmynet_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats){structmynet_priv *priv = netdev_priv(dev);/* 读取硬件统计寄存器 */// stats->rx_packets = readl(priv->regs + RX_PACKETS);// stats->tx_packets = readl(priv->regs + TX_PACKETS);// stats->rx_bytes = readl(priv->regs + RX_BYTES);// stats->tx_bytes = readl(priv->regs + TX_BYTES);}netdev_priv undeclared | #include <linux/netdevice.h> | |
struct sk_buff_head undefined | #include <linux/skbuff.h> | |
napi_schedule undefined | napi_schedule_irqoff | |
NETDEV_TX_OK undeclared | #include <linux/netdevice.h> |
# 现象dmesg | grep "mynet"# 显示: mynet: probe failed# 排查步骤1. 检查设备树节点2. 检查资源是否正确获取3. 检查 register_netdev 返回值# 现象ping 192.168.1.1# 显示: Destination Host Unreachable# 排查步骤1. 检查设备是否启动: ifconfig mynet02. 检查发送队列是否停止: cat /sys/class/net/mynet0/queues/tx-0/state3. 检查硬件是否正常# 现象网络吞吐量远低于预期# 优化建议1. 确保启用NAPI2. 调整NAPI预算: echo 64 > /proc/sys/net/core/netdev_budget3. 启用TSO/GRO: ethtool -K mynet0 tso on gro on# 查看设备信息ip link show# 查看设备统计cat /proc/net/dev# 查看中断统计cat /proc/interrupts | grep mynet# 查看NAPI状态cat /sys/class/net/mynet0/napi_defer_hard_irqs# 使用ethtool查看设备信息ethtool mynet0# 使用tcpdump抓包tcpdump -i mynet0net_device、net_device_ops、napi_struct、sk_buffdev_queue_xmit → ndo_start_xmit)、接收路径(中断 → NAPI → netif_receive_skb)高级特性
虚拟化网络
性能优化
参考资料:
drivers/net/Documentation/networking/