本文分析了nss dp有线驱动如何进行skb tx的。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | /* * dma_desc_tx * Tx DMA Descriptor Structure * * Enhanced descriptor format for transmit. */ struct dma_desc_tx { uint32_t status; /* Status */ uint32_t length; /* Buffer 1 and Buffer 2 length */ uint32_t buffer1; /* Network Buffer 1 pointer (DMA-able) */ uint32_t buffer2; /* Network Buffer 2 pointer (DMA-able) */ uint32_t reserved1; /* Reserved word */ uint32_t reserved2; /* Reserved word */ uint32_t timestamplow; /* Lower 32 bits of the 64 bit timestamp value */ uint32_t timestamphigh; /* Higher 32 bits of the 64 bit timestamp value */ uint32_t padding[8]; /* Pad 32 byte to align to 64B cacheline size */ }; struct syn_dp_info_tx { struct napi_struct napi_tx; /* Tx NAPI */ void __iomem *mac_base; /* MAC base for register read/write */ struct dma_desc_tx *tx_desc; /* start address of TX descriptors ring or chain, this is used by the driver */ uint32_t busy_tx_desc_cnt; /* Number of Tx Descriptors owned by DMA at any given time */ uint32_t tx_comp_idx; /* index of the tx descriptor owned by DMA */ uint32_t tx_idx; /* index of the tx descriptor next available with driver */ struct syn_dp_tx_buf tx_buf_pool[SYN_DP_TX_DESC_SIZE]; /* Tx skb pool helping TX DMA descriptors */ struct nss_dp_hal_gmac_stats_tx tx_stats; /* GMAC driver Tx statistics */ struct net_device *netdev; /* Net-device corresponding to the GMAC */ struct device *dev; /* Platform device corresponding to the GMAC */ struct sk_buff *skb_free_list[SYN_DP_NAPI_BUDGET_TX]; /* Array to hold SKBs before free during Tx completion */ size_t shinfo_addr_virt[SYN_DP_NAPI_BUDGET_TX]; /* Array to hold SKB end pointer to be prefetched during Tx completion */ }; /* * syn_dp_info * Synopysys GMAC Dataplane information */ struct syn_dp_info { struct syn_dp_info_rx dp_info_rx; struct syn_dp_info_tx dp_info_tx; void __iomem *mac_base; dma_addr_t rx_desc_dma_addr; dma_addr_t tx_desc_dma_addr; int napi_added; }; |
初始化函数
static int syn_dp_cfg_tx_setup_desc_queue(struct syn_dp_info *dev_info)
tx ring的个数由下述宏定义
#define SYN_DP_TX_DESC_SIZE 1024 /* Tx Descriptors needed in the descriptor pool/queue */
首先使用dma_alloc_coherent()分配SYN_DP_TX_DESC_SIZE个dma_desc_tx结构体,返回dma地址。
first_desc = dma_alloc_coherent(tx_info->dev, sizeof(struct dma_desc_tx) * SYN_DP_TX_DESC_SIZE, &dma_addr, GFP_KERNEL);
将first_desc存储到 dp_info_tx结构体的 tx_desc。
将dma_addr存储到 syn_dp_info结构体的 tx_desc_dma_addr。
这样tx ring的虚拟地址和物理dma地址,读存储起来了。
初始化tx ring
将所有的成员都清0,最后一个ring的status设置为DESC_TX_DESC_END_OF_RING。
将dp_info_tx结构体的,tx_comp_idx, tx_idx, busy_tx_desc_cnt 置0
将dma生效给gmac
syn_init_tx_desc_base(dev_info->mac_base, dev_info->tx_desc_dma_addr);
-》hal_write_reg(mac_base, SYN_DMA_TX_DESCRIPTOR_LIST_ADDRESS, desc_dma);
注册一个tx napi,处理函数syn_dp_napi_poll_tx()
netif_napi_add(netdev, &tx_info->napi_tx, syn_dp_napi_poll_tx, SYN_DP_NAPI_BUDGET_TX);
我们知道napi设计是用来rx的。但是nss dp,这里tx使用napi,用来释放skb。
请求中断,中断处理函数 syn_dp_handle_irq()
err = request_irq(netdev->irq, syn_dp_handle_irq, 0, "nss-dp-gmac", &gmac_dev->dp_info.syn_info);
这里rx和tx共享中断。
对接协议栈的xmit函数是 nss_dp_ximit()
static const struct net_device_ops nss_dp_netdev_ops = {
.ndo_open = nss_dp_open,
.ndo_stop = nss_dp_close,
.ndo_start_xmit = nss_dp_xmit,
nss_dp_xmit()
它只是一个简单的封装,调用data plane的xmit函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | /* * nss_dp_xmit() */ static netdev_tx_t nss_dp_xmit( struct sk_buff *skb, struct net_device *netdev) { struct nss_dp_dev *dp_priv; if (!skb || !netdev) return NETDEV_TX_OK; dp_priv = ( struct nss_dp_dev *)netdev_priv(netdev); netdev_dbg(netdev, "Tx packet, len %d\n" , skb->len); return dp_priv->data_plane_ops->xmit(dp_priv->dpc, skb); } |
syn_dp_if_xmit()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | struct nss_dp_data_plane_ops nss_dp_gmac_ops = { .init = syn_dp_if_init, .open = syn_dp_if_open, .close = syn_dp_if_close, .link_state = syn_dp_if_link_state, .mac_addr = syn_dp_if_mac_addr, .change_mtu = syn_dp_if_change_mtu, .xmit = syn_dp_if_xmit, /* * syn_dp_if_xmit() * Dataplane method to transmit the packet */ static netdev_tx_t syn_dp_if_xmit( struct nss_dp_data_plane_ctx *dpc, struct sk_buff *skb) { struct net_device *netdev = dpc->dev; struct nss_dp_dev *gmac_dev = ( struct nss_dp_dev *)netdev_priv(netdev); struct syn_dp_info_tx *tx_info = &gmac_dev->dp_info.syn_info.dp_info_tx; uint16_t ret; ret = syn_dp_tx(tx_info, skb); if (likely(!ret)) { return NETDEV_TX_OK; } /* * Handle the scenario when descriptors are not enough. * Only one DMA channel is supported to assume queue 0. */ if (likely(ret == NETDEV_TX_BUSY)) { /* * Stop the queue if the queue stop is not disabled and return * NETDEV_TX_BUSY. Packet will be requeued or dropped by the caller. * Queue will be re-enabled from Tx Complete. */ if (likely(!dp_global_ctx.tx_requeue_stop)) { netdev_dbg(netdev, "Stopping tx queue due to lack of tx descriptors" ); atomic64_inc((atomic64_t *)&tx_info->tx_stats.tx_packets_requeued); netif_stop_queue(netdev); return NETDEV_TX_BUSY; } } netdev_dbg(netdev, "Drop packet due to no Tx descriptor or invalid pkt" ); atomic64_inc((atomic64_t *)&tx_info->tx_stats.tx_dropped); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } |
调用syn_dp_tx()进行skb的tx。如果dma ring没有空闲的,则调用netif_stop_queue(netdev),暂时停掉queue. 等有足够的ring后,在重启。
syn_dp_tx()
nss dp支持 scatter gather (sg), 这个后续再分析,我们这里只是学习如何处理tx ring的。
如果是非线性的skb,调用syn_dp_tx_sg()进行发送
/*
* Check if it's a Scatter Gather packet
*/
if (unlikely(skb_is_nonlinear(skb))) {
return syn_dp_tx_sg(tx_info, skb);
}
对于线性skb, 按下面的方式处理
判断是否有1个的 tx ring,如果没有,则返回NETDEV_TX_BUSY
/*
* Linear skb processing
*/
if (unlikely((SYN_DP_TX_DESC_SIZE - atomic_read((atomic_t *)&tx_info->busy_tx_desc_cnt)) < 1)) {
atomic64_inc((atomic64_t *)&tx_info->tx_stats.tx_desc_not_avail);
netdev_dbg(netdev, "Not enough descriptors available");
return NETDEV_TX_BUSY;
}
接着,得到skb数据的dma地址,并对skb数据区进行dma处理
dma_addr = (dma_addr_t)virt_to_phys(skb->data);
dmac_clean_range_no_dsb((void *)skb->data, (void *)(skb->data + skb->len));
然后将skb放到dma tx ring中。调用
/*
* Queue packet to the GMAC rings
*/
syn_dp_tx_set_desc(tx_info, dma_addr, skb, (skb->ip_summed == CHECKSUM_PARTIAL),
(DESC_TX_LAST | DESC_TX_FIRST | DESC_TX_INT_ENABLE | DESC_OWN_BY_DMA));
syn_dp_tx_set_desc()
tx_desc存储了第一个desc。
tx_idx 存储了下一个用于tx的 tx dma desc,它初始化为0,tx一个就曾1, 曾到最后一个,又初始化为0。
因此可以得到要tx的dma desc:
struct dma_desc_tx *txdesc = tx_info->tx_desc + tx_idx;
设置 txdesc的lenght和buffer,为skb的长度和skb的data的dma地址。
将skb信息存储到tx_buf_pool[]数组,这样释放的时候,可以找到skb
tx_info->tx_buf_pool[tx_idx].skb = skb;
tx_info->tx_buf_pool[tx_idx].len = length;
tx_info->tx_buf_pool[tx_idx].desc_count = 1;
tx_info->tx_buf_pool[tx_idx].shinfo_addr_virt = (size_t)skb->end;
最后修改dma desc的status,修改完status后,这个dma就属于硬件了。硬件可以进行tx.
注意修改status的时候,要进行write memory barrier, 这样保证status写入之前,buffer和length已经写入了。这个barrier的常规操作。
/*
* Ensure all write completed before setting own by dma bit so when gmac
* HW takeover this descriptor, all the fields are filled correctly
*/
wmb();
txdesc->status = (status | ((offload_needed) ? DESC_TX_CIS_TCP_PSEUDO_CS : 0) | ((tx_idx == (SYN_DP_TX_DESC_SIZE - 1)) ? DESC_TX_DESC_END_OF_RING : 0));
写硬件寄存器,开启DMA tx
syn_resume_dma_tx(tx_info->mac_base);
曾大pending tx个数,busy_tx_desc_cnt指的是写入到tx ring,还没有释放的个数。
atomic_inc((atomic_t *)&tx_info->busy_tx_desc_cnt);
tx交给硬件后,硬件进行发送处理,发送完毕后,触发中断。
中断处理函数syn_dp_handle_irq()
读取dma status
status = hal_read_relaxed_reg(mac_base, SYN_DMA_STATUS);
处理tx status
1 2 3 4 5 6 7 8 | /* * Schedule Tx napi if Tx complete interrupt is triggered. */ if (status & SYN_DMA_INT_TX_COMPLETED) { syn_clear_tx_dma_status(mac_base); syn_disable_tx_dma_interrupt(mac_base); napi_schedule(&dp_info->dp_info_tx.napi_tx); } |
syn_clear_tx_dma_status()
这个是清掉当前的tx dma 状态,不清掉会持续触发中断。
syn_disable_tx_dma_interrupt()
这个是禁止后续新的tx dma中断
然后调用tx napi
syn_dp_napi_poll_tx()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | /* * syn_dp_napi_poll_tx() * Scheduled by napi to process TX. */ static int syn_dp_napi_poll_tx( struct napi_struct *napi, int budget) { struct syn_dp_info_tx *tx_info = ( struct syn_dp_info_tx *)napi; void __iomem *mac_base = tx_info->mac_base; int work_done; work_done = syn_dp_tx_complete(tx_info, budget); if (unlikely(work_done < budget)) { napi_complete(napi); syn_enable_tx_dma_interrupt(mac_base); } return work_done; } |
调用 syn_dp_tx_complete 进行 tx desc回收,以及对应的skb释放。如果work_done少于 budget,表明所有的 desc已经回收了,重新开启中断。并完成napi。否则表明还没处理完,内核将继续调用napi.
syn_dp_tx_complete()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | int syn_dp_tx_complete( struct syn_dp_info_tx *tx_info, int budget) { uint32_t status; struct dma_desc_tx *desc = NULL; struct sk_buff *skb; uint32_t tx_skb_index, len; uint32_t tx_packets = 0, total_len = 0; uint32_t num_desc = 0; uint32_t count = 0; struct syn_dp_tx_buf *tx_buf; struct netdev_queue *nq; int wake_free_task = 0; while (tx_info->tx_comp_idx < tx_info->tx_idx) { tx_skb_index = tx_info->tx_comp_idx & SYN_DP_TX_DESC_MAX_INDEX; prefetch(( void *)tx_info->tx_buf_pool[tx_skb_index].shinfo_addr_virt); desc = tx_info->tx_desc + tx_skb_index; status = desc->status; if (unlikely(syn_dp_gmac_is_tx_desc_owned_by_dma(status))) { /* * Descriptor still held by gmac dma, so we are done. */ break ; } /* * If fragments were transmitted in descriptor, * calculate the number of descriptors used by * the fragments in order to free it. */ tx_buf = &tx_info->tx_buf_pool[tx_skb_index]; num_desc = tx_buf->desc_count; skb = tx_buf->skb; syn_dp_tx_clear_buf_entry(tx_info, tx_skb_index); if (likely(status & DESC_TX_LAST)) { if (likely(!(status & DESC_TX_ERROR))) { /* * No error, record tx pkts/bytes and collision. */ tx_packets++; total_len += len; } else { /* * Some error happened, collect error statistics. */ syn_dp_tx_error_cnt(tx_info, status); } } smp_wmb(); tx_info->tx_comp_idx += num_desc; if (nss_skb_free_thread()) { if (kfifo_put(&tx_info->free_fifo, skb) > 0) { wake_free_task = 1; } else { dev_kfree_skb_any(skb); } } else napi_consume_skb(skb, budget); count++; if (count >= budget) break ; } if (wake_free_task) { wake_up_interruptible(&tx_info->txwq); } atomic64_add(tx_packets, (atomic64_t *)&tx_info->tx_stats.tx_packets); atomic64_add(total_len, (atomic64_t *)&tx_info->tx_stats.tx_bytes); nq = netdev_get_tx_queue(tx_info->netdev, SYN_DP_QUEUE_INDEX); /* * Wake up queue if stopped earlier due to lack of descriptors */ if (unlikely(netif_tx_queue_stopped(nq)) && netif_carrier_ok(tx_info->netdev)) { netif_wake_queue(tx_info->netdev); } return count; } |
这是一个ring,读取当前dma desc,如果不是被dma拥有,说明dma完成了发送,就可以释放skb. 然后继续下一个检查,最多检查budget个。