流式DMA映射:缓冲区来自叫上层的比如,kmalloc,__get_free_pages 等申请的,一般使用流式DMA映射,流式DMA映射大多进行cache的使无效或清除操作,以解决cache的一致性问题,接口较为复杂。
xmda使用流式dma映射:
1
2
dma_map_single(dev, addr, size, direction);
dma_unmap_single(dev, dma_handle, size, direction);
如果设备要求较大的DMA缓冲区,在支持SG模式时,可申请不连续的DMA缓冲区进行映射。
example:参考 https://github.com/Xilinx/open-nic-driver
这里是网卡 skb 的 buff 发送,进行的映射
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
netdev_tx_t onic_xmit_frame(struct sk_buff *skb, struct net_device *dev)
{
struct onic_private *priv = netdev_priv(dev);
struct onic_tx_queue *q;
struct onic_ring *ring;
struct qdma_h2c_st_desc desc;
u16 qid = skb->queue_mapping;
dma_addr_t dma_addr;
u8 *desc_ptr;
int rv;
bool debug = 0;
bool check_rv = 0;
q = priv->tx_queue[qid];
ring = &q->ring;
onic_tx_clean(q);
if (onic_ring_full(ring)) {
if (debug)
netdev_info(dev, "ring is full");
return NETDEV_TX_BUSY;
}
/* minimum Ethernet packet length is 60 */
rv = skb_put_padto(skb, ETH_ZLEN);
if (rv < 0)
check_rv = 1;
dma_addr = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&priv->pdev->dev, dma_addr))) {
dev_kfree_skb(skb);
priv->netdev_stats.tx_dropped++;
priv->netdev_stats.tx_errors++;
return NETDEV_TX_OK;
}
desc_ptr = ring->desc + QDMA_H2C_ST_DESC_SIZE * ring->next_to_use;
desc.len = skb->len;
desc.src_addr = dma_addr;
desc.metadata = skb->len;
qdma_pack_h2c_st_desc(desc_ptr, &desc);
q->buffer[ring->next_to_use].skb = skb;
q->buffer[ring->next_to_use].dma_addr = dma_addr;
q->buffer[ring->next_to_use].len = skb->len;
priv->netdev_stats.tx_packets++;
priv->netdev_stats.tx_bytes += skb->len;
onic_ring_increment_head(ring);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0)
if (onic_ring_full(ring) || !netdev_xmit_more()) {
#elif defined(RHEL_RELEASE_CODE)
#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1)
if (onic_ring_full(ring) || !netdev_xmit_more()) {
#endif
#else
if (onic_ring_full(ring) || !skb->xmit_more) {
#endif
wmb();
onic_set_tx_head(priv->hw.qdma, qid, ring->next_to_use);
}
return NETDEV_TX_OK;
}
static void onic_tx_clean(struct onic_tx_queue *q)
{
struct onic_private *priv = netdev_priv(q->netdev);
struct onic_ring *ring = &q->ring;
struct qdma_wb_stat wb;
int work, i;
if (test_and_set_bit(0, q->state))
return;
qdma_unpack_wb_stat(&wb, ring->wb);
if (wb.cidx == ring->next_to_clean) {
clear_bit(0, q->state);
return;
}
work = wb.cidx - ring->next_to_clean;
if (work < 0)
work += onic_ring_get_real_count(ring);
for (i = 0; i < work; ++i) {
struct onic_tx_buffer *buf = &q->buffer[ring->next_to_clean];
struct sk_buff *skb = buf->skb;
dma_unmap_single(&priv->pdev->dev, buf->dma_addr, buf->len,
DMA_TO_DEVICE);
dev_kfree_skb_any(skb);
onic_ring_increment_tail(ring);
}
clear_bit(0, q->state);
}
一致性DMA映射:申请的缓存区能够使用cache,并且保持cache一致性。一致性映射具有很长的生命周期,在这段时间内占用的映射寄存器,即使不使用也不会释放。生命周期为该驱动的生命周期。
主要用到的函数:
1
2
void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle,gfp_t gfp);
void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,dma_addr_t handle);
上述函数的返回值为申请到的DMA缓冲区的虚拟地址, 此外, 该函数还通过参数handle返回DMA缓冲区的总线地址。 handle的类型为dma_addr_t, 代表的是总线地址。
—
关于页面的映射,可以参考xilinx的XDMA或QDMA驱动:https://github.com/Xilinx/dma_ip_drivers