2017年10月12日 星期四

DPDK KNI interface

DPDK因為是bypass kernel network stack機制,輸入ifconfig是看不到此網路介面所以要使用tcpdump來debug是無法使用
DPDK應用程式透過Kernel Network Interface (KNI)建立虛擬網路介面 (vEthX)
才可以使用一般的Linux kernel TCP/IP stack

The benefits of using the DPDK KNI are:
➠ Faster than existing Linux TUN/TAP interfaces (by eliminating system calls and copy_to_user()/copy_from_user() operations.
➠ Allows management of DPDK ports using standard Linux net tools such as ethtool, ifconfig and tcpdump.
➠ Allows an interface with the kernel network stack.

Elements of KNI in DPDK



Kernel Module (rte_kni.ko)
CONFIG_RTE_KNI_KMOD
lib/librte_eal/linuxapp/kni
lib/librte_eal/linuxapp/kni/ethtool
├── compat.h
├── ethtool
├── kni_dev.h
├── kni_ethtool.c
├── kni_fifo.h
├── kni_misc.c
└── kni_net.c
//private information for a kni device
struct kni_dev
struct kni_net

//
struct rte_kni_request
struct rte_kni_fifo
struct rte_kni_mbuf
struct rte_kni_device_info
Static Library (librte_kni.a)
CONFIG_RTE_LIBRTE_KNI
lib/librte_kni
├── rte_kni.c
├── rte_kni_fifo.h
└── rte_kni.h
// KNI context
struct rte_kni

//Structure for configuring KNI device.
struct rte_kni_conf

//Structure which has the function pointers for KNI interface.
struct rte_kni_ops

//KNI memzone pool slot
struct rte_kni_memzone_slot

//KNI memzone pool
struct rte_kni_memzone_pool

//
struct rte_kni_fifo
struct rte_kni_device_info
struct rte_mempool
struct rte_memzone
struct rte_mbuf
rte_kni.c
rte_kni_init()
librte_eal (librte_eal.a)
lib/librte_eal/common/include
rte_memzone.h
//A structure describing a memzone, which is a contiguous portion of physical memory identified by a name.
struct rte_memzone
linuxapp/eal/
eal.c
//Launch threads, called at application init().
rte_eal_init()
common/
eal_common_launch.c
rte_eal_mp_remote_launch()
librte_mempool (librte_mempool.a)
lib/librte_mempool
rte_mempool.h
//The RTE mempool structure.
struct rte_mempool
librte_mbuf (librte_mbuf.a)
lib/librte_mbuf
rte_mbuf.h
//The generic rte_mbuf, containing a packet mbuf.
struct rte_mbuf

rte_pktmbuf_alloc()
rte_mbuf.c
//helper to create a mbuf pool
rte_pktmbuf_pool_create()
//Free a packet mbuf back into its original mempool.
rte_pktmbuf_free()
librte_ether (librte_ethdev.a)
lib/librte_ether
rte_ethdev.h
//A structure used to configure an Ethernet port. Depending upon the RX multi-queue mode, extra advanced configuration settings may be needed.
struct rte_eth_conf

//A structure used to retrieve link-level information of an Ethernet port.
struct rte_eth_link

//Ethernet device information
struct rte_eth_dev_info

rte_eth_rx_burst()
rte_ethdev.c
rte_eth_dev_count()
rte_eth_dev_configure()
rte_eth_dev_socket_id()
rte_eth_rx_queue_setup()
rte_eth_tx_queue_setup()
rte_eth_promiscuous_enable()
rte_eth_link_get_nowait()
rte_eth_dev_start()
rte_eth_dev_stop()
rte_eth_dev_info_get()
Drivers (librte_pmd_kni.a)
CONFIG_RTE_LIBRTE_PMD_KNI
drivers/net/kni
└── rte_eth_kni.c
struct eth_kni_args
struct pmd_queue_stats
struct pmd_queue
Others (rte_kni.ko / librte_kni.a)
lib/librte_eal/linuxapp/eal/include/exec-env
└── rte_kni_common.h
struct
//Structure for KNI request.
  rte_kni_request
//Fifo struct mapped in a shared memory.
  rte_kni_fifo
//The kernel image of the rte_mbuf struct
  rte_kni_mbuf
//Struct used to create a KNI device.
//Passed to the kernel in IOCTL call
  rte_kni_device_info
DPDK Application (kni)
CONFIG_RTE_LIBRTE_KNI
examples/kni
└── main.c
//Structure of port parameters
struct kni_port_params

//Structure type for recording kni interface specific stats
struct kni_interface_stats

//
struct rte_mempool
struct rte_eth_conf
struct rte_eth_link
struct rte_eth_dev_info
struct rte_mbuf
struct rte_kni_conf
struct rte_kni_ops

Kernel Module
# wget http://fast.dpdk.org/rel/dpdk-17.05.2.tar.xz
# tar -Jxvf dpdk-17.05.2.tar.xz
# cd dpdk-stable-17.05.2/
# make config T=x86_64-native-linuxapp-gcc
Configuration done
# make install T=x86_64-native-linuxapp-gcc
# insmod ./x86_64-native-linuxapp-gcc/kmod/rte_kni.ko kthread_mode =multiple
DPDK Application (kni)
# export RTE_SDK=`pwd`
# export RTE_TARGET=x86_64-native-linuxapp-gcc
# make -C examples/kni
–config=”(port,lcore_rx, lcore_tx[,lcore_kthread, ...]) [, port,lcore_rx, lcore_tx[,lcore_kthread, ...]]”:
# ./examples/kni/build/app/kni -c 0xFFFFF -n 4 -- -P -p 0x3 --config="(0,0,1),(1,2,3)"

ifconfig -a
vEth0     Link encap:Ethernet  HWaddr 52:3c:fd:56:18:63
          BROADCAST MULTICAST  MTU:1500  Metric:1
          RX packets:29 errors:0 dropped:3 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:1993 (1.9 KB)  TX bytes:0 (0.0 B)

vEth1     Link encap:Ethernet  HWaddr 56:54:a3:50:02:0c

          BROADCAST MULTICAST  MTU:1500  Metric:1
          RX packets:23 errors:0 dropped:3 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:1577 (1.5 KB)  TX bytes:0 (0.0 B)
ifconfig vEth0 up; ifconfig vEth1 up
top
...
%Cpu0  :100.0 us,  0.0 sy,  0.0 ni,  0.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
%Cpu1  :100.0 us,  0.0 sy,  0.0 ni,  0.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
%Cpu2  :100.0 us,  0.0 sy,  0.0 ni,  0.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
%Cpu3  :100.0 us,  0.0 sy,  0.0 ni,  0.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
   PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND
 77109 root      20   0 36.178g   6220   3408 R 400.0  0.0 575:00.07 kni
 77128 root      20   0       0      0      0 S   1.0  0.0   1:17.20 kni_vEth0
 77136 root      20   0       0      0      0 S   0.7  0.0   1:16.38 kni_vEth1
tcpdump -i vEth0

Test 
    ➠ test/test/ 
./test/test/test_kni.c


DPDK KNI Kernel Module (rte_kni.ko)

# modinfo rte_kni.ko
filename:       /home/bh0322/workspace/dpdk-stable-17.05.2/./build/kmod/rte_kni.ko
description:    Kernel Module for managing kni devices
author:          Intel Corporation
license:         Dual BSD/GPL
srcversion:     C1BCE1852D37B5F833BB878
depends:
vermagic:       3.16.0-30-generic SMP mod_unload modversions
parm:           lo_mode:KNI loopback mode (default=lo_mode_none):
    lo_mode_none        Kernel loopback disabled
    lo_mode_fifo          Enable kernel loopback with fifo
    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer
 (charp)
parm:           kthread_mode:Kernel thread mode (default=single):
    single     Single kernel thread mode enabled.
    multiple  Multiple kernel thread mode enabled.

➠ Transmit a packet (called by the kernel)
static int
kni_net_tx(struct sk_buff *skb, struct net_device *dev)
(1) dequeue a mbuf from alloc_q
(2) enqueue mbuf into tx_q
(3) Free skb and update statistics

➠ Struct used to create a KNI device. Passed to the kernel in IOCTL call
lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
struct rte_kni_device_info
(1) The interface name.
(2) Physical addresses of the corresponding memzones for the relevant FIFOs.
(3) Mbuf mempool details, both physical and virtual (to calculate the offset for mbuf pointers).
(4) PCI information.
(5) Core affinity (force_bind, core_id).

# ls -al /dev/kni
crw------- 1 root root 10, 57 Sep 26 12:15 /dev/kni
# cat /proc/misc
57 kni  //minor number

# dmidecode -t memory | grep Size

Alternative Solutions

➠ Tun/Tap  
➠ Recently tap PMD patch sent  
➠ af_packet  
➠ virtio-user + vhost-net  
➠ Bifurcated driver

System Environment

Server Platform: Dell PowerEdge R630
CPU: Intel(R) Xeon(R) CPU E5-2660 v3 @ 2.60GHz Number of cores 20
Memory: Total 256 GBs over 24 channels @ 2133 MHz
NICs: 2x Intel ® 82599ES 10-Gigabit SFI/SFP+ Network Connection
Driver ixgbe DPDK PMD
Operating System: Ubuntu 14.04.2 LTS
Linux kernel version: 3.16.0-30-generic
GCC version: Ubuntu 4.8.4-2ubuntu1~14.04.3
DPDK version: 17.05.2


參考資料

2 則留言: