我有一台 Supermicro 2027TR-HTFRF 服务器,我将其用作 xen 服务器。
我想要做的是让 SR-IOV 运行于 Connectx-3 卡,这样我就可以将 Infiniband 暴露给虚拟机。
按照这个方法,我设法让 I350 以太网公开 VF。
添加包含以下内容的 /etc/modprobe.d/mlx4_core.conf
options max_vfs=4
尽管这是 mlx4_core 模块的有效参数,但没有为 mellanox 设备创建虚拟函数。即
我得到的错误是
mlx4_core: `4' invalid for parameter `max_vfs'
我也尝试了值 1,但出现了类似的错误
mlx4_core: `1' invalid for parameter `max_vfs'
lspci -v 的输出
06:00.0 Network controller: Mellanox Technologies MT27500 Family [ConnectX-3]
Subsystem: Mellanox Technologies Device 0051
Flags: bus master, fast devsel, latency 0, IRQ 11
Memory at df900000 (64-bit, non-prefetchable) [size=1M]
Memory at de000000 (64-bit, prefetchable) [size=8M]
Capabilities: [40] Power Management version 3
Capabilities: [48] Vital Product Data
Capabilities: [9c] MSI-X: Enable- Count=128 Masked-
Capabilities: [60] Express Endpoint, MSI 00
Capabilities: [100] Alternative Routing-ID Interpretation (ARI)
Capabilities: [148] Device Serial Number 00-25-90-ff-ff-17-57-24
Capabilities: [18c] #19
Kernel modules: mlx4_core
因此看起来 SR-IOV 不可用。
然后我发现了这个帖子: http://community.mellanox.com/thread/1073
但我用的是 ubuntu 12.04。
接下来我使用 apt-get 安装了 mstflint。
然后我识别了它是什么类型的卡。
mstflint -d 06:00.0 q
不幸的是,mstflint 崩溃了。
Warning: memory access to device 06:00.0 failed: Input/output error.
Warning: Fallback on IO: much slower, and unsafe if device in use.
*** buffer overflow detected ***: mstflint terminated
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x37)[0x7f66d91ac817]
一些 strace 输出:
strace mstflint -d 06:00.0 q
...
open("/proc/bus/pci/06/00.0", O_RDWR|O_DSYNC) = 3
ioctl(3, IIOCNETDIF, 0xdf900000) = 0
mmap(NULL, 1048576, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0xdf900000) = 0x7f0e0a200000
munmap(0x7f0e0a200000, 65536) = 0
close(3) = 0
open("/dev/mem", O_RDWR|O_DSYNC) = 3
mmap(NULL, 1048576, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0xdf900000) = 0x7f0e092f2000
munmap(0x7f0e092f2000, 65536) = 0
close(3) = 0
write(2, "Warning: memory access to device"..., 136Warning: memory access to device 06:00.0 failed: Input/output error.
Warning: Fallback on IO: much slower, and unsafe if device in use.
) = 136
open("/sys/bus/pci/devices/0000:06:00.0/config", O_RDWR|O_DSYNC) = 3
pwrite(3, "\375\374\373\372", 4, 88) = 4
pread(3, "\375\374\373\372", 4, 88) = 4
close(3) = 0
open("/dev/tty", O_RDWR|O_NOCTTY|O_NONBLOCK) = 3
writev(3, [{"*** ", 4}, {"buffer overflow detected", 24}, {" ***: ", 6}, {"mstflint", 8}, {" terminated\n", 12}], 5*** buffer overflow detected ***: mstflint terminated
) = 54
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0e0a3f9000
write(3, "======= Backtrace: =========\n", 29======= Backtrace: =========
) = 29
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"__fortify_fail", 14}, {"+0x", 3}, {"37", 2}, {")", 1}, {"[0x", 3}, {"7f0e094fc817", 12}, {"]\n", 2}], 9/lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x37)[0x7f0e094fc817]
) = 69
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"+0x", 3}, {"109710", 6}, {")", 1}, {"[0x", 3}, {"7f0e094fb710", 12}, {"]\n", 2}], 8/lib/x86_64-linux-gnu/libc.so.6(+0x109710)[0x7f0e094fb710]
) = 59
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"+0x", 3}, {"108b79", 6}, {")", 1}, {"[0x", 3}, {"7f0e094fab79", 12}, {"]\n", 2}], 8/lib/x86_64-linux-gnu/libc.so.6(+0x108b79)[0x7f0e094fab79]
) = 59
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"_IO_default_xsputn", 18}, {"+0x", 3}, {"dd", 2}, {")", 1}, {"[0x", 3}, {"7f0e0946e13d", 12}, {"]\n", 2}], 9/lib/x86_64-linux-gnu/libc.so.6(_IO_default_xsputn+0xdd)[0x7f0e0946e13d]
) = 73
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"_IO_vfprintf", 12}, {"+0x", 3}, {"1ae7", 4}, {")", 1}, {"[0x", 3}, {"7f0e0943c4a7", 12}, {"]\n", 2}], 9/lib/x86_64-linux-gnu/libc.so.6(_IO_vfprintf+0x1ae7)[0x7f0e0943c4a7]
) = 69
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"__vsprintf_chk", 14}, {"+0x", 3}, {"94", 2}, {")", 1}, {"[0x", 3}, {"7f0e094fac14", 12}, {"]\n", 2}], 9/lib/x86_64-linux-gnu/libc.so.6(__vsprintf_chk+0x94)[0x7f0e094fac14]
) = 69
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"__sprintf_chk", 13}, {"+0x", 3}, {"7d", 2}, {")", 1}, {"[0x", 3}, {"7f0e094fab5d", 12}, {"]\n", 2}], 9/lib/x86_64-linux-gnu/libc.so.6(__sprintf_chk+0x7d)[0x7f0e094fab5d]
) = 68
writev(3, [{"mstflint", 8}, {"[0x", 3}, {"40de46", 6}, {"]\n", 2}], 4mstflint[0x40de46]
) = 19
writev(3, [{"mstflint", 8}, {"[0x", 3}, {"40ee56", 6}, {"]\n", 2}], 4mstflint[0x40ee56]
) = 19
writev(3, [{"mstflint", 8}, {"[0x", 3}, {"406475", 6}, {"]\n", 2}], 4mstflint[0x406475]
) = 19
writev(3, [{"mstflint", 8}, {"[0x", 3}, {"408d00", 6}, {"]\n", 2}], 4mstflint[0x408d00]
) = 19
writev(3, [{"/lib/x86_64-linux-gnu/libc.so.6", 31}, {"(", 1}, {"__libc_start_main", 17}, {"+0x", 3}, {"ed", 2}, {")", 1}, {"[0x", 3}, {"7f0e0941376d", 12}, {"]\n", 2}], 9/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7f0e0941376d]
) = 72
writev(3, [{"mstflint", 8}, {"[0x", 3}, {"401af9", 6}, {"]\n", 2}], 4mstflint[0x401af9]
) = 19
write(3, "======= Memory map: ========\n", 29======= Memory map: ========
) = 29
open("/proc/self/maps", O_RDONLY) = 4
read(4, "00400000-0041a000 r-xp 00000000 "..., 1024) = 1024
write(3, "00400000-0041a000 r-xp 00000000 "..., 102400400000-0041a000 r-xp 00000000 08:01 220 /usr/bin/mstflint
00619000-0061a000 r--p 00019000 08:01 220 /usr/bin/mstflint
0061a000-0061b000 rw-p 0001a000 08:01 220 /usr/bin/mstflint
024a3000-024c4000 rw-p 00000000 00:00 0 [heap]
7f0e09302000-7f0e093f2000 rw-s df910000 00:05 1028 /dev/mem
7f0e093f2000-7f0e095a7000 r-xp 00000000 08:01 135350 /lib/x86_64-linux-gnu/libc-2.15.so
7f0e095a7000-7f0e097a6000 ---p 001b5000 08:01 135350 /lib/x86_64-linux-gnu/libc-2.15.so
7f0e097a6000-7f0e097aa000 r--p 001b4000 08:01 135350 /lib/x86_64-linux-gnu/libc-2.15.so
7f0e097aa000-7f0e097ac000 rw-p 001b8000 08:01 135350 /lib/x86_64-linux-gnu/libc-2.15.so
7f0e097ac000-7f0e097b1000 rw-p 00000000 00:00 0
7f0e097b1000-7f0e097c6000 r-xp 00000000 08:01 131116 /lib/x86_64-linux-gnu/libgcc_s.so.1) = 1024
read(4, "\n7f0e097c6000-7f0e099c5000 ---p "..., 1024) = 1024
write(3, "\n7f0e097c6000-7f0e099c5000 ---p "..., 1024
7f0e097c6000-7f0e099c5000 ---p 00015000 08:01 131116 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f0e099c5000-7f0e099c6000 r--p 00014000 08:01 131116 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f0e099c6000-7f0e099c7000 rw-p 00015000 08:01 131116 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f0e099c7000-7f0e09ac2000 r-xp 00000000 08:01 135361 /lib/x86_64-linux-gnu/libm-2.15.so
7f0e09ac2000-7f0e09cc1000 ---p 000fb000 08:01 135361 /lib/x86_64-linux-gnu/libm-2.15.so
7f0e09cc1000-7f0e09cc2000 r--p 000fa000 08:01 135361 /lib/x86_64-linux-gnu/libm-2.15.so
7f0e09cc2000-7f0e09cc3000 rw-p 000fb000 08:01 135361 /lib/x86_64-linux-gnu/libm-2.15.so
7f0e09cc3000-7f0e09da5000 r-xp 00000000 08:01 30579 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.16
7f0e09da5000-7f0e09fa4000 ---p 000e2000 08:01 30579 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.16
7f0e09fa4000-7f0e09fac000 ) = 1024
read(4, "r--p 000e1000 08:01 30579 "..., 1024) = 1024
write(3, "r--p 000e1000 08:01 30579 "..., 1024r--p 000e1000 08:01 30579 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.16
7f0e09fac000-7f0e09fae000 rw-p 000e9000 08:01 30579 /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.16
7f0e09fae000-7f0e09fc3000 rw-p 00000000 00:00 0
7f0e09fc3000-7f0e09fd9000 r-xp 00000000 08:01 131300 /lib/x86_64-linux-gnu/libz.so.1.2.3.4
7f0e09fd9000-7f0e0a1d8000 ---p 00016000 08:01 131300 /lib/x86_64-linux-gnu/libz.so.1.2.3.4
7f0e0a1d8000-7f0e0a1d9000 r--p 00015000 08:01 131300 /lib/x86_64-linux-gnu/libz.so.1.2.3.4
7f0e0a1d9000-7f0e0a1da000 rw-p 00016000 08:01 131300 /lib/x86_64-linux-gnu/libz.so.1.2.3.4
7f0e0a1da000-7f0e0a1fc000 r-xp 00000000 08:01 135367 /lib/x86_64-linux-gnu/ld-2.15.so
7f0e0a210000-7f0e0a300000 rw-s df910000 00:03 4026533088 /proc/bus/pci/06/00.0
7f0e0a300000-7f0e0a3f0000 rw-s df910000 00:0e 19851 /sys/devices/pci0000:00/0000:00:03.2/0000:06:) = 1024
read(4, "00.0/resource0\n7f0e0a3f0000-7f0e"..., 1024) = 570
write(3, "00.0/resource0\n7f0e0a3f0000-7f0e"..., 57000.0/resource0
7f0e0a3f0000-7f0e0a3f5000 rw-p 00000000 00:00 0
7f0e0a3f9000-7f0e0a3fc000 rw-p 00000000 00:00 0
7f0e0a3fc000-7f0e0a3fd000 r--p 00022000 08:01 135367 /lib/x86_64-linux-gnu/ld-2.15.so
7f0e0a3fd000-7f0e0a3ff000 rw-p 00023000 08:01 135367 /lib/x86_64-linux-gnu/ld-2.15.so
7fffdd53f000-7fffdd560000 rw-p 00000000 00:00 0 [stack]
7fffdd5ff000-7fffdd600000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
) = 570
read(4, "", 1024) = 0
close(4) = 0
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
gettid() = 4122
tgkill(4122, 4122, SIGABRT) = 0
--- SIGABRT (Aborted) @ 0 (0) ---
+++ killed by SIGABRT (core dumped) +++
Aborted (core dumped)
这时我有点陷入困境。
更新:我设法从已启用 SR-IOV 的 SuperMicro 获取固件更新。
此外,我还需要将 options max_vfs= 设置为 options num_vfs=。命名参数似乎略有不同。这足以让它在 Ubuntu 下运行。
安装固件我使用了这种方法:http://www.panticz.de/mellanox 但是,我还必须在调用 mst start 之前在每个模块上调用 insmod。Modprobe 不起作用。
答案1
首先检查一些显而易见的东西:
- 在 BIOS 设置中,确保英特尔 VT-d和英特尔输入/输出技术都是已启用。
- 将该卡插入不同的 PCI 插槽进行尝试。
- 联系软件包维护者报告该工具的一个错误。