我的服务器已经正常运行了 173 天,然后就开始出现这种情况。将 CentOS 6.6 内核升级到最新版本也无济于事。
重新启动大约需要 10 分钟,然后一切重新开始。
我正在运行专用服务器 Intel Core i7 980X、MSI X58-E Pro 主板、2x Intel SSD 240 GB S/W RAID 0 + 2x WD RE4 3 TB S/W RAID 0
我已经更换了 SSD 驱动器以外的所有硬件,但问题仍然存在 ;(
INFO: task qmail-queue:7091 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
qmail-queue D 0000000000000003 0 7091 7090 0x00000080
ffff88031b30fdc8 0000000000000082 ffff88031b30fd90 ffff88031b30fd8c
000000000000000e ffff88063fc24780 000000a2c09d83a0 ffff8800282f58c0
0000000000000400 00000001000618c3 ffff8806333d1068 ffff88031b30ffd8
Call Trace:
[<ffffffffa02255a5>] jbd2_log_wait_commit+0xc5/0x140 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa0225938>] jbd2_complete_transaction+0x68/0xb0 [jbd2]
[<ffffffffa0256061>] ext4_sync_file+0x121/0x1d0 [ext4]
[<ffffffff811c0c51>] vfs_fsync_range+0xa1/0x100
[<ffffffff811c0d1d>] vfs_fsync+0x1d/0x20
[<ffffffff811c0d5e>] do_fsync+0x3e/0x60
[<ffffffff811c0db0>] sys_fsync+0x10/0x20
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task qmail-send:1606 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
qmail-send D 0000000000000003 0 1606 1594 0x00000080
ffff8806327b9dc8 0000000000000086 ffff8806327b9e70 0000000000000001
000000000000000e ffff8806327b9de8 ffff8806327b9d88 ffffffff81123f80
ffff88036fa7c220 ffff8806327b9de8 ffff88062fc31ad8 ffff8806327b9fd8
Call Trace:
[<ffffffff81123f80>] ? find_get_pages_tag+0x40/0x130
[<ffffffff8109ef4e>] ? prepare_to_wait+0x4e/0x80
[<ffffffffa02255a5>] jbd2_log_wait_commit+0xc5/0x140 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa0225938>] jbd2_complete_transaction+0x68/0xb0 [jbd2]
[<ffffffffa0256061>] ext4_sync_file+0x121/0x1d0 [ext4]
[<ffffffff811c0c51>] vfs_fsync_range+0xa1/0x100
[<ffffffff811c0d1d>] vfs_fsync+0x1d/0x20
[<ffffffff811c0d5e>] do_fsync+0x3e/0x60
[<ffffffff811c0db0>] sys_fsync+0x10/0x20
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4141 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx D 0000000000000003 0 4141 4140 0x00000080
ffff880633079be8 0000000000000082 0000000000000000 ffff880632a50ae8
0000000000000000 ffff8800282b5928 000000bc7bda8648 0000000000000003
ffff880633079b88 000000010007c912 ffff88063214c5f8 ffff880633079fd8
Call Trace:
[<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
[<ffffffff811f1eac>] ? dqput+0x5c/0x200
[<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
[<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
[<ffffffff8119d9e6>] vfs_create+0xe6/0x110
[<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
[<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
[<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
[<ffffffff811ae392>] ? alloc_fd+0x92/0x160
[<ffffffff8118b157>] do_sys_open+0x67/0x130
[<ffffffff8118b260>] sys_open+0x20/0x30
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4147 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx D 0000000000000001 0 4147 4140 0x00000080
ffff880633395be8 0000000000000086 ffff880633395b48 0000000000000000
0000000000000000 ffff880633395cc4 0000000000000000 ffff880458b6b4e8
ffff880633395c18 ffffffffa025d3de ffff88063214dad8 ffff880633395fd8
Call Trace:
[<ffffffffa025d3de>] ? ext4_getblk+0xee/0x1f0 [ext4]
[<ffffffff8109ef4e>] ? prepare_to_wait+0x4e/0x80
[<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
[<ffffffff811f1eac>] ? dqput+0x5c/0x200
[<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
[<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
[<ffffffff8119d9e6>] vfs_create+0xe6/0x110
[<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
[<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
[<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
[<ffffffff811ae392>] ? alloc_fd+0x92/0x160
[<ffffffff8118b157>] do_sys_open+0x67/0x130
[<ffffffff8118b260>] sys_open+0x20/0x30
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4149 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx D 0000000000000003 0 4149 4140 0x00000080
ffff8805efc7bbe8 0000000000000086 ffff8805efc7bb48 0000000000000000
0000000000000000 ffff8805efc7bcc4 0000000000000000 ffff88044b34d4e8
ffff8805efc7bc18 ffffffffa025d3de ffff8805d117c5f8 ffff8805efc7bfd8
Call Trace:
[<ffffffffa025d3de>] ? ext4_getblk+0xee/0x1f0 [ext4]
[<ffffffff8109ef4e>] ? prepare_to_wait+0x4e/0x80
[<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
[<ffffffff811f1eac>] ? dqput+0x5c/0x200
[<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
[<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
[<ffffffff8119d9e6>] vfs_create+0xe6/0x110
[<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
[<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
[<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
[<ffffffff811ae392>] ? alloc_fd+0x92/0x160
[<ffffffff8118b157>] do_sys_open+0x67/0x130
[<ffffffff8118b260>] sys_open+0x20/0x30
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4155 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx D 0000000000000006 0 4155 4140 0x00000080
ffff8805d2a0fd18 0000000000000082 0000000000000000 ffff8805a643b840
ffff8805d2a0fc88 ffff8805d2a0fe08 000000b880966530 ffffffff8122ec0f
ffff8805d2a0fd68 0000000100078618 ffff88063318bad8 ffff8805d2a0ffd8
Call Trace:
[<ffffffff8122ec0f>] ? security_inode_permission+0x1f/0x30
[<ffffffff8152b486>] __mutex_lock_slowpath+0x96/0x210
[<ffffffff8152afab>] mutex_lock+0x2b/0x50
[<ffffffff811a10b6>] do_filp_open+0x2d6/0xd20
[<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
[<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
[<ffffffff811ae392>] ? alloc_fd+0x92/0x160
[<ffffffff8118b157>] do_sys_open+0x67/0x130
[<ffffffff8118b260>] sys_open+0x20/0x30
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4160 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx D 0000000000000006 0 4160 4140 0x00000080
ffff8805efcd3be8 0000000000000086 ffff8805efcd3bb0 ffff8805efcd3bac
0000000000000000 ffff88063fc24d80 000000b7c632b48c ffff8800282158c0
00000000000007ff 00000001000779f0 ffff880632ddc5f8 ffff8805efcd3fd8
Call Trace:
[<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
[<ffffffff811f1eac>] ? dqput+0x5c/0x200
[<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
[<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
[<ffffffff8119d9e6>] vfs_create+0xe6/0x110
[<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
[<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
[<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
[<ffffffff811ae392>] ? alloc_fd+0x92/0x160
[<ffffffff8118b157>] do_sys_open+0x67/0x130
[<ffffffff8118b260>] sys_open+0x20/0x30
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
INFO: task nginx:4164 blocked for more than 120 seconds.
Not tainted 2.6.32-504.30.3.el6.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
nginx D 0000000000000003 0 4164 4140 0x00000080
ffff880633055be8 0000000000000082 ffff880633055bb0 ffff880633055bac
0000000000000000 ffff88063fc24780 000000be5103f4f7 ffff8800282158c0
00000000000005ff 000000010007e7db ffff88062fd63ad8 ffff880633055fd8
Call Trace:
[<ffffffffa021e08a>] start_this_handle+0x25a/0x480 [jbd2]
[<ffffffff8109ec20>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa021e495>] jbd2_journal_start+0xb5/0x100 [jbd2]
[<ffffffff811f1eac>] ? dqput+0x5c/0x200
[<ffffffffa0272e66>] ext4_journal_start_sb+0x56/0xe0 [ext4]
[<ffffffffa02663e7>] ext4_create+0x77/0x150 [ext4]
[<ffffffff8119d9e6>] vfs_create+0xe6/0x110
[<ffffffff811a186e>] do_filp_open+0xa8e/0xd20
[<ffffffff81193cd4>] ? cp_new_stat+0xe4/0x100
[<ffffffff8129943a>] ? strncpy_from_user+0x4a/0x90
[<ffffffff811ae392>] ? alloc_fd+0x92/0x160
[<ffffffff8118b157>] do_sys_open+0x67/0x130
[<ffffffff8118b260>] sys_open+0x20/0x30
[<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
对一些简单命令(如 userdel username)进行 strace:
open("/etc/group", O_RDONLY|O_CLOEXEC) = 10
fstat(10, {st_mode=S_IFREG|0644, st_size=4253, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb3b4957000
read(10, "root:x:0:root\nbin:x:1:root,bin,d"..., 4096) = 4096
close(10) = 0
munmap(0x7fb3b4957000, 4096) = 0
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 10
fstat(10, {st_mode=S_IFREG|0644, st_size=16685, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb3b4957000
lseek(10, 0, SEEK_CUR) = 0
read(10, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 4096
read(10, "stem User for edzia-r:/home/edzi"..., 4096) = 4096
read(10, "stros:/home/stros/:/sbin/nologin"..., 4096) = 4096
read(10, "x:7923:7923:System User for neox"..., 4096) = 4096
read(10, "me:x:8042:8042:System User for s"..., 4096) = 301
read(10, "", 4096) = 0
close(10) = 0
munmap(0x7fb3b4957000, 4096) = 0
ioctl(0, SNDCTL_TMR_TIMEBASE or TCGETS, {B38400 opost isig icanon echo ...}) = 0
readlink("/proc/self/fd/0", "/dev/pts/0"..., 31) = 10
lstat("/dev/pts/0", {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
sendto(3, "x\0\0\0]\4\5\0\2\0\0\0\0\0\0\0op=deleting grou"..., 120, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 120
poll([{fd=3, events=POLLIN}], 1, 500) = 1 ([{fd=3, revents=POLLIN}])
recvfrom(3, "$\0\0\0\2\0\0\0\2\0\0\0YR\0\0\0\0\0\0x\0\0\0]\4\5\0\2\0\0\0"..., 8988, MSG_PEEK|MSG_DONTWAIT, {sa_family=AF_NETLINK, pid=0, groups=00000000}, [12]) = 36
recvfrom(3, "$\0\0\0\2\0\0\0\2\0\0\0YR\0\0\0\0\0\0x\0\0\0]\4\5\0\2\0\0\0"..., 8988, MSG_DONTWAIT, {sa_family=AF_NETLINK, pid=0, groups=00000000}, [12]) = 36
sendto(9, "<86>Jul 19 13:16:04 userdel[2108"..., 77, MSG_NOSIGNAL, NULL, 0) = 77
fstat(5, {st_mode=S_IFREG|0644, st_size=16685, ...}) = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=16685, ...}) = 0
umask(077) = 022
open("/etc/passwd-", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 10
umask(022) = 077
lseek(5, 0, SEEK_SET) = 0
read(5, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 4096
fstat(10, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb3b4957000
read(5, "stem User for edzia-r:/home/edzi"..., 4096) = 4096
write(10, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 4096
read(5, "stros:/home/stros/:/sbin/nologin"..., 4096) = 4096
write(10, "stem User for edzia-r:/home/edzi"..., 4096) = 4096
read(5, "x:7923:7923:System User for neox"..., 4096) = 4096
write(10, "stros:/home/stros/:/sbin/nologin"..., 4096) = 4096
read(5, "me:x:8042:8042:System User for s"..., 4096) = 301
write(10, "x:7923:7923:System User for neox"..., 4096) = 4096
read(5, "", 4096) = 0
write(10, "me:x:8042:8042:System User for s"..., 301) = 301
fsync(10