使用 Oracle Linux 8.6 时,我们遇到了 timedatectl 问题,但是仅有的当它由 systemd(239-58.0.1.el8_6.3) 在 .service 定义文件中运行ExecStartPre
并且 SELinux 设置为时Enforcing
。
重要的!: 命令timedatectl show
作品运行时:
- 由 shell 中的用户
- 如果我们在脚本中运行它,
ExecStart
而不是ExecStartPre
ExecStartPre
如果我们直接在以下行中运行它:ExecStartPre=timedatectl show
最小复制:
/测试.sh:
#!/usr/bin/bash
timedatectl show
/usr/lib/systemd/system/test.service
[Unit]
Description=Test Service
[Service]
Type=simple
TimeoutStopSec=60s #This is necessary as this is a timeout issue
SyslogIdentifier=test
ExecStartPre=+/usr/bin/bash /test.sh
ExecStart=echo "Done!"
使用这些文件,运行命令systemctl start test
结果:
服务启动失败。Journalctl 显示"Failed to parse bus message: Connection timed out"
strace 日志:
我们设法收集了故障期间的 strace 输出。相关部分(依赖项加载之后):
access("/etc/selinux/config", F_OK) = 0
openat(AT_FDCWD, "/proc/sys/crypto/fips_enabled", O_RDONLY) = 3
read(3, "0\n", 2) = 2
close(3) = 0
access("/etc/system-fips", F_OK) = -1 ENOENT (No such file or directory)
access("/etc/system-fips", F_OK) = -1 ENOENT (No such file or directory)
access("/etc/gcrypt/fips_enabled", F_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/proc/sys/crypto/fips_enabled", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "0\n", 1024) = 2
close(3) = 0
prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1
prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument)
prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = -1 EINVAL (Invalid argument)
prctl(PR_CAPBSET_READ, CAP_BLOCK_SUSPEND) = 1
prctl(PR_CAPBSET_READ, CAP_PERFMON) = -1 EINVAL (Invalid argument)
prctl(PR_CAPBSET_READ, CAP_AUDIT_READ) = 1
openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=217800224, ...}) = 0
mmap(NULL, 217800224, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3238c29000
close(3) = 0
openat(AT_FDCWD, "/proc/self/stat", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "14656 (timedatectl) R 14653 1463"..., 1024) = 313
close(3) = 0
access("/proc/vz", F_OK) = -1 ENOENT (No such file or directory)
getpid() = 14656
openat(AT_FDCWD, "/run/systemd/container", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/proc/1/environ", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0400, st_size=0, ...}) = 0
read(3, "HOME=/\0TERM=linux\0BOOT_IMAGE=(hd"..., 1024) = 99
read(3, "", 1024) = 0
close(3) = 0
openat(AT_FDCWD, "/proc/1/sched", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
read(3, "systemd (1, #threads: 1)\n-------"..., 1024) = 1024
close(3) = 0
openat(AT_FDCWD, "/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "BOOT_IMAGE=(hd0,msdos1)/vmlinuz-"..., 1024) = 183
close(3) = 0
openat(AT_FDCWD, "/", O_RDONLY|O_NOCTTY|O_CLOEXEC|O_PATH) = 3
fstatfs(3, {f_type=XFS_SB_MAGIC, f_bsize=4096, f_blocks=18341120, f_bfree=16244361, f_bavail=16244361, f_files=36700160, f_ffree=36548894, f_fsid={val=[0xfc00, 0]}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_RELATIME}) = 0
close(3) = 0
access("/etc/initrd-release", F_OK) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/run/systemd/system/", {st_mode=S_IFDIR|0755, st_size=40, ...}, AT_SYMLINK_NOFOLLOW) = 0
socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3
getsockopt(3, SOL_SOCKET, SO_RCVBUF, [212992], [4]) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUFFORCE, [8388608], 4) = 0
getsockopt(3, SOL_SOCKET, SO_SNDBUF, [212992], [4]) = 0
setsockopt(3, SOL_SOCKET, SO_SNDBUFFORCE, [8388608], 4) = 0
connect(3, {sa_family=AF_UNIX, sun_path="/run/dbus/system_bus_socket"}, 29) = 0
getsockopt(3, SOL_SOCKET, SO_PEERCRED, {pid=1, uid=0, gid=0}, [12]) = 0
getsockopt(3, SOL_SOCKET, SO_PEERSEC, "system_u:system_r:system_dbusd_t"..., [64 => 36]) = 0
getsockopt(3, SOL_SOCKET, SO_PEERGROUPS, 0x55ed61e47c60, [256 => 0]) = 0
fstat(3, {st_mode=S_IFSOCK|0777, st_size=0, ...}) = 0
getsockopt(3, SOL_SOCKET, SO_ACCEPTCONN, [0], [4]) = 0
getsockname(3, {sa_family=AF_UNIX}, [128 => 2]) = 0
sendmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0AUTH EXTERNAL\r\nDATA\r\n", iov_len=22}, {iov_base="NEGOTIATE_UNIX_FD\r\n", iov_len=19}, {iov_base="BEGIN\r\n", iov_len=7}], msg_iovlen=3, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 48
gettid() = 14656
getrandom("\x26\x61\x8e\x0f\x4d\x7f\x4e\x6f\x8f\x4f\xfa\x3e\x40\xa5\xe0\x25", 16, GRND_NONBLOCK) = 16
gettid() = 14656
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="DATA\r\nOK a4090baa394dcf0c365870c"..., iov_len=256}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 58
sendmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\0\0\0\0\1\0\0\0m\0\0\0\1\1o\0\25\0\0\0/org/fre"..., iov_len=128}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 128
recvmsg(3, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
ppoll([{fd=3, events=POLLIN}], 1, {tv_sec=24, tv_nsec=999768000}, NULL, 8) = 1 ([{fd=3, revents=POLLIN}], left {tv_sec=24, tv_nsec=999316712})
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\f\0\0\0\1\0\0\0=\0\0\0\6\1s\0\7\0\0\0", iov_len=24}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 24
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base=":1.4092\0\5\1u\0\1\0\0\0\10\1g\0\1s\0\0\7\1s\0\24\0\0\0"..., iov_len=68}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 68
sendmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\5\0\0\0\2\0\0\0\217\0\0\0\1\1o\0\32\0\0\0/org/fre"..., iov_len=160}, {iov_base="\0\0\0\0\0", iov_len=5}], msg_iovlen=2, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 165
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\4\1\1\f\0\0\0\2\0\0\0\215\0\0\0\1\1o\0\25\0\0\0", iov_len=24}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 24
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="/org/freedesktop/DBus\0\0\0\2\1s\0\24\0\0\0"..., iov_len=148}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 148
recvmsg(3, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
ppoll([{fd=3, events=POLLIN}], 1, {tv_sec=24, tv_nsec=999676000}, NULL, 8) = 0 (Timeout)
openat(AT_FDCWD, "/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=2997, ...}) = 0
read(4, "# Locale name alias data base.\n#"..., 4096) = 2997
read(4, "", 4096) = 0
close(4) = 0
openat(AT_FDCWD, "/usr/share/locale/en_US.UTF-8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/en_US.utf8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/en_US/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/en.UTF-8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/en.utf8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/en/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
writev(2, [{iov_base="Failed to parse bus message: Con"..., iov_len=49}, {iov_base="\n", iov_len=1}], 2) = 50
close(3) = 0
exit_group(1) = ?
+++ exited with 1 +++
记录发现:
从日志中我似乎可以清楚地看到,通过始终timedatectl
存在的命令,确实成功地从服务器获取了信息。但是, fd 3 上的套接字似乎没有正确关闭。使用时间戳运行表明,这是唯一花费大量时间的命令,并且命令在超时发生之前就已经完成了。recvmsg
ppol
strace
ppol
recvmsg
审计日志:
我在审计日志中发现与故障相符的以下内容:
type=USER_ACCT msg=audit(1660774510.696:6426767): pid=1298671 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:initrc_t:s0 msg='op=PAM:accounting grantors=pam_unix acct="root" exe="/usr/bin/sudo" hostname=? addr=? terminal=? res=success'UID="root" AUID="unset"
type=USER_CMD msg=audit(1660774510.697:6426768): pid=1298671 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:initrc_t:s0 msg='cwd="/" cmd=74696D656461746563746C2073686F77 exe="/usr/bin/sudo" terminal=? res=success'UID="root" AUID="unset"
type=CRED_REFR msg=audit(1660774510.697:6426769): pid=1298671 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:initrc_t:s0 msg='op=PAM:setcred grantors=pam_env,pam_unix acct="root" exe="/usr/bin/sudo" hostname=? addr=? terminal=? res=success'UID="root" AUID="unset"
type=USER_START msg=audit(1660774510.706:6426770): pid=1298671 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:initrc_t:s0 msg='op=PAM:session_open grantors=pam_keyinit,pam_limits,pam_systemd,pam_unix acct="root" exe="/usr/bin/sudo" hostname=? addr=? terminal=? res=success'UID="root" AUID="unset"
type=SERVICE_START msg=audit(1660774510.780:6426771): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 msg='unit=systemd-timedated comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=success'UID="root" AUID="unset"
type=USER_AVC msg=audit(1660774510.783:6426772): pid=966 uid=81 auid=4294967295 ses=4294967295 subj=system_u:system_r:system_dbusd_t:s0-s0:c0.c1023 msg='avc: denied { send_msg } for msgtype=method_return dest=:1.3078967 spid=1298674 tpid=1298673 scontext=system_u:system_r:systemd_timedated_t:s0 tcontext=system_u:system_r:initrc_t:s0 tclass=dbus permissive=0 exe="/usr/bin/dbus-daemon" sauid=81 hostname=? addr=? terminal=?'UID="dbus" AUID="unset" SAUID="dbus"
变化:
发生了以下变化没有效果(错误继续)
- 在测试文件中添加用户/组定义,并以用户和 root 身份运行 ExecStartPre 命令
- 使用 /usr/bin/sh 运行脚本
- 使用 /usr/bin/zsh 运行脚本
timedatectl show
在 sudo 下运行命令
问题:
- 什么可能导致
ppol
这种情况挂起? recvmsg
当命令似乎已经返回了所有预期的内容时,我们如何确定它实际上在等待什么?- 除了将其设置为以外,我们还能对 SELinux 做其他什么事情来修复此问题吗
Permissive
?
答案1
问完这个问题后不久,我就找到了我自己问题的答案(不过,如果有人能告诉我,我还是想知道为什么会出现这种情况,ExecStartPre
但不会)。ExecStart
问题是,默认情况下,SELinux 是阻止的initrc_t:dbus send_msg
。绕过此问题的最简单方法是将 SELinux 设置为宽容,但这显然远非理想。相反,我们需要按照以下步骤创建策略。
- 安装
policycoreutils-devel
方式:dnf install -y policycoreutils-devel
- 在 audit.log 文件中找到审计消息编号后(请参阅问题“1660774510”中的 audit.log 代码片段),将相应的行输入到实用
audit2allow
程序中以生成策略文件:cat /var/log/audit/audit.log | grep 1660774510 | audit2allow -M newPolicy
- 用于
semodule
应用新策略:semodule -i newPolicy.pp
笔记:
为了激励其他可能更详细的答案(希望解释 ExecStartPre 与 ExecStart 的事情),我暂时不会接受我自己的答案,并会在一段时间后接受最好的答案。