我有一个带有起搏器+裂脑的主动/被动高可用性集群。裂脑情况发生后,我使用自动裂脑恢复来恢复我的系统。
问题是由于文件系统资源由 CRM 管理,因此在裂脑恢复后不会安装。裂脑恢复后什么都没有安装!
看来我应该使用 stonith 设备,但我的老板要求我在不使用任何额外设备的情况下解决这个问题!我如何才能将裂脑情况通知给 Pacemaker,以便它重新挂载文件系统资源?
编辑:我的 crm 配置
node drbd3
node drbd4
primitive apache ocf:heartbeat:apache \
params configfile="/etc/apache2/apache2.conf" httpd="/usr/sbin/apache2" \
op monitor interval="5s"
primitive drbd_disk ocf:linbit:drbd \
params drbd_resource="r0" \
op monitor interval="15s"
primitive fs_drbd ocf:heartbeat:Filesystem \
params device="/dev/drbd/by-res/r0" directory="/mnt" fstype="ext4"
primitive ip1 ocf:heartbeat:IPaddr2 \
params ip="10.8.0.95" nic="eth0"
group group1 ip1 apache
ms ms_drbd drbd_disk \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
colocation mnt_on_master inf: fs_drbd ms_drbd:Master
order apache_after_ip inf: ip1:start apache:start
order mount_after_drbd inf: ms_drbd:promote fs_drbd:start
property $id="cib-bootstrap-options" \
dc-version="1.0.8-042548a451fce8400660f6031f4da6f0223dd5dd" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
和我的 drdb 配置:
global { usage-count no; }
common { syncer { rate 100M; } }
resource r0 {
protocol C;
startup {
wfc-timeout 1;
degr-wfc-timeout 1;
}
# disk{
# fencing resource-only;
# }
handlers {
#split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
}
net {
cram-hmac-alg sha1;
shared-secret "test";
# allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri call-pri-lost-after-sb;
}
on drbd3 {
device /dev/drbd0;
disk /dev/sda1;
address 10.8.0.93:7788;
meta-disk internal;
}
on drbd4 {
device /dev/drbd0;
disk /dev/sda1;
address 10.8.0.94:7788;
meta-disk internal;
}
}
#include "drbd.d/global_common.conf";
#include "drbd.d/*.res";
~
答案1
对于上述情况,您可以添加以下几行来/etc/rc.local
解决问题 -
#drbdadm secondary resource-name<resource>
#drbdadm disconnect resource-name<resource>
#drbdadm -- --discard-my-data connect resource-name<resource>
将上述命令添加到 NODE-1[主/活动节点]/etc/rc.local
文件
drbdadm connect resource-name<resource>
将上述命令添加到 NODE-2[辅助/被动节点]/etc/rc.local
文件