按照此处的指示:https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#keepalived-and-haproxy我正在尝试启动第一个控制平面节点,但出现此错误:kubelet is not running or is unhealthy
。并且在 journalct -xeu kubelet 的输出中我收到错误:dial tcp: lookup vip.mycluster.local: Temporary failure in name resolution
。只有在使用 初始化时才会发生这种情况kubeadm init --control-plane-endpoint vip.mycluster.local
如果我仅使用 初始化集群,则kubeadm init
不会出现任何错误。
journalct -xeu kubelet
输出 :
Nov 27 15:31:44 k8s-eu-1-control-plane-node-1 kubelet[33417]: E1127 15:31:44.505821 33417 controller.go:146] "Failed to ensure lease exists, will retry" err="Get \"https://vip.mycluster.local:6445/apis/coordination.k8s.io/v1/namespaces/kube-node-lease/leases/k8s-eu-1-control-plane-node-1?timeout=10s\": dial tcp: lookup vip.mycluster.local: Temporary failure in name resolution" interval="7s"
Nov 27 15:31:44 k8s-eu-1-control-plane-node-1 kubelet[33417]: I1127 15:31:44.663579 33417 kubelet_node_status.go:70] "Attempting to register node" node="k8s-eu-1-control-plane-node-1"
Nov 27 15:31:44 k8s-eu-1-control-plane-node-1 kubelet[33417]: E1127 15:31:44.664815 33417 kubelet_node_status.go:92] "Unable to register node with API server" err="Post \"https://vip.mycluster.local:6445/api/v1/nodes\": dial tcp: lookup vip.mycluster.local: Temporary failure in name resolution" node="k8s-eu-1-control-plane-node-1"
Nov 27 15:31:44 k8s-eu-1-control-plane-node-1 kubelet[33417]: E1127 15:31:44.957274 33417 eviction_manager.go:258] "Eviction manager: failed to get summary stats" err="failed to get node info: node \"k8s-eu-1-control-plane-node-1\" not found"
这是/etc/haproxy/haproxy.conf
:
root@k8s-eu-1-control-plane-node-1:~# cat /etc/haproxy/haproxy.cfg
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#haproxy-configuration
# /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
log /dev/log local0
log /dev/log local1 notice
daemon
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 1
timeout http-request 10s
timeout queue 20s
timeout connect 5s
timeout client 20s
timeout server 20s
timeout http-keep-alive 10s
timeout check 10s
#---------------------------------------------------------------------
# apiserver frontend which proxys to the control plane nodes
#---------------------------------------------------------------------
frontend apiserver
bind *:6445
mode tcp
option tcplog
default_backend apiserverbackend
#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#bootstrap-the-cluster
backend apiserverbackend
option httpchk GET /healthz
http-check expect status 200
mode tcp
option ssl-hello-chk
balance roundrobin
server k8s-eu-1-control-plane-node-1 aa.aaa.aaa.aa:8443 check
server k8s-eu-1-control-plane-node-2 bb.bbb.bbb.bbb:8443 check
server k8s-eu-1-control-plane-node-3 cc.ccc.ccc.ccc:8443 check
# [...]
keepalive
:
root@k8s-eu-1-control-plane-node-1:~# ls -lah /etc/keepalived/
total 16K
drwxr-xr-x 2 root root 4.0K Nov 27 14:06 .
drwxr-xr-x 87 root root 4.0K Nov 27 11:32 ..
-rw-r--r-- 1 root root 672 Nov 27 13:56 check_apiserver.sh
-rw-r--r-- 1 root root 704 Nov 27 11:43 keepalived.conf
root@k8s-eu-1-control-plane-node-1:~# cat /etc/keepalived/keepalived.conf
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#keepalived-configuration
# https://www.server-world.info/en/note?os=Ubuntu_22.04&p=keepalived&f=1
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
enable_script_security
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 101
authentication {
auth_type PASS
auth_pass 42
}
virtual_ipaddress {
10.0.0.30
}
track_script {
check_apiserver
}
}
root@k8s-eu-1-control-plane-node-1:~# cat /etc/keepalived/keepalived.conf
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#keepalived-configuration
# https://www.server-world.info/en/note?os=Ubuntu_22.04&p=keepalived&f=1
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
enable_script_security
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 101
authentication {
auth_type PASS
auth_pass 42
}
virtual_ipaddress {
10.0.0.30
}
track_script {
check_apiserver
}
}
root@k8s-eu-1-control-plane-node-1:~# cat /etc/keepalived/check_apiserver.sh
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md#keepalived-configuration
# https://www.server-world.info/en/note?os=Ubuntu_22.04&p=keepalived&f=1
#!/bin/sh
errorExit() {
echo "*** $*" 1>&2
exit 1
}
APISERVER_DEST_PORT=6445
APISERVER_VIP=10.0.0.30
curl --silent --max-time 2 --insecure https://localhost/:${APISERVER_DEST_PORT}/ -o /dev/null || errorExit "Error GET https://localhost/:${APISERVER_DEST_PORT}/"
if ip addr | grep -q ${APISERVER_VIP}; then
curl --silent --max-time 2 --insecure https://${APISERVER_VIP}:${APISERVER_DEST_PORT}/ -o /dev/null || errorExit "Error GET https://${APISERVER_VIP}:${APISERVER_DEST_PORT}/"
fi
/etc/kubernetes/manifests/haproxy.yaml
:
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-
considerations.md#option-1-run-the-services-on-the-operating-system
apiVersion: v1
kind: Pod
metadata:
name: haproxy
namespace: kube-system
spec:
containers:
- image: haproxy:2.1.4
name: haproxy
livenessProbe:
failureThreshold: 8
httpGet:
host: localhost
path: /healthz
port: 6445
scheme: HTTPS
volumeMounts:
- mountPath: /usr/local/etc/haproxy/haproxy.cfg
name: haproxyconf
readOnly: true
hostNetwork: true
volumes:
- hostPath:
path: /etc/haproxy/haproxy.cfg
type: FileOrCreate
name: haproxyconf
status: {}
/etc/kubernetes/manifests/keepalived.yaml
:
# https://github.com/kubernetes/kubeadm/blob/main/docs/ha-
considerations.md#option-2-run-the-services-as-static-pods
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
name: keepalived
namespace: kube-system
spec:
containers:
- image: osixia/keepalived:2.0.17
name: keepalived
resources: {}
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_BROADCAST
- NET_RAW
volumeMounts:
- mountPath: /usr/local/etc/keepalived/keepalived.conf
name: config
- mountPath: /etc/keepalived/check_apiserver.sh
name: check
hostNetwork: true
volumes:
- hostPath:
path: /etc/keepalived/keepalived.conf
name: config
- hostPath:
path: /etc/keepalived/check_apiserver.sh
name: check
DNS Address
这是其中一次初始化尝试( = )k8s-eu-1-control-plane-node-1
的输出aa.aaa.aaa.aa
:
root@k8s-eu-1-control-plane-node-1:~# sudo kubeadm init --control-plane-endpoint "aa.aaa.aaa.aa:6445" --upload-certs
[init] Using Kubernetes version: v1.28.4
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
W1127 16:47:19.353319 37423 checks.go:835] detected that the sandbox image "registry.k8s.io/pause:3.6" of the container runtime is inconsistent with that used by kubeadm. It is recommended that using "registry.k8s.io/pause:3.9" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-eu-1-control-plane-node-1 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 aa.aaa.aaa.aa]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [k8s-eu-1-control-plane-node-1 localhost] and IPs [aa.aaa.aaa.aa 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [k8s-eu-1-control-plane-node-1 localhost] and IPs [aa.aaa.aaa.aa 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
W1127 16:47:21.033530 37423 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
W1127 16:47:21.301458 37423 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "kubelet.conf" kubeconfig file
W1127 16:47:21.464421 37423 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
W1127 16:47:21.572446 37423 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.
Unfortunately, an error has occurred:
timed out waiting for the condition
This error is likely caused by:
- The kubelet is not running
- The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)
If you are on a systemd-powered system, you can try to troubleshoot the error with the following commands:
- 'systemctl status kubelet'
- 'journalctl -xeu kubelet'
Additionally, a control plane component may have crashed or exited when started by the container runtime.
To troubleshoot, list all containers using your preferred container runtimes CLI.
Here is one example how you may list all running Kubernetes containers by using crictl:
- 'crictl --runtime-endpoint unix:///var/run/containerd/containerd.sock ps -a | grep kube | grep -v pause'
Once you have found the failing container, you can inspect its logs with:
- 'crictl --runtime-endpoint unix:///var/run/containerd/containerd.sock logs CONTAINERID'
error execution phase wait-control-plane: couldn't initialize a Kubernetes cluster
To see the stack trace of this error execute with --v=5 or higher
寻找提示