我已经做了etcd备份,然后在同一个集群上恢复,现在我遇到了这些问题,我可以列出资源,但无法创建或删除。这是一个 1 个主服务器和 2 个从服务器的设置,使用kubeadm。我运行这个集群已经将近 8 个月了,之前没有出现过任何问题。任何建议都将不胜感激 :)
kubectl version
Client Version: version.Info{Major:"1", Minor:"21", GitVersion:"v1.21.0", GitCommit:"cb303e613a121a29364f75cc67d3d580833a7479", GitTreeState:"clean", BuildDate:"2021-04-08T16:31:21Z", GoVersion:"go1.16.1", Compiler:"gc", Platform:"linux/amd64"}
kubectl get pod
NAME READY STATUS RESTARTS AGE
mongodb-deployment-79c8fcfd4-krst4 1/1 Running 1 7d8h
nginx-deployment-5b5b7764d-6x457 2/2 Running 2 7d5h
nginx-deployment-5b5b7764d-rxfhn 2/2 Running 2 7d5h
nginx-deployment-5b5b7764d-zw7v8 2/2 Running 2 7d5h
pod-with-toleration 1/1 Running 5 26d
kubectl delete pod pod-with-toleration
Error from server: etcdserver: request timed out
sudo ETCDCTL_API=3 etcdctl member list \
> --cert=/etc/kubernetes/pki/etcd/server.crt \
> --key=/etc/kubernetes/pki/etcd/server.key \
> --cacert=/etc/kubernetes/pki/etcd/ca.crt
a26af52927f3b0b7, started, master, https://172.31.4.108:2380, https://172.31.4.108:2379
kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-558bd4d5db-j7fk6 1/1 Running 57 233d
coredns-558bd4d5db-kdkbb 1/1 Running 57 233d
etcd-master 1/1 Running 57 233d
kube-apiserver-master 1/1 Running 60 216d
kube-controller-manager-master 1/1 Running 58 233d
kube-proxy-2kpwp 1/1 Running 57 233d
kube-proxy-q54dh 1/1 Running 49 223d
kube-proxy-xc9rx 1/1 Running 49 223d
kube-scheduler-master 1/1 Running 58 233d
weave-net-d2tf8 2/2 Running 123 224d
weave-net-lxt7m 2/2 Running 108 223d
weave-net-w4mv2 2/2 Running 103 223d
kubectl logs -n kube-system etcd-master
2022-07-02 19:25:37.798497 W | etcdserver: failed to revoke 30b7819a6ceffaa1 ("etcdserver: request timed out")
2022-07-02 19:25:39.312238 I | etcdserver/api/etcdhttp: /health OK (status code 200)
WARNING: 2022/07/02 19:25:40 grpc: Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"
2022-07-02 19:25:41.798974 W | etcdserver: failed to revoke 30b7819ebd239fac ("etcdserver: request timed out")
2022-07-02 19:25:43.363679 I | embed: rejected connection from "172.31.24.138:33716" (error "tls: first record does not look like a TLS handshake", ServerName "")
WARNING: 2022/07/02 19:25:44 grpc: Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"
WARNING: 2022/07/02 19:25:44 grpc: Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"
2022-07-02 19:25:44.797749 W | etcdserver: failed to revoke 30b7819ebd239292 ("etcdserver: request timed out")
2022-07-02 19:25:44.797781 W | etcdserver: failed to revoke 30b7819a6cefea2b ("etcdserver: request timed out")
sudo vim /etc/kubernetes/manifests/etcd.yaml
apiVersion: v1
kind: Pod
metadata:
annotations:
kubeadm.kubernetes.io/etcd.advertise-client-urls: https://172.31.4.108:2379
creationTimestamp: null
labels:
component: etcd
tier: control-plane
name: etcd
namespace: kube-system
spec:
containers:
- command:
- etcd
- --advertise-client-urls=https://172.31.4.108:2379
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
- --client-cert-auth=true
- --data-dir=/var/lib/etcd
- --initial-advertise-peer-urls=https://172.31.4.108:2380
- --initial-cluster=master=https://172.31.4.108:2380
- --key-file=/etc/kubernetes/pki/etcd/server.key
- --listen-client-urls=https://127.0.0.1:2379,https://172.31.4.108:2379
- --listen-metrics-urls=http://127.0.0.1:2381
- --listen-peer-urls=https://172.31.4.108:2380
- --name=master
- --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
- --peer-client-cert-auth=true
- --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
- --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
- --snapshot-count=10000
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
image: k8s.gcr.io/etcd:3.4.13-0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /health
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: etcd
resources:
requests:
cpu: 100m
ephemeral-storage: 100Mi
memory: 100Mi
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /health
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /var/lib/etcd
name: etcd-data
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
hostNetwork: true
priorityClassName: system-node-critical
volumes:
- hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
name: etcd-certs
- hostPath:
path: /var/lib/etcd
type: DirectoryOrCreate
name: etcd-data
status: {}
ls -l /var/lib
total 180
drwxr-xr-x 4 root root 4096 Oct 21 2021 AccountsService
drwxr-xr-x 2 root root 4096 Nov 6 2021 PackageKit
drwxr-x--- 3 root root 4096 Nov 6 2021 amazon
drwxr-xr-x 3 root root 4096 Nov 7 2021 apport
drwxr-xr-x 5 root root 4096 Jun 26 07:01 apt
drwxr-xr-x 2 root root 4096 Sep 10 2020 boltd
drwxr-xr-x 8 root root 4096 Jul 2 17:59 cloud
drwx------ 3 root root 4096 Nov 20 2021 cni
drwxr-xr-x 2 root root 4096 Jul 2 19:29 command-not-found
drwx--x--x 12 root root 4096 Mar 28 14:30 containerd
drwxr-xr-x 2 root root 4096 Nov 6 2021 dbus
drwxr-xr-x 2 root root 4096 Apr 10 2020 dhcp
drwxr-xr-x 3 root root 4096 Nov 27 2021 dockershim
drwxr-xr-x 7 root root 4096 Jun 26 07:01 dpkg
drwx------ 3 root root 4096 Jul 2 18:00 etcd
drwxr-xr-x 7 root root 4096 Mar 29 04:21 fwupd