服务器错误:etcdserver:请求超时 - etcd 备份和恢复后出错

服务器错误:etcdserver:请求超时 - etcd 备份和恢复后出错

我已经做了etcd备份,然后在同一个集群上恢复,现在我遇到了这些问题,我可以列出资源,但无法创建或删除。这是一个 1 个主服务器和 2 个从服务器的设置,使用kubeadm。我运行这个集群已经将近 8 个月了,之前没有出现过任何问题。任何建议都将不胜感激 :)

kubectl version
Client Version: version.Info{Major:"1", Minor:"21", GitVersion:"v1.21.0", GitCommit:"cb303e613a121a29364f75cc67d3d580833a7479", GitTreeState:"clean", BuildDate:"2021-04-08T16:31:21Z", GoVersion:"go1.16.1", Compiler:"gc", Platform:"linux/amd64"}


kubectl get pod
NAME                                                            READY   STATUS    RESTARTS   AGE
mongodb-deployment-79c8fcfd4-krst4                              1/1     Running   1          7d8h
nginx-deployment-5b5b7764d-6x457                                2/2     Running   2          7d5h
nginx-deployment-5b5b7764d-rxfhn                                2/2     Running   2          7d5h
nginx-deployment-5b5b7764d-zw7v8                                2/2     Running   2          7d5h
pod-with-toleration                                             1/1     Running   5          26d


kubectl delete pod pod-with-toleration
Error from server: etcdserver: request timed out

sudo ETCDCTL_API=3 etcdctl member list \
> --cert=/etc/kubernetes/pki/etcd/server.crt \
>   --key=/etc/kubernetes/pki/etcd/server.key \
>   --cacert=/etc/kubernetes/pki/etcd/ca.crt
a26af52927f3b0b7, started, master, https://172.31.4.108:2380, https://172.31.4.108:2379


kubectl get pod -n kube-system
NAME                             READY   STATUS    RESTARTS   AGE
coredns-558bd4d5db-j7fk6         1/1     Running   57         233d
coredns-558bd4d5db-kdkbb         1/1     Running   57         233d
etcd-master                      1/1     Running   57         233d
kube-apiserver-master            1/1     Running   60         216d
kube-controller-manager-master   1/1     Running   58         233d
kube-proxy-2kpwp                 1/1     Running   57         233d
kube-proxy-q54dh                 1/1     Running   49         223d
kube-proxy-xc9rx                 1/1     Running   49         223d
kube-scheduler-master            1/1     Running   58         233d
weave-net-d2tf8                  2/2     Running   123        224d
weave-net-lxt7m                  2/2     Running   108        223d
weave-net-w4mv2                  2/2     Running   103        223d

kubectl logs -n kube-system etcd-master
2022-07-02 19:25:37.798497 W | etcdserver: failed to revoke 30b7819a6ceffaa1 ("etcdserver: request timed out")
2022-07-02 19:25:39.312238 I | etcdserver/api/etcdhttp: /health OK (status code 200)
WARNING: 2022/07/02 19:25:40 grpc: Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"
2022-07-02 19:25:41.798974 W | etcdserver: failed to revoke 30b7819ebd239fac ("etcdserver: request timed out")
2022-07-02 19:25:43.363679 I | embed: rejected connection from "172.31.24.138:33716" (error "tls: first record does not look like a TLS handshake", ServerName "")
WARNING: 2022/07/02 19:25:44 grpc: Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"
WARNING: 2022/07/02 19:25:44 grpc: Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"
2022-07-02 19:25:44.797749 W | etcdserver: failed to revoke 30b7819ebd239292 ("etcdserver: request timed out")
2022-07-02 19:25:44.797781 W | etcdserver: failed to revoke 30b7819a6cefea2b ("etcdserver: request timed out")

sudo vim /etc/kubernetes/manifests/etcd.yaml

apiVersion: v1
kind: Pod
metadata:
  annotations:
    kubeadm.kubernetes.io/etcd.advertise-client-urls: https://172.31.4.108:2379
  creationTimestamp: null
  labels:
    component: etcd
    tier: control-plane
  name: etcd
  namespace: kube-system
spec:
  containers:
  - command:
    - etcd
    - --advertise-client-urls=https://172.31.4.108:2379
    - --cert-file=/etc/kubernetes/pki/etcd/server.crt
    - --client-cert-auth=true
    - --data-dir=/var/lib/etcd
    - --initial-advertise-peer-urls=https://172.31.4.108:2380
    - --initial-cluster=master=https://172.31.4.108:2380
    - --key-file=/etc/kubernetes/pki/etcd/server.key
    - --listen-client-urls=https://127.0.0.1:2379,https://172.31.4.108:2379
    - --listen-metrics-urls=http://127.0.0.1:2381
    - --listen-peer-urls=https://172.31.4.108:2380
    - --name=master
    - --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
    - --peer-client-cert-auth=true
    - --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
    - --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    - --snapshot-count=10000
    - --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    image: k8s.gcr.io/etcd:3.4.13-0
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /health
        port: 2381
        scheme: HTTP
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    name: etcd
    resources:
      requests:
            cpu: 100m
        ephemeral-storage: 100Mi
        memory: 100Mi
    startupProbe:
      failureThreshold: 24
      httpGet:
        host: 127.0.0.1
        path: /health
        port: 2381
        scheme: HTTP
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    volumeMounts:
    - mountPath: /var/lib/etcd
      name: etcd-data
    - mountPath: /etc/kubernetes/pki/etcd
      name: etcd-certs
  hostNetwork: true
  priorityClassName: system-node-critical
  volumes:
  - hostPath:
      path: /etc/kubernetes/pki/etcd
      type: DirectoryOrCreate
    name: etcd-certs
  - hostPath:
      path: /var/lib/etcd
      type: DirectoryOrCreate
    name: etcd-data
status: {}

ls -l /var/lib
total 180
drwxr-xr-x  4 root      root      4096 Oct 21  2021 AccountsService
drwxr-xr-x  2 root      root      4096 Nov  6  2021 PackageKit
drwxr-x---  3 root      root      4096 Nov  6  2021 amazon
drwxr-xr-x  3 root      root      4096 Nov  7  2021 apport
drwxr-xr-x  5 root      root      4096 Jun 26 07:01 apt
drwxr-xr-x  2 root      root      4096 Sep 10  2020 boltd
drwxr-xr-x  8 root      root      4096 Jul  2 17:59 cloud
drwx------  3 root      root      4096 Nov 20  2021 cni
drwxr-xr-x  2 root      root      4096 Jul  2 19:29 command-not-found
drwx--x--x 12 root      root      4096 Mar 28 14:30 containerd
drwxr-xr-x  2 root      root      4096 Nov  6  2021 dbus
drwxr-xr-x  2 root      root      4096 Apr 10  2020 dhcp
drwxr-xr-x  3 root      root      4096 Nov 27  2021 dockershim
drwxr-xr-x  7 root      root      4096 Jun 26 07:01 dpkg
drwx------  3 root      root      4096 Jul  2 18:00 etcd
drwxr-xr-x  7 root      root      4096 Mar 29 04:21 fwupd




相关内容