在 Amazon 提供的 ECS CloudFormation 模板中,自动扩展健康检查失败。

在 Amazon 提供的 ECS CloudFormation 模板中,自动扩展健康检查失败。

我正在尝试使用 AWS 提供的 CloudFormation ECS 服务模板建立一个新的 ECS 集群这里作为指导。我的 ECS 实例在 AutoScaling 组内启动,但随后无法通过健康检查并且总是被终止。

输出实际上并没有告诉我哪些检查失败或为什么失败。

我使用的 CloudFormation 代码与 AWS 文档中提供的常用代码基本相同。我添加了一个具有更广泛权限的安全组(这样我可以在迭代过程中通过 SSH 进入),并将 AMI 更新为 us-east-1 中针对 ECS 优化的 Amazon Linux 的最新版本。

当前模板:

{
  "AWSTemplateFormatVersion" : "2010-09-09",
  "Description": "Deploys PoC ECS infrastructure.",  

  "Parameters" : {
    "KeyName": {
      "Description": "Name of an existing EC2 KeyPair to enable SSH access to the Elastic Beanstalk and Bastion hosts",
      "Type": "String",
      "MinLength": "1",
      "MaxLength": "255",
      "AllowedPattern": "[\\x20-\\x7E]*",
      "ConstraintDescription": "can contain only ASCII characters.",
      "Default": "smx-test-key"
    },
    "SubnetID": {
      "Type": "List<AWS::EC2::Subnet::Id>",
      "Description": "Select a default subnet ID."
    },    
    "DesiredCapacity": {
      "Type": "Number",
      "Default" : "1",
      "Description": "Number of instances to launch in your ECS cluster."
    },    
    "MaxSize": {
      "Type": "Number",
      "Default" : "1",
      "Description": "Maximum number of instances that can be launched in your ECS cluster."
    },
    "ECSInstanceType": {
      "Description": "The type of instance to use for ECS app servers",
      "Type": "String",
      "Default": "t2.micro",
      "AllowedValues": ["t2.micro", "t2.small", "t2.medium", "t2.large", "m3.medium", "m3.large", "m3.xlarge" ]
    },
    "SSHLocation" : {
      "Description" : " The IP address range that can be used to SSH to the EC2 instances.",
      "Type": "String",
      "MinLength": "9",
      "MaxLength": "18",
      "Default": "0.0.0.0/0",
      "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})",
      "ConstraintDescription": "must be a valid IP CIDR range of the form x.x.x.x/x."
    }
  },

  "Mappings" : {
    "AWSRegionToAMI" : {
      "us-east-1"      : { "AMIID" : "ami-5d1b984a" }
    }
  },

  "Resources" : {

    "ECSCluster": {
      "Type": "AWS::ECS::Cluster"
    },

    "taskdefinition": {
      "Type": "AWS::ECS::TaskDefinition",
      "Properties" : {
        "ContainerDefinitions" : [
          {
            "Name": "simple-app",
            "Cpu": "10",
            "Essential": "true",
            "Image":"httpd:2.4",
            "Memory":"300",
            "MountPoints": [{
              "ContainerPath": "/usr/local/apache2/htdocs",
              "SourceVolume": "my-vol"
            }],
            "PortMappings": [
              { "HostPort": 80, "ContainerPort": 80 }
            ]
          },
          {
            "Name": "busybox",
            "Cpu": 10,
            "Command": [
              "/bin/sh -c \"while true; do echo '<html> <head> <title>Amazon ECS Sample App</title> <style>body {margin-top: 40px; background-color: #333;} </style> </head><body> <div style=color:white;text-align:center> <h1>Amazon ECS Sample App</h1> <h2>Congratulations!</h2> <p>Your application is now running on a container in Amazon ECS.</p>' > top; /bin/date > date ; echo '</div></body></html>' > bottom; cat top date bottom > /usr/local/apache2/htdocs/index.html ; sleep 1; done\""
            ],      
            "EntryPoint": [ "sh", "-c"],
            "Essential": false,
            "Image": "busybox",
            "Memory": 200,
            "VolumesFrom": [
              {
                "SourceContainer": "simple-app"
              }
            ]
          }
        ],
        "Volumes": [
          { "Name": "my-vol" }
        ]
      }
    },    

    "EcsElasticLoadBalancer" : {
      "Type" : "AWS::ElasticLoadBalancing::LoadBalancer",
      "Properties" : {
        "Subnets" : { "Ref" : "SubnetID" },
        "Listeners" : [ {
          "LoadBalancerPort" : "80",
          "InstancePort" : "80",
          "Protocol" : "HTTP"
        } ],
        "HealthCheck" : {
          "Target" : "HTTP:80/",
          "HealthyThreshold" : "2",
          "UnhealthyThreshold" : "10",
          "Interval" : "30",
          "Timeout" : "5"
        }
      }
    }, 

    "ECSAutoScalingGroup" : {
      "Type" : "AWS::AutoScaling::AutoScalingGroup",
      "Properties" : {
        "VPCZoneIdentifier" : { "Ref" : "SubnetID" },
        "LaunchConfigurationName" : { "Ref" : "ContainerInstances" },
        "MinSize" : "1",
        "MaxSize" : { "Ref" : "MaxSize" },
        "DesiredCapacity" : { "Ref" : "DesiredCapacity" }
      },
      "CreationPolicy" : {
        "ResourceSignal" : {
          "Timeout" : "PT60M"
        }
      },
      "UpdatePolicy": {
        "AutoScalingRollingUpdate": {
          "MinInstancesInService": "1",
          "MaxBatchSize": "1",
          "PauseTime" : "PT60M",
          "WaitOnResourceSignals": "true"
        }
      }
    },   

    "ContainerInstances": {
      "Type": "AWS::AutoScaling::LaunchConfiguration",
      "Metadata" : {
        "AWS::CloudFormation::Init" : {
          "config" : {
            "commands" : {
              "01_add_instance_to_cluster" : {
                "command" : { "Fn::Join": [ "", [ "#!/bin/bash\n", "echo ECS_CLUSTER=", { "Ref": "ECSCluster" }, " >> /etc/ecs/ecs.config" ] ] }
              }
            },
            "files" : {
              "/etc/cfn/cfn-hup.conf" : {
                "content" : { "Fn::Join" : ["", [
                  "[main]\n",
                  "stack=", { "Ref" : "AWS::StackId" }, "\n",
                  "region=", { "Ref" : "AWS::Region" }, "\n"
                ]]},
                "mode"    : "000400",
                "owner"   : "root",
                "group"   : "root"
              },
              "/etc/cfn/hooks.d/cfn-auto-reloader.conf" : {
                "content": { "Fn::Join" : ["", [
                  "[cfn-auto-reloader-hook]\n",
                  "triggers=post.update\n",
                  "path=Resources.ContainerInstances.Metadata.AWS::CloudFormation::Init\n",
                  "action=/opt/aws/bin/cfn-init -v ",
                  "         --stack ", { "Ref" : "AWS::StackName" },
                  "         --resource ContainerInstances ",
                  "         --region ", { "Ref" : "AWS::Region" }, "\n",
                  "runas=root\n"
                ]]}
              }
            },
            "services" : {
              "sysvinit" : {
                "cfn-hup" : { "enabled" : "true", "ensureRunning" : "true", "files" : ["/etc/cfn/cfn-hup.conf", "/etc/cfn/hooks.d/cfn-auto-reloader.conf"] }
              }
            }
          }
        }
      },
      "Properties": {
        "ImageId" : { "Fn::FindInMap" : [ "AWSRegionToAMI", { "Ref" : "AWS::Region" }, "AMIID" ] },
        "InstanceType"   : { "Ref" : "ECSInstanceType" },
        "IamInstanceProfile": { "Ref": "EC2InstanceProfile" },
        "KeyName"        : { "Ref" : "KeyName" },
        "SecurityGroups": { "Ref" : "ECSSecurityGroup" },
        "UserData"       : { "Fn::Base64" : { "Fn::Join" : ["", [
             "#!/bin/bash -xe\n",
             "yum install -y aws-cfn-bootstrap\n",

             "/opt/aws/bin/cfn-init -v ",
             "         --stack ", { "Ref" : "AWS::StackName" },
             "         --resource ContainerInstances ",
             "         --region ", { "Ref" : "AWS::Region" }, "\n",

             "/opt/aws/bin/cfn-signal -e $? ",
             "         --stack ", { "Ref" : "AWS::StackName" },
             "         --resource ECSAutoScalingGroup ",
             "         --region ", { "Ref" : "AWS::Region" }, "\n"
        ]]}},
        "Tags" : [ {"Key" : "Name", "Value" : "ECS autoscaling instance"} ]
      }
    },

    "service": {
      "Type": "AWS::ECS::Service",
      "DependsOn": ["ECSAutoScalingGroup"],
      "Properties" : {
        "Cluster": {"Ref": "ECSCluster"},
        "DesiredCount": "1",
        "LoadBalancers": [
          {
            "ContainerName": "simple-app",
            "ContainerPort": "80",
            "LoadBalancerName" : { "Ref" : "EcsElasticLoadBalancer" }
          }
        ],
        "Role" : {"Ref":"ECSServiceRole"},
        "TaskDefinition" : {"Ref":"taskdefinition"}
      }
    },

    "ECSServiceRole": {
      "Type": "AWS::IAM::Role",
      "Properties": {
        "AssumeRolePolicyDocument": {
          "Statement": [
            {
              "Effect": "Allow",
              "Principal": {
                "Service": [
                  "ecs.amazonaws.com"
                ]
              },
              "Action": [
                "sts:AssumeRole"
              ]
            }
          ]
        },
        "Path": "/",
        "Policies": [
          {
            "PolicyName": "ecs-service",
            "PolicyDocument": {
              "Statement": [
                {
                  "Effect": "Allow",
                  "Action": [
                    "elasticloadbalancing:Describe*",
                    "elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
                    "elasticloadbalancing:RegisterInstancesWithLoadBalancer",
                    "ec2:Describe*",
                    "ec2:AuthorizeSecurityGroupIngress"
                  ],
                  "Resource": "*"
                }
              ]
            }
          }
        ]
      }
    },

    "EC2Role": {
      "Type": "AWS::IAM::Role",
      "Properties": {
        "AssumeRolePolicyDocument": {
          "Statement": [
            {
              "Effect": "Allow",
              "Principal": {
                "Service": [
                  "ec2.amazonaws.com"
                ]
              },
              "Action": [
                "sts:AssumeRole"
              ]
            }
          ]
        },
        "Path": "/",
        "Policies": [
          {
            "PolicyName": "ecs-service",
            "PolicyDocument": {
              "Statement": [
                {
                  "Effect": "Allow",
                  "Action": [
                    "ecs:CreateCluster",
                    "ecs:DeregisterContainerInstance",
                    "ecs:DiscoverPollEndpoint",
                    "ecs:Poll",
                    "ecs:RegisterContainerInstance",
                    "ecs:StartTelemetrySession",
                    "ecs:Submit*",
                    "logs:CreateLogStream",
                    "logs:PutLogEvents"
                  ],
                  "Resource": "*"
                }
              ]
            }
          }
        ]
      }
    },

    "EC2InstanceProfile": {
      "Type": "AWS::IAM::InstanceProfile",
      "Properties": {
        "Path": "/",
        "Roles": [ { "Ref": "EC2Role" } ]
      }
    }

    "ECSSecurityGroup" : {
      "Type" : "AWS::EC2::SecurityGroup",
      "Properties" : {
        "GroupDescription" : "Fortigate recommended settings.  See marketplace for docs.",
        "VpcId" : { "Ref" : "VPC" },
        "SecurityGroupIngress" : [
           { "IpProtocol" : "tcp", "FromPort" : "22", "ToPort" : "22", "CidrIp" : "0.0.0.0/0" },
           { "IpProtocol" : "tcp", "FromPort" : "80", "ToPort" : "80", "CidrIp" : "0.0.0.0/0" },
           { "IpProtocol" : "icmp", "FromPort" : "-1",  "ToPort" : "-1",  "CidrIp" : {"Fn::GetAtt" : [ "VPC" , "CidrBlock" ]}}
        ],
        "Tags" : [ {"Key" : "Name", "Value" : "ECS Security Group"} ]
      }
    }
  },

  "Outputs" : {
    "ecsservice" : {
      "Value" : { "Ref" : "service" }
    },
    "ecscluster" : {
      "Value" : { "Ref" : "ECSCluster" }
    },
    "taskdef" : {
      "Value" : { "Ref" : "taskdefinition" }
    }
  }
}

当我创建堆栈时,AutoScaling 组之前的所有内容都已完成。AS 组已创建,实例已启动。但随后运行状况检查失败,实例终止,堆栈回滚。CloudFormation 显示自动缩放组的创建失败,并显示收到 1 个 SUCCESS 信号中的 0 个。无法满足 100% MinSuccessfulInstancesPercent 要求

迄今为止采取的故障排除步骤:

  • 将 AMI 映射回滚到其示例结果中列出的版本,结果仍旧失败。所以问题不是出在这里。
  • 将堆栈设置为更长的超时时间(60 分钟)以便通过 SSH 进入并开始探索实例。
  • 看起来 CloudFormation Init 不是从 AWS::AutoScaling::LaunchConfiguration 资源运行的。/etc/ecs/ecs.config不存在,其他文件缺失或缺少配置。但 LaunchConfiguration 资源在 CloudFormation 控制台事件日志中似乎已完成。

几个问题:

  • 在我们解决这个问题之前,我想我应该问一下是否有一个更新的、经过验证的工作模板发布在某处?
  • “ContainerInstances”资源的 AWS::CloudFormation::Init 代码看起来正常吗?
  • AutoScaling 组本身看起来还好吗?
  • 我怎样才能最好地解决这些配置问题?

答案1

使用此用户数据:

“UserData”:{“Fn::Base64”:{“Fn::Join”:[“”,[“#!/bin/bash -xe\n”,“sudo mkdir /etc/ecs/\n”,“sudo chmod 777 /etc/ecs/\n”,“echo ECS_CLUSTER=",{“Ref”:“ECSCluster”},“>> /etc/ecs/ecs.config\n”,“yum install -y aws-cfn-bootstrap\n”,“/opt/aws/bin/cfn-signal -e $?”,“--stack”,{“Ref”:“AWS::StackName”},“--resource ECSAutoScalingGroup”,“--region”,{“Ref”:“AWS::Region”},“\n”]]}

并将“cloudformation:SignalResource”添加到EC2Role Policy文档。

相关内容