使用 proxy_pass 配置 nginx,使用 nodejs 配置 crowlers

使用 proxy_pass 配置 nginx,使用 nodejs 配置 crowlers

我的配置有问题,除了我的 crowler proxy_pass 之外,其他一切都很好,流量从未触及该服务器。有谁知道如何让它与同一位置下的两个 proxy_pass 一起工作 / { ?

这是我的配置:

upstream play_backend {
    server web01.local:1333;
    server web02.local:1333;
}

server {
    server_name website.com;
    listen 80;
    return 301 https://website.com$request_uri;
}

server {
    server_name website.com;
    listen 443;
    if ($host = website.com) {
        rewrite ^(.*) https://www.website.com:443$request_uri? permanent;
    }

    ssl on;
    ssl_certificate /xxx/fullchain.pem;
    ssl_certificate_key /xxx/privkey.pem;

    #HSTS
    add_header Strict-Transport-Security "max-age=63072000; includeSubdomains; preload";

    #   ## Block http user agent - wget ##
    if ($http_user_agent ~* (Wget) ) {
        return 403;
    }

    ## Block Software download user agents ##
    if ($http_user_agent ~* LWP::Simple|BBBike|wget|libwww-     perl|wordpress|WordPress/3.9.2|WordPress/4.0) {
        return 403;
    }

    location / {
        proxy_pass http://play_backend;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        #Redirect scrapers to test.website.com
        if ($http_user_agent ~* (googlebot|yahoo|bingbot|baiduspider|yandex|yeti|yodaobot|gigabot|ia_archiver|facebookexternalhit|twitterbot|developers\.google\.com)) {
            proxy_pass http://192.168.10.2:80;
        }
    }

    location /xxxx2 {
        proxy_pass http://xxx.xxx.xxx;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
    }
}

答案1

我现在已经创建了一个实际有效的解决方案,并想与大家分享。剩下的唯一问题是将真实 IP 发送到上游

upstream play_backend {
    server webb01.local:1333;
    server webb02.local:1333;
}

upstream crawler {
    server crawler.local:8080;
}

map $http_user_agent $backend {
    default "play_backend";
    ~*(facebot|facebookexternalhit/1.1|facebookexternalhit/1.2|googlebot|yahoo|bingbot|baiduspider|yandex|yeti|yodaobot|gigabot|ia_archiver|facebookexternalhit|twitterbot|developers\.google\.com) "crawler";
}

server {
    location / {
        proxy_pass http://$backend;
    }
}

答案2

它是受到推崇的使用map指令,而不是if

map $http_user_agent $backend {
     default play_backend;
     "~*(googlebot|yahoo|bingbot|baiduspider|yandex|yeti|yodaobot|gigabot|ia_archiver|facebookexternalhit|twitterbot|developers\.google\.com)"
             xxx.xxx.xxx:80;
}

server {
    location / {
         proxy_pass http://$backend;
    }
}

答案3

您的配置实际上很奇怪。没有server要处理的块www.website.com,因此应该由 nginx 默认服务器处理。

这与原始问题无关,但由于您要求提供从 到 重定向的示例website.comhttps://www.website.com因此这里是:

server {
    listen 80;
    server_name website.com www.website.com;
    return 301 https://www.website.com;
}

server {
    listen 443;
    server_name website.com;
    return 301 https://www.website.com;
}

server {
    listen 443;
    server_name www.website.com;
    .... main configuration for the website ....
}

相关内容