服务器使用 linkchecker 代理提供旧缓存/内存/数据

服务器使用 linkchecker 代理提供旧缓存/内存/数据

我正在使用下面的 perl 脚本,通过使用 wget 命令和 linkchecker 代理访问所有链接来填充服务器的缓存/内存,以检查是否有断开的链接。当我运行它时,它会不断获取很久以前的网站数据。因此,它会用旧数据填充缓存/内存。

这和 LinkChecker 代理或其他东西有关系吗?我不明白旧数据来自哪里,因为我重启了服务器几次。

#!/usr/bin/perl 
use DBI;          
use Thread;  
use Time::HiRes;
use POSIX;
use Date::Calc qw(:all);  
use Time::Local;            
use Time::localtime;

while (true) {
    local $now = time;
    $dbDb = "webshops_db";
    $dBUser = "user";
    $dbPass = "password";
    $dbHost = "localhost";                                                                                                                                          
    $query = "SELECT page, websitesId FROM `visitorsPagesArchive` WHERE websitesId > 0 AND LEFT(`page`, 3) = '/c/'  AND RIGHT(`page`, 
5) = '.html' GROUP BY 
`page`, `websitesId` ORDER BY 
LENGTH(`page`) LIMIT 
500000";
    print "Connecting to DB\n";
    $dbh = DBI->connect("DBI:mysql:$dbDb:$dbHost", $dBUser, $dbPass);
    print "Connected to DB\n";
    print "Prepare Query\n";
    $sqlQuery  = $dbh->prepare($query);

    print "Excecute Query \n";                          
    $rv = $sqlQuery->execute
    or print "can't execute the query: $sqlQuery->errstr";
    print "Executed\n";
    $rowsCount = $sqlQuery->rows;                 
    print "Rows Counted \n";
    print "Starting Bot with $rowsCount Rijen, verdeeld over 10 threads\n";
    $i = 0;
    while (@row= $sqlQuery->fetchrow_array()) {
        my $page = $row[0];
        my $websitesId = $row[1];
        my $load =  get_load_average();

        if ($websitesId == 1) {
            $url = "http://www.website1.nl";
            $oup = `wget -b -o /dev/null -O /dev/null  --user-agent="LinkChecker" "$url$page"`;
        } 
        elsif ($websitesId == 2) {
            $url = "http://www.website2.nl";
            $oup = `wget -b -o /dev/null -O /dev/null  --user-agent="LinkChecker" "$url$page"`;
        }

        elsif ($websitesId == 5) {
            $url = "http://www.website3.nl";
            $oup = `wget -b -o /dev/null -O /dev/null --user-agent="LinkChecker" "$url$page"`;
        }
        elsif ($websitesId == 6) {
            $url = "http://www.website4.be";
            $oup = `wget -b -o /dev/null -O /dev/null --user-agent="LinkChecker" "$url$page"`;
        }


        # Sleep 0.1 seconds
        Time::HiRes::sleep(0.017);  
            $tmp = time - $now;

        $percentage = ceil($i/$rowsCount*100);
        print "$url$page ($tmp Seconds / $percentage %))\n";
        $i++;

        if ($load > 20) {
            $tmp = time - $now;
            print "Sleeping for a while, load is $load ($tmp Seconds) ($percentage %)\n";
            sleep(20);
        }


        if (`ps aux | grep wget | wc -l` > 160) {
            print "Busy Busy Busy.. Sleeping 10 seconds.. zzzzzzzzzz\n";
            sleep (10);
        }

        my $timeLocal = (localtime());

        if (localtime->hour() == 0 && Day_of_Week(localtime->year()+1900, (localtime->mon()+1), localtime->mday()) == 4) {
            print "Thuesday night, backup-time.. Sleepin zzzz\n\r";
            sleep (28800);
        } 
    }                    

    $rc = $sqlQuery->finish;        

    sub get_load_average {
      open(LOAD, "/proc/loadavg") or die "Unable to get server load \n";
      my $load_avg = <LOAD>;
      close LOAD;
      my ( $one_min_avg ) = split /\s/, $load_avg;
      return $one_min_avg;
    }
}

## $oup = `perl /root/crons/test.pl`;
exit;

相关内容