我正在使用下面的 perl 脚本,通过使用 wget 命令和 linkchecker 代理访问所有链接来填充服务器的缓存/内存,以检查是否有断开的链接。当我运行它时,它会不断获取很久以前的网站数据。因此,它会用旧数据填充缓存/内存。
这和 LinkChecker 代理或其他东西有关系吗?我不明白旧数据来自哪里,因为我重启了服务器几次。
#!/usr/bin/perl
use DBI;
use Thread;
use Time::HiRes;
use POSIX;
use Date::Calc qw(:all);
use Time::Local;
use Time::localtime;
while (true) {
local $now = time;
$dbDb = "webshops_db";
$dBUser = "user";
$dbPass = "password";
$dbHost = "localhost";
$query = "SELECT page, websitesId FROM `visitorsPagesArchive` WHERE websitesId > 0 AND LEFT(`page`, 3) = '/c/' AND RIGHT(`page`,
5) = '.html' GROUP BY
`page`, `websitesId` ORDER BY
LENGTH(`page`) LIMIT
500000";
print "Connecting to DB\n";
$dbh = DBI->connect("DBI:mysql:$dbDb:$dbHost", $dBUser, $dbPass);
print "Connected to DB\n";
print "Prepare Query\n";
$sqlQuery = $dbh->prepare($query);
print "Excecute Query \n";
$rv = $sqlQuery->execute
or print "can't execute the query: $sqlQuery->errstr";
print "Executed\n";
$rowsCount = $sqlQuery->rows;
print "Rows Counted \n";
print "Starting Bot with $rowsCount Rijen, verdeeld over 10 threads\n";
$i = 0;
while (@row= $sqlQuery->fetchrow_array()) {
my $page = $row[0];
my $websitesId = $row[1];
my $load = get_load_average();
if ($websitesId == 1) {
$url = "http://www.website1.nl";
$oup = `wget -b -o /dev/null -O /dev/null --user-agent="LinkChecker" "$url$page"`;
}
elsif ($websitesId == 2) {
$url = "http://www.website2.nl";
$oup = `wget -b -o /dev/null -O /dev/null --user-agent="LinkChecker" "$url$page"`;
}
elsif ($websitesId == 5) {
$url = "http://www.website3.nl";
$oup = `wget -b -o /dev/null -O /dev/null --user-agent="LinkChecker" "$url$page"`;
}
elsif ($websitesId == 6) {
$url = "http://www.website4.be";
$oup = `wget -b -o /dev/null -O /dev/null --user-agent="LinkChecker" "$url$page"`;
}
# Sleep 0.1 seconds
Time::HiRes::sleep(0.017);
$tmp = time - $now;
$percentage = ceil($i/$rowsCount*100);
print "$url$page ($tmp Seconds / $percentage %))\n";
$i++;
if ($load > 20) {
$tmp = time - $now;
print "Sleeping for a while, load is $load ($tmp Seconds) ($percentage %)\n";
sleep(20);
}
if (`ps aux | grep wget | wc -l` > 160) {
print "Busy Busy Busy.. Sleeping 10 seconds.. zzzzzzzzzz\n";
sleep (10);
}
my $timeLocal = (localtime());
if (localtime->hour() == 0 && Day_of_Week(localtime->year()+1900, (localtime->mon()+1), localtime->mday()) == 4) {
print "Thuesday night, backup-time.. Sleepin zzzz\n\r";
sleep (28800);
}
}
$rc = $sqlQuery->finish;
sub get_load_average {
open(LOAD, "/proc/loadavg") or die "Unable to get server load \n";
my $load_avg = <LOAD>;
close LOAD;
my ( $one_min_avg ) = split /\s/, $load_avg;
return $one_min_avg;
}
}
## $oup = `perl /root/crons/test.pl`;
exit;