version 1.1, 2025/06/27 16:20:30
|
version 1.3, 2025/06/30 02:18:44
|
Line 9
|
Line 9
|
# Licensed AGPL-3.0 |
# Licensed AGPL-3.0 |
# |
# |
# $Log$ |
# $Log$ |
|
# Revision 1.3 2025/06/30 02:18:44 snw |
|
# Updates |
|
# |
|
# Revision 1.2 2025/06/28 00:33:32 snw |
|
# Update locking |
|
# |
# Revision 1.1 2025/06/27 16:20:30 snw |
# Revision 1.1 2025/06/27 16:20:30 snw |
# Add blacklist |
# Add blacklist |
# |
# |
# |
# |
|
|
use Getopt::Long; |
use Getopt::Long; |
use DBI; |
|
|
|
|
use DBI; |
|
|
|
my $dbh = ""; |
|
my $dsn = ""; |
|
|
|
GetOptions("dbhost=s" => \$dbhost, |
|
"dbname=s" => \$dbname, |
|
"dbusername=s" => \$dbusername, |
|
"dbpw=s" => \$dbpw, |
|
"domain=s" => \$domain) |
|
or die("error in command line arguments"); |
|
|
|
$| = 1; |
|
print "pandia blacklist v0.0.1\n"; |
|
print " Copyright (C) 2025 Coherent Logic Development LLC\n\n"; |
|
|
|
print "pandia: connecting to $dbname database at $dbhost..."; |
|
$dsn = "DBI:mysql:database=$dbname;host=$dbhost;port=3306;mysql_connect_timeout=5;"; |
|
$dbh = DBI->connect($dsn, $dbusername, $dbpw, {RaiseError => 0, PrintError => 1}); |
|
die "pandia: failed to connect to MySQL database: DBI->errstr()" unless $dbh; |
|
print "[OK]\n"; |
|
|
|
print "pandia: blacklisting domain $domain..."; |
|
my $sth = $dbh->prepare("INSERT INTO blacklist (url_domain) VALUES (?)"); |
|
$sth->execute($domain); |
|
print "[OK]\n"; |
|
|
|
print "pandia: removing blacklisted items from crawl queue..."; |
|
my $sth = $dbh->prepare("DELETE crawl_queue FROM crawl_queue JOIN blacklist ON crawl_queue.url_domain=blacklist.url_domain"); |
|
$sth->execute(); |
|
print "[OK]\n"; |
|
|
|
print "pandia: removing blacklisted items from index..."; |
|
$sth = $dbh->prepare("DELETE url_fulltext FROM url_fulltext JOIN blacklist ON url_fulltext.url_domain=blacklist.url_domain"); |
|
$sth->execute(); |
|
print "[OK]\n"; |
|
|
|
|