#!/usr/bin/env perl # # $Id: blacklist,v 1.3 2025/06/30 02:18:44 snw Exp $ # Copyright (C) 2025 Coherent Logic Development LLC # # Author: Serena Willis # # Licensed AGPL-3.0 # # $Log: blacklist,v $ # Revision 1.3 2025/06/30 02:18:44 snw # Updates # # Revision 1.2 2025/06/28 00:33:32 snw # Update locking # # Revision 1.1 2025/06/27 16:20:30 snw # Add blacklist # # use Getopt::Long; use DBI; my $dbh = ""; my $dsn = ""; GetOptions("dbhost=s" => \$dbhost, "dbname=s" => \$dbname, "dbusername=s" => \$dbusername, "dbpw=s" => \$dbpw, "domain=s" => \$domain) or die("error in command line arguments"); $| = 1; print "pandia blacklist v0.0.1\n"; print " Copyright (C) 2025 Coherent Logic Development LLC\n\n"; print "pandia: connecting to $dbname database at $dbhost..."; $dsn = "DBI:mysql:database=$dbname;host=$dbhost;port=3306;mysql_connect_timeout=5;"; $dbh = DBI->connect($dsn, $dbusername, $dbpw, {RaiseError => 0, PrintError => 1}); die "pandia: failed to connect to MySQL database: DBI->errstr()" unless $dbh; print "[OK]\n"; print "pandia: blacklisting domain $domain..."; my $sth = $dbh->prepare("INSERT INTO blacklist (url_domain) VALUES (?)"); $sth->execute($domain); print "[OK]\n"; print "pandia: removing blacklisted items from crawl queue..."; my $sth = $dbh->prepare("DELETE crawl_queue FROM crawl_queue JOIN blacklist ON crawl_queue.url_domain=blacklist.url_domain"); $sth->execute(); print "[OK]\n"; print "pandia: removing blacklisted items from index..."; $sth = $dbh->prepare("DELETE url_fulltext FROM url_fulltext JOIN blacklist ON url_fulltext.url_domain=blacklist.url_domain"); $sth->execute(); print "[OK]\n";