File:  [Coherent Logic Development] / pandia / blacklist
Revision 1.3: download - view: text, annotated - select for diffs
Mon Jun 30 02:18:44 2025 UTC (2 weeks, 6 days ago) by snw
Branches: MAIN
CVS tags: HEAD
Updates

#!/usr/bin/env perl

# 
# $Id: blacklist,v 1.3 2025/06/30 02:18:44 snw Exp $
#  Copyright (C) 2025 Coherent Logic Development LLC
#
# Author: Serena Willis <snw@coherent-logic.com>
#
# Licensed AGPL-3.0
#
# $Log: blacklist,v $
# Revision 1.3  2025/06/30 02:18:44  snw
# Updates
#
# Revision 1.2  2025/06/28 00:33:32  snw
# Update locking
#
# Revision 1.1  2025/06/27 16:20:30  snw
# Add blacklist
#
#

use Getopt::Long;
use DBI;

my $dbh = "";
my $dsn = "";

GetOptions("dbhost=s" => \$dbhost,
           "dbname=s" => \$dbname,
           "dbusername=s" => \$dbusername,
           "dbpw=s" => \$dbpw,
	   "domain=s" => \$domain)
    or die("error in command line arguments");

$| = 1;
print "pandia blacklist v0.0.1\n";
print " Copyright (C) 2025 Coherent Logic Development LLC\n\n";

print "pandia:  connecting to $dbname database at $dbhost...";
$dsn = "DBI:mysql:database=$dbname;host=$dbhost;port=3306;mysql_connect_timeout=5;";
$dbh = DBI->connect($dsn, $dbusername, $dbpw, {RaiseError => 0, PrintError => 1});
die "pandia:  failed to connect to MySQL database: DBI->errstr()" unless $dbh;
print "[OK]\n";

print "pandia:  blacklisting domain $domain...";
my $sth = $dbh->prepare("INSERT INTO blacklist (url_domain) VALUES (?)");
$sth->execute($domain);
print "[OK]\n";

print "pandia:  removing blacklisted items from crawl queue...";
my $sth = $dbh->prepare("DELETE crawl_queue FROM crawl_queue JOIN blacklist ON crawl_queue.url_domain=blacklist.url_domain");
$sth->execute();
print "[OK]\n";

print "pandia:  removing blacklisted items from index...";
$sth = $dbh->prepare("DELETE url_fulltext FROM url_fulltext JOIN blacklist ON url_fulltext.url_domain=blacklist.url_domain");
$sth->execute();
print "[OK]\n";



FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>