File:  [Coherent Logic Development] / pandia / blacklist
Revision 1.3: download - view: text, annotated - select for diffs
Mon Jun 30 02:18:44 2025 UTC (4 weeks, 6 days ago) by snw
Branches: MAIN
CVS tags: HEAD
Updates

    1: #!/usr/bin/env perl
    2: 
    3: # 
    4: # $Id: blacklist,v 1.3 2025/06/30 02:18:44 snw Exp $
    5: #  Copyright (C) 2025 Coherent Logic Development LLC
    6: #
    7: # Author: Serena Willis <snw@coherent-logic.com>
    8: #
    9: # Licensed AGPL-3.0
   10: #
   11: # $Log: blacklist,v $
   12: # Revision 1.3  2025/06/30 02:18:44  snw
   13: # Updates
   14: #
   15: # Revision 1.2  2025/06/28 00:33:32  snw
   16: # Update locking
   17: #
   18: # Revision 1.1  2025/06/27 16:20:30  snw
   19: # Add blacklist
   20: #
   21: #
   22: 
   23: use Getopt::Long;
   24: use DBI;
   25: 
   26: my $dbh = "";
   27: my $dsn = "";
   28: 
   29: GetOptions("dbhost=s" => \$dbhost,
   30:            "dbname=s" => \$dbname,
   31:            "dbusername=s" => \$dbusername,
   32:            "dbpw=s" => \$dbpw,
   33: 	   "domain=s" => \$domain)
   34:     or die("error in command line arguments");
   35: 
   36: $| = 1;
   37: print "pandia blacklist v0.0.1\n";
   38: print " Copyright (C) 2025 Coherent Logic Development LLC\n\n";
   39: 
   40: print "pandia:  connecting to $dbname database at $dbhost...";
   41: $dsn = "DBI:mysql:database=$dbname;host=$dbhost;port=3306;mysql_connect_timeout=5;";
   42: $dbh = DBI->connect($dsn, $dbusername, $dbpw, {RaiseError => 0, PrintError => 1});
   43: die "pandia:  failed to connect to MySQL database: DBI->errstr()" unless $dbh;
   44: print "[OK]\n";
   45: 
   46: print "pandia:  blacklisting domain $domain...";
   47: my $sth = $dbh->prepare("INSERT INTO blacklist (url_domain) VALUES (?)");
   48: $sth->execute($domain);
   49: print "[OK]\n";
   50: 
   51: print "pandia:  removing blacklisted items from crawl queue...";
   52: my $sth = $dbh->prepare("DELETE crawl_queue FROM crawl_queue JOIN blacklist ON crawl_queue.url_domain=blacklist.url_domain");
   53: $sth->execute();
   54: print "[OK]\n";
   55: 
   56: print "pandia:  removing blacklisted items from index...";
   57: $sth = $dbh->prepare("DELETE url_fulltext FROM url_fulltext JOIN blacklist ON url_fulltext.url_domain=blacklist.url_domain");
   58: $sth->execute();
   59: print "[OK]\n";
   60: 
   61: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>