Annotation of pandia/blacklist, revision 1.3
1.1 snw 1: #!/usr/bin/env perl
2:
3: #
1.3 ! snw 4: # $Id: blacklist,v 1.2 2025/06/28 00:33:32 snw Exp $
1.1 snw 5: # Copyright (C) 2025 Coherent Logic Development LLC
6: #
7: # Author: Serena Willis <snw@coherent-logic.com>
8: #
9: # Licensed AGPL-3.0
10: #
1.2 snw 11: # $Log: blacklist,v $
1.3 ! snw 12: # Revision 1.2 2025/06/28 00:33:32 snw
! 13: # Update locking
! 14: #
1.2 snw 15: # Revision 1.1 2025/06/27 16:20:30 snw
16: # Add blacklist
17: #
1.1 snw 18: #
19:
20: use Getopt::Long;
1.2 snw 21: use DBI;
22:
23: my $dbh = "";
24: my $dsn = "";
25:
26: GetOptions("dbhost=s" => \$dbhost,
27: "dbname=s" => \$dbname,
28: "dbusername=s" => \$dbusername,
29: "dbpw=s" => \$dbpw,
30: "domain=s" => \$domain)
31: or die("error in command line arguments");
32:
33: $| = 1;
34: print "pandia blacklist v0.0.1\n";
35: print " Copyright (C) 2025 Coherent Logic Development LLC\n\n";
36:
37: print "pandia: connecting to $dbname database at $dbhost...";
38: $dsn = "DBI:mysql:database=$dbname;host=$dbhost;port=3306;mysql_connect_timeout=5;";
39: $dbh = DBI->connect($dsn, $dbusername, $dbpw, {RaiseError => 0, PrintError => 1});
40: die "pandia: failed to connect to MySQL database: DBI->errstr()" unless $dbh;
41: print "[OK]\n";
42:
43: print "pandia: blacklisting domain $domain...";
44: my $sth = $dbh->prepare("INSERT INTO blacklist (url_domain) VALUES (?)");
45: $sth->execute($domain);
46: print "[OK]\n";
47:
48: print "pandia: removing blacklisted items from crawl queue...";
49: my $sth = $dbh->prepare("DELETE crawl_queue FROM crawl_queue JOIN blacklist ON crawl_queue.url_domain=blacklist.url_domain");
50: $sth->execute();
51: print "[OK]\n";
52:
1.3 ! snw 53: print "pandia: removing blacklisted items from index...";
! 54: $sth = $dbh->prepare("DELETE url_fulltext FROM url_fulltext JOIN blacklist ON url_fulltext.url_domain=blacklist.url_domain");
! 55: $sth->execute();
! 56: print "[OK]\n";
! 57:
! 58:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>