version 1.2, 2025/06/27 02:14:47
|
version 1.3, 2025/06/27 16:20:30
|
Line 9
|
Line 9
|
# Licensed AGPL-3.0 |
# Licensed AGPL-3.0 |
# |
# |
# $Log$ |
# $Log$ |
|
# Revision 1.3 2025/06/27 16:20:30 snw |
|
# Add blacklist |
|
# |
# Revision 1.2 2025/06/27 02:14:47 snw |
# Revision 1.2 2025/06/27 02:14:47 snw |
# Initial operational capability |
# Initial operational capability |
# |
# |
Line 25 use Lingua::Stem;
|
Line 28 use Lingua::Stem;
|
use DBI; |
use DBI; |
use Data::Dumper; |
use Data::Dumper; |
use Try::Tiny; |
use Try::Tiny; |
|
use Fcntl qw(:flock); |
|
|
my $dbh = ""; |
my $dbh = ""; |
my $dsn = ""; |
my $dsn = ""; |
Line 33 $| = 1;
|
Line 37 $| = 1;
|
print "pandia indexer v0.0.1\n"; |
print "pandia indexer v0.0.1\n"; |
print " Copyright (C) 2025 Coherent Logic Development LLC\n\n"; |
print " Copyright (C) 2025 Coherent Logic Development LLC\n\n"; |
|
|
|
open my $file, ">", "pandia_indexer.lock" or die $!; |
|
flock $file, LOCK_EX|LOCK_NB or die "Unable to lock file $!"; |
|
|
GetOptions("dbhost=s" => \$dbhost, |
GetOptions("dbhost=s" => \$dbhost, |
"dbname=s" => \$dbname, |
"dbname=s" => \$dbname, |
"dbusername=s" => \$dbusername, |
"dbusername=s" => \$dbusername, |
Line 113 while (my $hashref = $sth->fetchrow_hash
|
Line 120 while (my $hashref = $sth->fetchrow_hash
|
$sthd->execute($url); |
$sthd->execute($url); |
|
|
my $sth = $dbh->prepare("INSERT INTO keywords (word, url, url_domain, word_count) VALUES (?, ?, ?, ?)"); |
my $sth = $dbh->prepare("INSERT INTO keywords (word, url, url_domain, word_count) VALUES (?, ?, ?, ?)"); |
my $sths = $dbh->prepare("SELECT word_count FROM keywords WHERE word=?"); |
my $sths = $dbh->prepare("SELECT word_count FROM keywords WHERE word=? AND url=?"); |
my $sthu = $dbh->prepare("UPDATE keywords SET word_count=word_count + 1 WHERE word=? AND url=?"); |
my $sthu = $dbh->prepare("UPDATE keywords SET word_count=word_count + 1 WHERE word=? AND url=?"); |
foreach my $word (@words) { |
foreach my $word (@words) { |
$word =~ s/[^\x00-\x7F]//g; |
$word =~ s/[^\x00-\x7F]//g; |
$sths->execute($word); |
$sths->execute($word, $url); |
|
|
if($sths->rows > 0) { |
if($sths->rows > 0) { |
$sthu->execute($word, $url); |
$sthu->execute($word, $url); |