version 1.2, 2025/06/25 19:38:48
|
version 1.3, 2025/06/27 02:14:47
|
Line 9
|
Line 9
|
# Licensed AGPL-3.0 |
# Licensed AGPL-3.0 |
# |
# |
# $Log$ |
# $Log$ |
|
# Revision 1.3 2025/06/27 02:14:47 snw |
|
# Initial operational capability |
|
# |
# Revision 1.2 2025/06/25 19:38:48 snw |
# Revision 1.2 2025/06/25 19:38:48 snw |
# Add indexer |
# Add indexer |
# |
# |
Line 114 sub crawl_url {
|
Line 117 sub crawl_url {
|
my $tree = HTML::TreeBuilder->new(); |
my $tree = HTML::TreeBuilder->new(); |
|
|
my $response = $http->get($url); |
my $response = $http->get($url); |
|
|
|
if(not $response->{success}) { |
|
print "pandia: http failure; skipping $url\n"; |
|
next; |
|
} |
|
|
|
if(exists $response->{redirects}) { |
|
print "pandia: redirects detected; skipping $url\n"; |
|
next; |
|
} |
|
|
$tree->parse($response->{content}); |
$tree->parse($response->{content}); |
|
|
my @links = $tree->find_by_tag_name('a'); |
my @links = $tree->find_by_tag_name('a'); |
Line 147 sub crawl_url {
|
Line 161 sub crawl_url {
|
} |
} |
|
|
$| = 1; |
$| = 1; |
|
|
print "pandia crawler v0.0.1\n"; |
print "pandia crawler v0.0.1\n"; |
print " Copyright (C) 2025 Coherent Logic Development LLC\n\n"; |
print " Copyright (C) 2025 Coherent Logic Development LLC\n\n"; |
|
|