Diff for /pandia/crawler between versions 1.2 and 1.3

version 1.2, 2025/06/25 19:38:48 version 1.3, 2025/06/27 02:14:47
Line 9 Line 9
 # Licensed AGPL-3.0  # Licensed AGPL-3.0
 #  #
 # $Log$  # $Log$
   # Revision 1.3  2025/06/27 02:14:47  snw
   # Initial operational capability
   #
 # Revision 1.2  2025/06/25 19:38:48  snw  # Revision 1.2  2025/06/25 19:38:48  snw
 # Add indexer  # Add indexer
 #  #
Line 114  sub crawl_url { Line 117  sub crawl_url {
     my $tree = HTML::TreeBuilder->new();      my $tree = HTML::TreeBuilder->new();
   
     my $response = $http->get($url);      my $response = $http->get($url);
   
       if(not $response->{success}) {
           print "pandia:  http failure; skipping $url\n";
           next;
       }
           
       if(exists $response->{redirects}) {
           print "pandia:  redirects detected; skipping $url\n";
           next;
       }
   
     $tree->parse($response->{content});      $tree->parse($response->{content});
   
     my @links = $tree->find_by_tag_name('a');      my @links = $tree->find_by_tag_name('a');
Line 147  sub crawl_url { Line 161  sub crawl_url {
 }  }
   
 $| = 1;  $| = 1;
   
 print "pandia crawler v0.0.1\n";  print "pandia crawler v0.0.1\n";
 print " Copyright (C) 2025 Coherent Logic Development LLC\n\n";  print " Copyright (C) 2025 Coherent Logic Development LLC\n\n";
   

Removed from v.1.2  
changed lines
  Added in v.1.3


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>