Diff for /pandia/crawler between versions 1.5 and 1.6

version 1.5, 2025/06/28 00:33:32 version 1.6, 2025/07/01 19:20:47
Line 9 Line 9
 # Licensed AGPL-3.0  # Licensed AGPL-3.0
 #  #
 # $Log$  # $Log$
   # Revision 1.6  2025/07/01 19:20:47  snw
   # Stop crawling and indexing URLs containing page fragments
   #
 # Revision 1.5  2025/06/28 00:33:32  snw  # Revision 1.5  2025/06/28 00:33:32  snw
 # Update locking  # Update locking
 #  #
Line 55  my $invalid_scheme_skips = 0; Line 58  my $invalid_scheme_skips = 0;
 sub store_url {  sub store_url {
     my ($url, $parent) = @_;      my ($url, $parent) = @_;
   
       if (index($url, '#') != -1) {
           print "pandia:  URL contains a fragment; skipping\n";
           return;
       }
       
     if($url ne "" && length($url) <= 255 && substr($url, 0, 6) ne "mailto" && substr($url, 0, 4) eq "http") {             if($url ne "" && length($url) <= 255 && substr($url, 0, 6) ne "mailto" && substr($url, 0, 4) eq "http") {       
                   
         my $u = URI->new($url);          my $u = URI->new($url);

Removed from v.1.5  
changed lines
  Added in v.1.6


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>