--- pandia/crawler 2025/06/28 00:33:32 1.5 +++ pandia/crawler 2025/07/01 19:20:47 1.6 @@ -1,7 +1,7 @@ #!/usr/bin/env perl # -# $Id: crawler,v 1.5 2025/06/28 00:33:32 snw Exp $ +# $Id: crawler,v 1.6 2025/07/01 19:20:47 snw Exp $ # Copyright (C) 2025 Coherent Logic Development LLC # # Author: Serena Willis @@ -9,6 +9,9 @@ # Licensed AGPL-3.0 # # $Log: crawler,v $ +# Revision 1.6 2025/07/01 19:20:47 snw +# Stop crawling and indexing URLs containing page fragments +# # Revision 1.5 2025/06/28 00:33:32 snw # Update locking # @@ -55,6 +58,11 @@ my $invalid_scheme_skips = 0; sub store_url { my ($url, $parent) = @_; + if (index($url, '#') != -1) { + print "pandia: URL contains a fragment; skipping\n"; + return; + } + if($url ne "" && length($url) <= 255 && substr($url, 0, 6) ne "mailto" && substr($url, 0, 4) eq "http") { my $u = URI->new($url);