--- pandia/Pandia.pm 2025/07/01 06:48:03 1.3 +++ pandia/Pandia.pm 2025/07/01 19:20:47 1.4 @@ -1,7 +1,7 @@ #!/usr/bin/env perl # -# $Id: Pandia.pm,v 1.3 2025/07/01 06:48:03 snw Exp $ +# $Id: Pandia.pm,v 1.4 2025/07/01 19:20:47 snw Exp $ # Copyright (C) 2025 Coherent Logic Development LLC # # Author: Serena Willis @@ -9,6 +9,9 @@ # Licensed AGPL-3.0 # # $Log: Pandia.pm,v $ +# Revision 1.4 2025/07/01 19:20:47 snw +# Stop crawling and indexing URLs containing page fragments +# # Revision 1.3 2025/07/01 06:48:03 snw # Updates # @@ -42,6 +45,11 @@ my $indices_waiting : shared; sub do_index { my ($url, $domain, $dsn, $dbuser, $dbpass, $reindex) = @_; + if (index($url, '#') != -1) { + print "pandia: URL contains a fragment; skipping\n"; + return; + } + print "pandia: thread connecting to MySQL database..."; my $dbh = DBI->connect($dsn, $dbuser, $dbpass, {RaiseError => 0, PrintError => 1});