diff options
Diffstat (limited to 'util/OLD/sitemap.pl')
-rw-r--r-- | util/OLD/sitemap.pl | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/util/OLD/sitemap.pl b/util/OLD/sitemap.pl new file mode 100644 index 00000000..6fde2f80 --- /dev/null +++ b/util/OLD/sitemap.pl @@ -0,0 +1,97 @@ +#!/usr/bin/perl + +my $sitemapfile = '/www/vndb/www/sitemap.xml.gz'; +my $baseurl = 'http://vndb.org'; +my %chfr = qw( a always h hourly d daily w weekly m monthly y yearly n never ); + + +# the code +use strict; +use warnings; +no warnings 'once'; +use DBI; +use POSIX; # for ceil(); +use XML::Writer; +use PerlIO::gzip; +use DateTime; + +require '/www/vndb/lib/global.pl'; + +my $sql = DBI->connect(@VNDB::DBLOGIN, + { RaiseError => 1, PrintError => 0, AutoCommit => 1, pg_enable_utf8 => 1 }); + +my $urls = 0; +my $x; + +sitemap(); + +sub sitemap { + print "Creating sitemap...\n"; + # open file and start writing + open(my $IO, '>:gzip', $sitemapfile) || die $1; + $x = new XML::Writer(OUTPUT => $IO, ENCODING => 'UTF-8', DATA_MODE => 1, DATA_INDENT => 1); + $x->xmlDecl(); + $x->comment(q|NOTE: All URL's that require you to login or that may contain usernames are left out.|); + $x->startTag('urlset', xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9'); + + # some default pages + _sm_add(@$_) foreach ( + [ '/', 'd' ], + [ '/faq', 'm' ], + ); + + # some browse pages + _sm_add('/v/'.$_, 'w') for ('a'..'z', 'all', 'cat'); + _sm_add('/p/'.$_, 'w') for ('a'..'z', 'all'); + + # visual novels + my $q = $sql->prepare(q| + SELECT v.id, c.added, v.rgraph + FROM vn v + JOIN vn_rev vr ON vr.id = v.latest + JOIN changes c ON vr.id = c.id + |); $q->execute; + while($_ = $q->fetchrow_arrayref) { + _sm_add('/v'.$_->[0], 'w', $_->[1], 0.7); +# _sm_add('/v'.$_->[0].'/stats', 'w'); + _sm_add('/v'.$_->[0].'/rg', 'w', $_->[1]) if $_->[2]; + } + + # producers + $q = $sql->prepare(q| + SELECT p.id, c.added + FROM producers p + JOIN producers_rev pr ON pr.id = p.latest + JOIN changes c ON c.id = pr.id + |); $q->execute; + _sm_add('/p'.$_->[0], 'w', $_->[1]) while $_ = $q->fetchrow_arrayref; + + # releases + $q = $sql->prepare(q| + SELECT r.id, c.added + FROM releases r + JOIN releases_rev rr ON rr.id = r.latest + JOIN changes c ON c.id = rr.id + |); $q->execute; + _sm_add('/r'.$_->[0], 'w', $_->[1], 0.3) while $_ = $q->fetchrow_arrayref; + + + # and stop writing + $x->endTag('urlset'); + $x->end(); + close($IO); + printf "Sitemap created, %d urls added\n", $urls; +} + + + +sub _sm_add { + my($loc, $cf, $lastmod, $pri) = @_; + $x->startTag('url'); + $x->dataElement('loc', $baseurl . $loc); + $x->dataElement('changefreq', $chfr{$cf}?$chfr{$cf}:$cf) if defined $cf; + $x->dataElement('lastmod', DateTime->from_epoch(epoch => $lastmod)->ymd) if defined $lastmod; + $x->dataElement('priority', $pri) if defined $pri; + $x->endTag('url'); + $urls++; +} |