diff options
Diffstat (limited to 'lib/Multi/Sitemap.pm')
-rw-r--r-- | lib/Multi/Sitemap.pm | 154 |
1 files changed, 0 insertions, 154 deletions
diff --git a/lib/Multi/Sitemap.pm b/lib/Multi/Sitemap.pm deleted file mode 100644 index 85afae57..00000000 --- a/lib/Multi/Sitemap.pm +++ /dev/null @@ -1,154 +0,0 @@ - -# -# Multi::Sitemap - The sitemap generator -# - -package Multi::Sitemap; - -use strict; -use warnings; -use POE; -use XML::Writer; -use PerlIO::gzip; -use POSIX 'strftime'; -use Time::HiRes 'gettimeofday', 'tv_interval'; - - -sub spawn { - my $p = shift; - POE::Session->create( - package_states => [ - $p => [qw| _start shutdown check_age generate addquery addurl finish |], - ], - heap => { - output => $VNDB::ROOT.'/www/sitemap.xml.gz', - max_age => 24*3600, # seconds - check_delay => 3600, # seconds - @_, - } - ); -} - - -sub _start { - $_[KERNEL]->alias_set('sitemap'); - $_[KERNEL]->yield('check_age'); - $_[KERNEL]->sig(shutdown => 'shutdown'); -} - - -sub shutdown { - $_[KERNEL]->delay('check_age'); - $_[KERNEL]->alias_remove('sitemap'); -} - - -sub check_age { - # check the last modified time of the sitemap, and if it's older than max_age, regenerate it - $_[KERNEL]->yield('generate') if !-f $_[HEAP]{output} || (stat $_[HEAP]{output})[9] < time-$_[HEAP]{max_age}; - - # check sitemap again later - $_[KERNEL]->delay(check_age => $_[HEAP]{check_delay}); -} - - -sub generate { - $_[KERNEL]->call(core => log => '(Re)generating sitemap'); - - $_[HEAP]{urls} = 0; - $_[HEAP]{start} = [ gettimeofday ]; - - open($_[HEAP]{io}, '>:gzip', $_[HEAP]{output}) || die $1; - $_[HEAP]{xml} = new XML::Writer( - OUTPUT => $_[HEAP]{io}, - ENCODING => 'UTF-8', - DATA_MODE => 1, - DATA_INDENT => 1 - ); - $_[HEAP]{xml}->xmlDecl(); - $_[HEAP]{xml}->startTag('urlset', xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9'); - - # / - $_[KERNEL]->call(sitemap => addurl => '', 'daily'); - - # /d+ - /([0-9]+)$/ && $_[KERNEL]->call(sitemap => addurl => 'd'.$1, 'monthly', [stat $_]->[9]) - for (glob "$VNDB::ROOT/data/docs/*"); - - # /v/[browse] & /p/[browse] - $_[KERNEL]->call(sitemap => addurl => $_, 'weekly') - for (map { 'v/'.$_, 'p/'.$_ } 'a'..'z', 0, 'all'); - - # /v+ - $_[KERNEL]->post(pg => query => q| - SELECT v.id, extract('epoch' from c.added) as added - FROM vn v - JOIN vn_rev vr ON vr.id = v.latest - JOIN changes c ON vr.id = c.id - WHERE v.hidden = FALSE - ORDER BY v.id|, - undef, 'addquery', [ 'v', 0.7 ]); - - # /r+ - $_[KERNEL]->post(pg => query => q| - SELECT r.id, extract('epoch' from c.added) as added - FROM releases r - JOIN releases_rev rr ON rr.id = r.latest - JOIN changes c ON c.id = rr.id - WHERE r.hidden = FALSE - ORDER BY r.id|, - undef, 'addquery', [ 'r', 0.5 ]); - - # /p+ - $_[KERNEL]->post(pg => query => q| - SELECT p.id, extract('epoch' from c.added) as added - FROM producers p - JOIN producers_rev pr ON pr.id = p.latest - JOIN changes c ON c.id = pr.id - WHERE p.hidden = FALSE - ORDER BY p.id|, - undef, 'addquery', [ 'p', 0.3 ]); - - # /g+ - $_[KERNEL]->post(pg => query => q| - SELECT t.id, extract('epoch' from t.added) as added - FROM tags t - WHERE state = 2 - ORDER BY t.id|, - undef, 'addquery', [ 'g', 0.3, 1 ]); -} - - -sub addquery { # num, db-res, [ type, priority, finish ] - $_[KERNEL]->call(sitemap => addurl => $_[ARG2][0].$_->{id}, 'weekly', $_->{added}, $_[ARG2][1]) - for(@{$_[ARG1]}); - $_[KERNEL]->yield('finish') if $_[ARG2][2]; -} - - -sub finish { - $_[HEAP]{xml}->endTag('urlset'); - $_[HEAP]{xml}->end(); - close $_[HEAP]{io}; - - $_[KERNEL]->call(core => log => 'Wrote %d URLs (%.1f kB gzipped) to the sitemap in %.2f seconds', - $_[HEAP]{urls}, (-s $_[HEAP]{output})/1024, tv_interval($_[HEAP]{start})); - - delete @{$_[HEAP]}{qw| xml io start urls |}; -} - - -sub addurl { # loc, changefreq, lastmod, priority - $_[HEAP]{xml}->startTag('url'); - $_[HEAP]{xml}->dataElement(loc => $VNDB::S{url}.'/'.$_[ARG0]); - $_[HEAP]{xml}->dataElement(changefreq => $_[ARG1]) if defined $_[ARG1]; - $_[HEAP]{xml}->dataElement(lastmod => strftime('%Y-%m-%d', gmtime $_[ARG2])) if defined $_[ARG2]; - $_[HEAP]{xml}->dataElement(priority => $_[ARG3]) if defined $_[ARG3]; - $_[HEAP]{xml}->endTag('url'); - $_[HEAP]{urls}++; -} - - -1; - - |