summaryrefslogtreecommitdiff
path: root/lib/Multi/Sitemap.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Multi/Sitemap.pm')
-rw-r--r--lib/Multi/Sitemap.pm154
1 files changed, 0 insertions, 154 deletions
diff --git a/lib/Multi/Sitemap.pm b/lib/Multi/Sitemap.pm
deleted file mode 100644
index 85afae57..00000000
--- a/lib/Multi/Sitemap.pm
+++ /dev/null
@@ -1,154 +0,0 @@
-
-#
-# Multi::Sitemap - The sitemap generator
-#
-
-package Multi::Sitemap;
-
-use strict;
-use warnings;
-use POE;
-use XML::Writer;
-use PerlIO::gzip;
-use POSIX 'strftime';
-use Time::HiRes 'gettimeofday', 'tv_interval';
-
-
-sub spawn {
- my $p = shift;
- POE::Session->create(
- package_states => [
- $p => [qw| _start shutdown check_age generate addquery addurl finish |],
- ],
- heap => {
- output => $VNDB::ROOT.'/www/sitemap.xml.gz',
- max_age => 24*3600, # seconds
- check_delay => 3600, # seconds
- @_,
- }
- );
-}
-
-
-sub _start {
- $_[KERNEL]->alias_set('sitemap');
- $_[KERNEL]->yield('check_age');
- $_[KERNEL]->sig(shutdown => 'shutdown');
-}
-
-
-sub shutdown {
- $_[KERNEL]->delay('check_age');
- $_[KERNEL]->alias_remove('sitemap');
-}
-
-
-sub check_age {
- # check the last modified time of the sitemap, and if it's older than max_age, regenerate it
- $_[KERNEL]->yield('generate') if !-f $_[HEAP]{output} || (stat $_[HEAP]{output})[9] < time-$_[HEAP]{max_age};
-
- # check sitemap again later
- $_[KERNEL]->delay(check_age => $_[HEAP]{check_delay});
-}
-
-
-sub generate {
- $_[KERNEL]->call(core => log => '(Re)generating sitemap');
-
- $_[HEAP]{urls} = 0;
- $_[HEAP]{start} = [ gettimeofday ];
-
- open($_[HEAP]{io}, '>:gzip', $_[HEAP]{output}) || die $1;
- $_[HEAP]{xml} = new XML::Writer(
- OUTPUT => $_[HEAP]{io},
- ENCODING => 'UTF-8',
- DATA_MODE => 1,
- DATA_INDENT => 1
- );
- $_[HEAP]{xml}->xmlDecl();
- $_[HEAP]{xml}->startTag('urlset', xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9');
-
- # /
- $_[KERNEL]->call(sitemap => addurl => '', 'daily');
-
- # /d+
- /([0-9]+)$/ && $_[KERNEL]->call(sitemap => addurl => 'd'.$1, 'monthly', [stat $_]->[9])
- for (glob "$VNDB::ROOT/data/docs/*");
-
- # /v/[browse] & /p/[browse]
- $_[KERNEL]->call(sitemap => addurl => $_, 'weekly')
- for (map { 'v/'.$_, 'p/'.$_ } 'a'..'z', 0, 'all');
-
- # /v+
- $_[KERNEL]->post(pg => query => q|
- SELECT v.id, extract('epoch' from c.added) as added
- FROM vn v
- JOIN vn_rev vr ON vr.id = v.latest
- JOIN changes c ON vr.id = c.id
- WHERE v.hidden = FALSE
- ORDER BY v.id|,
- undef, 'addquery', [ 'v', 0.7 ]);
-
- # /r+
- $_[KERNEL]->post(pg => query => q|
- SELECT r.id, extract('epoch' from c.added) as added
- FROM releases r
- JOIN releases_rev rr ON rr.id = r.latest
- JOIN changes c ON c.id = rr.id
- WHERE r.hidden = FALSE
- ORDER BY r.id|,
- undef, 'addquery', [ 'r', 0.5 ]);
-
- # /p+
- $_[KERNEL]->post(pg => query => q|
- SELECT p.id, extract('epoch' from c.added) as added
- FROM producers p
- JOIN producers_rev pr ON pr.id = p.latest
- JOIN changes c ON c.id = pr.id
- WHERE p.hidden = FALSE
- ORDER BY p.id|,
- undef, 'addquery', [ 'p', 0.3 ]);
-
- # /g+
- $_[KERNEL]->post(pg => query => q|
- SELECT t.id, extract('epoch' from t.added) as added
- FROM tags t
- WHERE state = 2
- ORDER BY t.id|,
- undef, 'addquery', [ 'g', 0.3, 1 ]);
-}
-
-
-sub addquery { # num, db-res, [ type, priority, finish ]
- $_[KERNEL]->call(sitemap => addurl => $_[ARG2][0].$_->{id}, 'weekly', $_->{added}, $_[ARG2][1])
- for(@{$_[ARG1]});
- $_[KERNEL]->yield('finish') if $_[ARG2][2];
-}
-
-
-sub finish {
- $_[HEAP]{xml}->endTag('urlset');
- $_[HEAP]{xml}->end();
- close $_[HEAP]{io};
-
- $_[KERNEL]->call(core => log => 'Wrote %d URLs (%.1f kB gzipped) to the sitemap in %.2f seconds',
- $_[HEAP]{urls}, (-s $_[HEAP]{output})/1024, tv_interval($_[HEAP]{start}));
-
- delete @{$_[HEAP]}{qw| xml io start urls |};
-}
-
-
-sub addurl { # loc, changefreq, lastmod, priority
- $_[HEAP]{xml}->startTag('url');
- $_[HEAP]{xml}->dataElement(loc => $VNDB::S{url}.'/'.$_[ARG0]);
- $_[HEAP]{xml}->dataElement(changefreq => $_[ARG1]) if defined $_[ARG1];
- $_[HEAP]{xml}->dataElement(lastmod => strftime('%Y-%m-%d', gmtime $_[ARG2])) if defined $_[ARG2];
- $_[HEAP]{xml}->dataElement(priority => $_[ARG3]) if defined $_[ARG3];
- $_[HEAP]{xml}->endTag('url');
- $_[HEAP]{urls}++;
-}
-
-
-1;
-
-