summaryrefslogtreecommitdiff
path: root/lib/Multi/Sitemap.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Multi/Sitemap.pm')
-rw-r--r--lib/Multi/Sitemap.pm156
1 files changed, 156 insertions, 0 deletions
diff --git a/lib/Multi/Sitemap.pm b/lib/Multi/Sitemap.pm
new file mode 100644
index 00000000..836fce5b
--- /dev/null
+++ b/lib/Multi/Sitemap.pm
@@ -0,0 +1,156 @@
+
+#
+# Multi::Sitemap - The sitemap generator
+#
+
+package Multi::Sitemap;
+
+use strict;
+use warnings;
+use POE;
+use XML::Writer;
+use PerlIO::gzip;
+use DateTime;
+
+
+sub spawn {
+ my $p = shift;
+ POE::Session->create(
+ package_states => [
+ $p => [qw| _start cmd_sitemap staticpages vnpages releasepages producerpages finish addurl |],
+ ],
+ heap => {
+ output => '/www/vndb/www/sitemap.xml.gz',
+ baseurl => 'http://vndb.org',
+ @_,
+ }
+ );
+}
+
+
+sub _start {
+ $_[KERNEL]->alias_set('sitemap');
+ $_[KERNEL]->call(core => register => qr/^sitemap$/, 'cmd_sitemap');
+ # TODO: add an event to run cmd_sitemap on a daily basis
+}
+
+
+sub cmd_sitemap {
+ # Function order:
+ # cmd_sitemap
+ # staticpages
+ # vnpages
+ # releasepages
+ # producerpages
+ # finish
+
+ $_[HEAP]{cmd} = $_[ARG0];
+ $_[HEAP]{urls} = 0;
+
+ open($_[HEAP]{io}, '>:gzip', $_[HEAP]{output}) || die $1;
+ $_[HEAP]{xml} = new XML::Writer(
+ OUTPUT => $_[HEAP]{io},
+ ENCODING => 'UTF-8',
+ DATA_MODE => 1,
+ DATA_INDENT => 1
+ );
+ $_[HEAP]{xml}->xmlDecl();
+ $_[HEAP]{xml}->comment(q|NOTE: All URL's that require you to login or that may contain usernames are left out.|);
+ $_[HEAP]{xml}->startTag('urlset', xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9');
+
+ $_[KERNEL]->yield('staticpages');
+}
+
+
+sub staticpages {
+ $_[KERNEL]->call(core => log => 3, 'Adding static pages');
+
+ $_[KERNEL]->call(sitemap => addurl => '', 'd');
+ $_[KERNEL]->call(sitemap => addurl => 'faq', 'm');
+
+ $_[KERNEL]->call(sitemap => addurl => $_, 'w')
+ for ( (map { 'v/'.$_ } 'a'..'z'), 'v/all', 'v/cat', (map { 'p/'.$_ } 'a'..'z'), 'p/all');
+
+ $_[KERNEL]->yield('vnpages');
+}
+
+
+sub vnpages {
+ $_[KERNEL]->call(core => log => 3, 'Adding visual novel pages');
+
+ my $q = $Multi::SQL->prepare(q|
+ SELECT v.id, c.added, v.rgraph
+ FROM vn v
+ JOIN vn_rev vr ON vr.id = v.latest
+ JOIN changes c ON vr.id = c.id
+ |);
+ $q->execute;
+ while(local $_ = $q->fetchrow_arrayref) {
+ $_[KERNEL]->call(sitemap => addurl => 'v/'.$_->[0], 'w', $_->[1], 0.7);
+ $_[KERNEL]->call(sitemap => addurl => 'v/'.$_->[0].'/rg', 'w', $_->[1], 0.7) if $_->[2];
+ }
+
+ $_[KERNEL]->yield('releasepages');
+}
+
+
+sub releasepages {
+ $_[KERNEL]->call(core => log => 3, 'Adding release pages');
+
+ my $q = $Multi::SQL->prepare(q|
+ SELECT r.id, c.added
+ FROM releases r
+ JOIN releases_rev rr ON rr.id = r.latest
+ JOIN changes c ON c.id = rr.id
+ |);
+ $q->execute;
+ while(local $_ = $q->fetchrow_arrayref) {
+ $_[KERNEL]->call(sitemap => addurl => 'r/'.$_->[0], 'w', $_->[1], 0.3);
+ }
+
+ $_[KERNEL]->yield('producerpages');
+}
+
+
+sub producerpages {
+ $_[KERNEL]->call(core => log => 3, 'Adding producer pages');
+
+ my $q = $Multi::SQL->prepare(q|
+ SELECT p.id, c.added
+ FROM producers p
+ JOIN producers_rev pr ON pr.id = p.latest
+ JOIN changes c ON c.id = pr.id
+ |);
+ $q->execute;
+ while(local $_ = $q->fetchrow_arrayref) {
+ $_[KERNEL]->call(sitemap => addurl => 'p/'.$_->[0], 'w', $_->[1]);
+ }
+
+ $_[KERNEL]->yield('finish');
+}
+
+
+sub finish {
+ $_[HEAP]{xml}->endTag('urlset');
+ $_[HEAP]{xml}->end();
+ close $_[HEAP]{io};
+ $_[KERNEL]->call(core => log => 2 => 'Wrote %d URLs in the sitemap', $_[HEAP]{urls});
+ $_[KERNEL]->post(core => finish => $_[HEAP]{cmd});
+ delete @{$_[HEAP]}{qw| xml io cmd urls |};
+}
+
+
+sub addurl { # loc, changefreq, lastmod, priority
+ $_[HEAP]{xml}->startTag('url');
+ $_[HEAP]{xml}->dataElement(loc => $_[HEAP]{baseurl}.'/'.$_[ARG0]);
+ $_[HEAP]{xml}->dataElement(changefreq => $_[ARG1]) if defined $_[ARG1];
+ $_[HEAP]{xml}->dataElement(lastmod => DateTime->from_epoch(epoch => $_[ARG2])->ymd) if defined $_[ARG2];
+ $_[HEAP]{xml}->dataElement(priority => $_[ARG3]) if defined $_[ARG3];
+ $_[HEAP]{xml}->endTag('url');
+ $_[HEAP]{urls}++;
+}
+
+
+1;
+
+