summaryrefslogtreecommitdiff
path: root/util/OLD/sitemap.pl
blob: 6fde2f8073c0222be069046029f0648266665a76 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl

my $sitemapfile = '/www/vndb/www/sitemap.xml.gz';
my $baseurl = 'http://vndb.org';
my %chfr = qw( a always   h hourly  d daily  w weekly  m monthly  y yearly  n never );


# the code
use strict;
use warnings;
no warnings 'once';
use DBI;
use POSIX; # for ceil();
use XML::Writer;
use PerlIO::gzip;
use DateTime;

require '/www/vndb/lib/global.pl';

my $sql = DBI->connect(@VNDB::DBLOGIN,
    { RaiseError => 1, PrintError => 0, AutoCommit => 1, pg_enable_utf8 => 1 });

my $urls = 0;
my $x;

sitemap();

sub sitemap {
  print "Creating sitemap...\n";
 # open file and start writing
  open(my $IO, '>:gzip', $sitemapfile) || die $1;
  $x = new XML::Writer(OUTPUT => $IO, ENCODING => 'UTF-8', DATA_MODE => 1, DATA_INDENT => 1);
  $x->xmlDecl();
  $x->comment(q|NOTE: All URL's that require you to login or that may contain usernames are left out.|);
  $x->startTag('urlset', xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9');

 # some default pages
  _sm_add(@$_) foreach (
    [ '/', 'd' ],
    [ '/faq', 'm' ],
  );

 # some browse pages
  _sm_add('/v/'.$_, 'w') for ('a'..'z', 'all', 'cat');
  _sm_add('/p/'.$_, 'w') for ('a'..'z', 'all');

 # visual novels
  my $q = $sql->prepare(q|
    SELECT v.id, c.added, v.rgraph
    FROM vn v
    JOIN vn_rev vr ON vr.id = v.latest
    JOIN changes c ON vr.id = c.id
  |); $q->execute;
  while($_ = $q->fetchrow_arrayref) {
    _sm_add('/v'.$_->[0], 'w', $_->[1], 0.7);
#    _sm_add('/v'.$_->[0].'/stats', 'w');
    _sm_add('/v'.$_->[0].'/rg', 'w', $_->[1]) if $_->[2];
  }

 # producers
  $q = $sql->prepare(q|
    SELECT p.id, c.added
    FROM producers p
    JOIN producers_rev pr ON pr.id = p.latest
    JOIN changes c ON c.id = pr.id
  |); $q->execute;
  _sm_add('/p'.$_->[0], 'w', $_->[1]) while $_ = $q->fetchrow_arrayref;

 # releases
  $q = $sql->prepare(q|
    SELECT r.id, c.added
    FROM releases r
    JOIN releases_rev rr ON rr.id = r.latest
    JOIN changes c ON c.id = rr.id
  |); $q->execute;
  _sm_add('/r'.$_->[0], 'w', $_->[1], 0.3) while $_ = $q->fetchrow_arrayref;


 # and stop writing
  $x->endTag('urlset');
  $x->end();
  close($IO);
  printf "Sitemap created, %d urls added\n", $urls;
}



sub _sm_add {
  my($loc, $cf, $lastmod, $pri) = @_;
  $x->startTag('url');
   $x->dataElement('loc', $baseurl . $loc);
   $x->dataElement('changefreq', $chfr{$cf}?$chfr{$cf}:$cf) if defined $cf;
   $x->dataElement('lastmod', DateTime->from_epoch(epoch => $lastmod)->ymd) if defined $lastmod;
   $x->dataElement('priority', $pri) if defined $pri;
  $x->endTag('url');
  $urls++;
}