diff options
-rw-r--r-- | data/global.pl | 1 | ||||
-rw-r--r-- | lib/Multi/APIDump.pm | 128 | ||||
-rwxr-xr-x | util/dbdump.pl | 82 |
3 files changed, 82 insertions, 129 deletions
diff --git a/data/global.pl b/data/global.pl index 878bdba4..50234141 100644 --- a/data/global.pl +++ b/data/global.pl @@ -281,7 +281,6 @@ our %M = ( log_level => 'trace', modules => { #API => {}, # disabled by default, not really needed - #APIDump => {}, Feed => {}, RG => {}, #Anime => {}, # disabled by default, requires AniDB username/pass diff --git a/lib/Multi/APIDump.pm b/lib/Multi/APIDump.pm deleted file mode 100644 index 79458f8d..00000000 --- a/lib/Multi/APIDump.pm +++ /dev/null @@ -1,128 +0,0 @@ - -# -# Multi::APIDump - Regular dumps of the database for public API stuff -# - -package Multi::APIDump; - -use strict; -use warnings; -use Multi::Core; -use JSON::XS; -use PerlIO::gzip; - - -sub run { - push_watcher schedule 0, 24*3600, \&generate; -} - - -sub tags_gen { - # The subqueries are kinda ugly, but it's convenient to have everything in a single query. - pg_cmd q| - SELECT id, name, description, searchable, applicable, c_items AS vns, cat, - (SELECT string_agg(alias,'$$$-$$$') FROM tags_aliases where tag = id) AS aliases, - (SELECT string_agg(parent::text, ',') FROM tags_parents WHERE tag = id) AS parents - FROM tags WHERE state = 2 - |, undef, sub { - my($res, $time) = @_; - return if pg_expect $res, 1; - my $ws = AE::time; - my @res = $res->rowsAsHashes; - for(@res) { - $_->{id} *= 1; - $_->{meta} = $_->{searchable} ne 't' ? JSON::XS::true : JSON::XS::false; # For backwards compat - $_->{searchable} = $_->{searchable} eq 't' ? JSON::XS::true : JSON::XS::false; - $_->{applicable} = $_->{applicable} eq 't' ? JSON::XS::true : JSON::XS::false; - $_->{vns} *= 1; - $_->{aliases} = [ split /\$\$\$-\$\$\$/, ($_->{aliases}||'') ]; - $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; - } - writejson(\@res, "$VNDB::ROOT/www/api/tags.json.gz", $time, $ws); - }; -} - - -sub traits_gen { - pg_cmd q| - SELECT id, name, alias AS aliases, description, searchable, applicable, c_items AS chars, - (SELECT string_agg(parent::text, ',') FROM traits_parents WHERE trait = id) AS parents - FROM traits WHERE state = 2 - |, undef, sub { - my($res, $time) = @_; - return if pg_expect $res, 1; - my $ws = AE::time; - my @res = $res->rowsAsHashes; - for(@res) { - $_->{id} *= 1; - $_->{meta} = $_->{searchable} ne 't' ? JSON::XS::true : JSON::XS::false; # For backwards compat - $_->{searchable} = $_->{searchable} eq 't' ? JSON::XS::true : JSON::XS::false; - $_->{applicable} = $_->{applicable} eq 't' ? JSON::XS::true : JSON::XS::false; - $_->{chars} *= 1; - $_->{aliases} = [ split /\r?\n/, ($_->{aliases}||'') ]; - $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; - } - writejson(\@res, "$VNDB::ROOT/www/api/traits.json.gz", $time, $ws); - }; -} - - -sub writejson { - my($data, $file, $sqltime, $procstart) = @_; - - open my $f, '>:gzip:utf8', "$file~" or die "Writing $file: $!"; - print $f JSON::XS->new->encode($data); - close $f; - rename "$file~", $file or die "Renaming $file: $!"; - - my $wt = AE::time-$procstart; - AE::log info => sprintf 'Wrote %s in %.2fs query + %.2fs write, size: %.1fkB, items: %d.', - $file, $sqltime, $wt, (-s $file)/1024, scalar @$data; -} - - -sub votes_gen { - pg_cmd q{ - SELECT vv.vid||' '||vv.uid||' '||vv.vote as l, to_char(vv.date, 'YYYY-MM-DD') as d - FROM votes vv - JOIN users u ON u.id = vv.uid - JOIN vn v ON v.id = vv.vid - WHERE NOT v.hidden - AND NOT u.ign_votes - AND NOT EXISTS(SELECT 1 FROM users_prefs up WHERE up.uid = u.id AND key = 'hide_list') - }, undef, sub { - my($res, $time) = @_; - return if pg_expect $res, 1; - my $ws = AE::time; - - # legacy votes v1 file, without date - my $file = "$VNDB::ROOT/www/api/votes.gz"; - open my $f, '>:gzip:utf8', "$file~" or die "Writing $file: $!"; - printf $f "%s\n", $res->value($_,0) for (0 .. $res->rows-1); - close $f; - rename "$file~", $file or die "Renaming $file: $!"; - - # v2 file with date - $file = "$VNDB::ROOT/www/api/votes2.gz"; - open $f, '>:gzip:utf8', "$file~" or die "Writing $file: $!"; - printf $f "%s %s\n", $res->value($_,0), $res->value($_,1) for (0 .. $res->rows-1); - close $f; - rename "$file~", $file or die "Renaming $file: $!"; - - my $wt = AE::time-$ws; - AE::log info => sprintf 'Wrote %s in %.2fs query + %.2fs write, size: %.1fkB, items: %d.', - $file, $time, $wt, (-s $file)/1024, scalar $res->rows; - }; -} - - -sub generate { - # TODO: Running these functions in the main process adds ~11MB of RAM because - # the full query results are kept in memory. It might be worthwile to - # generate the dumps in a forked process. - tags_gen; - my $a; $a = AE::timer 5, 0, sub { traits_gen; undef $a; }; - my $b; $b = AE::timer 10, 0, sub { votes_gen; undef $b; }; -} - -1; diff --git a/util/dbdump.pl b/util/dbdump.pl index 79075626..2c386f19 100755 --- a/util/dbdump.pl +++ b/util/dbdump.pl @@ -11,6 +11,10 @@ util/dbdump.pl export-db output.tar.zst util/dbdump.pl export-img output.tar.zst Write an export of all referenced images to a .tar.zst + +util/dbdump.pl export-votes output.gz +util/dbdump.pl export-tags output.gz +util/dbdump.pl export-traits output.gz _ # TODO: @@ -242,10 +246,88 @@ sub export_img { } +sub export_votes { + my $dest = shift; + require PerlIO::gzip; + + open my $F, '>:gzip:utf8', $dest; + $db->do(q{COPY ( + SELECT vv.vid||' '||vv.uid||' '||vv.vote||' '||to_char(vv.date, 'YYYY-MM-DD') + FROM votes vv + JOIN users u ON u.id = vv.uid + JOIN vn v ON v.id = vv.vid + WHERE NOT v.hidden + AND NOT u.ign_votes + AND NOT EXISTS(SELECT 1 FROM users_prefs up WHERE up.uid = u.id AND key = 'hide_list') + ORDER BY vv.vid, vv.uid + ) TO STDOUT + }); + my $v; + print $F $v while($db->pg_getcopydata($v) >= 0); +} + + +sub export_tags { + my $dest = shift; + require JSON::XS; + require PerlIO::gzip; + + my $lst = $db->selectall_arrayref(q{ + SELECT id, name, description, searchable, applicable, c_items AS vns, cat, + (SELECT string_agg(alias,'$$$-$$$') FROM tags_aliases where tag = id) AS aliases, + (SELECT string_agg(parent::text, ',') FROM tags_parents WHERE tag = id) AS parents + FROM tags WHERE state = 2 ORDER BY id + }, { Slice => {} }); + for(@$lst) { + $_->{id} *= 1; + $_->{meta} = !$_->{searchable} ? JSON::XS::true() : JSON::XS::false(); # For backwards compat + $_->{searchable} = $_->{searchable} ? JSON::XS::true() : JSON::XS::false(); + $_->{applicable} = $_->{applicable} ? JSON::XS::true() : JSON::XS::false(); + $_->{vns} *= 1; + $_->{aliases} = [ split /\$\$\$-\$\$\$/, ($_->{aliases}||'') ]; + $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; + } + + open my $F, '>:gzip:utf8', $dest; + print $F JSON::XS->new->canonical->encode($lst); +} + + +sub export_traits { + my $dest = shift; + require JSON::XS; + require PerlIO::gzip; + + my $lst = $db->selectall_arrayref(q{ + SELECT id, name, alias AS aliases, description, searchable, applicable, c_items AS chars, + (SELECT string_agg(parent::text, ',') FROM traits_parents WHERE trait = id) AS parents + FROM traits WHERE state = 2 ORDER BY id + }, { Slice => {} }); + for(@$lst) { + $_->{id} *= 1; + $_->{meta} = $_->{searchable} ? JSON::XS::true() : JSON::XS::false(); # For backwards compat + $_->{searchable} = $_->{searchable} ? JSON::XS::true() : JSON::XS::false(); + $_->{applicable} = $_->{applicable} ? JSON::XS::true() : JSON::XS::false(); + $_->{chars} *= 1; + $_->{aliases} = [ split /\r?\n/, ($_->{aliases}||'') ]; + $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; + } + + open my $F, '>:gzip:utf8', $dest; + print $F JSON::XS->new->canonical->encode($lst); +} + + if($ARGV[0] && $ARGV[0] eq 'export-db' && $ARGV[1]) { export_db $ARGV[1]; } elsif($ARGV[0] && $ARGV[0] eq 'export-img' && $ARGV[1]) { export_img $ARGV[1]; +} elsif($ARGV[0] && $ARGV[0] eq 'export-votes' && $ARGV[1]) { + export_votes $ARGV[1]; +} elsif($ARGV[0] && $ARGV[0] eq 'export-tags' && $ARGV[1]) { + export_tags $ARGV[1]; +} elsif($ARGV[0] && $ARGV[0] eq 'export-traits' && $ARGV[1]) { + export_traits $ARGV[1]; } else { print $HELP; } |