diff options
Diffstat (limited to 'lib/Multi/APIDump.pm')
-rw-r--r-- | lib/Multi/APIDump.pm | 155 |
1 files changed, 61 insertions, 94 deletions
diff --git a/lib/Multi/APIDump.pm b/lib/Multi/APIDump.pm index 3441225a..d3454e72 100644 --- a/lib/Multi/APIDump.pm +++ b/lib/Multi/APIDump.pm @@ -7,119 +7,78 @@ package Multi::APIDump; use strict; use warnings; -use POE; +use Multi::Core; use JSON::XS; use PerlIO::gzip; -use Time::HiRes 'time'; - - -sub spawn { - my $p = shift; - POE::Session->create( - package_states => [ - $p => [qw| _start shutdown tags_gen tags_write traits_gen traits_write writejson votes_gen votes_write|], - ], - heap => { - regenerate_interval => 86400, # daily min. - tagsfile => "$VNDB::ROOT/www/api/tags.json.gz", - traitsfile => "$VNDB::ROOT/www/api/traits.json.gz", - votesfile => "$VNDB::ROOT/www/api/votes.gz", - @_, - }, - ); -} - - -sub _start { - $_[KERNEL]->alias_set('apidump'); - $_[KERNEL]->yield('tags_gen'); - $_[KERNEL]->delay(traits_gen => 10); - $_[KERNEL]->delay(votes_gen => 20); - $_[KERNEL]->sig(shutdown => 'shutdown'); -} -sub shutdown { - $_[KERNEL]->delay('tags_gen'); - $_[KERNEL]->delay('traits_gen'); - $_[KERNEL]->delay('votes_gen'); - $_[KERNEL]->alias_remove('apidump'); +sub run { + push_watcher schedule 0, 24*3600, \&generate; } sub tags_gen { - $_[KERNEL]->alarm(tags_gen => int((time+3)/$_[HEAP]{regenerate_interval}+1)*$_[HEAP]{regenerate_interval}); - # The subqueries are kinda ugly, but it's convenient to have everything in a single query. - $_[KERNEL]->post(pg => query => q{ + pg_cmd q| SELECT id, name, description, meta, c_items AS vns, cat, (SELECT string_agg(alias,'$$$-$$$') FROM tags_aliases where tag = id) AS aliases, (SELECT string_agg(parent::text, ',') FROM tags_parents WHERE tag = id) AS parents FROM tags WHERE state = 2 - }, undef, 'tags_write'); -} - - -sub tags_write { - my($res, $time) = @_[ARG1,ARG3]; - my $ws = time; - - for(@$res) { - $_->{id} *= 1; - $_->{meta} = $_->{meta} ? JSON::XS::true : JSON::XS::false; - $_->{vns} *= 1; - $_->{aliases} = [ split /\$\$\$-\$\$\$/, ($_->{aliases}||'') ]; - $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; - } - - $_[KERNEL]->yield(writejson => $res, $_[HEAP]{tagsfile}, $time, $ws); + |, undef, sub { + my($res, $time) = @_; + return if pg_expect $res, 1; + my $ws = AE::time; + my @res = $res->rowsAsHashes; + for(@res) { + $_->{id} *= 1; + $_->{meta} = $_->{meta} ? JSON::XS::true : JSON::XS::false; + $_->{vns} *= 1; + $_->{aliases} = [ split /\$\$\$-\$\$\$/, ($_->{aliases}||'') ]; + $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; + } + writejson(\@res, "$VNDB::ROOT/www/api/tags.json.gz", $time, $ws); + }; } sub traits_gen { - $_[KERNEL]->alarm(traits_gen => int((time+3)/$_[HEAP]{regenerate_interval}+1)*$_[HEAP]{regenerate_interval}); - - $_[KERNEL]->post(pg => query => q{ + pg_cmd q| SELECT id, name, alias AS aliases, description, meta, c_items AS chars, (SELECT string_agg(parent::text, ',') FROM traits_parents WHERE trait = id) AS parents FROM traits WHERE state = 2 - }, undef, 'traits_write'); -} - - -sub traits_write { - my($res, $time) = @_[ARG1,ARG3]; - my $ws = time; - - for(@$res) { - $_->{id} *= 1; - $_->{meta} = $_->{meta} ? JSON::XS::true : JSON::XS::false; - $_->{chars} *= 1; - $_->{aliases} = [ split /\r?\n/, ($_->{aliases}||'') ]; - $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; - } - - $_[KERNEL]->yield(writejson => $res, $_[HEAP]{traitsfile}, $time, $ws); + |, undef, sub { + my($res, $time) = @_; + return if pg_expect $res, 1; + my $ws = AE::time; + my @res = $res->rowsAsHashes; + for(@res) { + $_->{id} *= 1; + $_->{meta} = $_->{meta} ? JSON::XS::true : JSON::XS::false; + $_->{chars} *= 1; + $_->{aliases} = [ split /\r?\n/, ($_->{aliases}||'') ]; + $_->{parents} = [ map $_*1, split /,/, ($_->{parents}||'') ]; + } + writejson(\@res, "$VNDB::ROOT/www/api/traits.json.gz", $time, $ws); + }; } sub writejson { - my($data, $file, $sqltime, $procstart) = @_[ARG0..$#_]; + my($data, $file, $sqltime, $procstart) = @_; open my $f, '>:gzip:utf8', "$file~" or die "Writing $file: $!"; print $f JSON::XS->new->encode($data); close $f; rename "$file~", $file or die "Renaming $file: $!"; - my $wt = time-$procstart; - $_[KERNEL]->call(core => log => 'Wrote %s in %.2fs query + %.2fs write, size: %.1fkB, items: %d.', - $file, $sqltime, $wt, (-s $file)/1024, scalar @$data); + my $wt = AE::time-$procstart; + AE::log info => sprintf 'Wrote %s in %.2fs query + %.2fs write, size: %.1fkB, items: %d.', + $file, $sqltime, $wt, (-s $file)/1024, scalar @$data; } -sub votes_gen { - $_[KERNEL]->alarm(votes_gen => int((time+3)/$_[HEAP]{regenerate_interval}+1)*$_[HEAP]{regenerate_interval}); - $_[KERNEL]->post(pg => query => q{ +sub votes_gen { + pg_cmd q{ SELECT vv.vid||' '||vv.uid||' '||vv.vote as l FROM votes vv JOIN users u ON u.id = vv.uid @@ -127,23 +86,31 @@ sub votes_gen { WHERE NOT v.hidden AND NOT u.ign_votes AND NOT EXISTS(SELECT 1 FROM users_prefs up WHERE up.uid = u.id AND key = 'hide_list') - }, undef, 'votes_write'); + }, undef, sub { + my($res, $time) = @_; + return if pg_expect $res, 1; + my $ws = AE::time; + + my $file = "$VNDB::ROOT/www/api/votes.gz"; + open my $f, '>:gzip:utf8', "$file~" or die "Writing $file: $!"; + printf $f "%s\n", $res->value($_,0) for (0 .. $res->rows-1); + close $f; + rename "$file~", $file or die "Renaming $file: $!"; + + my $wt = AE::time-$ws; + AE::log info => sprintf 'Wrote %s in %.2fs query + %.2fs write, size: %.1fkB, items: %d.', + $file, $time, $wt, (-s $file)/1024, scalar $res->rows; + }; } -sub votes_write { - my($res, $sqltime) = @_[ARG1,ARG3]; - my $ws = time; - - my $file = $_[HEAP]{votesfile}; - open my $f, '>:gzip:utf8', "$file~" or die "Writing $file: $!"; - printf $f "%s\n", $_->{l} for (@$res); - close $f; - rename "$file~", $file or die "Renaming $file: $!"; - - my $wt = time-$ws; - $_[KERNEL]->call(core => log => 'Wrote %s in %.2fs query + %.2fs write, size: %.1fkB, items: %d.', - $file, $sqltime, $wt, (-s $file)/1024, scalar @$res); +sub generate { + # TODO: Running these functions in the main process adds ~11MB of RAM because + # the full query results are kept in memory. It might be worthwile to + # generate the dumps in a forked process. + tags_gen; + my $a; $a = AE::timer 5, 0, sub { traits_gen; undef $a; }; + my $b; $b = AE::timer 10, 0, sub { votes_gen; undef $b; }; } 1; |