diff options
author | Yorhel <git@yorhel.nl> | 2019-12-24 16:04:21 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2019-12-24 16:04:23 +0100 |
commit | 2d406640e541c3bd5f9f309b5d0cc7776b7e63ce (patch) | |
tree | 6d53aec320aefdab2dc3b25ab5bb0afc078cba6c /util | |
parent | e77d322d59ea7e741b12ae172b0df5847d161274 (diff) |
ulist: Export new lists in dbdump.pl, unexport old lists
Ended up exporting the 'users.ign_votes' column as well, as that's both
easier and (potentially) more useful than not exporting ignored votes in
the first place.
Diffstat (limited to 'util')
-rwxr-xr-x | util/dbdump.pl | 53 | ||||
-rw-r--r-- | util/sql/schema.sql | 32 |
2 files changed, 46 insertions, 39 deletions
diff --git a/util/dbdump.pl b/util/dbdump.pl index dba08504..640ff6fc 100755 --- a/util/dbdump.pl +++ b/util/dbdump.pl @@ -28,6 +28,7 @@ use DBI; use DBD::Pg; use File::Copy 'cp'; use File::Find 'find'; +use Time::HiRes 'time'; use Cwd 'abs_path'; our $ROOT; @@ -42,6 +43,10 @@ use VNDB::Schema; # Tables are exported with an explicit ORDER BY to make them more deterministic # and avoid potentially leaking information about internal state (such as when # a user last updated their account). +# +# Hidden DB entries, private user lists and various other rows with no +# interesting references are excluded from the dumps. Keeping all references +# consistent with those omissions complicates the WHERE clauses somewhat. my %tables = ( anime => { where => 'id IN(SELECT va.aid FROM vn_anime va JOIN vn v ON v.id = va.id WHERE NOT v.hidden)' }, chars => { where => 'NOT hidden' }, @@ -59,7 +64,12 @@ my %tables = ( releases_platforms => { where => 'id IN(SELECT id FROM releases WHERE NOT hidden)' }, releases_producers => { where => 'id IN(SELECT id FROM releases WHERE NOT hidden) AND pid IN(SELECT id FROM producers WHERE NOT hidden)' }, releases_vn => { where => 'id IN(SELECT id FROM releases WHERE NOT hidden) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' }, - rlists => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list) AND rid IN(SELECT id FROM releases WHERE NOT hidden)' }, + rlists => { where => 'EXISTS(SELECT 1 FROM releases r' + .' JOIN releases_vn rv ON rv.id = r.id' + .' JOIN vn v ON v.id = rv.vid' + .' JOIN ulist_vns_labels uvl ON uvl.vid = rv.vid' + .' JOIN ulist_labels ul ON ul.uid = uvl.uid AND ul.id = uvl.lbl' + .' WHERE r.id = rlists.rid AND uvl.uid = rlists.uid AND NOT r.hidden AND NOT v.hidden AND NOT ul.private)' }, screenshots => { where => 'id IN(SELECT scr FROM vn_screenshots vs JOIN vn v ON v.id = vs.id WHERE NOT v.hidden)' }, staff => { where => 'NOT hidden' }, staff_alias => { where => 'id IN(SELECT id FROM staff WHERE NOT hidden)' }, @@ -69,17 +79,15 @@ my %tables = ( tags_vn => { where => 'tag IN(SELECT id FROM tags WHERE state = 2) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' }, traits => { where => 'state = 2' }, traits_parents => { where => 'trait IN(SELECT id FROM traits WHERE state = 2)' }, - # Only include users that are relevant for this dump. - # (The 'DISTINCT' isn't necessary, but does make the query faster) - # (Users with their votes ignored are still included. W/e) - users => { where => q{ - ( id NOT IN(SELECT DISTINCT id FROM users WHERE hide_list) - AND id IN(SELECT DISTINCT uid FROM rlists - UNION SELECT DISTINCT uid FROM wlists - UNION SELECT DISTINCT uid FROM vnlists - UNION SELECT DISTINCT uid FROM votes) - ) OR id IN(SELECT DISTINCT uid FROM tags_vn) - } }, + ulist_labels => { where => 'NOT private AND EXISTS(SELECT 1 FROM ulist_vns_labels uvl WHERE uvl.lbl = id AND ulist_labels.uid = uvl.uid)' }, + ulist_vns => { where => 'vid IN(SELECT id FROM vn WHERE NOT hidden)' + .' AND EXISTS(SELECT 1 FROM ulist_vns_labels uvl' + .' JOIN ulist_labels ul ON ul.uid = uvl.uid AND ul.id = uvl.lbl' + .' WHERE ulist_vns.uid = uvl.uid AND ulist_vns.vid = uvl.vid AND NOT ul.private)' }, + ulist_vns_labels => { where => 'vid IN(SELECT id FROM vn WHERE NOT hidden)' + .' AND EXISTS(SELECT 1 FROM ulist_labels ul WHERE ul.uid = ulist_vns_labels.uid AND id = lbl AND NOT ul.private)' }, + users => { where => 'id IN(SELECT DISTINCT uvl.uid FROM ulist_vns_labels uvl JOIN ulist_labels ul ON ul.uid = uvl.uid AND ul.id = uvl.lbl WHERE NOT ul.private)' + .' OR id IN(SELECT DISTINCT uid FROM tags_vn)' }, vn => { where => 'NOT hidden' }, vn_anime => { where => 'id IN(SELECT id FROM vn WHERE NOT hidden)' }, vn_relations => { where => 'id IN(SELECT id FROM vn WHERE NOT hidden)' }, @@ -88,13 +96,9 @@ my %tables = ( .' AND aid IN(SELECT sa.aid FROM staff_alias sa JOIN staff s ON s.id = sa.id WHERE NOT s.hidden)' .' AND cid IN(SELECT id FROM chars WHERE NOT hidden)' }, vn_staff => { where => 'id IN(SELECT id FROM vn WHERE NOT hidden) AND aid IN(SELECT sa.aid FROM staff_alias sa JOIN staff s ON s.id = sa.id WHERE NOT s.hidden)' }, - vnlists => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' }, - votes => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list OR ign_votes)' - .' AND vid IN(SELECT id FROM vn WHERE NOT hidden)' }, wikidata => { where => q{id IN(SELECT l_wikidata FROM producers WHERE NOT hidden UNION SELECT l_wikidata FROM staff WHERE NOT hidden UNION SELECT l_wikidata FROM vn WHERE NOT hidden)} }, - wlists => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' }, ); my @tables = map +{ name => $_, %{$tables{$_}} }, sort keys %tables; @@ -121,7 +125,6 @@ sub export_table { my @cols = grep $_->{pub}, @{$schema->{cols}}; die "No columns to export for table '$table->{name}'\n" if !@cols;; - #print "# Dumping $table->{name}\n"; my $fn = "$dest/$table->{name}"; # Truncate all timestamptz columns to a day, to avoid leaking privacy-sensitive info. @@ -130,12 +133,15 @@ sub export_table { my $order = $schema->{primary} ? join ', ', map "\"$_\"", @{$schema->{primary}} : $table->{order}; die "Table '$table->{name}' is missing an ORDER BY clause\n" if !$order; + my $start = time; $db->do(qq{COPY (SELECT $cols FROM "$table->{name}" $where ORDER BY $order) TO STDOUT}); open my $F, '>:utf8', $fn; my $v; print $F $v while($db->pg_getcopydata($v) >= 0); close $F; + #printf "# Dumped %s in %.3fs\n", $table->{name}, time-$start; + open $F, '>', "$fn.header"; print $F join "\t", map $_->{name}, @cols; print $F "\n"; @@ -269,14 +275,15 @@ sub export_votes { open my $F, '>:gzip:utf8', $dest; $db->do(q{COPY ( - SELECT vv.vid||' '||vv.uid||' '||vv.vote||' '||to_char(vv.date, 'YYYY-MM-DD') - FROM votes vv - JOIN users u ON u.id = vv.uid - JOIN vn v ON v.id = vv.vid + SELECT uv.vid||' '||uv.uid||' '||uv.vote||' '||to_char(uv.vote_date, 'YYYY-MM-DD') + FROM ulist_vns uv + JOIN users u ON u.id = uv.uid + JOIN vn v ON v.id = uv.vid WHERE NOT v.hidden AND NOT u.ign_votes - AND NOT u.hide_list - ORDER BY vv.vid, vv.uid + AND uv.vote IS NOT NULL + AND EXISTS(SELECT 1 FROM ulist_vns_labels uvl JOIN ulist_labels ul ON ul.id = uvl.lbl AND ul.uid = uvl.uid WHERE uv.uid = uvl.uid AND uv.vid = uvl.vid AND NOT ul.private) + ORDER BY uv.vid, uv.uid ) TO STDOUT }); my $v; diff --git a/util/sql/schema.sql b/util/sql/schema.sql index 8a1fc8c2..a3a94a49 100644 --- a/util/sql/schema.sql +++ b/util/sql/schema.sql @@ -705,32 +705,32 @@ CREATE TABLE traits_parents ( -- ulist_labels CREATE TABLE ulist_labels ( - uid integer NOT NULL, -- user.id - id integer NOT NULL, -- 0 < builtin < 10 <= custom, ids are reused - label text NOT NULL, + uid integer NOT NULL, -- [pub] user.id + id integer NOT NULL, -- [pub] 0 < builtin < 10 <= custom, ids are reused + label text NOT NULL, -- [pub] private boolean NOT NULL, PRIMARY KEY(uid, id) ); -- ulist_vns CREATE TABLE ulist_vns ( - uid integer NOT NULL, -- users.id - vid integer NOT NULL, -- vn.id - added timestamptz NOT NULL DEFAULT NOW(), - lastmod timestamptz NOT NULL DEFAULT NOW(), -- updated when anything in this row has changed? - vote_date timestamptz, -- Used for "recent votes" - also updated when vote has changed? - vote smallint CHECK(vote IS NULL OR vote BETWEEN 10 AND 100), - started date, - finished date, - notes text NOT NULL DEFAULT '', + uid integer NOT NULL, -- [pub] users.id + vid integer NOT NULL, -- [pub] vn.id + added timestamptz NOT NULL DEFAULT NOW(), -- [pub] + lastmod timestamptz NOT NULL DEFAULT NOW(), -- [pub] updated when anything in this row has changed? + vote_date timestamptz, -- [pub] Used for "recent votes" - also updated when vote has changed? + vote smallint CHECK(vote IS NULL OR vote BETWEEN 10 AND 100), -- [pub] + started date, -- [pub] + finished date, -- [pub] + notes text NOT NULL DEFAULT '', -- [pub] PRIMARY KEY(uid, vid) ); -- ulist_vns_labels CREATE TABLE ulist_vns_labels ( - uid integer NOT NULL, -- user.id - lbl integer NOT NULL, - vid integer NOT NULL, -- vn.id + uid integer NOT NULL, -- [pub] user.id + lbl integer NOT NULL, -- [pub] + vid integer NOT NULL, -- [pub] vn.id PRIMARY KEY(uid, lbl, vid) ); @@ -753,7 +753,7 @@ CREATE TABLE users ( c_changes integer NOT NULL DEFAULT 0, ip inet NOT NULL DEFAULT '0.0.0.0', c_tags integer NOT NULL DEFAULT 0, - ign_votes boolean NOT NULL DEFAULT FALSE, + ign_votes boolean NOT NULL DEFAULT FALSE, -- [pub] email_confirmed boolean NOT NULL DEFAULT FALSE, skin text NOT NULL DEFAULT '', customcss text NOT NULL DEFAULT '', |