summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2019-12-24 16:04:21 +0100
committerYorhel <git@yorhel.nl>2019-12-24 16:04:23 +0100
commit2d406640e541c3bd5f9f309b5d0cc7776b7e63ce (patch)
tree6d53aec320aefdab2dc3b25ab5bb0afc078cba6c /util
parente77d322d59ea7e741b12ae172b0df5847d161274 (diff)
ulist: Export new lists in dbdump.pl, unexport old lists
Ended up exporting the 'users.ign_votes' column as well, as that's both easier and (potentially) more useful than not exporting ignored votes in the first place.
Diffstat (limited to 'util')
-rwxr-xr-xutil/dbdump.pl53
-rw-r--r--util/sql/schema.sql32
2 files changed, 46 insertions, 39 deletions
diff --git a/util/dbdump.pl b/util/dbdump.pl
index dba08504..640ff6fc 100755
--- a/util/dbdump.pl
+++ b/util/dbdump.pl
@@ -28,6 +28,7 @@ use DBI;
use DBD::Pg;
use File::Copy 'cp';
use File::Find 'find';
+use Time::HiRes 'time';
use Cwd 'abs_path';
our $ROOT;
@@ -42,6 +43,10 @@ use VNDB::Schema;
# Tables are exported with an explicit ORDER BY to make them more deterministic
# and avoid potentially leaking information about internal state (such as when
# a user last updated their account).
+#
+# Hidden DB entries, private user lists and various other rows with no
+# interesting references are excluded from the dumps. Keeping all references
+# consistent with those omissions complicates the WHERE clauses somewhat.
my %tables = (
anime => { where => 'id IN(SELECT va.aid FROM vn_anime va JOIN vn v ON v.id = va.id WHERE NOT v.hidden)' },
chars => { where => 'NOT hidden' },
@@ -59,7 +64,12 @@ my %tables = (
releases_platforms => { where => 'id IN(SELECT id FROM releases WHERE NOT hidden)' },
releases_producers => { where => 'id IN(SELECT id FROM releases WHERE NOT hidden) AND pid IN(SELECT id FROM producers WHERE NOT hidden)' },
releases_vn => { where => 'id IN(SELECT id FROM releases WHERE NOT hidden) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' },
- rlists => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list) AND rid IN(SELECT id FROM releases WHERE NOT hidden)' },
+ rlists => { where => 'EXISTS(SELECT 1 FROM releases r'
+ .' JOIN releases_vn rv ON rv.id = r.id'
+ .' JOIN vn v ON v.id = rv.vid'
+ .' JOIN ulist_vns_labels uvl ON uvl.vid = rv.vid'
+ .' JOIN ulist_labels ul ON ul.uid = uvl.uid AND ul.id = uvl.lbl'
+ .' WHERE r.id = rlists.rid AND uvl.uid = rlists.uid AND NOT r.hidden AND NOT v.hidden AND NOT ul.private)' },
screenshots => { where => 'id IN(SELECT scr FROM vn_screenshots vs JOIN vn v ON v.id = vs.id WHERE NOT v.hidden)' },
staff => { where => 'NOT hidden' },
staff_alias => { where => 'id IN(SELECT id FROM staff WHERE NOT hidden)' },
@@ -69,17 +79,15 @@ my %tables = (
tags_vn => { where => 'tag IN(SELECT id FROM tags WHERE state = 2) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' },
traits => { where => 'state = 2' },
traits_parents => { where => 'trait IN(SELECT id FROM traits WHERE state = 2)' },
- # Only include users that are relevant for this dump.
- # (The 'DISTINCT' isn't necessary, but does make the query faster)
- # (Users with their votes ignored are still included. W/e)
- users => { where => q{
- ( id NOT IN(SELECT DISTINCT id FROM users WHERE hide_list)
- AND id IN(SELECT DISTINCT uid FROM rlists
- UNION SELECT DISTINCT uid FROM wlists
- UNION SELECT DISTINCT uid FROM vnlists
- UNION SELECT DISTINCT uid FROM votes)
- ) OR id IN(SELECT DISTINCT uid FROM tags_vn)
- } },
+ ulist_labels => { where => 'NOT private AND EXISTS(SELECT 1 FROM ulist_vns_labels uvl WHERE uvl.lbl = id AND ulist_labels.uid = uvl.uid)' },
+ ulist_vns => { where => 'vid IN(SELECT id FROM vn WHERE NOT hidden)'
+ .' AND EXISTS(SELECT 1 FROM ulist_vns_labels uvl'
+ .' JOIN ulist_labels ul ON ul.uid = uvl.uid AND ul.id = uvl.lbl'
+ .' WHERE ulist_vns.uid = uvl.uid AND ulist_vns.vid = uvl.vid AND NOT ul.private)' },
+ ulist_vns_labels => { where => 'vid IN(SELECT id FROM vn WHERE NOT hidden)'
+ .' AND EXISTS(SELECT 1 FROM ulist_labels ul WHERE ul.uid = ulist_vns_labels.uid AND id = lbl AND NOT ul.private)' },
+ users => { where => 'id IN(SELECT DISTINCT uvl.uid FROM ulist_vns_labels uvl JOIN ulist_labels ul ON ul.uid = uvl.uid AND ul.id = uvl.lbl WHERE NOT ul.private)'
+ .' OR id IN(SELECT DISTINCT uid FROM tags_vn)' },
vn => { where => 'NOT hidden' },
vn_anime => { where => 'id IN(SELECT id FROM vn WHERE NOT hidden)' },
vn_relations => { where => 'id IN(SELECT id FROM vn WHERE NOT hidden)' },
@@ -88,13 +96,9 @@ my %tables = (
.' AND aid IN(SELECT sa.aid FROM staff_alias sa JOIN staff s ON s.id = sa.id WHERE NOT s.hidden)'
.' AND cid IN(SELECT id FROM chars WHERE NOT hidden)' },
vn_staff => { where => 'id IN(SELECT id FROM vn WHERE NOT hidden) AND aid IN(SELECT sa.aid FROM staff_alias sa JOIN staff s ON s.id = sa.id WHERE NOT s.hidden)' },
- vnlists => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' },
- votes => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list OR ign_votes)'
- .' AND vid IN(SELECT id FROM vn WHERE NOT hidden)' },
wikidata => { where => q{id IN(SELECT l_wikidata FROM producers WHERE NOT hidden
UNION SELECT l_wikidata FROM staff WHERE NOT hidden
UNION SELECT l_wikidata FROM vn WHERE NOT hidden)} },
- wlists => { where => 'uid NOT IN(SELECT id FROM users WHERE hide_list) AND vid IN(SELECT id FROM vn WHERE NOT hidden)' },
);
my @tables = map +{ name => $_, %{$tables{$_}} }, sort keys %tables;
@@ -121,7 +125,6 @@ sub export_table {
my @cols = grep $_->{pub}, @{$schema->{cols}};
die "No columns to export for table '$table->{name}'\n" if !@cols;;
- #print "# Dumping $table->{name}\n";
my $fn = "$dest/$table->{name}";
# Truncate all timestamptz columns to a day, to avoid leaking privacy-sensitive info.
@@ -130,12 +133,15 @@ sub export_table {
my $order = $schema->{primary} ? join ', ', map "\"$_\"", @{$schema->{primary}} : $table->{order};
die "Table '$table->{name}' is missing an ORDER BY clause\n" if !$order;
+ my $start = time;
$db->do(qq{COPY (SELECT $cols FROM "$table->{name}" $where ORDER BY $order) TO STDOUT});
open my $F, '>:utf8', $fn;
my $v;
print $F $v while($db->pg_getcopydata($v) >= 0);
close $F;
+ #printf "# Dumped %s in %.3fs\n", $table->{name}, time-$start;
+
open $F, '>', "$fn.header";
print $F join "\t", map $_->{name}, @cols;
print $F "\n";
@@ -269,14 +275,15 @@ sub export_votes {
open my $F, '>:gzip:utf8', $dest;
$db->do(q{COPY (
- SELECT vv.vid||' '||vv.uid||' '||vv.vote||' '||to_char(vv.date, 'YYYY-MM-DD')
- FROM votes vv
- JOIN users u ON u.id = vv.uid
- JOIN vn v ON v.id = vv.vid
+ SELECT uv.vid||' '||uv.uid||' '||uv.vote||' '||to_char(uv.vote_date, 'YYYY-MM-DD')
+ FROM ulist_vns uv
+ JOIN users u ON u.id = uv.uid
+ JOIN vn v ON v.id = uv.vid
WHERE NOT v.hidden
AND NOT u.ign_votes
- AND NOT u.hide_list
- ORDER BY vv.vid, vv.uid
+ AND uv.vote IS NOT NULL
+ AND EXISTS(SELECT 1 FROM ulist_vns_labels uvl JOIN ulist_labels ul ON ul.id = uvl.lbl AND ul.uid = uvl.uid WHERE uv.uid = uvl.uid AND uv.vid = uvl.vid AND NOT ul.private)
+ ORDER BY uv.vid, uv.uid
) TO STDOUT
});
my $v;
diff --git a/util/sql/schema.sql b/util/sql/schema.sql
index 8a1fc8c2..a3a94a49 100644
--- a/util/sql/schema.sql
+++ b/util/sql/schema.sql
@@ -705,32 +705,32 @@ CREATE TABLE traits_parents (
-- ulist_labels
CREATE TABLE ulist_labels (
- uid integer NOT NULL, -- user.id
- id integer NOT NULL, -- 0 < builtin < 10 <= custom, ids are reused
- label text NOT NULL,
+ uid integer NOT NULL, -- [pub] user.id
+ id integer NOT NULL, -- [pub] 0 < builtin < 10 <= custom, ids are reused
+ label text NOT NULL, -- [pub]
private boolean NOT NULL,
PRIMARY KEY(uid, id)
);
-- ulist_vns
CREATE TABLE ulist_vns (
- uid integer NOT NULL, -- users.id
- vid integer NOT NULL, -- vn.id
- added timestamptz NOT NULL DEFAULT NOW(),
- lastmod timestamptz NOT NULL DEFAULT NOW(), -- updated when anything in this row has changed?
- vote_date timestamptz, -- Used for "recent votes" - also updated when vote has changed?
- vote smallint CHECK(vote IS NULL OR vote BETWEEN 10 AND 100),
- started date,
- finished date,
- notes text NOT NULL DEFAULT '',
+ uid integer NOT NULL, -- [pub] users.id
+ vid integer NOT NULL, -- [pub] vn.id
+ added timestamptz NOT NULL DEFAULT NOW(), -- [pub]
+ lastmod timestamptz NOT NULL DEFAULT NOW(), -- [pub] updated when anything in this row has changed?
+ vote_date timestamptz, -- [pub] Used for "recent votes" - also updated when vote has changed?
+ vote smallint CHECK(vote IS NULL OR vote BETWEEN 10 AND 100), -- [pub]
+ started date, -- [pub]
+ finished date, -- [pub]
+ notes text NOT NULL DEFAULT '', -- [pub]
PRIMARY KEY(uid, vid)
);
-- ulist_vns_labels
CREATE TABLE ulist_vns_labels (
- uid integer NOT NULL, -- user.id
- lbl integer NOT NULL,
- vid integer NOT NULL, -- vn.id
+ uid integer NOT NULL, -- [pub] user.id
+ lbl integer NOT NULL, -- [pub]
+ vid integer NOT NULL, -- [pub] vn.id
PRIMARY KEY(uid, lbl, vid)
);
@@ -753,7 +753,7 @@ CREATE TABLE users (
c_changes integer NOT NULL DEFAULT 0,
ip inet NOT NULL DEFAULT '0.0.0.0',
c_tags integer NOT NULL DEFAULT 0,
- ign_votes boolean NOT NULL DEFAULT FALSE,
+ ign_votes boolean NOT NULL DEFAULT FALSE, -- [pub]
email_confirmed boolean NOT NULL DEFAULT FALSE,
skin text NOT NULL DEFAULT '',
customcss text NOT NULL DEFAULT '',