diff options
author | Yorhel <git@yorhel.nl> | 2022-10-31 13:23:52 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2022-10-31 13:49:03 +0100 |
commit | 5b4b4593cfa9e2c46a02a849bf5b673b9ec70586 (patch) | |
tree | cee0a24f510205a624f0c64b86aef4b585168047 | |
parent | aab523f8c663b22434f1e34167b204cd3cd8749b (diff) |
dbdump.pl: Improve performance of dumping ulist_vns
Going from 29s before the merge to 35s after the merge to 7s with this
change.
-rw-r--r-- | sql/schema.sql | 1 | ||||
-rwxr-xr-x | util/dbdump.pl | 49 |
2 files changed, 41 insertions, 9 deletions
diff --git a/sql/schema.sql b/sql/schema.sql index a6f663dc..8d5010ee 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1022,6 +1022,7 @@ CREATE TABLE ulist_labels ( ); -- ulist_vns +-- XXX: dbdump.pl has a custom query for this table, make sure to sync that when adding/removing [pub] columns. CREATE TABLE ulist_vns ( uid vndbid NOT NULL, -- [pub] users.id vid vndbid NOT NULL, -- [pub] vn.id diff --git a/util/dbdump.pl b/util/dbdump.pl index 1ad708fa..2143ddf3 100755 --- a/util/dbdump.pl +++ b/util/dbdump.pl @@ -46,6 +46,36 @@ use lib "$ROOT/lib"; use VNDB::Schema; +# Ridiculous query to export 'ulist_vns' with private labels removed. +# Since doing a lookup in ulist_labels for each row+label in ulist_vns is +# rather slow, this query takes a shortcut: for users that do not have any +# private labels at all (i.e. the common case), this query just dumps the rows +# without any modification. Only for users that have at least one private label +# are the labels filtered. +my $sql_ulist_vns_cols = q{ + uid, vid, date_trunc('day',added) AS added, date_trunc('day',lastmod) AS lastmod + , date_trunc('day',vote_date), started, finished, vote, notes +}; +my $sql_ulist_vns = qq{ + SELECT * FROM ( + SELECT $sql_ulist_vns_cols, array_agg(lblid ORDER BY lblid) AS labels + FROM ulist_vns, unnest(labels) x(lblid) + WHERE NOT c_private + AND NOT EXISTS(SELECT 1 FROM ulist_labels WHERE uid = ulist_vns.uid AND id = lblid AND private) + AND uid IN(SELECT uid FROM ulist_labels WHERE private) + GROUP BY uid, vid + UNION ALL + SELECT $sql_ulist_vns_cols, labels + FROM ulist_vns + WHERE NOT c_private + AND uid NOT IN(SELECT uid FROM ulist_labels WHERE private) + ) z + WHERE vid IN(SELECT id FROM vn WHERE NOT hidden) + ORDER BY uid, vid +}; + + + # Tables and columns to export. # # Tables are exported with an explicit ORDER BY to make them more deterministic @@ -89,9 +119,7 @@ my %tables = ( traits_parents => { where => 'id IN(SELECT id FROM traits WHERE NOT hidden)' }, ulist_labels => { where => 'NOT private AND EXISTS(SELECT 1 FROM ulist_vns uv JOIN vn v ON v.id = uv.vid WHERE NOT v.hidden AND uv.labels && ARRAY[ulist_labels.id] AND ulist_labels.uid = uv.uid)' }, - ulist_vns => { where => 'NOT c_private AND vid IN(SELECT id FROM vn WHERE NOT hidden)' - # XXX: This is slow - , overrule_labels => '(SELECT array_agg(ul.id) FROM unnest(labels) x(id) JOIN ulist_labels ul ON ul.id = x.id WHERE ul.uid = ulist_vns.uid AND NOT ul.private)' }, + ulist_vns => { sql => $sql_ulist_vns }, users => { where => 'id IN(SELECT DISTINCT uid FROM ulist_vns WHERE NOT c_private)' .' OR id IN(SELECT DISTINCT uid FROM tags_vn)' .' OR id IN(SELECT DISTINCT uid FROM image_votes)' @@ -148,14 +176,17 @@ sub export_table { my $fn = "$dest/$table->{name}"; - # Truncate all timestamptz columns to a day, to avoid leaking privacy-sensitive info. - my $cols = join ', ', map $table->{"overrule_$_->{name}"} // ($_->{type} eq 'timestamptz' ? "date_trunc('day', \"$_->{name}\")" : qq{"$_->{name}"}), @cols; - my $where = $table->{where} ? "WHERE $table->{where}" : ''; - my $order = table_order $table->{name}; - die "Table '$table->{name}' is missing an ORDER BY clause\n" if !$order; + my $sql = $table->{sql} // do { + # Truncate all timestamptz columns to a day, to avoid leaking privacy-sensitive info. + my $cols = join ', ', map $_->{type} eq 'timestamptz' ? "date_trunc('day', \"$_->{name}\")" : qq{"$_->{name}"}, @cols; + my $where = $table->{where} ? "WHERE $table->{where}" : ''; + my $order = table_order $table->{name}; + die "Table '$table->{name}' is missing an ORDER BY clause\n" if !$order; + qq{SELECT $cols FROM "$table->{name}" $where $order} + }; my $start = time; - $db->do(qq{COPY (SELECT $cols FROM "$table->{name}" $where $order) TO STDOUT}); + $db->do(qq{COPY ($sql) TO STDOUT}); open my $F, '>:utf8', $fn; my $v; print $F $v while($db->pg_getcopydata($v) >= 0); |