From be023476e67dc76a5fb1344af5b5cb9244d992e7 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 29 Nov 2009 11:51:21 +0100 Subject: SQL: Rewrote tag_vn_calc() to use a WITH RECURSIVE .. SELECT query This is more efficient, and doesn't require the tag_tree() or tag_vn_childs() stored procedures. Does require PostgreSQL 8.4+ --- README | 2 +- lib/Multi/Maintenance.pm | 6 ++--- util/dump.sql | 59 ++++++++++++++++++++------------------------ util/updates/update_2.10.sql | 45 +++++++++++++++++++++++---------- 4 files changed, 63 insertions(+), 49 deletions(-) diff --git a/README b/README index c56db3f7..21b9389b 100644 --- a/README +++ b/README @@ -12,7 +12,7 @@ Requirements global requirements: Linux, or an OS that resembles Linux. Chances are VNDB won't run on Windows. - PostgreSQL 8.3+ + PostgreSQL 8.4+ perl 5.10 recommended, 5.8 may also work A webserver that works with YAWF (lighttpd and Apache are known to work) diff --git a/lib/Multi/Maintenance.pm b/lib/Multi/Maintenance.pm index 0ea7ef29..b952c490 100644 --- a/lib/Multi/Maintenance.pm +++ b/lib/Multi/Maintenance.pm @@ -99,20 +99,20 @@ sub log_stats { # num, res, action, time sub vncache { - # this takes about 40s to complete. We really need to search for an alternative + # this takes about 50s to complete. We really need to search for an alternative # method of keeping the c_* columns in the vn table up-to-date. $_[KERNEL]->post(pg => do => 'SELECT update_vncache(0)', undef, 'log_stats', 'vncache'); } sub tagcache { - # takes about 18 seconds max. ouch, but still kind-of acceptable + # takes about 2 seconds max, still OK $_[KERNEL]->post(pg => do => 'SELECT tag_vn_calc()', undef, 'log_stats', 'tagcache'); } sub vnpopularity { - # still takes at most 2 seconds. let's hope that doesn't increase... + # still takes at most 3 seconds. let's hope that doesn't increase... $_[KERNEL]->post(pg => do => 'SELECT update_vnpopularity()', undef, 'log_stats', 'vnpopularity'); } diff --git a/util/dump.sql b/util/dump.sql index a1a04aef..cad84ac2 100644 --- a/util/dump.sql +++ b/util/dump.sql @@ -551,46 +551,41 @@ END; $$ LANGUAGE plpgsql; --- returns all votes inherited by childs --- UNION this with tags_vn and you have all votes for all tags -CREATE OR REPLACE FUNCTION tag_vn_childs() RETURNS SETOF tags_vn AS $$ -DECLARE - r tags_vn%rowtype; - i RECORD; - l RECORD; -BEGIN - FOR l IN SElECT id FROM tags WHERE meta = FALSE AND state = 2 AND EXISTS(SELECT 1 FROM tags_parents WHERE parent = id) LOOP - FOR i IN SELECT tag FROM tag_tree(l.id, 0, true) LOOP - FOR r IN SELECT l.id, vid, uid, vote, spoiler FROM tags_vn WHERE tag = i.tag LOOP - RETURN NEXT r; - END LOOP; - END LOOP; - END LOOP; -END; -$$ LANGUAGE plpgsql; - - -- recalculate tags_vn_inherit CREATE OR REPLACE FUNCTION tag_vn_calc() RETURNS void AS $$ BEGIN - -- all votes for all tags - CREATE OR REPLACE TEMPORARY VIEW tags_vn_all AS - SELECT * FROM tags_vn UNION SELECT * FROM tag_vn_childs(); - -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry - CREATE OR REPLACE TEMPORARY VIEW tags_vn_grouped AS - SELECT tag, vid, uid, MAX(vote)::real AS vote, COALESCE(AVG(spoiler), 0)::real AS spoiler - FROM tags_vn_all GROUP BY tag, vid, uid; - -- grouped by (tag, vid) and serialized into a table DROP INDEX IF EXISTS tags_vn_inherit_tag; TRUNCATE tags_vn_inherit; + -- populate tags_vn_inherit INSERT INTO tags_vn_inherit - SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating, - (CASE WHEN AVG(spoiler) < 0.7 THEN 0 WHEN AVG(spoiler) > 1.3 THEN 2 ELSE 1 END)::smallint AS spoiler - FROM tags_vn_grouped + -- all votes for all tags, including votes inherited by child tags + -- (also includes meta tags, because they could have a normal tag as parent) + WITH RECURSIVE tags_vn_all(lvl, tag, vid, uid, vote, spoiler, meta) AS ( + SELECT 15, tag, vid, uid, vote, spoiler, false + FROM tags_vn + UNION ALL + SELECT lvl-1, tp.parent, ta.vid, ta.uid, ta.vote, ta.spoiler, t.meta + FROM tags_vn_all ta + JOIN tags_parents tp ON tp.tag = ta.tag + JOIN tags t ON t.id = tp.parent + WHERE t.state = 2 + AND ta.lvl > 0 + ) + -- grouped by (tag, vid) + SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating, + (CASE WHEN AVG(spoiler) < 0.7 THEN 0 WHEN AVG(spoiler) > 1.3 THEN 2 ELSE 1 END)::smallint AS spoiler + FROM ( + -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry (also removing meta tags) + SELECT tag, vid, uid, MAX(vote)::real, COALESCE(AVG(spoiler), 0)::real + FROM tags_vn_all + WHERE NOT meta + GROUP BY tag, vid, uid + ) AS t(tag, vid, uid, vote, spoiler) GROUP BY tag, vid - HAVING AVG(vote) > 0; + HAVING AVG(vote) > 0; + -- recreate index CREATE INDEX tags_vn_inherit_tag ON tags_vn_inherit (tag); - -- and update the VN count in the tags table as well + -- and update the VN count in the tags table UPDATE tags SET c_vns = (SELECT COUNT(*) FROM tags_vn_inherit WHERE tag = id); RETURN; END; diff --git a/util/updates/update_2.10.sql b/util/updates/update_2.10.sql index dada4889..74b1a8a5 100644 --- a/util/updates/update_2.10.sql +++ b/util/updates/update_2.10.sql @@ -9,26 +9,42 @@ CREATE TABLE tags_vn_inherit ( spoiler smallint NOT NULL ); + +-- more efficient version of tag_vn_calc() CREATE OR REPLACE FUNCTION tag_vn_calc() RETURNS void AS $$ BEGIN - -- all votes for all tags - CREATE OR REPLACE TEMPORARY VIEW tags_vn_all AS - SELECT * FROM tags_vn UNION SELECT * FROM tag_vn_childs(); - -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry - CREATE OR REPLACE TEMPORARY VIEW tags_vn_grouped AS - SELECT tag, vid, uid, MAX(vote)::real AS vote, COALESCE(AVG(spoiler), 0)::real AS spoiler - FROM tags_vn_all GROUP BY tag, vid, uid; - -- grouped by (tag, vid) and serialized into a table DROP INDEX IF EXISTS tags_vn_inherit_tag; TRUNCATE tags_vn_inherit; + -- populate tags_vn_inherit INSERT INTO tags_vn_inherit - SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating, - (CASE WHEN AVG(spoiler) < 0.7 THEN 0 WHEN AVG(spoiler) > 1.3 THEN 2 ELSE 1 END)::smallint AS spoiler - FROM tags_vn_grouped + -- all votes for all tags, including votes inherited by child tags + -- (also includes meta tags, because they could have a normal tag as parent) + WITH RECURSIVE tags_vn_all(lvl, tag, vid, uid, vote, spoiler, meta) AS ( + SELECT 15, tag, vid, uid, vote, spoiler, false + FROM tags_vn + UNION ALL + SELECT lvl-1, tp.parent, ta.vid, ta.uid, ta.vote, ta.spoiler, t.meta + FROM tags_vn_all ta + JOIN tags_parents tp ON tp.tag = ta.tag + JOIN tags t ON t.id = tp.parent + WHERE t.state = 2 + AND ta.lvl > 0 + ) + -- grouped by (tag, vid) + SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating, + (CASE WHEN AVG(spoiler) < 0.7 THEN 0 WHEN AVG(spoiler) > 1.3 THEN 2 ELSE 1 END)::smallint AS spoiler + FROM ( + -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry (also removing meta tags) + SELECT tag, vid, uid, MAX(vote)::real, COALESCE(AVG(spoiler), 0)::real + FROM tags_vn_all + WHERE NOT meta + GROUP BY tag, vid, uid + ) AS t(tag, vid, uid, vote, spoiler) GROUP BY tag, vid - HAVING AVG(vote) > 0; + HAVING AVG(vote) > 0; + -- recreate index CREATE INDEX tags_vn_inherit_tag ON tags_vn_inherit (tag); - -- and update the VN count in the tags table as well + -- and update the VN count in the tags table UPDATE tags SET c_vns = (SELECT COUNT(*) FROM tags_vn_inherit WHERE tag = id); RETURN; END; @@ -36,3 +52,6 @@ $$ LANGUAGE plpgsql; SELECT tag_vn_calc(); +-- tag_vn_childs() is not used anymore +DROP FUNCTION tag_vn_childs() CASCADE; + -- cgit v1.2.3