diff options
author | Yorhel <git@yorhel.nl> | 2009-05-09 10:02:46 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2009-05-09 10:02:46 +0200 |
commit | ce6d43aed955effe136f1ce3682828d2d43109cb (patch) | |
tree | e55754ece45875da53568c5e616a36a59540633f /util | |
parent | 9909b50b1973d02717c89d4e42b274909d59a0ac (diff) |
Don't consider VNs with AVG(vote) < 0 on tag pages
The bayesian ranking algorithm isn't exactly meant to be used to
differentiate between absolute values, so do a pre-check on AVG(vote)
before considering a VN in the rating.
I've also played around with using plain old averages as score, but I'd
say the ordering is a lot better with the bayesian ranking, the displayed
score is just slightly more confusing.
Diffstat (limited to 'util')
-rw-r--r-- | util/dump.sql | 4 | ||||
-rw-r--r-- | util/updates/update_2.4.sql | 33 |
2 files changed, 35 insertions, 2 deletions
diff --git a/util/dump.sql b/util/dump.sql index 5003c6e8..81343307 100644 --- a/util/dump.sql +++ b/util/dump.sql @@ -576,8 +576,8 @@ BEGIN SELECT * FROM tags_vn UNION SELECT * FROM tag_vn_childs(); -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry CREATE OR REPLACE TEMPORARY VIEW tags_vn_grouped AS - SELECT tag, vid, uid, AVG(vote)::real AS vote, COALESCE(AVG(spoiler), 0)::real AS spoiler - FROM tags_vn_all GROUP BY tag, vid, uid; + SELECT tag, vid, uid, MAX(vote)::real AS vote, COALESCE(AVG(spoiler), 0)::real AS spoiler + FROM tags_vn_all WHERE vote > 0 GROUP BY tag, vid, uid; -- grouped by (tag, vid) and serialized into a table DROP INDEX IF EXISTS tags_vn_bayesian_tag; TRUNCATE tags_vn_bayesian; diff --git a/util/updates/update_2.4.sql b/util/updates/update_2.4.sql new file mode 100644 index 00000000..03b18dd7 --- /dev/null +++ b/util/updates/update_2.4.sql @@ -0,0 +1,33 @@ + + +-- don't consider vns with vote < 0 on tag pages + +CREATE OR REPLACE FUNCTION tag_vn_calc() RETURNS void AS $$ +BEGIN + -- all votes for all tags + CREATE OR REPLACE TEMPORARY VIEW tags_vn_all AS + SELECT * FROM tags_vn UNION SELECT * FROM tag_vn_childs(); + -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry + CREATE OR REPLACE TEMPORARY VIEW tags_vn_grouped AS + SELECT tag, vid, uid, MAX(vote)::real AS vote, COALESCE(AVG(spoiler), 0)::real AS spoiler + FROM tags_vn_all WHERE vote > 0 GROUP BY tag, vid, uid; + -- grouped by (tag, vid) and serialized into a table + DROP INDEX IF EXISTS tags_vn_bayesian_tag; + TRUNCATE tags_vn_bayesian; + INSERT INTO tags_vn_bayesian + SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating, + (CASE WHEN AVG(spoiler) < 0.7 THEN 0 WHEN AVG(spoiler) > 1.3 THEN 2 ELSE 1 END)::smallint AS spoiler + FROM tags_vn_grouped + GROUP BY tag, vid; + CREATE INDEX tags_vn_bayesian_tag ON tags_vn_bayesian (tag); + -- now perform the bayesian ranking calculation + UPDATE tags_vn_bayesian tvs SET rating = + ((SELECT AVG(users)::real * AVG(rating)::real FROM tags_vn_bayesian WHERE tag = tvs.tag) + users*rating) + / ((SELECT AVG(users)::real FROM tags_vn_bayesian WHERE tag = tvs.tag) + users)::real; + -- and update the VN count in the tags table as well + UPDATE tags SET c_vns = (SELECT COUNT(*) FROM tags_vn_bayesian WHERE tag = id); + RETURN; +END; +$$ LANGUAGE plpgsql; +SELECT tag_vn_calc(); + |