summaryrefslogtreecommitdiff
path: root/util/updates
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2009-05-09 10:02:46 +0200
committerYorhel <git@yorhel.nl>2009-05-09 10:02:46 +0200
commitce6d43aed955effe136f1ce3682828d2d43109cb (patch)
treee55754ece45875da53568c5e616a36a59540633f /util/updates
parent9909b50b1973d02717c89d4e42b274909d59a0ac (diff)
Don't consider VNs with AVG(vote) < 0 on tag pages
The bayesian ranking algorithm isn't exactly meant to be used to differentiate between absolute values, so do a pre-check on AVG(vote) before considering a VN in the rating. I've also played around with using plain old averages as score, but I'd say the ordering is a lot better with the bayesian ranking, the displayed score is just slightly more confusing.
Diffstat (limited to 'util/updates')
-rw-r--r--util/updates/update_2.4.sql33
1 files changed, 33 insertions, 0 deletions
diff --git a/util/updates/update_2.4.sql b/util/updates/update_2.4.sql
new file mode 100644
index 00000000..03b18dd7
--- /dev/null
+++ b/util/updates/update_2.4.sql
@@ -0,0 +1,33 @@
+
+
+-- don't consider vns with vote < 0 on tag pages
+
+CREATE OR REPLACE FUNCTION tag_vn_calc() RETURNS void AS $$
+BEGIN
+ -- all votes for all tags
+ CREATE OR REPLACE TEMPORARY VIEW tags_vn_all AS
+ SELECT * FROM tags_vn UNION SELECT * FROM tag_vn_childs();
+ -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry
+ CREATE OR REPLACE TEMPORARY VIEW tags_vn_grouped AS
+ SELECT tag, vid, uid, MAX(vote)::real AS vote, COALESCE(AVG(spoiler), 0)::real AS spoiler
+ FROM tags_vn_all WHERE vote > 0 GROUP BY tag, vid, uid;
+ -- grouped by (tag, vid) and serialized into a table
+ DROP INDEX IF EXISTS tags_vn_bayesian_tag;
+ TRUNCATE tags_vn_bayesian;
+ INSERT INTO tags_vn_bayesian
+ SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating,
+ (CASE WHEN AVG(spoiler) < 0.7 THEN 0 WHEN AVG(spoiler) > 1.3 THEN 2 ELSE 1 END)::smallint AS spoiler
+ FROM tags_vn_grouped
+ GROUP BY tag, vid;
+ CREATE INDEX tags_vn_bayesian_tag ON tags_vn_bayesian (tag);
+ -- now perform the bayesian ranking calculation
+ UPDATE tags_vn_bayesian tvs SET rating =
+ ((SELECT AVG(users)::real * AVG(rating)::real FROM tags_vn_bayesian WHERE tag = tvs.tag) + users*rating)
+ / ((SELECT AVG(users)::real FROM tags_vn_bayesian WHERE tag = tvs.tag) + users)::real;
+ -- and update the VN count in the tags table as well
+ UPDATE tags SET c_vns = (SELECT COUNT(*) FROM tags_vn_bayesian WHERE tag = id);
+ RETURN;
+END;
+$$ LANGUAGE plpgsql;
+SELECT tag_vn_calc();
+