summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2019-10-07 09:13:40 +0200
committerYorhel <git@yorhel.nl>2019-10-07 11:00:48 +0200
commitba02e35cea8f2f25ab0ed1a4fccc8983435f9671 (patch)
tree729c14b601ec2a8b779950d531449eff4e6ff434
parent69ffbc08f489770863b80998c704c66f148e469e (diff)
SQL: Adjust tag_vn_inherit and trait_chars calculations
Most importantly, downvoted parent tags are now still included for searching as per https://vndb.org/t12750 - in particular, a downvoted tag is considered to be exactly the same thing as a tag with no votes at all. Also made a few other adjustments: - Removed the "ignore child tags/traits if parent has its own value" rule. This was done because the 'spoiler' rating would get inherited using the average, but that should just have been the minimum (i.e. if a single child tag is not a spoiler, then neither is the parent). https://vndb.org/t10082 - The score of a parent tag is now avg(avg(votes_on_child_tags)) rather than avg(votes_on_child_tags), i.e. tags with more votes don't have more weight than tags with fewer votes. I don't think vote counts really matter in determining the parent score. - Deleted VNs now don't count towards tag statistics (this is consistent with traits). - Performance improvements: Smaller indices and more clever querying.
-rw-r--r--util/sql/func.sql89
-rw-r--r--util/sql/perms.sql2
-rw-r--r--util/sql/schema.sql4
-rw-r--r--util/sql/tableattrs.sql2
-rw-r--r--util/updates/update_20191007.sql7
5 files changed, 51 insertions, 53 deletions
diff --git a/util/sql/func.sql b/util/sql/func.sql
index cdf1b605..57e0b43f 100644
--- a/util/sql/func.sql
+++ b/util/sql/func.sql
@@ -129,39 +129,32 @@ BEGIN
DROP INDEX IF EXISTS tags_vn_inherit_tag_vid;
TRUNCATE tags_vn_inherit;
-- populate tags_vn_inherit
- INSERT INTO tags_vn_inherit
- -- All votes for all tags, including votes inherited by child tags if the
- -- parent tag itself does not have any votes.
- -- (also includes non-searchable tags, because they could have a searchable tag as parent)
- WITH RECURSIVE tags_vn_all(lvl, tag, vid, uid, vote, spoiler, defaultspoil, searchable) AS (
- SELECT 15, tv.tag, tv.vid, tv.uid, tv.vote, tv.spoiler, t.defaultspoil, true
- FROM tags_vn tv
- JOIN tags t ON t.id = tv.tag
- WHERE NOT tv.ignore
- UNION ALL
- SELECT lvl-1, tp.parent, ta.vid, ta.uid, ta.vote, ta.spoiler, t.defaultspoil, t.searchable
- FROM tags_vn_all ta
- JOIN tags_parents tp ON tp.tag = ta.tag
- JOIN tags t ON t.id = tp.parent
- WHERE t.state = 2
- AND ta.lvl > 0
- AND NOT EXISTS(SELECT 1 FROM tags_vn tv WHERE tv.tag = tp.parent AND tv.vid = ta.vid)
+ INSERT INTO tags_vn_inherit (tag, vid, rating, spoiler)
+ -- Group votes to generate a list of directly-upvoted (vid, tag) pairs.
+ -- This is essentually the same as the tag listing on VN pages.
+ WITH RECURSIVE t_avg(tag, vid, vote, spoiler) AS (
+ SELECT tv.tag, tv.vid, AVG(tv.vote)::real, CASE WHEN COUNT(tv.spoiler) = 0 THEN MIN(t.defaultspoil) ELSE AVG(tv.spoiler)::real END
+ FROM tags_vn tv
+ JOIN tags t ON t.id = tv.tag
+ WHERE NOT tv.ignore AND t.state = 2
+ AND vid NOT IN(SELECT id FROM vn WHERE hidden)
+ GROUP BY tv.tag, tv.vid
+ HAVING AVG(tv.vote) > 0
+ -- Add parent tags
+ ), t_all(lvl, tag, vid, vote, spoiler) AS (
+ SELECT 15, * FROM t_avg
+ UNION ALL
+ SELECT ta.lvl-1, tp.parent, ta.vid, ta.vote, ta.spoiler
+ FROM t_all ta
+ JOIN tags_parents tp ON tp.tag = ta.tag
+ WHERE ta.lvl > 0
)
- -- grouped by (tag, vid)
- SELECT tag, vid, COUNT(uid) AS users, AVG(vote)::real AS rating,
- (CASE WHEN COUNT(spoiler) = 0 THEN defaultspoil
- WHEN AVG(spoiler) > 1.3 THEN 2
- WHEN AVG(spoiler) > 0.4 THEN 1 ELSE 0
- END)::smallint AS spoiler
- FROM (
- -- grouped by (tag, vid, uid), so only one user votes on one parent tag per VN entry (also removing unsearchable tags)
- SELECT tag, vid, uid, MAX(vote)::real, AVG(spoiler)::real, defaultspoil
- FROM tags_vn_all
- WHERE searchable
- GROUP BY tag, vid, uid, defaultspoil
- ) AS t(tag, vid, uid, vote, spoiler, defaultspoil)
- GROUP BY tag, vid, defaultspoil
- HAVING AVG(vote) > 0;
+ -- Merge
+ SELECT tag, vid, AVG(vote)
+ , (CASE WHEN MIN(spoiler) > 1.3 THEN 2 WHEN MIN(spoiler) > 0.4 THEN 1 ELSE 0 END)::smallint
+ FROM t_all
+ WHERE tag IN(SELECT id FROM tags WHERE searchable)
+ GROUP BY tag, vid;
-- recreate index
CREATE INDEX tags_vn_inherit_tag_vid ON tags_vn_inherit (tag, vid);
-- and update the VN count in the tags table
@@ -174,36 +167,36 @@ $$ LANGUAGE plpgsql SECURITY DEFINER;
-- recalculate traits_chars
CREATE OR REPLACE FUNCTION traits_chars_calc() RETURNS void AS $$
BEGIN
+ DROP INDEX IF EXISTS traits_chars_tid;
TRUNCATE traits_chars;
INSERT INTO traits_chars (tid, cid, spoil)
- -- all char<->trait links of the latest revisions, including chars
- -- inherited from child traits if the parent trait was not mentioned
- -- directly.
+ -- all char<->trait links of the latest revisions, including chars inherited from child traits.
-- (also includes non-searchable traits, because they could have a searchable trait as parent)
- WITH RECURSIVE traits_chars_all(lvl, tid, cid, spoiler, searchable) AS (
- SELECT 15, tid, ct.id, spoil, true
- FROM chars_traits ct
- JOIN chars c ON c.id = ct.id
- WHERE NOT c.hidden
+ WITH RECURSIVE traits_chars_all(lvl, tid, cid, spoiler) AS (
+ SELECT 15, tid, ct.id, spoil
+ FROM chars_traits ct
+ WHERE id NOT IN(SELECT id from chars WHERE hidden)
UNION ALL
- SELECT lvl-1, tp.parent, tc.cid, tc.spoiler, t.searchable
+ SELECT lvl-1, tp.parent, tc.cid, tc.spoiler
FROM traits_chars_all tc
JOIN traits_parents tp ON tp.trait = tc.tid
JOIN traits t ON t.id = tp.parent
WHERE t.state = 2
AND tc.lvl > 0
- AND NOT EXISTS(SELECT 1 FROM chars_traits cti WHERE cti.tid = tp.parent AND cti.id = tc.cid)
)
- -- now grouped by (tid, cid) and with non-searchable traits filtered out
- SELECT tid, cid, (CASE WHEN AVG(spoiler) > 1.3 THEN 2 WHEN AVG(spoiler) > 0.7 THEN 1 ELSE 0 END)::smallint AS spoiler
- FROM traits_chars_all
- WHERE searchable
- GROUP BY tid, cid;
+ -- now grouped by (tid, cid), with non-searchable traits filtered out
+ SELECT tid, cid
+ , (CASE WHEN MIN(spoiler) > 1.3 THEN 2 WHEN MIN(spoiler) > 0.7 THEN 1 ELSE 0 END)::smallint AS spoiler
+ FROM traits_chars_all
+ WHERE tid IN(SELECT id FROM traits WHERE searchable)
+ GROUP BY tid, cid;
+ -- recreate index
+ CREATE INDEX traits_chars_tid ON traits_chars (tid);
-- and update the VN count in the tags table
UPDATE traits SET c_items = (SELECT COUNT(*) FROM traits_chars WHERE tid = id);
RETURN;
END;
-$$ LANGUAGE plpgsql;
+$$ LANGUAGE plpgsql SECURITY DEFINER;
-- Fully recalculate all rows in stats_cache
diff --git a/util/sql/perms.sql b/util/sql/perms.sql
index e3e0e3b8..6bf43a63 100644
--- a/util/sql/perms.sql
+++ b/util/sql/perms.sql
@@ -143,7 +143,7 @@ GRANT SELECT ON threads TO vndb_multi;
GRANT SELECT ON threads_boards TO vndb_multi;
GRANT SELECT ON threads_posts TO vndb_multi;
GRANT SELECT, UPDATE ON traits TO vndb_multi;
-GRANT SELECT, INSERT, TRUNCATE ON traits_chars TO vndb_multi;
+GRANT SELECT ON traits_chars TO vndb_multi; -- traits_chars_calc() is SECURITY DEFINER
GRANT SELECT ON traits_parents TO vndb_multi;
GRANT SELECT (id, username, registered, c_votes, c_changes, c_tags, ign_votes, email_confirmed, hide_list, notify_dbedit, notify_announce),
UPDATE ( c_votes, c_changes, c_tags) ON users TO vndb_multi;
diff --git a/util/sql/schema.sql b/util/sql/schema.sql
index 46bf3d27..7db233d1 100644
--- a/util/sql/schema.sql
+++ b/util/sql/schema.sql
@@ -610,7 +610,6 @@ CREATE TABLE tags_vn (
CREATE TABLE tags_vn_inherit (
tag integer NOT NULL,
vid integer NOT NULL,
- users integer NOT NULL,
rating real NOT NULL,
spoiler smallint NOT NULL
);
@@ -689,8 +688,7 @@ CREATE TABLE traits (
CREATE TABLE traits_chars (
cid integer NOT NULL, -- chars (id)
tid integer NOT NULL, -- traits (id)
- spoil smallint NOT NULL DEFAULT 0,
- PRIMARY KEY(cid, tid)
+ spoil smallint NOT NULL DEFAULT 0
);
-- traits_parents
diff --git a/util/sql/tableattrs.sql b/util/sql/tableattrs.sql
index 53b503ce..ed25bc98 100644
--- a/util/sql/tableattrs.sql
+++ b/util/sql/tableattrs.sql
@@ -118,7 +118,7 @@ CREATE INDEX tags_vn_vid ON tags_vn (vid);
CREATE INDEX threads_posts_date ON threads_posts (date);
CREATE INDEX threads_posts_ts ON threads_posts USING gin(bb_tsvector(msg));
CREATE INDEX threads_posts_uid ON threads_posts (uid); -- Only really used on /u+ pages to get stats
-CREATE INDEX traits_chars_tid ON traits_chars (tid); -- Significantly speeds up traits_chars_calc() and possibly other stuff
+CREATE INDEX traits_chars_tid ON traits_chars (tid);
CREATE INDEX vn_seiyuu_aid ON vn_seiyuu (aid); -- Only used on /s+?
CREATE INDEX vn_seiyuu_cid ON vn_seiyuu (cid); -- Only used on /c+?
CREATE INDEX vn_staff_aid ON vn_staff (aid);
diff --git a/util/updates/update_20191007.sql b/util/updates/update_20191007.sql
new file mode 100644
index 00000000..9d556478
--- /dev/null
+++ b/util/updates/update_20191007.sql
@@ -0,0 +1,7 @@
+ALTER TABLE tags_vn_inherit DROP COLUMN users;
+
+ALTER TABLE traits_chars DROP CONSTRAINT traits_chars_pkey;
+
+\i util/sql/func.sql
+SELECT tag_vn_calc();
+SELECT traits_chars_calc();