summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2022-10-08 10:35:36 +0200
committerYorhel <git@yorhel.nl>2022-10-08 10:53:55 +0200
commit275548ac86739c9046aa025701492c254fcb32a0 (patch)
tree951bf8ebde0e720c94ebaf2dd025a750853bbf61 /sql
parentd663e161a56d47d22db0d9eb4740e4af67047cd1 (diff)
SQL: Reduce size of images table + set a max of 15 votes
Reduction is by 12 bytes per row, by simply encoding the same data as smallints. We never needed the full range of the real/integer types. (Could go even further, but postgres doesn't have a uint8 type and encoding avg + stddev in the same column feels a bit too hacky) Having more than 15 votes on an image is pretty useless, so by removing images with sufficient votes from the image voting selection we can avoid further bloating the table. (c_weight is still >0 even for images that shouldn't be selected anymore, because this field is also abused to check if an image is referenced somewhere in the DB. Instead, images with c_weight=1 are now excluded from selection. This has the side effect of also excluding images with 14 votes and stddev=0, but that seems okay)
Diffstat (limited to 'sql')
-rw-r--r--sql/func.sql17
-rw-r--r--sql/schema.sql14
2 files changed, 18 insertions, 13 deletions
diff --git a/sql/func.sql b/sql/func.sql
index 1b694211..26f477bd 100644
--- a/sql/func.sql
+++ b/sql/func.sql
@@ -186,20 +186,23 @@ BEGIN
SET c_votecount = votecount, c_sexual_avg = sexual_avg, c_sexual_stddev = sexual_stddev
, c_violence_avg = violence_avg, c_violence_stddev = violence_stddev, c_weight = weight, c_uids = uids
FROM (
- SELECT s.*,
- CASE WHEN EXISTS(
+ SELECT s.id, s.votecount, s.uids
+ , COALESCE(s.sexual_avg *100, 200) AS sexual_avg, COALESCE(s.sexual_stddev *100, 0) AS sexual_stddev
+ , COALESCE(s.violence_avg*100, 200) AS violence_avg, COALESCE(s.violence_stddev*100, 0) AS violence_stddev
+ , CASE WHEN s.votecount >= 15 THEN 1 -- Lock the weight at 1 at 15 votes, collecting more votes is just inefficient
+ WHEN EXISTS(
SELECT 1 FROM vn v WHERE s.id BETWEEN 'cv1' AND vndbid_max('cv') AND NOT v.hidden AND v.image = s.id
UNION ALL SELECT 1 FROM vn_screenshots vs JOIN vn v ON v.id = vs.id WHERE s.id BETWEEN 'sf1' AND vndbid_max('sf') AND NOT v.hidden AND vs.scr = s.id
UNION ALL SELECT 1 FROM chars c WHERE s.id BETWEEN 'ch1' AND vndbid_max('ch') AND NOT c.hidden AND c.image = s.id
)
- THEN ceil(pow(2, greatest(0, 14 - s.votecount)) + coalesce(pow(s.sexual_stddev, 2), 0)*100 + coalesce(pow(s.violence_stddev, 2), 0)*100)::real
+ THEN ceil(pow(2, greatest(0, 14 - s.votecount)) + coalesce(pow(s.sexual_stddev, 2), 0)*100 + coalesce(pow(s.violence_stddev, 2), 0)*100)
ELSE 0 END AS weight
FROM (
SELECT i.id, count(iv.id) AS votecount
- , round(avg(sexual) FILTER(WHERE NOT iv.ignore), 2)::real AS sexual_avg
- , round(avg(violence) FILTER(WHERE NOT iv.ignore), 2)::real AS violence_avg
- , round(stddev_pop(sexual) FILTER(WHERE NOT iv.ignore), 2)::real AS sexual_stddev
- , round(stddev_pop(violence) FILTER(WHERE NOT iv.ignore), 2)::real AS violence_stddev
+ , round(avg(sexual) FILTER(WHERE NOT iv.ignore), 2) AS sexual_avg
+ , round(avg(violence) FILTER(WHERE NOT iv.ignore), 2) AS violence_avg
+ , round(stddev_pop(sexual) FILTER(WHERE NOT iv.ignore), 2) AS sexual_stddev
+ , round(stddev_pop(violence) FILTER(WHERE NOT iv.ignore), 2) AS violence_stddev
, coalesce(array_agg(u.id) FILTER(WHERE u.id IS NOT NULL), '{}') AS uids
FROM images i
LEFT JOIN image_votes iv ON iv.id = i.id
diff --git a/sql/schema.sql b/sql/schema.sql
index 3c1680ed..124c2a7f 100644
--- a/sql/schema.sql
+++ b/sql/schema.sql
@@ -281,13 +281,15 @@ CREATE TABLE images (
id vndbid NOT NULL PRIMARY KEY CONSTRAINT images_id_check CHECK(vndbid_type(id) IN('ch', 'cv', 'sf')), -- [pub]
width smallint NOT NULL, -- [pub]
height smallint NOT NULL, -- [pub]
- c_votecount integer NOT NULL DEFAULT 0, -- [pub] (cached columns are marked [pub] for easy querying...)
- c_sexual_avg real, -- [pub]
- c_sexual_stddev real, -- [pub]
- c_violence_avg real, -- [pub]
- c_violence_stddev real, -- [pub]
- c_weight real NOT NULL DEFAULT 0, -- [pub]
+ c_votecount smallint NOT NULL DEFAULT 0, -- [pub] (cached columns are marked [pub] for easy querying)
+ c_sexual_avg smallint NOT NULL DEFAULT 200, -- [pub] (0 - 200, so average vote * 100)
+ c_sexual_stddev smallint NOT NULL DEFAULT 0, -- [pub]
+ c_violence_avg smallint NOT NULL DEFAULT 200, -- [pub]
+ c_violence_stddev smallint NOT NULL DEFAULT 0, -- [pub]
+ c_weight smallint NOT NULL DEFAULT 0, -- [pub]
c_uids vndbid[] NOT NULL DEFAULT '{}'
+ -- (technically, c_votecount is redundant as it can be easily derived from
+ -- c_uids, but otherwise we'd lose the space to padding anyway)
);
-- image_votes