diff options
author | Yorhel <git@yorhel.nl> | 2020-04-05 16:27:06 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2020-04-05 16:27:08 +0200 |
commit | 022e2570ffbe134d161a87d354254127f418108a (patch) | |
tree | 622e3145c4a6adc47930e9d479a3f019532cdfc6 | |
parent | 6d767e48b63c30fd572cff1fdf73bb2e60574848 (diff) |
SQL: Fix update_images_cache() performance regression
That somehow got a lot slower with the last update. On the upside, this
fixed query is simpler and doesn't need the ugly
construct-query-as-a-string hack anymore.
-rw-r--r-- | sql/func.sql | 44 |
1 files changed, 14 insertions, 30 deletions
diff --git a/sql/func.sql b/sql/func.sql index 7701c52d..977b775a 100644 --- a/sql/func.sql +++ b/sql/func.sql @@ -138,43 +138,29 @@ $$ LANGUAGE SQL; -- c_weight = if not_referenced then 0 else lower(c_votecount) -> higher(c_weight) && higher(*_stddev) -> higher(c_weight) -- --- One solution: --- --- 1 # Minimum weight for referenced images --- * max(1, 10 - c_votecount) # 0 votes -> 10x more likely to be selected, 9+ votes -> 1x. Something non-linear may be nicer... --- * (1+c_sexual_stddev*6) # stddev is 0..1.5, upscaled to 1x..9x more likely to be selected. Any stddev>0.8 (=4.8x more likely) is probably worth looking at --- * (1+c_violence_stddev*6) --- --- Extremes: 1 .. 810 --- --- Alternative solution (currently implemented): +-- Current algorithm: -- -- votes_weight = max(0, 10 - c_votecount)/10 -> linear weight between 0..1, 0 being OK and 1 being BAD --- *_stddev_weight = *_stddev/1.5 -> ^ --- weight = min(1, votes_weight*100 + sexual_stddev_weight*100 + violence_stddev_weight*100) +-- weight = min(1, votes_weight*100 + sexual_stddev*100 + violence_stddev*100) -- -- Extremes: 1 .. 300, easier to tune and reason about, but still linear -- --- Neither of those solutions are grounded in theory, I've no clue how --- statistics work. I suspect confidence intervals/levels are more appropriate --- for this use case. +-- This isn't very grounded in theory, I've no clue how statistics work. I +-- suspect confidence intervals/levels are more appropriate for this use case. -- -- Non-'ch' image weights are currently reduced to 20% in order to prioritize -- character images. CREATE OR REPLACE FUNCTION update_images_cache(vndbid) RETURNS void AS $$ BEGIN - -- Have to dynamically construct the query here, a - -- WHERE ($1 IS NULL OR s.id = $1) - -- causes the planner to miss a bunch of optimizations. - EXECUTE $sql$UPDATE images + UPDATE images SET c_votecount = votecount, c_sexual_avg = sexual_avg, c_sexual_stddev = sexual_stddev , c_violence_avg = violence_avg, c_violence_stddev = violence_stddev, c_weight = weight FROM ( SELECT s.*, - CASE WHEN x.id IS NULL THEN 0 + CASE WHEN COALESCE(v1.id,v2.id,c.id) IS NULL THEN 0 ELSE greatest(1, - ((greatest(0, 10.0 - s.votecount)/10)*100 + coalesce(s.sexual_stddev/1.5, 0)*100 + coalesce(s.violence_stddev/1.5, 0)*100) - * (CASE WHEN vndbid_type(x.id) = 'ch' THEN 1 ELSE 0.2 END) + ((greatest(0, 10.0 - s.votecount)/10)*100 + coalesce(s.sexual_stddev, 0)*100 + coalesce(s.violence_stddev, 0)*100) + * (CASE WHEN vndbid_type(s.id) = 'ch' THEN 1 ELSE 0.2 END) ) END AS weight FROM ( @@ -183,17 +169,15 @@ BEGIN , avg(violence) AS violence_avg, stddev_pop(violence) AS violence_stddev FROM images i LEFT JOIN image_votes iv ON iv.id = i.id + WHERE ($1 IS NULL OR i.id = $1) GROUP BY i.id ) s - LEFT JOIN ( - SELECT image FROM vn WHERE NOT hidden AND image IS NOT NULL - UNION ALL SELECT scr FROM vn_screenshots vs JOIN vn v ON v.id = vs.id WHERE NOT v.hidden - UNION ALL SELECT image FROM chars WHERE NOT hidden AND image IS NOT NULL - ) x(id) ON s.id = x.id - $sql$ || (CASE WHEN $1 IS NULL THEN '' ELSE 'WHERE s.id = '||quote_literal($1) END) || $sql$ + LEFT JOIN vn v1 ON NOT v1.hidden AND v1.image = s.id + LEFT JOIN vn_screenshots vs ON vs.scr = s.id + LEFT JOIN vn v2 ON NOT v2.hidden AND vs.id = v2.id + LEFT JOIN chars c ON NOT c.hidden AND c.image = s.id ) weights - WHERE weights.id = images.id - $sql$; + WHERE weights.id = images.id; END; $$ LANGUAGE plpgsql; |