summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2020-02-21 08:30:46 +0100
committerYorhel <git@yorhel.nl>2020-03-16 12:45:42 +0100
commitbe6aa6cc7e8034cfc064acb22f44e66aa527e06f (patch)
treef5e27345a3bec3d4d516a7c986d34b23e37c2d28 /util
parent8fe95ae3c2119e5a5219ad072d441bac406ea547 (diff)
imgflag: Initial schema + UI for image flagging
Lots of TODO's left to work on, but you have to start somewhere. I've bumped the Docker image version because this change requires TUWF commit 74aad378d49592df4359ea8a9f6f36d4a0013c04 (Elm decoder for structs with more than 8 fields)
Diffstat (limited to 'util')
-rwxr-xr-xutil/dbdump.pl5
-rwxr-xr-xutil/devdump.pl4
-rw-r--r--util/sql/func.sql67
-rw-r--r--util/sql/perms.sql4
-rw-r--r--util/sql/schema.sql21
-rw-r--r--util/sql/tableattrs.sql6
-rw-r--r--util/sql/triggers.sql14
-rw-r--r--util/updates/2020-03-13-image-flagging.sql30
8 files changed, 143 insertions, 8 deletions
diff --git a/util/dbdump.pl b/util/dbdump.pl
index f2f5d4ca..11cd2046 100755
--- a/util/dbdump.pl
+++ b/util/dbdump.pl
@@ -56,9 +56,8 @@ my %tables = (
.' AND (rid IS NULL OR rid IN(SELECT id FROM releases WHERE NOT hidden))'
, order => 'id, vid, rid' },
docs => { where => 'NOT hidden' },
- images => { where => 'id IN(SELECT scr FROM vn_screenshots vs JOIN vn v ON v.id = vs.id WHERE NOT v.hidden'
- .' UNION SELECT image FROM chars WHERE image IS NOT NULL AND NOT hidden'
- .' UNION SELECT image from vn WHERE image IS NOT NULL AND NOT hidden)' },
+ images => { where => "c_weight > 0" }, # Only images with a positive weight are referenced.
+ image_votes => { where => "id IN(SELECT id FROM images WHERE c_weight > 0)", order => 'uid, id' },
producers => { where => 'NOT hidden' },
producers_relations => { where => 'id IN(SELECT id FROM producers WHERE NOT hidden)' },
releases => { where => 'NOT hidden' },
diff --git a/util/devdump.pl b/util/devdump.pl
index 5fb4a916..40ef6848 100755
--- a/util/devdump.pl
+++ b/util/devdump.pl
@@ -132,7 +132,9 @@ sub copy_entry {
copy 'wikidata';
# Image metadata
- copy images => 'SELECT * FROM images WHERE id IN('.join(',',map "'$_'", @$images).')';
+ my $image_ids = join ',', map "'$_'", @$images;
+ copy images => "SELECT * FROM images WHERE id IN($image_ids)";
+ copy image_votes => "SELECT * FROM image_votes WHERE id IN($image_ids)", { uid => 'user' };
# Threads (announcements)
my $threads = join ',', @{ $db->selectcol_arrayref("SELECT tid FROM threads_boards b WHERE b.type = 'an'") };
diff --git a/util/sql/func.sql b/util/sql/func.sql
index af510e51..77d4451e 100644
--- a/util/sql/func.sql
+++ b/util/sql/func.sql
@@ -136,6 +136,62 @@ $$ LANGUAGE SQL;
+-- c_weight = if not_referenced then 0 else lower(c_votecount) -> higher(c_weight) && higher(*_stddev) -> higher(c_weight)
+--
+-- One solution:
+--
+-- 1 # Minimum weight for referenced images
+-- * max(1, 10 - c_votecount) # 0 votes -> 10x more likely to be selected, 9+ votes -> 1x. Something non-linear may be nicer...
+-- * (1+c_sexual_stddev*6) # stddev is 0..1.5, upscaled to 1x..9x more likely to be selected. Any stddev>0.8 (=4.8x more likely) is probably worth looking at
+-- * (1+c_violence_stddev*6)
+--
+-- Extremes: 1 .. 810
+--
+-- Alternative solution (currently implemented):
+--
+-- votes_weight = max(0, 10 - c_votecount)/10 -> linear weight between 0..1, 0 being OK and 1 being BAD
+-- *_stddev_weight = *_stddev/1.5 -> ^
+-- weight = 1 + votes_weight*100 + sexual_stddev_weight*100 + violence_stddev_weight*100
+--
+-- Extremes: 1 .. 301, easier to tune and reason about, but still linear
+--
+-- Neither of those solutions are grounded in theory, I've no clue how
+-- statistics work. I suspect confidence intervals/levels are more appropriate
+-- for this use case.
+CREATE OR REPLACE FUNCTION update_images_cache(image_id) RETURNS void AS $$
+BEGIN
+ -- Have to dynamically construct the query here, a
+ -- WHERE ($1 IS NULL OR s.id = $1)
+ -- causes the planner to miss a bunch of optimizations.
+ EXECUTE $sql$UPDATE images
+ SET c_votecount = votecount, c_sexual_avg = sexual_avg, c_sexual_stddev = sexual_stddev
+ , c_violence_avg = violence_avg, c_violence_stddev = violence_stddev, c_weight = weight
+ FROM (
+ SELECT s.*,
+ CASE WHEN x.id IS NULL THEN 0
+ ELSE 1 + (greatest(0, 10.0 - s.votecount)/10)*100 + coalesce(s.sexual_stddev/1.5, 0)*100 + coalesce(s.violence_stddev/1.5, 0)*100
+ END AS weight
+ FROM (
+ SELECT i.id, count(iv.id) AS votecount
+ , avg(sexual) AS sexual_avg, stddev_pop(sexual) AS sexual_stddev
+ , avg(violence) AS violence_avg, stddev_pop(violence) AS violence_stddev
+ FROM images i
+ LEFT JOIN image_votes iv ON iv.id = i.id
+ GROUP BY i.id
+ ) s
+ LEFT JOIN (
+ SELECT image FROM vn WHERE NOT hidden AND image IS NOT NULL
+ UNION ALL SELECT scr FROM vn_screenshots vs JOIN vn v ON v.id = vs.id WHERE NOT v.hidden
+ UNION ALL SELECT image FROM chars WHERE NOT hidden AND image IS NOT NULL
+ ) x(id) ON s.id = x.id
+ $sql$ || (CASE WHEN $1 IS NULL THEN '' ELSE 'WHERE s.id = '||quote_literal($1)||'::image_id' END) || $sql$
+ ) weights
+ WHERE weights.id = images.id
+ $sql$;
+END; $$ LANGUAGE plpgsql;
+
+
+
-- Update users.c_vns, c_votes and c_wish for one user (when given an id) or all users (when given NULL)
CREATE OR REPLACE FUNCTION update_users_ulist_stats(integer) RETURNS void AS $$
BEGIN
@@ -468,6 +524,17 @@ BEGIN
SELECT rl.uid, rv.vid FROM rlists rl JOIN releases_vn rv ON rv.id = rl.rid WHERE rl.rid = xedit.itemid
ON CONFLICT (uid, vid) DO NOTHING;
END IF;
+
+ -- Call update_images_cache() where appropriate
+ IF xtype = 'c'
+ THEN
+ PERFORM update_images_cache(image) FROM chars_hist WHERE chid IN(xoldchid,xedit.chid) AND image IS NOT NULL;
+ END IF;
+ IF xtype = 'v'
+ THEN
+ PERFORM update_images_cache(image) FROM vn_hist WHERE chid IN(xoldchid,xedit.chid) AND image IS NOT NULL;
+ PERFORM update_images_cache(scr) FROM vn_screenshots_hist WHERE chid IN(xoldchid,xedit.chid);
+ END IF;
END;
$$ LANGUAGE plpgsql;
diff --git a/util/sql/perms.sql b/util/sql/perms.sql
index 7410a688..b25235f0 100644
--- a/util/sql/perms.sql
+++ b/util/sql/perms.sql
@@ -16,6 +16,7 @@ GRANT SELECT, INSERT ON chars_vns_hist TO vndb_site;
GRANT SELECT, INSERT, UPDATE ON docs TO vndb_site;
GRANT SELECT, INSERT ON docs_hist TO vndb_site;
GRANT SELECT, INSERT, UPDATE ON images TO vndb_site;
+GRANT SELECT, INSERT, UPDATE, DELETE ON image_votes TO vndb_site;
GRANT SELECT, INSERT, UPDATE, DELETE ON login_throttle TO vndb_site;
GRANT SELECT, INSERT, UPDATE, DELETE ON notifications TO vndb_site;
GRANT SELECT, INSERT, UPDATE ON producers TO vndb_site;
@@ -104,7 +105,8 @@ GRANT SELECT ON chars_traits TO vndb_multi;
GRANT SELECT ON chars_vns TO vndb_multi;
GRANT SELECT ON docs TO vndb_multi;
GRANT SELECT ON docs_hist TO vndb_multi;
-GRANT SELECT ON images TO vndb_multi;
+GRANT SELECT, UPDATE ON images TO vndb_multi;
+GRANT SELECT ON image_votes TO vndb_multi;
GRANT SELECT, INSERT, UPDATE, DELETE ON login_throttle TO vndb_multi;
GRANT SELECT, INSERT, UPDATE, DELETE ON notifications TO vndb_multi;
GRANT SELECT, UPDATE ON producers TO vndb_multi;
diff --git a/util/sql/schema.sql b/util/sql/schema.sql
index 89d8431e..ae10a3d9 100644
--- a/util/sql/schema.sql
+++ b/util/sql/schema.sql
@@ -206,9 +206,24 @@ CREATE TABLE docs_hist (
-- images
CREATE TABLE images (
- id image_id NOT NULL PRIMARY KEY, -- [pub]
- width smallint NOT NULL, -- [pub]
- height smallint NOT NULL -- [pub]
+ id image_id NOT NULL PRIMARY KEY, -- [pub]
+ width smallint NOT NULL, -- [pub]
+ height smallint NOT NULL, -- [pub]
+ c_votecount integer NOT NULL DEFAULT 0, -- [pub] (cached columns are marked [pub] for easy querying...)
+ c_sexual_avg float, -- [pub]
+ c_sexual_stddev float, -- [pub]
+ c_violence_avg float, -- [pub]
+ c_violence_stddev float, -- [pub]
+ c_weight float NOT NULL DEFAULT 0 -- [pub]
+);
+
+-- image_votes
+CREATE TABLE image_votes (
+ id image_id NOT NULL, -- [pub]
+ uid integer, -- [pub]
+ sexual smallint NOT NULL CHECK(sexual >= 0 AND sexual <= 2), -- [pub]
+ violence smallint NOT NULL CHECK(violence >= 0 AND violence <= 2), -- [pub]
+ date timestamptz NOT NULL DEFAULT NOW() -- [pub]
);
-- login_throttle
diff --git a/util/sql/tableattrs.sql b/util/sql/tableattrs.sql
index 191c78e6..00efbd48 100644
--- a/util/sql/tableattrs.sql
+++ b/util/sql/tableattrs.sql
@@ -16,6 +16,7 @@ ALTER TABLE chars_vns ADD CONSTRAINT chars_vns_rid_fkey
ALTER TABLE chars_vns_hist ADD CONSTRAINT chars_vns_hist_chid_fkey FOREIGN KEY (chid) REFERENCES changes (id) ON DELETE CASCADE;
ALTER TABLE chars_vns_hist ADD CONSTRAINT chars_vns_hist_vid_fkey FOREIGN KEY (vid) REFERENCES vn (id) DEFERRABLE;
ALTER TABLE chars_vns_hist ADD CONSTRAINT chars_vns_hist_rid_fkey FOREIGN KEY (rid) REFERENCES releases (id) DEFERRABLE;
+ALTER TABLE image_votes ADD CONSTRAINT image_votes_id_fkey FOREIGN KEY (id) REFERENCES images (id);
ALTER TABLE notifications ADD CONSTRAINT notifications_uid_fkey FOREIGN KEY (uid) REFERENCES users (id) ON DELETE CASCADE;
ALTER TABLE notifications ADD CONSTRAINT notifications_c_byuser_fkey FOREIGN KEY (c_byuser) REFERENCES users (id) ON DELETE SET DEFAULT;
ALTER TABLE producers ADD CONSTRAINT producers_rgraph_fkey FOREIGN KEY (rgraph) REFERENCES relgraphs (id);
@@ -111,6 +112,9 @@ ALTER TABLE vn_staff_hist ADD CONSTRAINT vn_staff_hist_chid_fkey
CREATE INDEX chars_main ON chars (main) WHERE main IS NOT NULL AND NOT hidden; -- Only used on /c+
CREATE INDEX chars_vns_vid ON chars_vns (vid);
+CREATE INDEX chars_image ON chars (image);
+CREATE UNIQUE INDEX image_votes_pkey ON image_votes (uid, id);
+CREATE INDEX image_votes_id ON image_votes (id);
CREATE INDEX notifications_uid ON notifications (uid);
CREATE INDEX releases_producers_pid ON releases_producers (pid);
CREATE INDEX releases_vn_vid ON releases_vn (vid);
@@ -125,6 +129,8 @@ CREATE INDEX threads_posts_date ON threads_posts (date);
CREATE INDEX threads_posts_ts ON threads_posts USING gin(bb_tsvector(msg));
CREATE INDEX threads_posts_uid ON threads_posts (uid); -- Only really used on /u+ pages to get stats
CREATE INDEX traits_chars_tid ON traits_chars (tid);
+CREATE INDEX vn_image ON vn (image);
+CREATE INDEX vn_screenshots_scr ON vn_screenshots (scr);
CREATE INDEX vn_seiyuu_aid ON vn_seiyuu (aid); -- Only used on /s+?
CREATE INDEX vn_seiyuu_cid ON vn_seiyuu (cid); -- Only used on /c+?
CREATE INDEX vn_staff_aid ON vn_staff (aid);
diff --git a/util/sql/triggers.sql b/util/sql/triggers.sql
index a8ef3bbc..aa9419c2 100644
--- a/util/sql/triggers.sql
+++ b/util/sql/triggers.sql
@@ -329,3 +329,17 @@ END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER notify_announce AFTER INSERT ON threads_posts FOR EACH ROW WHEN (NEW.num = 1) EXECUTE PROCEDURE notify_announce();
+
+
+
+
+-- Call update_images_cache() for every change on image_votes
+
+CREATE OR REPLACE FUNCTION update_images_cache() RETURNS trigger AS $$
+BEGIN
+ PERFORM update_images_cache(id) FROM (SELECT OLD.id UNION SELECT NEW.id) AS x(id) WHERE id IS NOT NULL;
+ RETURN NULL;
+END
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER image_votes_cache AFTER INSERT OR UPDATE OR DELETE ON image_votes FOR EACH ROW EXECUTE PROCEDURE update_images_cache();
diff --git a/util/updates/2020-03-13-image-flagging.sql b/util/updates/2020-03-13-image-flagging.sql
new file mode 100644
index 00000000..d106af1c
--- /dev/null
+++ b/util/updates/2020-03-13-image-flagging.sql
@@ -0,0 +1,30 @@
+ALTER TABLE images ADD COLUMN c_votecount integer NOT NULL DEFAULT 0;
+ALTER TABLE images ADD COLUMN c_sexual_avg float;
+ALTER TABLE images ADD COLUMN c_sexual_stddev float;
+ALTER TABLE images ADD COLUMN c_violence_avg float;
+ALTER TABLE images ADD COLUMN c_violence_stddev float;
+ALTER TABLE images ADD COLUMN c_weight float NOT NULL DEFAULT 0;
+
+CREATE TABLE image_votes (
+ id image_id NOT NULL,
+ uid integer,
+ sexual smallint NOT NULL CHECK(sexual >= 0 AND sexual <= 2),
+ violence smallint NOT NULL CHECK(violence >= 0 AND violence <= 2),
+ date timestamptz NOT NULL DEFAULT NOW()
+);
+
+CREATE UNIQUE INDEX image_votes_pkey ON image_votes (uid, id);
+CREATE INDEX image_votes_id ON image_votes (id);
+ALTER TABLE image_votes ADD CONSTRAINT image_votes_id_fkey FOREIGN KEY (id) REFERENCES images (id);
+
+-- These significantly speed up the update_image_cache() and reverse image search on the flagging UI
+CREATE INDEX vn_image ON vn (image);
+CREATE INDEX vn_screenshots_scr ON vn_screenshots (scr);
+CREATE INDEX chars_image ON chars (image);
+
+\i util/sql/func.sql
+\i util/sql/triggers.sql
+\i util/sql/perms.sql
+
+\timing
+select update_images_cache(NULL);