summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2020-05-15 13:55:20 +0200
committerYorhel <git@yorhel.nl>2020-05-15 13:59:14 +0200
commitc3318cb2cc5104d3c7f403b8ad6c70b6fa00b55e (patch)
treee397a45069a94fb19248655e5e6649721ec03565 /util
parentc18d0d12829c4c42910d66a4d7fbb035ba8dc7ac (diff)
unusedimages.pl: Keep *all* referenced images in the DB + don't delete recent uploads
As a result, every image mentioned in the DB should now have an actual image file and every image file should now have a row in the DB. I'm planning to run this script more often to clean up after the new (coming) image upload forms - which may leave some unreferenced files - and to prevent VNDB from being abused as an image hosting service.
Diffstat (limited to 'util')
-rwxr-xr-xutil/unusedimages.pl153
1 files changed, 78 insertions, 75 deletions
diff --git a/util/unusedimages.pl b/util/unusedimages.pl
index 3b81c7ae..818a82d2 100755
--- a/util/unusedimages.pl
+++ b/util/unusedimages.pl
@@ -16,96 +16,99 @@ BEGIN { ($ROOT = abs_path $0) =~ s{/util/unusedimages\.pl$}{}; }
my $db = DBI->connect('dbi:Pg:dbname=vndb', 'vndb', undef, { RaiseError => 1 });
-my(%scr, %cv, %ch);
my $count = 0;
+my $fnmatch = '/(cv|ch|sf|st)/[0-9][0-9]/([1-9][0-9]{0,6})\.jpg';
-my $fnmatch = qr{/(cv|ch|sf|st)/[0-9][0-9]/([0-9]+)\.jpg};
+my(%scr, %cv, %ch);
my %dir = (cv => \%cv, ch => \%ch, sf => \%scr, st => \%scr);
-sub cleandb {
- my $cnt = $db->do(q{
- DELETE FROM images WHERE id IN(
- SELECT id FROM images EXCEPT
- SELECT * FROM (
- SELECT scr FROM vn_screenshots
- UNION SELECT scr FROM vn_screenshots_hist
- UNION SELECT image FROM vn WHERE image IS NOT NULL
- UNION SELECT image FROM vn_hist WHERE image IS NOT NULL
- UNION SELECT image FROM chars WHERE image IS NOT NULL
- UNION SELECT image FROM chars_hist WHERE image IS NOT NULL
- ) x
- )
- });
- print "# Deleted unreferenced images: $cnt\n";
-}
-sub addtxt {
- my $t = shift;
- while($t =~ m{$fnmatch}g) {
- $dir{$1}{$2} = 1;
- $count++;
- }
+sub cleandb {
+ # Delete all images from the `images` table that are not referenced from
+ # *anywhere* in the database, including old revisions and links found in
+ # comments, descriptions and docs.
+ # The 30 (100, in the case of screenshots) most recently uploaded images of
+ # each type are also kept because there's a good chance they will get
+ # referenced from somewhere, soon.
+ my $cnt = $db->do(q{
+ DELETE FROM images WHERE id IN(
+ SELECT id FROM images EXCEPT
+ WHERE id NOT IN(SELECT id FROM images WHERE id BETWEEN vndbid('ch',1) AND vndbid_max('ch') ORDER BY id DESC LIMIT 30)
+ AND id NOT IN(SELECT id FROM images WHERE id BETWEEN vndbid('cv',1) AND vndbid_max('cv') ORDER BY id DESC LIMIT 30)
+ AND id NOT IN(SELECT id FROM images WHERE id BETWEEN vndbid('sf',1) AND vndbid_max('sf') ORDER BY id DESC LIMIT 100)
+ SELECT * FROM (
+ SELECT scr FROM vn_screenshots
+ UNION SELECT scr FROM vn_screenshots_hist
+ UNION SELECT image FROM vn WHERE image IS NOT NULL
+ UNION SELECT image FROM vn_hist WHERE image IS NOT NULL
+ UNION SELECT image FROM chars WHERE image IS NOT NULL
+ UNION SELECT image FROM chars_hist WHERE image IS NOT NULL
+ UNION (
+ SELECT vndbid(case when img[1] = 'st' then 'sf' else img[1] end, img[2]::int)
+ FROM ( SELECT content FROM docs
+ UNION ALL SELECT content FROM docs_hist
+ UNION ALL SELECT "desc" FROM vn
+ UNION ALL SELECT "desc" FROM vn_hist
+ UNION ALL SELECT "desc" FROM chars
+ UNION ALL SELECT "desc" FROM chars_hist
+ UNION ALL SELECT "desc" FROM producers
+ UNION ALL SELECT "desc" FROM producers_hist
+ UNION ALL SELECT notes FROM releases
+ UNION ALL SELECT notes FROM releases_hist
+ UNION ALL SELECT "desc" FROM staff
+ UNION ALL SELECT "desc" FROM staff_hist
+ UNION ALL SELECT description FROM tags
+ UNION ALL SELECT description FROM traits
+ UNION ALL SELECT comments FROM changes
+ UNION ALL SELECT msg FROM threads_posts
+ ) x(text), regexp_matches(text, '}.$fnmatch.q{', 'g') as y(img)
+ )
+ ) x
+ )
+ });
+ print "# Deleted unreferenced images: $cnt\n";
}
-sub addtxtsql {
- my($name, $query) = @_;
- $count = 0;
- my $st = $db->prepare($query);
- $st->execute();
- while((my $txt = $st->fetch())) {
- addtxt $txt->[0];
- }
- print "# References in $name... $count\n";
-}
sub addimagessql {
- my $st = $db->prepare('SELECT vndbid_type(id), vndbid_num(id) FROM images');
- $st->execute();
- $count = 0;
- while((my $num = $st->fetch())) {
- $dir{$num->[0]}{$num->[1]} = 1;
- $count++;
- }
- print "# Items in `images'... $count\n";
+ my $st = $db->prepare('SELECT vndbid_type(id), vndbid_num(id) FROM images');
+ $st->execute();
+ $count = 0;
+ while((my $num = $st->fetch())) {
+ $dir{$num->[0]}{$num->[1]} = 1;
+ $count++;
+ }
+ print "# Items in `images'... $count\n";
};
+
sub findunused {
- my $size = 0;
- $count = 0;
- my $left = 0;
- find {
- no_chdir => 1,
- follow => 1,
- wanted => sub {
- return if -d "$File::Find::name";
- if($File::Find::name !~ /($fnmatch)$/) {
- print "# Unknown file: $File::Find::name\n";
- return;
- }
- if(!$dir{$2}{$3}) {
- my $s = (-s $File::Find::name) / 1024;
- $size += $s;
- $count++;
- printf "rm '%s' # %d KiB, https://s.vndb.org%s\n", $File::Find::name, $s, $1
- } else {
- $left++;
- }
- }
- }, "$ROOT/static/cv", "$ROOT/static/ch", "$ROOT/static/sf", "$ROOT/static/st";
- printf "# Deleted %d files, left %d files, saved %d KiB\n", $count, $left, $size;
+ my $size = 0;
+ $count = 0;
+ my $left = 0;
+ find {
+ no_chdir => 1,
+ follow => 1,
+ wanted => sub {
+ return if -d "$File::Find::name";
+ if($File::Find::name !~ /($fnmatch)$/) {
+ print "# Unknown file: $File::Find::name\n";
+ return;
+ }
+ if(!$dir{$2}{$3}) {
+ my $s = (-s $File::Find::name) / 1024;
+ $size += $s;
+ $count++;
+ printf "rm '%s' # %d KiB, https://s.vndb.org%s\n", $File::Find::name, $s, $1
+ } else {
+ $left++;
+ }
+ }
+ }, "$ROOT/static/cv", "$ROOT/static/ch", "$ROOT/static/sf", "$ROOT/static/st";
+ printf "# Deleted %d files, left %d files, saved %d KiB\n", $count, $left, $size;
}
cleandb;
-addtxtsql 'Docs', 'SELECT content FROM docs UNION ALL SELECT content FROM docs_hist';
-addtxtsql 'VN descriptions', 'SELECT "desc" FROM vn UNION ALL SELECT "desc" FROM vn_hist';
-addtxtsql 'Character descriptions', 'SELECT "desc" FROM chars UNION ALL SELECT "desc" FROM chars_hist';
-addtxtsql 'Producer descriptions', 'SELECT "desc" FROM producers UNION ALL SELECT "desc" FROM producers_hist';
-addtxtsql 'Release descriptions', 'SELECT notes FROM releases UNION ALL SELECT notes FROM releases_hist';
-addtxtsql 'Staff descriptions', 'SELECT "desc" FROM staff UNION ALL SELECT "desc" FROM staff_hist';
-addtxtsql 'Tag descriptions', 'SELECT description FROM tags';
-addtxtsql 'Trait descriptions', 'SELECT description FROM traits';
-addtxtsql 'Change summaries', 'SELECT comments FROM changes';
-addtxtsql 'Posts', 'SELECT msg FROM threads_posts';
addimagessql;
findunused;