summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2021-11-19 16:25:16 +0100
committerYorhel <git@yorhel.nl>2021-11-19 16:25:16 +0100
commit0ea2fb67930af01fe5f4484615e129a13bd8649b (patch)
tree67a209fb8841e343a4c7dc47424325c0f7e406ba
parent0a98d9df12cfc6ea73729b0b3feb3fee52c9f336 (diff)
Use new search for VNs; remove search cache updating from Multi
-rw-r--r--lib/Multi/API.pm5
-rw-r--r--lib/Multi/IRC.pm10
-rw-r--r--lib/Multi/Maintenance.pm40
-rw-r--r--lib/VNDB/Func.pm52
-rw-r--r--lib/VNWeb/ULists/List.pm2
-rw-r--r--lib/VNWeb/VN/Elm.pm13
-rw-r--r--lib/VNWeb/VN/List.pm2
-rw-r--r--sql/func.sql39
-rw-r--r--sql/triggers.sql9
-rwxr-xr-xutil/devdump.pl3
-rw-r--r--util/updates/2021-11-19-vn-search.sql7
11 files changed, 52 insertions, 130 deletions
diff --git a/lib/Multi/API.pm b/lib/Multi/API.pm
index 24a8a0ee..8ae8ceee 100644
--- a/lib/Multi/API.pm
+++ b/lib/Multi/API.pm
@@ -15,7 +15,7 @@ use POE::Filter::VNDBAPI 'encode_filters';
use Encode 'encode_utf8', 'decode_utf8';
use Crypt::URandom 'urandom';
use Crypt::ScryptKDF 'scrypt_raw';;
-use VNDB::Func 'imgurl', 'normalize_query', 'norm_ip', 'resolution';
+use VNDB::Func 'imgurl', 'norm_ip', 'resolution';
use VNDB::Types;
use VNDB::Config;
use JSON::XS;
@@ -583,8 +583,7 @@ my %GET_VN = (
[ stra => 'v.olang :op:(:value:)', {'=' => 'IN', '!=' => 'NOT IN'}, join => ',', process => \'lang' ],
],
search => [
- [ str => '(:value:)', {'~',1}, split => \&normalize_query,
- join => ' AND ', serialize => 'v.c_search LIKE :value:', process => \'like' ],
+ [ str => 'v.c_search LIKE ALL (search_query(:value:))', {'~',1} ],
],
tags => [
[ int => 'v.id :op:(SELECT vid FROM tags_vn_inherit WHERE tag = :value:)', {'=' => 'IN', '!=' => 'NOT IN'}, process => \'g' ],
diff --git a/lib/Multi/IRC.pm b/lib/Multi/IRC.pm
index 4ef26ec1..9fbc298f 100644
--- a/lib/Multi/IRC.pm
+++ b/lib/Multi/IRC.pm
@@ -10,7 +10,6 @@ use warnings;
use Multi::Core;
use AnyEvent::IRC::Client;
use AnyEvent::IRC::Util 'prefix_nick';
-use VNDB::Func 'normalize_query';
use VNDB::Config;
use TUWF::Misc 'uri_escape';
use POSIX 'strftime';
@@ -366,18 +365,13 @@ vn => [ 0, 0, sub {
my($nick, $chan, $q) = @_;
return $irc->send_msg(PRIVMSG => $chan, 'You forgot the search query, dummy~~!') if !$q;
- my @q = normalize_query($q);
- return $irc->send_msg(PRIVMSG => $chan,
- "Couldn't do anything with that search query, you might want to add quotes or use longer words.") if !@q;
-
- my $w = join ' AND ', map "c_search LIKE \$$_", 1..@q;
pg_cmd qq{
SELECT id, title
FROM vn
- WHERE NOT hidden AND $w
+ WHERE NOT hidden AND c_search LIKE ALL (search_query($1))
ORDER BY title
LIMIT 6
- }, [ map "%$_%", @q ], sub {
+ }, [ $q ], sub {
my $res = shift;
return if pg_expect $res, 1;
return $irc->send_msg(PRIVMSG => $chan, 'No visual novels found.') if !$res->nRows;
diff --git a/lib/Multi/Maintenance.pm b/lib/Multi/Maintenance.pm
index 7c0bb1ac..6f8e1274 100644
--- a/lib/Multi/Maintenance.pm
+++ b/lib/Multi/Maintenance.pm
@@ -9,7 +9,6 @@ use strict;
use warnings;
use Multi::Core;
use PerlIO::gzip;
-use VNDB::Func 'normalize_titles';
use VNDB::Config;
@@ -18,8 +17,6 @@ my $monthly;
sub run {
push_watcher schedule 7*3600+1800, 24*3600, \&daily; # 7:30 UTC, 30 minutes before the daily DB dumps are created
- push_watcher schedule 0, 3600, \&vnsearch_check;
- push_watcher pg->listen(vnsearch => on_notify => \&vnsearch_check);
set_monthly();
}
@@ -176,41 +173,4 @@ sub monthly {
}
-
-#
-# V N S E A R C H C A C H E
-#
-
-
-sub vnsearch_check {
- pg_cmd 'SELECT id FROM vn WHERE c_search IS NULL LIMIT 1', undef, sub {
- my $res = shift;
- return if pg_expect $res, 1 or !$res->rows;
-
- my $id = $res->value(0,0);
- pg_cmd q|SELECT title, original, alias FROM vn WHERE id = $1
- UNION SELECT r.title, r.original, NULL FROM releases r JOIN releases_vn rv ON rv.id = r.id WHERE rv.vid = $1 AND NOT r.hidden|,
- [ $id ], sub { vnsearch_update($id, @_) };
- };
-}
-
-
-sub vnsearch_update { # id, res, time
- my($id, $res, $time) = @_;
- return if pg_expect $res, 1;
-
- my $t = normalize_titles(grep length, map
- +($_->{title}, $_->{original}, split /[\n,]/, $_->{alias}||''),
- $res->rowsAsHashes
- );
-
- pg_cmd 'UPDATE vn SET c_search = $1 WHERE id = $2', [ $t, $id ], sub {
- my($res, $t2) = @_;
- return if pg_expect $res, 0;
- AE::log info => sprintf 'Updated search cache for %s (%3dms SQL)', $id, ($time+$t2)*1000;
- vnsearch_check;
- };
-}
-
-
1;
diff --git a/lib/VNDB/Func.pm b/lib/VNDB/Func.pm
index 800c37a5..d5692a38 100644
--- a/lib/VNDB/Func.pm
+++ b/lib/VNDB/Func.pm
@@ -5,8 +5,6 @@ use warnings;
use TUWF::Misc 'uri_escape';
use Exporter 'import';
use POSIX 'strftime';
-use Encode 'encode_utf8';
-use Unicode::Normalize 'NFKD', 'compose';
use Socket 'inet_pton', 'inet_ntop', 'AF_INET', 'AF_INET6';
use VNDB::Config;
use VNDB::Types;
@@ -17,7 +15,6 @@ our @EXPORT = ('bb_format', qw|
shorten
resolution
gtintype
- normalize_titles normalize_query
imgsize
norm_ip
minage
@@ -93,55 +90,6 @@ sub gtintype {
}
-# a rather aggressive normalization
-sub normalize {
- local $_ = lc shift;
- use utf8;
- # Remove combining markings, except for kana.
- # This effectively removes all accents from the characters (e.g. é -> e)
- $_ = compose(NFKD($_) =~ s/(?<=[^ア-ンあ-ん])\pM//rg);
- # remove some characters that have no significance when searching
- tr/\r\n\t,_\-.~~〜∼ー῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟“”‛’‘‚„«‹»›//d;
- tr/@/a/;
- tr/ı/i/; # Turkish lowercase i
- tr/×/x/;
- s/&/and/;
- # Remove spaces. We're doing substring search, so let it cross word boundary to find more stuff
- tr/ //d;
- # remove commonly used release titles ("x Edition" and "x Version")
- # this saves some space and speeds up the search
- s/(?:
- first|firstpress|firstpresslimited|limited|regular|standard
- |package|boxed|download|complete|popular
- |lowprice|best|cheap|budget
- |special|trial|allages|fullvoice
- |cd|cdr|cdrom|dvdrom|dvd|dvdpack|dvdpg|windows
- |初回限定|初回|限定|通常|廉価|パッケージ|ダウンロード
- )(?:edition|version|版|生産)//xg;
- # other common things
- s/fandisk/fandisc/g;
- s/sempai/senpai/g;
- no utf8;
- return $_;
-}
-
-
-# normalizes each title and returns a concatenated string of unique titles
-sub normalize_titles {
- my %t = map +(normalize($_), 1), @_;
- return join ' ', grep length $_, sort keys %t;
-}
-
-
-sub normalize_query {
- my $q = shift;
- # remove spaces within quotes, so that it's considered as one search word
- $q =~ s/"([^"]+)"/(my $s=$1)=~y{ }{}d;$s/ge;
- # split into search words and normalize
- return map quotemeta($_), grep length $_, map normalize($_), split / /, $q;
-}
-
-
# arguments: <image size>, <max dimensions>
# returns the size of the thumbnail with the same aspect ratio as the full-size
# image, but fits within the specified maximum dimensions
diff --git a/lib/VNWeb/ULists/List.pm b/lib/VNWeb/ULists/List.pm
index 3fe2ac6c..833b3556 100644
--- a/lib/VNWeb/ULists/List.pm
+++ b/lib/VNWeb/ULists/List.pm
@@ -269,7 +269,7 @@ sub listing_ {
sql('uv.uid =', \$uid),
!$own ? sql('uv.vid IN(SELECT vid FROM ulist_vns_labels WHERE uid =', \$uid, 'AND lbl IN(SELECT id FROM ulist_labels WHERE uid =', \$uid, 'AND NOT private))') : (),
@where_vns ? sql_or(@where_vns) : (),
- $opt->{q} ? map sql('v.c_search like', \"%$_%"), normalize_query $opt->{q} : (),
+ $opt->{q} ? sql 'v.c_search LIKE ALL (search_query(', \$opt->{q}, '))' : (),
defined($opt->{ch}) && $opt->{ch} ? sql('LOWER(SUBSTR(v.title, 1, 1)) =', \$opt->{ch}) : (),
defined($opt->{ch}) && !$opt->{ch} ? sql('(ASCII(v.title) <', \97, 'OR ASCII(v.title) >', \122, ') AND (ASCII(v.title) <', \65, 'OR ASCII(v.title) >', \90, ')') : ();
diff --git a/lib/VNWeb/VN/Elm.pm b/lib/VNWeb/VN/Elm.pm
index 3bf02d59..0f6ca5d4 100644
--- a/lib/VNWeb/VN/Elm.pm
+++ b/lib/VNWeb/VN/Elm.pm
@@ -13,15 +13,10 @@ elm_api VN => undef, {
elm_VNResult tuwf->dbPagei({ results => $data->{hidden}?50:15, page => 1 },
'SELECT v.id, v.title, v.original, v.hidden
FROM (',
- sql_join('UNION ALL', map {
- my $qs = sql_like $_;
- my @qs = normalize_query $_;
- (
- /^$RE{vid}$/ ? sql('SELECT 1, id FROM vn WHERE id =', \"$+{id}") : (),
- sql('SELECT 1+substr_score(lower(title),', \$qs, '), id FROM vn WHERE title ILIKE', \"$qs%"),
- @qs ? (sql 'SELECT 10, id FROM vn WHERE', sql_and map sql('c_search ILIKE', \"%$_%"), @qs) : ()
- )
- } @q),
+ sql_join('UNION ALL', map +(
+ /^$RE{vid}$/ ? sql('SELECT 1, id FROM vn WHERE id =', \"$+{id}") : (),
+ sql('SELECT 1+substr_score(lower(title),', \sql_like($_), '), id FROM vn WHERE c_search LIKE ALL (search_query(', \"$_", '))'),
+ ), @q),
') x(prio, id)
JOIN vn v ON v.id = x.id
WHERE', sql_and($data->{hidden} ? () : 'NOT v.hidden'), '
diff --git a/lib/VNWeb/VN/List.pm b/lib/VNWeb/VN/List.pm
index 4dcb1785..5c08727c 100644
--- a/lib/VNWeb/VN/List.pm
+++ b/lib/VNWeb/VN/List.pm
@@ -263,7 +263,7 @@ TUWF::get qr{/v(?:/(?<char>all|[a-z0]))?}, sub {
my $where = sql_and
'NOT v.hidden', $opt->{f}->sql_where(),
- $opt->{q} ? map sql('v.c_search LIKE', \"%$_%"), normalize_query $opt->{q} : (),
+ $opt->{q} ? sql 'v.c_search LIKE ALL (search_query(', \$opt->{q}, '))' : (),
defined($opt->{ch}) && $opt->{ch} ? sql('LOWER(SUBSTR(v.title, 1, 1)) =', \$opt->{ch}) : (),
defined($opt->{ch}) && !$opt->{ch} ? sql('(ASCII(v.title) <', \97, 'OR ASCII(v.title) >', \122, ') AND (ASCII(v.title) <', \65, 'OR ASCII(v.title) >', \90, ')') : ();
diff --git a/sql/func.sql b/sql/func.sql
index 3df59b35..179df943 100644
--- a/sql/func.sql
+++ b/sql/func.sql
@@ -69,6 +69,35 @@ CREATE OR REPLACE FUNCTION search_gen(terms text[]) RETURNS text AS $$
$$ LANGUAGE SQL IMMUTABLE;
+CREATE OR REPLACE FUNCTION search_gen_vn(vnid vndbid) RETURNS text AS $$
+ SELECT coalesce(string_agg(t, ' '), '') FROM (
+ SELECT t FROM (
+ SELECT search_norm_term(title) FROM vn WHERE id = vnid
+ UNION ALL SELECT search_norm_term(original) FROM vn WHERE vnid = id
+ UNION ALL SELECT search_norm_term(a) FROM vn, regexp_split_to_table(alias, E'\n') a(a) WHERE vnid = id
+ -- Remove the various editions/version strings from release titles,
+ -- this reduces the index size and makes VN search more relevant.
+ -- People looking for editions should be using the release search.
+ -- (This regex is rather incomplete, sadly)
+ UNION ALL SELECT regexp_replace(search_norm_term(t), '(?:体験|ダウンロド|dvdpg|(?:
+ first|firstpress|firstpresslimited|limited|regular|standard
+ |package|boxed|download|complete|popular|premium|deluxe|collectors?
+ |lowprice|best|thebest|cheap|budget|reprint|bundle|renewal
+ |special|trial|allages|fullvoice|web|demo|fulldemo
+ |cd|cdr|cdrom|dvdrom|dvd|dvdpack|windows|windows7|windows7support|windows10
+ |初回限定|初回|限定|通常|廉価|豪華|パッケージ|ダウンロード|ベスト|復刻|新装|7対応
+ )?(?:edition|version|thebest|pack|package|版|生産))+$', '', 'xg')
+ FROM (
+ SELECT title FROM releases r JOIN releases_vn rv ON rv.id = r.id WHERE NOT r.hidden AND rv.vid = vnid
+ UNION ALL
+ SELECT original FROM releases r JOIN releases_vn rv ON rv.id = r.id WHERE NOT r.hidden AND rv.vid = vnid
+ ) r(t)
+ ) x(t) WHERE t IS NOT NULL AND t <> '' GROUP BY t ORDER BY t
+ ) x(t);
+$$ LANGUAGE SQL;
+
+
+
-- Split a search query into LIKE patterns.
-- Supports double quoting for adjacent terms.
-- e.g. 'SEARCH que.ry "word here"' -> '{%search%,%query%,%wordhere%}'
@@ -507,7 +536,7 @@ DECLARE
BEGIN
SELECT id INTO xoldchid FROM changes WHERE itemid = nitemid AND rev = nrev-1;
- -- Set c_search to NULL and notify when
+ -- Update c_search when
-- 1. A new VN entry is created
-- 2. The vn title/original/alias has changed
IF vndbid_type(nitemid) = 'v' THEN
@@ -516,12 +545,11 @@ BEGIN
-- 2.
EXISTS(SELECT 1 FROM vn_hist v1, vn_hist v2 WHERE (v2.title <> v1.title OR v2.original <> v1.original OR v2.alias <> v1.alias) AND v1.chid = xoldchid AND v2.chid = nchid)
THEN
- UPDATE vn SET c_search = NULL WHERE id = nitemid;
- NOTIFY vnsearch;
+ UPDATE vn SET c_search = search_gen_vn(id) WHERE id = nitemid;
END IF;
END IF;
- -- Set related vn.c_search columns to NULL and notify when
+ -- Update vn.c_search when
-- 1. A new release is created
-- 2. A release has been hidden or unhidden
-- 3. The release title/original has changed
@@ -537,8 +565,7 @@ BEGIN
EXISTS(SELECT vid FROM releases_vn_hist WHERE chid = xoldchid EXCEPT SELECT vid FROM releases_vn_hist WHERE chid = nchid) OR
EXISTS(SELECT vid FROM releases_vn_hist WHERE chid = nchid EXCEPT SELECT vid FROM releases_vn_hist WHERE chid = xoldchid)
THEN
- UPDATE vn SET c_search = NULL WHERE id IN(SELECT vid FROM releases_vn_hist WHERE chid IN(nchid, xoldchid));
- NOTIFY vnsearch;
+ UPDATE vn SET c_search = search_gen_vn(id) WHERE id IN(SELECT vid FROM releases_vn_hist WHERE chid IN(nchid, xoldchid));
END IF;
END IF;
diff --git a/sql/triggers.sql b/sql/triggers.sql
index 8c2a8643..4a201c8a 100644
--- a/sql/triggers.sql
+++ b/sql/triggers.sql
@@ -203,15 +203,6 @@ CREATE TRIGGER insert_notify AFTER INSERT ON reviews FOR EACH STATEMENT EX
--- Send a vnsearch notification when the c_search column is set to NULL.
-
-CREATE OR REPLACE FUNCTION vn_vnsearch_notify() RETURNS trigger AS 'BEGIN NOTIFY vnsearch; RETURN NULL; END;' LANGUAGE plpgsql;
-
-CREATE TRIGGER vn_vnsearch_notify AFTER UPDATE ON vn FOR EACH ROW WHEN (OLD.c_search IS NOT NULL AND NEW.c_search IS NULL) EXECUTE PROCEDURE vn_vnsearch_notify();
-
-
-
-
-- Create notifications for new posts.
CREATE OR REPLACE FUNCTION notify_post() RETURNS trigger AS $$
diff --git a/util/devdump.pl b/util/devdump.pl
index 387e0bb9..2a20544c 100755
--- a/util/devdump.pl
+++ b/util/devdump.pl
@@ -84,7 +84,7 @@ sub copy_entry {
for(@$tables) {
my $add = '';
$add = " AND vid IN($vids)" if /^releases_vn/ || /^vn_relations/ || /^chars_vns/;
- copy $_ => "SELECT * FROM $_ WHERE id IN($ids) $add", $_ eq 'releases' ? { c_search => 'del' } : ();
+ copy $_ => "SELECT * FROM $_ WHERE id IN($ids) $add", { c_search => 'del' };
copy "${_}_hist" => "SELECT x.* FROM ${_}_hist x JOIN changes c ON c.id = x.chid WHERE c.itemid IN($ids) $add";
}
}
@@ -182,6 +182,7 @@ sub copy_entry {
print "SELECT update_vnvotestats();\n";
print "SELECT update_users_ulist_stats(NULL);\n";
print "SELECT update_images_cache(NULL);\n";
+ print "UPDATE vn SET c_search = search_gen_vn(id);\n";
print "UPDATE users u SET c_tags = (SELECT COUNT(*) FROM tags_vn v WHERE v.uid = u.id);\n";
print "UPDATE users u SET c_changes = (SELECT COUNT(*) FROM changes c WHERE c.requester = u.id);\n";
diff --git a/util/updates/2021-11-19-vn-search.sql b/util/updates/2021-11-19-vn-search.sql
new file mode 100644
index 00000000..56ce6661
--- /dev/null
+++ b/util/updates/2021-11-19-vn-search.sql
@@ -0,0 +1,7 @@
+DROP TRIGGER vn_vnsearch_notify ON vn;
+DROP FUNCTION vn_vnsearch_notify();
+\i sql/func.sql
+
+-- Warning: slow
+\timing
+UPDATE vn SET c_search = search_gen_vn(id);