summaryrefslogtreecommitdiff
path: root/lib/VNDBUtil.pm
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-01-16 21:51:13 +0100
committerYorhel <git@yorhel.nl>2016-01-16 21:51:13 +0100
commitd7dfc891b9156f3c85c859fa02b18401abef6919 (patch)
tree4a0bb5bd0083e374cf9a13a7d11f6dd0bc1e2458 /lib/VNDBUtil.pm
parent6313653b01652affded23d682d136e5e6ee42799 (diff)
VNDBUtil: Consider 'wo' and 'o' the same in search normalization
Diffstat (limited to 'lib/VNDBUtil.pm')
-rw-r--r--lib/VNDBUtil.pm10
1 files changed, 9 insertions, 1 deletions
diff --git a/lib/VNDBUtil.pm b/lib/VNDBUtil.pm
index 6ae6fbb8..cd294d1b 100644
--- a/lib/VNDBUtil.pm
+++ b/lib/VNDBUtil.pm
@@ -195,9 +195,13 @@ sub normalize {
s/\pM//g;
# remove some characters that have no significance when searching
use utf8;
- tr/\r\n\t ,_\-.~~〜∼῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟”‛’‘‚„«‹»›//d;
+ tr/\r\n\t,_\-.~~〜∼῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟”‛’‘‚„«‹»›//d;
tr/@/a/;
s/&/and/;
+ # Consider wo and o the same thing (when used as separate word)
+ s/(?:^| )o(?:$| )/wo/g;
+ # Remove spaces. We're doing substring search, so let it cross word boundary to find more stuff
+ tr/ //d;
# remove commonly used release titles ("x Edition" and "x Version")
# this saves some space and speeds up the search
s/(?:
@@ -225,6 +229,10 @@ sub normalize_titles {
sub normalize_query {
my $q = NFKD shift;
+ # Consider wo and o the same thing (when used as separate word). Has to be
+ # done here (in addition to normalize()) to make it work in combination with
+ # double quote search.
+ $q =~ s/(^| )o($| )/$1wo$2/ig;
# remove spaces within quotes, so that it's considered as one search word
$q =~ s/"([^"]+)"/(my $s=$1)=~y{ }{}d;$s/ge;
# split into search words, normalize, and remove too short words