path: root/lib
diff options
authorYorhel <>2016-01-16 21:51:13 +0100
committerYorhel <>2016-01-16 21:51:13 +0100
commitd7dfc891b9156f3c85c859fa02b18401abef6919 (patch)
tree4a0bb5bd0083e374cf9a13a7d11f6dd0bc1e2458 /lib
parent6313653b01652affded23d682d136e5e6ee42799 (diff)
VNDBUtil: Consider 'wo' and 'o' the same in search normalization
Diffstat (limited to 'lib')
1 files changed, 9 insertions, 1 deletions
diff --git a/lib/ b/lib/
index 6ae6fbb8..cd294d1b 100644
--- a/lib/
+++ b/lib/
@@ -195,9 +195,13 @@ sub normalize {
# remove some characters that have no significance when searching
use utf8;
- tr/\r\n\t ,_\-.~~〜∼῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟”‛’‘‚„«‹»›//d;
+ tr/\r\n\t,_\-.~~〜∼῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟”‛’‘‚„«‹»›//d;
+ # Consider wo and o the same thing (when used as separate word)
+ s/(?:^| )o(?:$| )/wo/g;
+ # Remove spaces. We're doing substring search, so let it cross word boundary to find more stuff
+ tr/ //d;
# remove commonly used release titles ("x Edition" and "x Version")
# this saves some space and speeds up the search
@@ -225,6 +229,10 @@ sub normalize_titles {
sub normalize_query {
my $q = NFKD shift;
+ # Consider wo and o the same thing (when used as separate word). Has to be
+ # done here (in addition to normalize()) to make it work in combination with
+ # double quote search.
+ $q =~ s/(^| )o($| )/$1wo$2/ig;
# remove spaces within quotes, so that it's considered as one search word
$q =~ s/"([^"]+)"/(my $s=$1)=~y{ }{}d;$s/ge;
# split into search words, normalize, and remove too short words