diff options
author | Yorhel <git@yorhel.nl> | 2016-01-16 21:51:13 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2016-01-16 21:51:13 +0100 |
commit | d7dfc891b9156f3c85c859fa02b18401abef6919 (patch) | |
tree | 4a0bb5bd0083e374cf9a13a7d11f6dd0bc1e2458 /lib | |
parent | 6313653b01652affded23d682d136e5e6ee42799 (diff) |
VNDBUtil: Consider 'wo' and 'o' the same in search normalization
Diffstat (limited to 'lib')
-rw-r--r-- | lib/VNDBUtil.pm | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/lib/VNDBUtil.pm b/lib/VNDBUtil.pm index 6ae6fbb8..cd294d1b 100644 --- a/lib/VNDBUtil.pm +++ b/lib/VNDBUtil.pm @@ -195,9 +195,13 @@ sub normalize { s/\pM//g; # remove some characters that have no significance when searching use utf8; - tr/\r\n\t ,_\-.~~〜∼῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟”‛’‘‚„«‹»›//d; + tr/\r\n\t,_\-.~~〜∼῀:[]()%+!?#$"'`♥★☆♪†「」『』【】・‟”‛’‘‚„«‹»›//d; tr/@/a/; s/&/and/; + # Consider wo and o the same thing (when used as separate word) + s/(?:^| )o(?:$| )/wo/g; + # Remove spaces. We're doing substring search, so let it cross word boundary to find more stuff + tr/ //d; # remove commonly used release titles ("x Edition" and "x Version") # this saves some space and speeds up the search s/(?: @@ -225,6 +229,10 @@ sub normalize_titles { sub normalize_query { my $q = NFKD shift; + # Consider wo and o the same thing (when used as separate word). Has to be + # done here (in addition to normalize()) to make it work in combination with + # double quote search. + $q =~ s/(^| )o($| )/$1wo$2/ig; # remove spaces within quotes, so that it's considered as one search word $q =~ s/"([^"]+)"/(my $s=$1)=~y{ }{}d;$s/ge; # split into search words, normalize, and remove too short words |