From b8fbb458cdbf6635c45675a3797292110a739f31 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 13 Mar 2010 15:34:24 +0100 Subject: Improved VN search This adds a new column to the vn table: c_search, which holds the normalized titles for speedy search results using LIKE. Also split some functions from VNDB::Func that didn't require YAWF into a VNDBUtil module, so Multi can also make use of them. The normalization functions are the same for Multi and VNDB, after all. The API and Multi::IRC still use the old search, these should be updated as well. --- lib/VNDB/Func.pm | 151 +------------------------------------------------------ 1 file changed, 2 insertions(+), 149 deletions(-) (limited to 'lib/VNDB/Func.pm') diff --git a/lib/VNDB/Func.pm b/lib/VNDB/Func.pm index 1801e3b8..f3f9d009 100644 --- a/lib/VNDB/Func.pm +++ b/lib/VNDB/Func.pm @@ -6,155 +6,8 @@ use warnings; use YAWF ':html'; use Exporter 'import'; use POSIX 'strftime', 'ceil', 'floor'; -our @EXPORT = qw| shorten bb2html gtintype liststat clearfloat cssicon tagscore mt minage |; - - -# I would've done this as a #define if this was C... -sub shorten { - my($str, $len) = @_; - return length($str) > $len ? substr($str, 0, $len-3).'...' : $str; -} - - -# Arguments: input, and optionally the maximum length -# Parses: -# [url=..] [/url] -# [raw] .. [/raw] -# [spoiler] .. [/spoiler] -# [quote] .. [/quote] -# [code] .. [/code] -# v+, v+.+ -# http://../ -sub bb2html { - my $raw = shift; - my $maxlength = shift; - $raw =~ s/\r//g; - $raw =~ s/\n{5,}/\n\n/g; - return '' if !$raw && $raw ne "0"; - - my($result, $length, $rmnewline, @open) = ('', 0, 0, 'first'); - - my $e = sub { - local $_ = shift; - s/&/&/g; - s/>/>/g; - s//g if !$maxlength; - s/\n/ /g if $maxlength; - return $_; - }; - - for (split /(\s|\n|\[[^\]]+\])/, $raw) { - next if !defined $_; - next if $_ eq ''; - - # (note to self: stop using unreadable hacks like these!) - $rmnewline-- && $_ eq "\n" && next if $rmnewline; - - my $lit = $_; - if($open[$#open] ne 'raw' && $open[$#open] ne 'code') { - if (lc$_ eq '[raw]') { push @open, 'raw'; next } - elsif (lc$_ eq '[spoiler]') { push @open, 'spoiler'; $result .= ''; next } - elsif (lc$_ eq '[quote]') { - push @open, 'quote'; - $result .= '
' if !$maxlength; - $rmnewline = 1; - next - } elsif (lc$_ eq '[code]') { - push @open, 'code'; - $result .= '
' if !$maxlength;
-        $rmnewline = 1;
-        next
-      } elsif (lc$_ eq '[/spoiler]') {
-        if($open[$#open] eq 'spoiler') {
-          $result .= '';
-          pop @open;
-        }
-        next;
-      } elsif (lc$_ eq '[/quote]') {
-        if($open[$#open] eq 'quote') {
-          $result .= '
' if !$maxlength; - $rmnewline = 1; - pop @open; - } - next; - } elsif(lc$_ eq '[/url]') { - if($open[$#open] eq 'url') { - $result .= ''; - pop @open; - } - next; - } elsif(s{\[url=((https?://|/)[^\]>]+)\]}{}i) { - $result .= $_; - push @open, 'url'; - next; - } elsif(!grep(/url/, @open) && - s{(.*)(http|https)://(.+[\d\w=/-])(.*)} - {$e->($1).qq|'.$e->('link').''.$e->($4)}e) { - $length += 4; - last if $maxlength && $length > $maxlength; - $result .= $_; - next; - } elsif(!grep(/url/, @open) && ( - s{^(.*[^\w]|)([tdvpr][1-9][0-9]*)\.([1-9][0-9]*)([^\w].*|)$}{$e->($1).qq|$2.$3|.$e->($4)}e || - s{^(.*[^\w]|)([tdvprug][1-9][0-9]*)([^\w].*|)$}{$e->($1).qq|$2|.$e->($3)}e)) { - $length += length $lit; - last if $maxlength && $length > $maxlength; - $result .= $_; - next; - } - } elsif($open[$#open] eq 'raw' && lc$_ eq '[/raw]') { - pop @open; - next; - } elsif($open[$#open] eq 'code' && lc$_ eq '[/code]') { - $result .= '' if !$maxlength; - pop @open; - next; - } - - # normal text processing - $length += length $_; - last if $maxlength && $length > $maxlength; - $result .= $e->($_); - } - - # close open tags - while((local $_ = pop @open) ne 'first') { - $result .= $_ eq 'url' ? '' : $_ eq 'spoiler' ? '' : ''; - $result .= $_ eq 'quote' ? '' : $_ eq 'code' ? '' : '' if !$maxlength; - } - $result .= '...' if $maxlength && $length > $maxlength; - - return $result; -} - - -# GTIN code as argument, -# Returns 'JAN', 'EAN', 'UPC' or undef, -# Also 'normalizes' the first argument in place -sub gtintype { - $_[0] =~ s/[^\d]+//g; - $_[0] =~ s/^0+//; - my $c = shift; - return undef if $c !~ /^[0-9]{12,13}$/; # only gtin-12 and 13 - $c = ('0'x(13-length $c)) . $c; # pad with zeros - - # calculate check digit according to - # http://www.gs1.org/productssolutions/barcodes/support/check_digit_calculator.html#how - my @n = reverse split //, $c; - my $n = shift @n; - $n += $n[$_] * ($_ % 2 != 0 ? 1 : 3) for (0..$#n); - return undef if $n % 10 != 0; - - # Do some rough guesses based on: - # http://www.gs1.org/productssolutions/barcodes/support/prefix_list.html - # and http://en.wikipedia.org/wiki/List_of_GS1_country_codes - local $_ = $c; - return 'JAN' if /^4[59]/; # prefix code 450-459 & 490-499 - return 'UPC' if /^(?:0[01]|0[6-9]|13|75[45])/; # prefix code 000-019 & 060-139 & 754-755 - return undef if /(?:0[2-5]|2|97[789]|9[6-9])/; # some codes we don't want: 020–059 & 200-299 & 977-999 - return 'EAN'; # let's just call everything else EAN :) -} +use VNDBUtil; +our @EXPORT = (@VNDBUtil::EXPORT, qw| liststat clearfloat cssicon tagscore mt minage |); # Argument: hashref with rstat and vstat -- cgit v1.2.3