diff options
Diffstat (limited to 'lib/VNDB/Func.pm')
-rw-r--r-- | lib/VNDB/Func.pm | 382 |
1 files changed, 182 insertions, 200 deletions
diff --git a/lib/VNDB/Func.pm b/lib/VNDB/Func.pm index 508b2272..8c448ad8 100644 --- a/lib/VNDB/Func.pm +++ b/lib/VNDB/Func.pm @@ -1,167 +1,160 @@ - package VNDB::Func; use strict; use warnings; -use TUWF ':html', 'kv_validate', 'xml_escape', 'uri_escape'; +use TUWF::Misc 'uri_escape'; use Exporter 'import'; -use POSIX 'strftime', 'ceil', 'floor'; -use JSON::XS; -use VNDBUtil; +use POSIX 'strftime', 'floor'; +use Socket 'inet_pton', 'inet_ntop', 'AF_INET', 'AF_INET6'; +use Digest::SHA 'sha1'; +use VNDB::Config; use VNDB::Types; use VNDB::BBCode; -our @EXPORT = (@VNDBUtil::EXPORT, 'bb_format', qw| - clearfloat cssicon minage fil_parse fil_serialize parenttags - childtags charspoil imgpath imgurl - fmtvote fmtmedia fmtvnlen fmtage fmtdatestr fmtdate fmtrating fmtspoil - lang_attr - json_encode json_decode script_json - form_compare +our @EXPORT = ('bb_format', qw| + in + idcmp + shorten + resolution + gtintype + imgsize + norm_ip + minage + fmtvote fmtmedia fmtage fmtdate fmtrating fmtspoil fmtanimation + rdate + imgpath imgurl + tlang tattr query_encode md2html + is_insecurepass |); -# three ways to represent the same information -our $fil_escape = '_ !"#$%&\'()*+,-./:;<=>?@[\]^`{}~'; -our @fil_escape = split //, $fil_escape; -our %fil_escape = map +($fil_escape[$_], sprintf '%02d', $_), 0..$#fil_escape; - - -# Clears a float, to make sure boxes always have the correct height -sub clearfloat { - div class => 'clearfloat', ''; +# Simple "is this element in the array?" function, using 'eq' to test equality. +# Supports both an @array and \@array. +# Usage: +# +# my $contains_hi = in 'hi', qw/ a b hi c /; # true +# +sub in { + my($q, @a) = @_; + $_ eq $q && return 1 for map ref $_ eq 'ARRAY' ? @$_ : ($_), @a; + 0 } -# Draws a CSS icon, arguments: class, title -sub cssicon { - abbr class => "icons $_[0]", title => $_[1]; - lit ' '; - end; +# Compare two vndbids, using proper numeric order +sub idcmp($$) { + my($a1, $a2) = $_[0] =~ /^([a-z]+)([0-9]+)$/; + my($b1, $b2) = $_[1] =~ /^([a-z]+)([0-9]+)$/; + $a1 cmp $b1 || $a2 <=> $b2 } -sub minage { - my($a, $ex) = @_; - $a = $AGE_RATING{$a}; - $ex && $a->{ex} ? "$a->{txt} (e.g. $a->{ex})" : $a->{txt} +sub shorten { + my($str, $len) = @_; + return length($str) > $len ? substr($str, 0, $len-3).'...' : $str; } -# arguments: $filter_string, @allowed_keys -sub fil_parse { - my $str = shift; - my %keys = map +($_,1), @_; - my %r; - for (split /\./, $str) { - next if !/^([a-z0-9_]+)-([a-zA-Z0-9_~\x81-\x{ffffff}]+)$/ || !$keys{$1}; - my($f, $v) = ($1, $2); - my @v = split /~/, $v; - s/_([0-9]{2})/$1 > $#fil_escape ? '' : $fil_escape[$1]/eg for(@v); - $r{$f} = @v > 1 ? \@v : $v[0] - } - return \%r; +sub resolution { + my($x,$y) = @_; + ($x,$y) = ($x->{reso_x}, $x->{reso_y}) if ref $x; + $x ? "${x}x${y}" : $y == 1 ? 'Non-standard' : undef } -sub fil_serialize { - my $fil = shift; - my $e = qr/([\Q$fil_escape\E])/; - return join '.', map { - my @v = ref $fil->{$_} ? @{$fil->{$_}} : ($fil->{$_}); - s/$e/_$fil_escape{$1}/g for(@v); - $_.'-'.join '~', @v - } grep defined($fil->{$_}), sort keys %$fil; +# GTIN code as argument, +# Returns 'JAN', 'EAN', 'UPC', 'ISBN' or undef, +# Also 'normalizes' the first argument in place +sub gtintype { + $_[0] =~ s/[^\d]+//g; + $_[0] =~ s/^0+//; + return undef if $_[0] !~ /^[0-9]{10,13}$/; # I've yet to see a UPC code shorter than 10 digits assigned to a game + $_[0] = ('0'x(12-length $_[0])) . $_[0] if length($_[0]) < 12; # pad with zeros to GTIN-12 + my $c = shift; + return undef if $c !~ /^[0-9]{12,13}$/; + $c = "0$c" if length($c) == 12; # pad with another zero for GTIN-13 + + # calculate check digit according to + # http://www.gs1.org/productssolutions/barcodes/support/check_digit_calculator.html#how + my @n = reverse split //, $c; + my $n = shift @n; + $n += $n[$_] * ($_ % 2 != 0 ? 1 : 3) for (0..$#n); + return undef if $n % 10 != 0; + + # Do some rough guesses based on: + # http://www.gs1.org/productssolutions/barcodes/support/prefix_list.html + # and http://en.wikipedia.org/wiki/List_of_GS1_country_codes + local $_ = $c; + return 'JAN' if /^4[59]/; # prefix code 450-459 & 490-499 + return 'UPC' if /^(?:0[01]|0[6-9]|13|75[45])/; # prefix code 000-019 & 060-139 & 754-755 + return 'ISBN' if /^97[89]/; + return undef if /^(?:0[2-5]|2|9[6-9])/; # some codes we don't want: 020–059 & 200-299 & non-ISBN 977-999 + return 'EAN'; # let's just call everything else EAN :) } -# generates a parent tags/traits listing -sub parenttags { - my($t, $index, $type) = @_; - p; - my @p = _parenttags(@{$t->{parents}}); - for my $p (@p ? @p : []) { - a href => "/$type", $index; - for (reverse @$p) { - txt ' > '; - a href => "/$type$_->{id}", $_->{name}; - } - txt " > $t->{name}"; - br; - } - end 'p'; -} - -# arg: tag/trait hashref -# returns: [ [ tag1, tag2, tag3 ], [ tag1, tag2, tag5 ] ] -sub _parenttags { - my @r; - for my $t (@_) { - for (@{$t->{'sub'}}) { - push @r, [ $t, @$_ ] for _parenttags($_); - } - push @r, [$t] if !@{$t->{'sub'}}; +# arguments: <image size>, <max dimensions> +# returns the size of the thumbnail with the same aspect ratio as the full-size +# image, but fits within the specified maximum dimensions +sub imgsize { + my($ow, $oh, $sw, $sh) = @_; + return ($ow, $oh) if $ow <= $sw && $oh <= $sh; + if($ow/$oh > $sw/$sh) { # width is the limiting factor + $oh *= $sw/$ow; + $ow = $sw; + } else { + $ow *= $sh/$oh; + $oh = $sh; } - return @r; + return (int ($ow+0.5), int ($oh+0.5)); } -# a child tags/traits box -sub childtags { - my($self, $title, $type, $t, $order) = @_; - - div class => 'mainbox'; - h1 $title; - ul class => 'tagtree'; - for my $p (sort { !$order ? @{$b->{'sub'}} <=> @{$a->{'sub'}} : $a->{$order} <=> $b->{$order} } @{$t->{childs}}) { - li; - a href => "/$type$p->{id}", $p->{name}; - b class => 'grayedout', " ($p->{c_items})" if $p->{c_items}; - end, next if !@{$p->{'sub'}}; - ul; - for (0..$#{$p->{'sub'}}) { - last if $_ >= 5 && @{$p->{'sub'}} > 6; - li; - txt '> '; - a href => "/$type$p->{sub}[$_]{id}", $p->{'sub'}[$_]{name}; - b class => 'grayedout', " ($p->{sub}[$_]{c_items})" if $p->{'sub'}[$_]{c_items}; - end; - } - if(@{$p->{'sub'}} > 6) { - my $c = @{$p->{'sub'}}-5; - li; - txt '> '; - a href => "/$type$p->{id}", style => 'font-style: italic', - sprintf '%d more %s%s', $c, $type eq 'g' ? 'tag' : 'trait', $c==1 ? '' : 's'; - end; - } - end; - end 'li'; +# Normalized IP address to use for duplicate detection/throttling. For IPv4 +# this is the /23 subnet (is this enough?), for IPv6 the /48 subnet, with the +# least significant bits of the address zero'd. +sub norm_ip { + my $ip = shift; + + # There's a whole bunch of IP manipulation modules on CPAN, but many seem + # quite bloated and still don't offer the functionality to return an IP + # with its mask applied (admittedly not a common operation). The libc + # socket functions will do fine in parsing and formatting addresses, and + # the actual masking is quite trivial in binary form. + my $v4 = inet_pton AF_INET, $ip; + if($v4) { + $v4 =~ s/(..)(.)./$1 . chr(ord($2) & 254) . "\0"/se; + return inet_ntop AF_INET, $v4; } - end 'ul'; - clearfloat; - br; - end 'div'; + + $ip = inet_pton AF_INET6, $ip; + return '::' if !$ip; + $ip =~ s/^(.{6}).+$/$1 . "\0"x10/se; + return inet_ntop AF_INET6, $ip; } -# generates the class elements for character spoiler hiding -sub charspoil { - return "charspoil charspoil_$_[0]"; +sub minage { + my($a, $ex) = @_; + return 'Unknown' if !defined $a; + $a = $AGE_RATING{$a}; + $ex && $a->{ex} ? "$a->{txt} (e.g. $a->{ex})" : $a->{txt} } -# generates a local path to an image in static/ -sub imgpath { # <type>, <id> - return sprintf '%s/static/%s/%02d/%d.jpg', $TUWF::OBJ->{root}, $_[0], $_[1]%100, $_[1]; +sub _path { + my($t, $id) = $_[1] =~ /([a-z]+)([0-9]+)/; + sprintf '%s/%s%s/%02d/%d.%s', $_[0], $t, $_[2] ? ".$_[2]" : '', $id%100, $id, $_[3]||'jpg'; } +# imgpath($image_id, $dir, $format) +# $dir = empty || 't' || 'orig' +# $format = empty || $file_ext +sub imgpath { _path config->{var_path}.'/static', @_ } -# generates a URL for an image in static/ -sub imgurl { - return sprintf '%s/%s/%02d/%d.jpg', $TUWF::OBJ->{url_static}, $_[0], $_[1]%100, $_[1]; -} +# imgurl($image_id, $dir, $format) +sub imgurl { _path config->{url_static}, @_ } # Formats a vote number. @@ -178,13 +171,6 @@ sub fmtmedia { $med->{ $med->{qty} && $qty > 1 ? 'plural' : 'txt' }; } -# Formats a VN length (xtra = time indication) -sub fmtvnlen { - my($len, $xtra) = @_; - $len = $VN_LENGTH{$len}; - $len->{txt}.($xtra && $len->{time} ? " ($len->{time})" : ''); -} - # Formats a UNIX timestamp as a '<number> <unit> ago' string sub fmtage { my $a = time-shift; @@ -200,32 +186,12 @@ sub fmtage { sprintf '%d %s ago', $t, $t == 1 ? $single : $plural; } -# argument: database release date format (yyyymmdd) -# y = 0000 -> unknown -# y = 9999 -> TBA -# m = 99 -> month+day unknown -# d = 99 -> day unknown -# return value: (unknown|TBA|yyyy|yyyy-mm|yyyy-mm-dd) -# if date > now: <b class="future">str</b> -sub fmtdatestr { - my $date = sprintf '%08d', shift||0; - my $future = $date > strftime '%Y%m%d', gmtime; - my($y, $m, $d) = ($1, $2, $3) if $date =~ /^([0-9]{4})([0-9]{2})([0-9]{2})$/; - - my $str = $y == 0 ? 'unknown' : $y == 9999 ? 'TBA' : - $m == 99 ? sprintf('%04d', $y) : - $d == 99 ? sprintf('%04d-%02d', $y, $m) : - sprintf('%04d-%02d-%02d', $y, $m, $d); - - return $str if !$future; - return qq|<b class="future">$str</b>|; -} # argument: unix timestamp and optional format (compact/full) sub fmtdate { my($t, $f) = @_; - return strftime '%Y-%m-%d', gmtime $t if !$f || $f eq 'compact'; - return strftime '%Y-%m-%d at %R', gmtime $t; + return strftime '%Y-%m-%d', localtime $t if !$f || $f eq 'compact'; + return strftime '%Y-%m-%d at %R', localtime $t; } # Turn a (natural number) vote into a rating indication @@ -251,69 +217,56 @@ sub fmtspoil { } -# Generates a HTML 'lang' attribute given a list of possible languages. -# This is used for the 'original language' field, which we can safely assume is not used for latin-alphabet languages. -sub lang_attr { - my @l = ref $_[0] ? $_[0]->@* : @_; - # Choose Japanese, Chinese or Korean (in order of likelyness) if those are in the list. - return (lang => 'ja') if grep $_ eq 'ja', @l; - return (lang => 'zh') if grep $_ eq 'zh', @l; - return (lang => 'ko') if grep $_ eq 'ko', @l; - return (lang => $l[0]) if @l == 1; - () +sub fmtanimation { + my($a, $cat) = @_; + return if !defined $a; + return $cat ? ucfirst "$cat not animated" : 'Not animated' if !$a; + return $cat ? "No $cat" : 'Not applicable' if $a == 1; + ($a & 256 ? 'Some scenes ' : $a & 512 ? 'All scenes ' : '').join('/', + $a & 4 ? 'Hand drawn' : (), + $a & 8 ? 'Vectorial' : (), + $a & 16 ? '3D' : (), + $a & 32 ? 'Live action' : () + ).($cat ? " $cat" : ''); } - -# JSON::XS::encode_json converts input to utf8, whereas the below functions -# operate on wide character strings. Canonicalization is enabled to allow for -# proper comparison of serialized objects. -my $JSON = JSON::XS->new; -$JSON->canonical(1); - -sub json_encode ($) { - $JSON->encode(@_); +# Format a release date as a string. +sub rdate { + my($y, $m, $d) = ($1, $2, $3) if sprintf('%08d', shift||0) =~ /^([0-9]{4})([0-9]{2})([0-9]{2})$/; + $y == 0 ? 'unknown' : + $y == 9999 ? 'TBA' : + $m == 99 ? sprintf('%04d', $y) : + $d == 99 ? sprintf('%04d-%02d', $y, $m) : + sprintf('%04d-%02d-%02d', $y, $m, $d); } -sub json_decode ($) { - $JSON->decode(@_); -} -# Insert JSON-encoded data as script, arguments: id, object -sub script_json { - script id => $_[0], type => 'application/json'; - my $js = json_encode $_[1]; - $js =~ s/</\\u003C/g; # escape HTML tags like </script> and <!-- - lit $js; - end; +# Given a language code & title, returns a (lang => $x) html property. +sub tlang { + my($lang, $title) = @_; + # TODO: The -Latn suffix is redundant for languages that use the Latin script by default, need to check with a list. + # English is the site's default, so no need to specify that. + $lang && $lang ne 'en' + ? (lang => $lang . ($title =~ /[\x{0400}-\x{04ff}\x{0600}-\x{06ff}\x{0e00}-\x{0e7f}\x{1100}-\x{11ff}\x{1400}-\x{167f}\x{3040}-\x{3099}\x{30a1}-\x{30fa}\x{3100}-\x{9fff}\x{ac00}-\x{d7af}\x{ff66}-\x{ffdc}\x{20000}-\x{323af}]/ ? '' : '-Latn')) + : (); } - -# Compare the keys in %$old with the keys in %$new. Returns 1 if a difference was found, 0 otherwise. -sub form_compare { - my($old, $new) = @_; - for my $k (keys %$old) { - my($o, $n) = ($old->{$k}, $new->{$k}); - return 1 if defined $n ne defined $o || ref $o ne ref $n; - if(!defined $o) { - # must be equivalent - } elsif(!ref $o) { - return 1 if $o ne $n; - } else { # 'json' template - return 1 if @$o != @$n; - return 1 if grep form_compare($o->[$_], $n->[$_]), 0..$#$o; - } - } - return 0; +# Given an SQL titles array, returns element attributes & content. +sub tattr { + my $title = ref $_[0] eq 'HASH' ? $_[0]{title} : $_[0]; + (tlang($title->[0],$title->[1]), title => $title->[3], $title->[1]) } -# Encode query parameters. Takes a hash or hashref with key/values, supports array values. + +# Encode query parameters. Takes a hash or hashref with key/values, supports array values and objects that implement query_encode(). sub query_encode { my $o = @_ == 1 ? $_[0] : {@_}; return join '&', map { my($k, $v) = ($_, $o->{$_}); + $v = $v->query_encode() if ref $v && ref $v ne 'ARRAY'; !defined $v ? () : ref $v ? map "$k=".uri_escape($_), sort @$v : "$k=".uri_escape($v) } sort keys %$o; } @@ -348,5 +301,34 @@ sub md2html { $html } -1; +sub is_insecurepass { + utf8::encode(local $_ = shift); + my $hash = sha1 $_; + my $dir = config->{var_path}.'/hibp'; + return 0 if !-d $dir; + + my $prefix = uc unpack 'H4', $hash; + my $data = substr $hash, 2, 10; + my $F; + if(!open $F, '<', "$dir/$prefix") { + warn "Unable to lookup password prefix $prefix: $!"; + return 0; + } + + # Plain old binary search. + # Would be nicer to search through an mmap'ed view of the file, or at least + # use pread(), but alas, neither are easily available in Perl. + my($left, $right) = (0, -10 + -s $F); + while($left <= $right) { + my $off = floor(($left+$right)/20)*10; + sysseek $F, $off, 0 or die $!; + 10 == sysread $F, my $buf, 10 or die $!; + return 1 if $buf eq $data; + if($buf lt $data) { $left = $off + 10; } + else { $right = $off - 10; } + } + 0; +} + +1; |