1 files changed, 182 insertions, 200 deletions
diff --git a/lib/VNDB/Func.pm b/lib/VNDB/Func.pm
index 508b2272..8c448ad8 100644
--- a/lib/VNDB/Func.pm
+++ b/lib/VNDB/Func.pm
@@ -1,167 +1,160 @@
-
 package VNDB::Func;
 
 use strict;
 use warnings;
-use TUWF ':html', 'kv_validate', 'xml_escape', 'uri_escape';
+use TUWF::Misc 'uri_escape';
 use Exporter 'import';
-use POSIX 'strftime', 'ceil', 'floor';
-use JSON::XS;
-use VNDBUtil;
+use POSIX 'strftime', 'floor';
+use Socket 'inet_pton', 'inet_ntop', 'AF_INET', 'AF_INET6';
+use Digest::SHA 'sha1';
+use VNDB::Config;
 use VNDB::Types;
 use VNDB::BBCode;
-our @EXPORT = (@VNDBUtil::EXPORT, 'bb_format', qw|
-  clearfloat cssicon minage fil_parse fil_serialize parenttags
-  childtags charspoil imgpath imgurl
-  fmtvote fmtmedia fmtvnlen fmtage fmtdatestr fmtdate fmtrating fmtspoil
-  lang_attr
-  json_encode json_decode script_json
-  form_compare
+our @EXPORT = ('bb_format', qw|
+  in
+  idcmp
+  shorten
+  resolution
+  gtintype
+  imgsize
+  norm_ip
+  minage
+  fmtvote fmtmedia fmtage fmtdate fmtrating fmtspoil fmtanimation
+  rdate
+  imgpath imgurl
+  tlang tattr
   query_encode
   md2html
+  is_insecurepass
 |);
 
 
-# three ways to represent the same information
-our $fil_escape = '_ !"#$%&\'()*+,-./:;<=>?@[\]^`{}~';
-our @fil_escape = split //, $fil_escape;
-our %fil_escape = map +($fil_escape[$_], sprintf '%02d', $_), 0..$#fil_escape;
-
-
-# Clears a float, to make sure boxes always have the correct height
-sub clearfloat {
-  div class => 'clearfloat', '';
+# Simple "is this element in the array?" function, using 'eq' to test equality.
+# Supports both an @array and \@array.
+# Usage:
+#
+#   my $contains_hi = in 'hi', qw/ a b hi c /; # true
+#
+sub in {
+    my($q, @a) = @_;
+    $_ eq $q && return 1 for map ref $_ eq 'ARRAY' ? @$_ : ($_), @a;
+    0
 }
 
 
-# Draws a CSS icon, arguments: class, title
-sub cssicon {
-  abbr class => "icons $_[0]", title => $_[1];
-   lit '&#xa0;';
-  end;
+# Compare two vndbids, using proper numeric order
+sub idcmp($$) {
+    my($a1, $a2) = $_[0] =~ /^([a-z]+)([0-9]+)$/;
+    my($b1, $b2) = $_[1] =~ /^([a-z]+)([0-9]+)$/;
+    $a1 cmp $b1 || $a2 <=> $b2
 }
 
 
-sub minage {
-  my($a, $ex) = @_;
-  $a = $AGE_RATING{$a};
-  $ex && $a->{ex} ? "$a->{txt} (e.g. $a->{ex})" : $a->{txt}
+sub shorten {
+  my($str, $len) = @_;
+  return length($str) > $len ? substr($str, 0, $len-3).'...' : $str;
 }
 
 
-# arguments: $filter_string, @allowed_keys
-sub fil_parse {
-  my $str = shift;
-  my %keys = map +($_,1), @_;
-  my %r;
-  for (split /\./, $str) {
-    next if !/^([a-z0-9_]+)-([a-zA-Z0-9_~\x81-\x{ffffff}]+)$/ || !$keys{$1};
-    my($f, $v) = ($1, $2);
-    my @v = split /~/, $v;
-    s/_([0-9]{2})/$1 > $#fil_escape ? '' : $fil_escape[$1]/eg for(@v);
-    $r{$f} = @v > 1 ? \@v : $v[0]
-  }
-  return \%r;
+sub resolution {
+  my($x,$y) = @_;
+  ($x,$y) = ($x->{reso_x}, $x->{reso_y}) if ref $x;
+  $x ? "${x}x${y}" : $y == 1 ? 'Non-standard' : undef
 }
 
 
-sub fil_serialize {
-  my $fil = shift;
-  my $e = qr/([\Q$fil_escape\E])/;
-  return join '.', map {
-    my @v = ref $fil->{$_} ? @{$fil->{$_}} : ($fil->{$_});
-    s/$e/_$fil_escape{$1}/g for(@v);
-    $_.'-'.join '~', @v
-  } grep defined($fil->{$_}), sort keys %$fil;
+# GTIN code as argument,
+# Returns 'JAN', 'EAN', 'UPC', 'ISBN' or undef,
+# Also 'normalizes' the first argument in place
+sub gtintype {
+  $_[0] =~ s/[^\d]+//g;
+  $_[0] =~ s/^0+//;
+  return undef if $_[0] !~ /^[0-9]{10,13}$/; # I've yet to see a UPC code shorter than 10 digits assigned to a game
+  $_[0] = ('0'x(12-length $_[0])) . $_[0] if length($_[0]) < 12; # pad with zeros to GTIN-12
+  my $c = shift;
+  return undef if $c !~ /^[0-9]{12,13}$/;
+  $c = "0$c" if length($c) == 12; # pad with another zero for GTIN-13
+
+  # calculate check digit according to
+  #  http://www.gs1.org/productssolutions/barcodes/support/check_digit_calculator.html#how
+  my @n = reverse split //, $c;
+  my $n = shift @n;
+  $n += $n[$_] * ($_ % 2 != 0 ? 1 : 3) for (0..$#n);
+  return undef if $n % 10 != 0;
+
+  # Do some rough guesses based on:
+  #  http://www.gs1.org/productssolutions/barcodes/support/prefix_list.html
+  #  and http://en.wikipedia.org/wiki/List_of_GS1_country_codes
+  local $_ = $c;
+  return 'JAN' if /^4[59]/; # prefix code 450-459 & 490-499
+  return 'UPC' if /^(?:0[01]|0[6-9]|13|75[45])/; # prefix code 000-019 & 060-139 & 754-755
+  return 'ISBN' if /^97[89]/;
+  return  undef if /^(?:0[2-5]|2|9[6-9])/; # some codes we don't want: 020–059 & 200-299 & non-ISBN 977-999
+  return 'EAN'; # let's just call everything else EAN :)
 }
 
 
-# generates a parent tags/traits listing
-sub parenttags {
-  my($t, $index, $type) = @_;
-  p;
-   my @p = _parenttags(@{$t->{parents}});
-   for my $p (@p ? @p : []) {
-     a href => "/$type", $index;
-     for (reverse @$p) {
-       txt ' > ';
-       a href => "/$type$_->{id}", $_->{name};
-     }
-     txt " > $t->{name}";
-     br;
-   }
-  end 'p';
-}
-
-# arg: tag/trait hashref
-# returns: [ [ tag1, tag2, tag3 ], [ tag1, tag2, tag5 ] ]
-sub _parenttags {
-  my @r;
-  for my $t (@_) {
-    for (@{$t->{'sub'}}) {
-      push @r, [ $t, @$_ ] for _parenttags($_);
-    }
-    push @r, [$t] if !@{$t->{'sub'}};
+# arguments: <image size>, <max dimensions>
+# returns the size of the thumbnail with the same aspect ratio as the full-size
+#   image, but fits within the specified maximum dimensions
+sub imgsize {
+  my($ow, $oh, $sw, $sh) = @_;
+  return ($ow, $oh) if $ow <= $sw && $oh <= $sh;
+  if($ow/$oh > $sw/$sh) { # width is the limiting factor
+    $oh *= $sw/$ow;
+    $ow = $sw;
+  } else {
+    $ow *= $sh/$oh;
+    $oh = $sh;
   }
-  return @r;
+  return (int ($ow+0.5), int ($oh+0.5));
 }
 
 
-# a child tags/traits box
-sub childtags {
-  my($self, $title, $type, $t, $order) = @_;
-
-  div class => 'mainbox';
-   h1 $title;
-   ul class => 'tagtree';
-    for my $p (sort { !$order ? @{$b->{'sub'}} <=> @{$a->{'sub'}} : $a->{$order} <=> $b->{$order} } @{$t->{childs}}) {
-      li;
-       a href => "/$type$p->{id}", $p->{name};
-       b class => 'grayedout', " ($p->{c_items})" if $p->{c_items};
-       end, next if !@{$p->{'sub'}};
-       ul;
-        for (0..$#{$p->{'sub'}}) {
-          last if $_ >= 5 && @{$p->{'sub'}} > 6;
-          li;
-           txt '> ';
-           a href => "/$type$p->{sub}[$_]{id}", $p->{'sub'}[$_]{name};
-           b class => 'grayedout', " ($p->{sub}[$_]{c_items})" if $p->{'sub'}[$_]{c_items};
-          end;
-        }
-        if(@{$p->{'sub'}} > 6) {
-          my $c = @{$p->{'sub'}}-5;
-          li;
-           txt '> ';
-           a href => "/$type$p->{id}", style => 'font-style: italic',
-             sprintf '%d more %s%s', $c, $type eq 'g' ? 'tag' : 'trait', $c==1 ? '' : 's';
-          end;
-        }
-       end;
-      end 'li';
+# Normalized IP address to use for duplicate detection/throttling. For IPv4
+# this is the /23 subnet (is this enough?), for IPv6 the /48 subnet, with the
+# least significant bits of the address zero'd.
+sub norm_ip {
+    my $ip = shift;
+
+    # There's a whole bunch of IP manipulation modules on CPAN, but many seem
+    # quite bloated and still don't offer the functionality to return an IP
+    # with its mask applied (admittedly not a common operation). The libc
+    # socket functions will do fine in parsing and formatting addresses, and
+    # the actual masking is quite trivial in binary form.
+    my $v4 = inet_pton AF_INET, $ip;
+    if($v4) {
+      $v4 =~ s/(..)(.)./$1 . chr(ord($2) & 254) . "\0"/se;
+      return inet_ntop AF_INET, $v4;
     }
-   end 'ul';
-   clearfloat;
-   br;
-  end 'div';
+
+    $ip = inet_pton AF_INET6, $ip;
+    return '::' if !$ip;
+    $ip =~ s/^(.{6}).+$/$1 . "\0"x10/se;
+    return inet_ntop AF_INET6, $ip;
 }
 
 
-# generates the class elements for character spoiler hiding
-sub charspoil {
-  return "charspoil charspoil_$_[0]";
+sub minage {
+  my($a, $ex) = @_;
+  return 'Unknown' if !defined $a;
+  $a = $AGE_RATING{$a};
+  $ex && $a->{ex} ? "$a->{txt} (e.g. $a->{ex})" : $a->{txt}
 }
 
 
-# generates a local path to an image in static/
-sub imgpath { # <type>, <id>
-  return sprintf '%s/static/%s/%02d/%d.jpg', $TUWF::OBJ->{root}, $_[0], $_[1]%100, $_[1];
+sub _path {
+    my($t, $id) = $_[1] =~ /([a-z]+)([0-9]+)/;
+    sprintf '%s/%s%s/%02d/%d.%s', $_[0], $t, $_[2] ? ".$_[2]" : '', $id%100, $id, $_[3]||'jpg';
 }
 
+# imgpath($image_id, $dir, $format)
+#   $dir = empty || 't' || 'orig'
+#   $format = empty || $file_ext
+sub imgpath { _path config->{var_path}.'/static', @_ }
 
-# generates a URL for an image in static/
-sub imgurl {
-  return sprintf '%s/%s/%02d/%d.jpg', $TUWF::OBJ->{url_static}, $_[0], $_[1]%100, $_[1];
-}
+# imgurl($image_id, $dir, $format)
+sub imgurl { _path config->{url_static}, @_ }
 
 
 # Formats a vote number.
@@ -178,13 +171,6 @@ sub fmtmedia {
     $med->{ $med->{qty} && $qty > 1 ? 'plural' : 'txt' };
 }
 
-# Formats a VN length (xtra = time indication)
-sub fmtvnlen {
-  my($len, $xtra) = @_;
-  $len = $VN_LENGTH{$len};
-  $len->{txt}.($xtra && $len->{time} ? " ($len->{time})" : '');
-}
-
 # Formats a UNIX timestamp as a '<number> <unit> ago' string
 sub fmtage {
   my $a = time-shift;
@@ -200,32 +186,12 @@ sub fmtage {
   sprintf '%d %s ago', $t, $t == 1 ? $single : $plural;
 }
 
-# argument: database release date format (yyyymmdd)
-#  y = 0000 -> unknown
-#  y = 9999 -> TBA
-#  m = 99   -> month+day unknown
-#  d = 99   -> day unknown
-# return value: (unknown|TBA|yyyy|yyyy-mm|yyyy-mm-dd)
-#  if date > now: <b class="future">str</b>
-sub fmtdatestr {
-  my $date = sprintf '%08d', shift||0;
-  my $future = $date > strftime '%Y%m%d', gmtime;
-  my($y, $m, $d) = ($1, $2, $3) if $date =~ /^([0-9]{4})([0-9]{2})([0-9]{2})$/;
-
-  my $str = $y == 0 ? 'unknown' : $y == 9999 ? 'TBA' :
-    $m == 99 ? sprintf('%04d', $y) :
-    $d == 99 ? sprintf('%04d-%02d', $y, $m) :
-               sprintf('%04d-%02d-%02d', $y, $m, $d);
-
-  return $str if !$future;
-  return qq|<b class="future">$str</b>|;
-}
 
 # argument: unix timestamp and optional format (compact/full)
 sub fmtdate {
   my($t, $f) = @_;
-  return strftime '%Y-%m-%d', gmtime $t if !$f || $f eq 'compact';
-  return strftime '%Y-%m-%d at %R', gmtime $t;
+  return strftime '%Y-%m-%d', localtime $t if !$f || $f eq 'compact';
+  return strftime '%Y-%m-%d at %R', localtime $t;
 }
 
 # Turn a (natural number) vote into a rating indication
@@ -251,69 +217,56 @@ sub fmtspoil {
 }
 
 
-# Generates a HTML 'lang' attribute given a list of possible languages.
-# This is used for the 'original language' field, which we can safely assume is not used for latin-alphabet languages.
-sub lang_attr {
-    my @l = ref $_[0] ? $_[0]->@* : @_;
-    # Choose Japanese, Chinese or Korean (in order of likelyness) if those are in the list.
-    return (lang => 'ja') if grep $_ eq 'ja', @l;
-    return (lang => 'zh') if grep $_ eq 'zh', @l;
-    return (lang => 'ko') if grep $_ eq 'ko', @l;
-    return (lang => $l[0]) if @l == 1;
-    ()
+sub fmtanimation {
+    my($a, $cat) = @_;
+    return if !defined $a;
+    return $cat ? ucfirst "$cat not animated" : 'Not animated' if !$a;
+    return $cat ? "No $cat" : 'Not applicable' if $a == 1;
+    ($a & 256 ? 'Some scenes ' : $a & 512 ? 'All scenes ' : '').join('/',
+        $a &  4 ? 'Hand drawn' : (),
+        $a &  8 ? 'Vectorial' : (),
+        $a & 16 ? '3D' : (),
+        $a & 32 ? 'Live action' : ()
+    ).($cat ? " $cat" : '');
 }
 
 
-
-# JSON::XS::encode_json converts input to utf8, whereas the below functions
-# operate on wide character strings. Canonicalization is enabled to allow for
-# proper comparison of serialized objects.
-my $JSON = JSON::XS->new;
-$JSON->canonical(1);
-
-sub json_encode ($) {
-  $JSON->encode(@_);
+# Format a release date as a string.
+sub rdate {
+    my($y, $m, $d) = ($1, $2, $3) if sprintf('%08d', shift||0) =~ /^([0-9]{4})([0-9]{2})([0-9]{2})$/;
+    $y ==    0 ? 'unknown' :
+    $y == 9999 ? 'TBA' :
+    $m ==   99 ? sprintf('%04d', $y) :
+    $d ==   99 ? sprintf('%04d-%02d', $y, $m) :
+                 sprintf('%04d-%02d-%02d', $y, $m, $d);
 }
 
-sub json_decode ($) {
-  $JSON->decode(@_);
-}
 
-# Insert JSON-encoded data as script, arguments: id, object
-sub script_json {
-  script id => $_[0], type => 'application/json';
-   my $js = json_encode $_[1];
-   $js =~ s/</\\u003C/g; # escape HTML tags like </script> and <!--
-   lit $js;
-  end;
+# Given a language code & title, returns a (lang => $x) html property.
+sub tlang {
+    my($lang, $title) = @_;
+    # TODO: The -Latn suffix is redundant for languages that use the Latin script by default, need to check with a list.
+    # English is the site's default, so no need to specify that.
+    $lang && $lang ne 'en'
+        ? (lang => $lang . ($title =~ /[\x{0400}-\x{04ff}\x{0600}-\x{06ff}\x{0e00}-\x{0e7f}\x{1100}-\x{11ff}\x{1400}-\x{167f}\x{3040}-\x{3099}\x{30a1}-\x{30fa}\x{3100}-\x{9fff}\x{ac00}-\x{d7af}\x{ff66}-\x{ffdc}\x{20000}-\x{323af}]/ ? '' : '-Latn'))
+        : ();
 }
 
 
-
-# Compare the keys in %$old with the keys in %$new. Returns 1 if a difference was found, 0 otherwise.
-sub form_compare {
-  my($old, $new) = @_;
-  for my $k (keys %$old) {
-    my($o, $n) = ($old->{$k}, $new->{$k});
-    return 1 if defined $n ne defined $o || ref $o ne ref $n;
-    if(!defined $o) {
-      # must be equivalent
-    } elsif(!ref $o) {
-      return 1 if $o ne $n;
-    } else { # 'json' template
-      return 1 if @$o != @$n;
-      return 1 if grep form_compare($o->[$_], $n->[$_]), 0..$#$o;
-    }
-  }
-  return 0;
+# Given an SQL titles array, returns element attributes & content.
+sub tattr {
+    my $title = ref $_[0] eq 'HASH' ? $_[0]{title} : $_[0];
+    (tlang($title->[0],$title->[1]), title => $title->[3], $title->[1])
 }
 
 
-# Encode query parameters. Takes a hash or hashref with key/values, supports array values.
+
+# Encode query parameters. Takes a hash or hashref with key/values, supports array values and objects that implement query_encode().
 sub query_encode {
     my $o = @_ == 1 ? $_[0] : {@_};
     return join '&', map {
         my($k, $v) = ($_, $o->{$_});
+        $v = $v->query_encode() if ref $v && ref $v ne 'ARRAY';
         !defined $v ? () : ref $v ? map "$k=".uri_escape($_), sort @$v : "$k=".uri_escape($v)
     } sort keys %$o;
 }
@@ -348,5 +301,34 @@ sub md2html {
     $html
 }
 
-1;
 
+sub is_insecurepass {
+    utf8::encode(local $_ = shift);
+    my $hash = sha1 $_;
+    my $dir = config->{var_path}.'/hibp';
+    return 0 if !-d $dir;
+
+    my $prefix = uc unpack 'H4', $hash;
+    my $data = substr $hash, 2, 10;
+    my $F;
+    if(!open $F, '<', "$dir/$prefix") {
+        warn "Unable to lookup password prefix $prefix: $!";
+        return 0;
+    }
+
+    # Plain old binary search.
+    # Would be nicer to search through an mmap'ed view of the file, or at least
+    # use pread(), but alas, neither are easily available in Perl.
+    my($left, $right) = (0, -10 + -s $F);
+    while($left <= $right) {
+        my $off = floor(($left+$right)/20)*10;
+        sysseek $F, $off, 0 or die $!;
+        10 == sysread $F, my $buf, 10 or die $!;
+        return 1 if $buf eq $data;
+        if($buf lt $data) { $left = $off + 10; }
+        else {              $right = $off - 10; }
+    }
+    0;
+}
+
+1;