diff options
author | Yorhel <git@yorhel.nl> | 2011-02-07 09:51:46 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2011-02-07 09:51:46 +0100 |
commit | 4d2c987b2329145d929381faba53a9b1e847ef08 (patch) | |
tree | 43f0b994d92296ffc3402661a5503f8ac980c5b0 | |
parent | d8af1c3163cbcf296eb2c37f1e1a8b073eeb7e7d (diff) |
Generate "bad request" on UTF-8 decoding error and added error_400_handler
I have not found any issues with letting decode_utf8() croak on invalid
input - in the few cases where this happened, regular processing
wouldn't have been meaningful anyway.
-rw-r--r-- | lib/TUWF.pm | 4 | ||||
-rw-r--r-- | lib/TUWF.pod | 7 | ||||
-rw-r--r-- | lib/TUWF/Request.pm | 27 |
3 files changed, 30 insertions, 8 deletions
diff --git a/lib/TUWF.pm b/lib/TUWF.pm index 5a05a29..aa0c21c 100644 --- a/lib/TUWF.pm +++ b/lib/TUWF.pm @@ -19,6 +19,7 @@ our $OBJ = bless { mail_from => '<noreply-yawf@blicky.net>', mail_sendmail => '/usr/sbin/sendmail', max_post_body => 10*1024*1024, # 10MB + error_400_handler => \&_error_400, error_404_handler => \&_error_404, error_405_handler => \&_error_405, error_413_handler => \&_error_413, @@ -114,6 +115,7 @@ sub load_recursive { # the default error handlers are quite ugly and generic... +sub _error_400 { _very_simple_page($_[0], 400, '400 - Bad Request', 'Only UTF-8 encoded data is accepted.') } sub _error_404 { _very_simple_page($_[0], 404, '404 - Page Not Found', 'The page you were looking for does not exist...') } sub _error_405 { _very_simple_page($_[0], 405, '405 - Method not allowed', 'The only allowed methods are: HEAD, GET or POST.') } sub _error_413 { _very_simple_page($_[0], 413, '413 - Request Entity Too Large', 'You were probably trying to upload a too large file.') } @@ -200,6 +202,8 @@ sub _handle_request { # initialize request my $err = $self->reqInit(); if($err) { + warn "Client sent non-UTF-8-encoded data. Generating HTTP 400 response.\n" if $err eq 'utf8'; + $self->{_TUWF}{error_400_handler}->($self) if $err eq 'utf8'; $self->{_TUWF}{error_405_handler}->($self) if $err eq 'method'; $self->{_TUWF}{error_413_handler}->($self) if $err eq 'maxpost'; return 1; diff --git a/lib/TUWF.pod b/lib/TUWF.pod index d993dd1..7709c91 100644 --- a/lib/TUWF.pod +++ b/lib/TUWF.pod @@ -326,6 +326,13 @@ I<logfile> is specified, TUWF will log page generation times for each request. This flag can be easily read through the C<debug()> method, so you can also use is in your own code. Default: 0 (disabled). +=item error_400_handler + +Similar to I<error_404_handler>, but is called when something in the request +data did not make sense to TUWF. In the current implementation, this only +happens when the request data contains non-UTF8-encoded text. A warning is +written to the log file when this happens. + =item error_404_handler Set this to a subroutine reference if you want to write your own 404 error diff --git a/lib/TUWF/Request.pm b/lib/TUWF/Request.pm index 971e77d..b56ee6f 100644 --- a/lib/TUWF/Request.pm +++ b/lib/TUWF/Request.pm @@ -23,9 +23,15 @@ sub reqInit { if ($ENV{REQUEST_URI}||'') =~ /\?/; } - $self->{_TUWF}{Req}{Cookies} = _parse_cookies($self, $ENV{HTTP_COOKIE} || $ENV{COOKIE}); - $self->{_TUWF}{Req}{GET} = _parse_urlencoded($ENV{QUERY_STRING}); - $self->reqPath(); # let it croak when the path isn't valid UTF-8 + my $err = eval { + $self->{_TUWF}{Req}{Cookies} = _parse_cookies($self, $ENV{HTTP_COOKIE} || $ENV{COOKIE}); + $self->{_TUWF}{Req}{GET} = _parse_urlencoded($ENV{QUERY_STRING}); + $self->reqPath(); # let it croak when the path isn't valid UTF-8 + 1; + }; + return 'utf8' if !$err && $@ && $@ =~ /does not map to Unicode/; # <- UGLY! + # re-throw if it wasn't a UTF-8 problem. I don't expect this to happen + die $@ if !$err; my $meth = $self->reqMethod; return 'method' if $meth !~ /^(GET|POST|HEAD)$/; @@ -36,11 +42,16 @@ sub reqInit { my $data; die "Couldn't read all POST data.\n" if $ENV{CONTENT_LENGTH} > read STDIN, $data, $ENV{CONTENT_LENGTH}, 0; - if(($ENV{'CONTENT_TYPE'}||'') =~ m{^multipart/form-data; boundary=(.+)$}) { - _parse_multipart($self, $data, $1); - } else { - $self->{_TUWF}{Req}{POST} = _parse_urlencoded($data); - } + $err = eval { + if(($ENV{'CONTENT_TYPE'}||'') =~ m{^multipart/form-data; boundary=(.+)$}) { + _parse_multipart($self, $data, $1); + } else { + $self->{_TUWF}{Req}{POST} = _parse_urlencoded($data); + } + 1; + }; + return 'utf8' if !$err && $@ && $@ =~ /does not map to Unicode/; + die $@ if !$err; } return ''; |