summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2011-02-07 09:51:46 +0100
committerYorhel <git@yorhel.nl>2011-02-07 09:51:46 +0100
commit4d2c987b2329145d929381faba53a9b1e847ef08 (patch)
tree43f0b994d92296ffc3402661a5503f8ac980c5b0
parentd8af1c3163cbcf296eb2c37f1e1a8b073eeb7e7d (diff)
Generate "bad request" on UTF-8 decoding error and added error_400_handler
I have not found any issues with letting decode_utf8() croak on invalid input - in the few cases where this happened, regular processing wouldn't have been meaningful anyway.
-rw-r--r--lib/TUWF.pm4
-rw-r--r--lib/TUWF.pod7
-rw-r--r--lib/TUWF/Request.pm27
3 files changed, 30 insertions, 8 deletions
diff --git a/lib/TUWF.pm b/lib/TUWF.pm
index 5a05a29..aa0c21c 100644
--- a/lib/TUWF.pm
+++ b/lib/TUWF.pm
@@ -19,6 +19,7 @@ our $OBJ = bless {
mail_from => '<noreply-yawf@blicky.net>',
mail_sendmail => '/usr/sbin/sendmail',
max_post_body => 10*1024*1024, # 10MB
+ error_400_handler => \&_error_400,
error_404_handler => \&_error_404,
error_405_handler => \&_error_405,
error_413_handler => \&_error_413,
@@ -114,6 +115,7 @@ sub load_recursive {
# the default error handlers are quite ugly and generic...
+sub _error_400 { _very_simple_page($_[0], 400, '400 - Bad Request', 'Only UTF-8 encoded data is accepted.') }
sub _error_404 { _very_simple_page($_[0], 404, '404 - Page Not Found', 'The page you were looking for does not exist...') }
sub _error_405 { _very_simple_page($_[0], 405, '405 - Method not allowed', 'The only allowed methods are: HEAD, GET or POST.') }
sub _error_413 { _very_simple_page($_[0], 413, '413 - Request Entity Too Large', 'You were probably trying to upload a too large file.') }
@@ -200,6 +202,8 @@ sub _handle_request {
# initialize request
my $err = $self->reqInit();
if($err) {
+ warn "Client sent non-UTF-8-encoded data. Generating HTTP 400 response.\n" if $err eq 'utf8';
+ $self->{_TUWF}{error_400_handler}->($self) if $err eq 'utf8';
$self->{_TUWF}{error_405_handler}->($self) if $err eq 'method';
$self->{_TUWF}{error_413_handler}->($self) if $err eq 'maxpost';
return 1;
diff --git a/lib/TUWF.pod b/lib/TUWF.pod
index d993dd1..7709c91 100644
--- a/lib/TUWF.pod
+++ b/lib/TUWF.pod
@@ -326,6 +326,13 @@ I<logfile> is specified, TUWF will log page generation times for each request.
This flag can be easily read through the C<debug()> method, so you can also use
is in your own code. Default: 0 (disabled).
+=item error_400_handler
+
+Similar to I<error_404_handler>, but is called when something in the request
+data did not make sense to TUWF. In the current implementation, this only
+happens when the request data contains non-UTF8-encoded text. A warning is
+written to the log file when this happens.
+
=item error_404_handler
Set this to a subroutine reference if you want to write your own 404 error
diff --git a/lib/TUWF/Request.pm b/lib/TUWF/Request.pm
index 971e77d..b56ee6f 100644
--- a/lib/TUWF/Request.pm
+++ b/lib/TUWF/Request.pm
@@ -23,9 +23,15 @@ sub reqInit {
if ($ENV{REQUEST_URI}||'') =~ /\?/;
}
- $self->{_TUWF}{Req}{Cookies} = _parse_cookies($self, $ENV{HTTP_COOKIE} || $ENV{COOKIE});
- $self->{_TUWF}{Req}{GET} = _parse_urlencoded($ENV{QUERY_STRING});
- $self->reqPath(); # let it croak when the path isn't valid UTF-8
+ my $err = eval {
+ $self->{_TUWF}{Req}{Cookies} = _parse_cookies($self, $ENV{HTTP_COOKIE} || $ENV{COOKIE});
+ $self->{_TUWF}{Req}{GET} = _parse_urlencoded($ENV{QUERY_STRING});
+ $self->reqPath(); # let it croak when the path isn't valid UTF-8
+ 1;
+ };
+ return 'utf8' if !$err && $@ && $@ =~ /does not map to Unicode/; # <- UGLY!
+ # re-throw if it wasn't a UTF-8 problem. I don't expect this to happen
+ die $@ if !$err;
my $meth = $self->reqMethod;
return 'method' if $meth !~ /^(GET|POST|HEAD)$/;
@@ -36,11 +42,16 @@ sub reqInit {
my $data;
die "Couldn't read all POST data.\n" if $ENV{CONTENT_LENGTH} > read STDIN, $data, $ENV{CONTENT_LENGTH}, 0;
- if(($ENV{'CONTENT_TYPE'}||'') =~ m{^multipart/form-data; boundary=(.+)$}) {
- _parse_multipart($self, $data, $1);
- } else {
- $self->{_TUWF}{Req}{POST} = _parse_urlencoded($data);
- }
+ $err = eval {
+ if(($ENV{'CONTENT_TYPE'}||'') =~ m{^multipart/form-data; boundary=(.+)$}) {
+ _parse_multipart($self, $data, $1);
+ } else {
+ $self->{_TUWF}{Req}{POST} = _parse_urlencoded($data);
+ }
+ 1;
+ };
+ return 'utf8' if !$err && $@ && $@ =~ /does not map to Unicode/;
+ die $@ if !$err;
}
return '';