summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbinspect144
1 files changed, 144 insertions, 0 deletions
diff --git a/binspect b/binspect
new file mode 100755
index 0000000..cffd5f8
--- /dev/null
+++ b/binspect
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+# Copyright (c) 2020 Yoran Heling
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# TODO:
+# - Documentation!
+# - Transparent decompression of .gz/.bz2/.xz/.zst etc inputs
+# - Query expressions to only return a particular key or array index
+# - 'bencode' output format
+# - Alternative input formats to more easily edit bencoded files
+
+use v5.14; # non-destructive replace
+use B;
+use Getopt::Long;
+use Encode;
+
+
+GetOptions(
+ 'f|format=s', \(my $opt_fmt = 'human'),
+ 's|strings=s', \(my $opt_str = 'hex'),
+ 'p|prefix!', \(my $opt_prefix = 0),
+);
+my $input_file = shift() || '-';
+
+
+# Given (buffer, current byte offset)
+# Returns (decoded value, new byte offset)
+# (This substring uglyness is the only way I've found to parse large bencode
+# values with okay performance; some CPAN modules may be even faster)
+sub bdecode {
+ my($buf, $off) = @_;
+ my $c = substr $buf, $off, 1;
+ # Ints
+ return ($1+0, $off + 2 + length $1) if $c eq 'i' && substr($buf, $off, 23) =~ /^i(0|-?[1-9][0-9]*)e/;
+ # Lists
+ if($c eq 'l') {
+ $off++;
+ my(@l, $v);
+ while('e' ne substr $buf, $off, 1) {
+ ($v, $off) = bdecode($buf, $off);
+ push @l, $v;
+ }
+ return (\@l, $off+1);
+ }
+ # Dicts
+ if($c eq 'd') {
+ $off++;
+ my(%d, $k, $v, $lk);
+ while(1) {
+ $c = substr $buf, $off, 1;
+ last if $c eq 'e';
+ die "Non-string key in bencode dictionary at byte $off" if $c !~ /^[0-9]$/;
+ ($k, $off) = bdecode($buf, $off);
+ die "Duplicate key or wrong key order in bencode dictionary at byte $off" if defined $lk && $lk ge $k;
+ $lk = $k;
+ ($v, $off) = bdecode($buf, $off);
+ $d{$k} = $v;
+ }
+ return (\%d, $off+1);
+ }
+ # Strings
+ my $len = substr $buf, $off, 20;
+ if($len =~ /^(0|[1-9][0-9]*):/) {
+ $off += 1 + length $1;
+ return (substr($buf, $off, $1), $off+$1);
+ }
+ die "Invalid bencode value at byte $off";
+}
+
+
+# Perl antipattern: Inspect scalar to see if it should be represented as an int.
+# Works fine, just need to be careful not to accidentally stringify a scalar.
+sub is_int { !(B::svref_2object(\$_[0])->FLAGS & B::SVp_POK) }
+sub is_utf8 { defined eval { Encode::decode('UTF-8', $_[0], Encode::DIE_ON_ERR|Encode::LEAVE_SRC) } }
+
+sub fmt_str {
+ my($data) = @_;
+ $opt_str eq 'raw' ? $data :
+ is_utf8($data) ? ($opt_prefix?'s:':'').$data :
+ $opt_str eq 'hex' ? ($opt_prefix?'hex:':'').($data =~ s/(.)/sprintf '%02x', ord $1/ergs) :
+ $opt_str eq 'base64' ? ($opt_prefix?'base64:':'').do { require MIME::Base64; MIME::Base64::encode_base64($data) =~ s/\r?\n//rg } :
+ $opt_str eq 'base64url' ? ($opt_prefix?'base64url:':'').do { require MIME::Base64; MIME::Base64::encode_base64url($data) } :
+ die "Unrecognized string format: $opt_str\n";
+}
+
+
+sub convert_str {
+ my($data) = @_;
+ return +{ map +(fmt_str($_), convert_str($data->{$_})), keys %$data } if ref $data eq 'HASH';
+ return [ map convert_str($_), @$data ] if ref $data eq 'ARRAY';
+ return $data if is_int $data;
+ fmt_str $data
+}
+
+
+sub fmt_human {
+ my($data) = @_;
+ my sub indent { $_[0] =~ s/\n/\n /rg }
+ my sub str { fmt_str($_[0]) =~ s/([\\\r\n\t])/\\$1/rg =~ s/([\x00-\x1f])/'\\x'.sprintf '%02x', ord $1/erg; }
+ my sub maybeln { (ref $_[0] eq 'HASH' && keys %{$_[0]}) || (ref $_[0] eq 'ARRAY' && @{$_[0]}) ? "\n" : ' ' }
+ return keys %$data ? join "\n", map sprintf('%s =%s', str($_), indent(maybeln($data->{$_}).fmt_human($data->{$_}))), sort keys %$data : '{}' if ref $data eq 'HASH';
+ return @$data ? join "\n", map sprintf('- %s', indent fmt_human($_)), @$data : '[]' if ref $data eq 'ARRAY';
+ return sprintf '%s%d', $opt_prefix ? 's:' : '', $data if is_int $data;
+ str $data
+}
+
+
+
+my $input = do {
+ local $/=undef;
+ $input_file eq '-' ? <> : do { open my $F, '<', $input_file or die "Can't open $input_file: $!\n"; <$F> };
+};
+my $data = eval { [bdecode $input, 0]->[0] } // die "Error parsing bencode from $input_file: $@\n";
+
+if($opt_fmt eq 'human') {
+ say fmt_human $data;
+} elsif($opt_fmt eq 'perl') {
+ require Data::Dumper;
+ print Data::Dumper->new([convert_str $data])->Terse(1)->Sortkeys(1)->Dump;
+} elsif($opt_fmt eq 'json') {
+ # JSON::PP is a core module and ought to come with a default Perl installation, but not all Linux distributions honor that...
+ die "Unable to load the Perl JSON::PP module, please make sure it's installed.\n" if !eval { require JSON::PP; 1 };
+ print JSON::PP->new->canonical->allow_nonref->pretty->encode(convert_str $data);
+} else {
+ die "Unrecognized output format: $opt_fmt\n";
+}