From a03fbbbeb01ab116b1d72f3dc2d23c6deac31eb3 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 8 Jun 2020 10:25:34 +0200 Subject: Initial commit Just publishing this script with the hope that it will motivate me to, one day, document this stuff and turn it into an actually usable tool. --- binspect | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100755 binspect diff --git a/binspect b/binspect new file mode 100755 index 0000000..cffd5f8 --- /dev/null +++ b/binspect @@ -0,0 +1,144 @@ +#!/usr/bin/perl +# Copyright (c) 2020 Yoran Heling +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# TODO: +# - Documentation! +# - Transparent decompression of .gz/.bz2/.xz/.zst etc inputs +# - Query expressions to only return a particular key or array index +# - 'bencode' output format +# - Alternative input formats to more easily edit bencoded files + +use v5.14; # non-destructive replace +use B; +use Getopt::Long; +use Encode; + + +GetOptions( + 'f|format=s', \(my $opt_fmt = 'human'), + 's|strings=s', \(my $opt_str = 'hex'), + 'p|prefix!', \(my $opt_prefix = 0), +); +my $input_file = shift() || '-'; + + +# Given (buffer, current byte offset) +# Returns (decoded value, new byte offset) +# (This substring uglyness is the only way I've found to parse large bencode +# values with okay performance; some CPAN modules may be even faster) +sub bdecode { + my($buf, $off) = @_; + my $c = substr $buf, $off, 1; + # Ints + return ($1+0, $off + 2 + length $1) if $c eq 'i' && substr($buf, $off, 23) =~ /^i(0|-?[1-9][0-9]*)e/; + # Lists + if($c eq 'l') { + $off++; + my(@l, $v); + while('e' ne substr $buf, $off, 1) { + ($v, $off) = bdecode($buf, $off); + push @l, $v; + } + return (\@l, $off+1); + } + # Dicts + if($c eq 'd') { + $off++; + my(%d, $k, $v, $lk); + while(1) { + $c = substr $buf, $off, 1; + last if $c eq 'e'; + die "Non-string key in bencode dictionary at byte $off" if $c !~ /^[0-9]$/; + ($k, $off) = bdecode($buf, $off); + die "Duplicate key or wrong key order in bencode dictionary at byte $off" if defined $lk && $lk ge $k; + $lk = $k; + ($v, $off) = bdecode($buf, $off); + $d{$k} = $v; + } + return (\%d, $off+1); + } + # Strings + my $len = substr $buf, $off, 20; + if($len =~ /^(0|[1-9][0-9]*):/) { + $off += 1 + length $1; + return (substr($buf, $off, $1), $off+$1); + } + die "Invalid bencode value at byte $off"; +} + + +# Perl antipattern: Inspect scalar to see if it should be represented as an int. +# Works fine, just need to be careful not to accidentally stringify a scalar. +sub is_int { !(B::svref_2object(\$_[0])->FLAGS & B::SVp_POK) } +sub is_utf8 { defined eval { Encode::decode('UTF-8', $_[0], Encode::DIE_ON_ERR|Encode::LEAVE_SRC) } } + +sub fmt_str { + my($data) = @_; + $opt_str eq 'raw' ? $data : + is_utf8($data) ? ($opt_prefix?'s:':'').$data : + $opt_str eq 'hex' ? ($opt_prefix?'hex:':'').($data =~ s/(.)/sprintf '%02x', ord $1/ergs) : + $opt_str eq 'base64' ? ($opt_prefix?'base64:':'').do { require MIME::Base64; MIME::Base64::encode_base64($data) =~ s/\r?\n//rg } : + $opt_str eq 'base64url' ? ($opt_prefix?'base64url:':'').do { require MIME::Base64; MIME::Base64::encode_base64url($data) } : + die "Unrecognized string format: $opt_str\n"; +} + + +sub convert_str { + my($data) = @_; + return +{ map +(fmt_str($_), convert_str($data->{$_})), keys %$data } if ref $data eq 'HASH'; + return [ map convert_str($_), @$data ] if ref $data eq 'ARRAY'; + return $data if is_int $data; + fmt_str $data +} + + +sub fmt_human { + my($data) = @_; + my sub indent { $_[0] =~ s/\n/\n /rg } + my sub str { fmt_str($_[0]) =~ s/([\\\r\n\t])/\\$1/rg =~ s/([\x00-\x1f])/'\\x'.sprintf '%02x', ord $1/erg; } + my sub maybeln { (ref $_[0] eq 'HASH' && keys %{$_[0]}) || (ref $_[0] eq 'ARRAY' && @{$_[0]}) ? "\n" : ' ' } + return keys %$data ? join "\n", map sprintf('%s =%s', str($_), indent(maybeln($data->{$_}).fmt_human($data->{$_}))), sort keys %$data : '{}' if ref $data eq 'HASH'; + return @$data ? join "\n", map sprintf('- %s', indent fmt_human($_)), @$data : '[]' if ref $data eq 'ARRAY'; + return sprintf '%s%d', $opt_prefix ? 's:' : '', $data if is_int $data; + str $data +} + + + +my $input = do { + local $/=undef; + $input_file eq '-' ? <> : do { open my $F, '<', $input_file or die "Can't open $input_file: $!\n"; <$F> }; +}; +my $data = eval { [bdecode $input, 0]->[0] } // die "Error parsing bencode from $input_file: $@\n"; + +if($opt_fmt eq 'human') { + say fmt_human $data; +} elsif($opt_fmt eq 'perl') { + require Data::Dumper; + print Data::Dumper->new([convert_str $data])->Terse(1)->Sortkeys(1)->Dump; +} elsif($opt_fmt eq 'json') { + # JSON::PP is a core module and ought to come with a default Perl installation, but not all Linux distributions honor that... + die "Unable to load the Perl JSON::PP module, please make sure it's installed.\n" if !eval { require JSON::PP; 1 }; + print JSON::PP->new->canonical->allow_nonref->pretty->encode(convert_str $data); +} else { + die "Unrecognized output format: $opt_fmt\n"; +} -- cgit v1.2.3