From a03fbbbeb01ab116b1d72f3dc2d23c6deac31eb3 Mon Sep 17 00:00:00 2001
From: Yorhel <git@yorhel.nl>
Date: Mon, 8 Jun 2020 10:25:34 +0200
Subject: Initial commit

Just publishing this script with the hope that it will motivate me to,
one day, document this stuff and turn it into an actually usable tool.
---
 binspect | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100755 binspect

diff --git a/binspect b/binspect
new file mode 100755
index 0000000..cffd5f8
--- /dev/null
+++ b/binspect
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+#  Copyright (c) 2020 Yoran Heling
+#
+#  Permission is hereby granted, free of charge, to any person obtaining
+#  a copy of this software and associated documentation files (the
+#  "Software"), to deal in the Software without restriction, including
+#  without limitation the rights to use, copy, modify, merge, publish,
+#  distribute, sublicense, and/or sell copies of the Software, and to
+#  permit persons to whom the Software is furnished to do so, subject to
+#  the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+#  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# TODO:
+# - Documentation!
+# - Transparent decompression of .gz/.bz2/.xz/.zst etc inputs
+# - Query expressions to only return a particular key or array index
+# - 'bencode' output format
+# - Alternative input formats to more easily edit bencoded files
+
+use v5.14; # non-destructive replace
+use B;
+use Getopt::Long;
+use Encode;
+
+
+GetOptions(
+    'f|format=s',  \(my $opt_fmt    = 'human'),
+    's|strings=s', \(my $opt_str    = 'hex'),
+    'p|prefix!',   \(my $opt_prefix = 0),
+);
+my $input_file = shift() || '-';
+
+
+# Given (buffer, current byte offset)
+# Returns (decoded value, new byte offset)
+# (This substring uglyness is the only way I've found to parse large bencode
+# values with okay performance; some CPAN modules may be even faster)
+sub bdecode {
+    my($buf, $off) = @_;
+    my $c = substr $buf, $off, 1;
+    # Ints
+    return ($1+0, $off + 2 + length $1) if $c eq 'i' && substr($buf, $off, 23) =~ /^i(0|-?[1-9][0-9]*)e/;
+    # Lists
+    if($c eq 'l') {
+        $off++;
+        my(@l, $v);
+        while('e' ne substr $buf, $off, 1) {
+            ($v, $off) = bdecode($buf, $off);
+            push @l, $v;
+        }
+        return (\@l, $off+1);
+    }
+    # Dicts
+    if($c eq 'd') {
+        $off++;
+        my(%d, $k, $v, $lk);
+        while(1) {
+            $c = substr $buf, $off, 1;
+            last if $c eq 'e';
+            die "Non-string key in bencode dictionary at byte $off" if $c !~ /^[0-9]$/;
+            ($k, $off) = bdecode($buf, $off);
+            die "Duplicate key or wrong key order in bencode dictionary at byte $off" if defined $lk && $lk ge $k;
+            $lk = $k;
+            ($v, $off) = bdecode($buf, $off);
+            $d{$k} = $v;
+        }
+        return (\%d, $off+1);
+    }
+    # Strings
+    my $len = substr $buf, $off, 20;
+    if($len =~ /^(0|[1-9][0-9]*):/) {
+        $off += 1 + length $1;
+        return (substr($buf, $off, $1), $off+$1);
+    }
+    die "Invalid bencode value at byte $off";
+}
+
+
+# Perl antipattern: Inspect scalar to see if it should be represented as an int.
+# Works fine, just need to be careful not to accidentally stringify a scalar.
+sub is_int { !(B::svref_2object(\$_[0])->FLAGS & B::SVp_POK) }
+sub is_utf8 { defined eval { Encode::decode('UTF-8', $_[0], Encode::DIE_ON_ERR|Encode::LEAVE_SRC) } }
+
+sub fmt_str {
+    my($data) = @_;
+    $opt_str eq 'raw'       ? $data :
+    is_utf8($data)          ? ($opt_prefix?'s:':'').$data :
+    $opt_str eq 'hex'       ? ($opt_prefix?'hex:':'').($data =~ s/(.)/sprintf '%02x', ord $1/ergs) :
+    $opt_str eq 'base64'    ? ($opt_prefix?'base64:':'').do { require MIME::Base64; MIME::Base64::encode_base64($data) =~ s/\r?\n//rg } :
+    $opt_str eq 'base64url' ? ($opt_prefix?'base64url:':'').do { require MIME::Base64; MIME::Base64::encode_base64url($data) } :
+    die "Unrecognized string format: $opt_str\n";
+}
+
+
+sub convert_str {
+    my($data) = @_;
+    return +{ map +(fmt_str($_), convert_str($data->{$_})), keys %$data } if ref $data eq 'HASH';
+    return [ map convert_str($_), @$data ] if ref $data eq 'ARRAY';
+    return $data if is_int $data;
+    fmt_str $data
+}
+
+
+sub fmt_human {
+    my($data) = @_;
+    my sub indent { $_[0] =~ s/\n/\n  /rg }
+    my sub str { fmt_str($_[0]) =~ s/([\\\r\n\t])/\\$1/rg =~ s/([\x00-\x1f])/'\\x'.sprintf '%02x', ord $1/erg; }
+    my sub maybeln { (ref $_[0] eq 'HASH' && keys %{$_[0]}) || (ref $_[0] eq 'ARRAY' && @{$_[0]}) ? "\n" : ' ' }
+    return keys %$data ? join "\n", map sprintf('%s =%s', str($_), indent(maybeln($data->{$_}).fmt_human($data->{$_}))), sort keys %$data : '{}' if ref $data eq 'HASH';
+    return @$data ? join "\n", map sprintf('- %s', indent fmt_human($_)), @$data : '[]' if ref $data eq 'ARRAY';
+    return sprintf '%s%d', $opt_prefix ? 's:' : '', $data if is_int $data;
+    str $data
+}
+
+
+
+my $input = do {
+    local $/=undef;
+    $input_file eq '-' ? <> : do { open my $F, '<', $input_file or die "Can't open $input_file: $!\n"; <$F> };
+};
+my $data = eval { [bdecode $input, 0]->[0] } // die "Error parsing bencode from $input_file: $@\n";
+
+if($opt_fmt eq 'human') {
+    say fmt_human $data;
+} elsif($opt_fmt eq 'perl') {
+    require Data::Dumper;
+    print Data::Dumper->new([convert_str $data])->Terse(1)->Sortkeys(1)->Dump;
+} elsif($opt_fmt eq 'json') {
+    # JSON::PP is a core module and ought to come with a default Perl installation, but not all Linux distributions honor that...
+    die "Unable to load the Perl JSON::PP module, please make sure it's installed.\n" if !eval { require JSON::PP; 1 };
+    print JSON::PP->new->canonical->allow_nonref->pretty->encode(convert_str $data);
+} else {
+    die "Unrecognized output format: $opt_fmt\n";
+}
-- 
cgit v1.2.3