#!/usr/bin/perl
use v5.26;
use warnings;
use TUWF ':html5_', ':xml';
use POSIX 'ceil';
use SQL::Interp 'sql', 'sql_interp';
use Time::Local 'timegm';
use Cwd 'abs_path';
our $ROOT;
BEGIN { ($ROOT = abs_path $0) =~ s{/www/index\.pl$}{}; }
# Force the pure-perl AnyEvent backend; More lightweight and we don't need the
# performance of EV. Fixes an issue with subprocess spawning under TUWF's
# built-in web server that I haven't been able to track down.
BEGIN { $ENV{PERL_ANYEVENT_MODEL} = 'Perl'; }
use lib "$ROOT/lib/ManUtils/inst/lib/perl5";
use ManUtils;
TUWF::set(
logfile => $ENV{TUWF_LOG},
db_login => [undef, undef, undef],
debug => $ENV{TUWF_DEBUG},
xml_pretty => 0,
log_slow_pages => 500,
);
TUWF::hook before => sub {
if(tuwf->{_TUWF}{http}) {
if(tuwf->resFile("$ROOT/www", tuwf->reqPath)) {
tuwf->resHeader('Cache-Control' => 'max-age=31536000');
tuwf->done;
}
}
};
# TODO: Add SQL::Interp support to TUWF directly, in some form.
sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) }
sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) }
sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) }
sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) }
sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) }
# Set the last modification time from a string in yyyy-mm-dd format.
sub TUWF::Object::resLastMod {
my($s, $d) = @_;
return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/;
my @t = gmtime timegm 0,0,0,$3,$2-1,$1;
$s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT',
(qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3],
(qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]],
$t[5]+1900, $t[2], $t[1], $t[0]);
}
# The systems table doesn't change often, so keep an in-memory cache for quick lookups.
sub systems {
state $s ||= [ map {
$_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:'');
$_
} tuwf->dbAll('SELECT id, name, release, short FROM systems ORDER BY name, id')->@* ];
}
sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } }
sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
# URL-unescape some special characters that may occur in man names.
# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
# Subquery returning all packages that have a man page.
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
sub sql_join {
my $sep = shift;
my @args = map +($sep, $_), @_;
sql @args[1..$#args];
}
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
sub pkg_frompath {
my($sys_where, $path) = @_;
# $path should be "$category/$name" or "$category/$name/$version", since
# $category may contain a slash, let's try both options.
my sub lookup {
my($cat, $name) = @_;
tuwf->dbRowi('SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND category =', \$cat, 'AND name =', \$name);
}
# $category/$name
# e.g. contrib/games/alien
if($path =~ m{^(.+)/([^/]+)$}) {
my $pkg = lookup $1, $2;
return ($pkg, '') if $pkg->{id};
}
# $category/$name/$version
# e.g. contrib/games/alien/10.2
if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) {
my $pkg = lookup $1, $2;
return ($pkg, $3) if $pkg->{id};
}
(undef, '');
}
# Get the preferred man page for the given filters.
sub man_pref {
my($section, $where) = @_;
$where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section;
# Criteria to determine a "preferred" man page:
# 1. english: English versions of a man page have preference over other locales
# 2. pkgver: Newer versions of the same package have preference over older versions
# 3. stdloc: Prefer man pages in standard locations
# 4. secmatch: Prefer an exact section match
# 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams)
# 6. debian: If there's no Arch, prefer latest Debian over other systems (again, tends to be more up-to-date)
# (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 )
# 7. sysrel: Prefer a more recent system release over an older release
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
# 9. pkgdate: Prefer more recent packages (cross-distro)
# 10. Fall back on shorthash comparison, to ensure the result is stable
state $archid = sysbyshort->{arch}{id};
state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id};
tuwf->dbRowi(q{
WITH unfiltered AS (
SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver
FROM files f
JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE}, $where, q{
), f_english AS(
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale)
), f_pkgver AS(
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
), f_stdloc AS(
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename)
), f_secmatch AS(
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{
), f_arch AS(
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid
), f_debian AS(
SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).id = $debid) OR (sys).id = $debid
), f_sysrel AS(
SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id)
), f_secorder AS(
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE a.section > b.section)
), f_pkgdate AS(
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
)
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
name, section, filename, locale, shorthash, content
FROM f_pkgdate ORDER BY shorthash LIMIT 1
});
}
# Given the name of a man page with optional section, find out the actual name
# and section suffix of the man page and the preferred version.
sub man_pref_name {
my($name, $where) = @_;
# Check the
Indexing %s versions of %s manual pages found in
%s files of %s packages.
Manned.org aims to index all manual pages from a variety of systems, both
old and new, and provides a convenient interface for looking up and viewing
the various versions of each man page.
More information »
The state of online indices of manual pages used to be a sad one. Existing
sites used to only offer you a single version of a man page: From one
origin, and often only in a single language. Most didn't even tell you
where the manual actually originated from, making it very hard to
determine whether the manual you found applied to your situation and even
harder to find a manual for a specific system. Additionally, some sites
rendered the manuals in an unreadable way, didn't correctly handle special
formatting - like tables - or didn't correctly display non-ASCII
characters.
Nowadays there are many good alternatives, but Manned.org was one of the
sites created in order to improve that situation. This site aims to index
the manual pages from a variaty of systems, both old and new, and allows you
to browse through the various versions of a manual page to find out how each
system behaves. The manuals are stored in the database as UTF-8, and are
passed through groff to
render them in (mostly) the same way as they are displayed in your terminal.
This website is open
source (AGPL licensed) and written in a combination of Perl and Rust.
The entire PostgreSQL database is available
for download.
You can link to specific packages and man pages with several URL formats.
These URLs will keep working in the future, so you should not have to worry
about eventual dead links. The following URLs are available to refer to an individual man page: In all URLs where an optional Linking to individual packages is also possible. These pages will show a
listing of all manual pages available in the given package. This site only indexes packages that actually have manual pages,
linking to a package that doesn't have any will result in a 404 page.
All man pages are fetched right from the (binary) packages available on the
public repositories of Linux distributions. In particular:
Only packages for a single architecture (i386 or amd64) are scanned. To my
knowledge, packages that come with different manuals for different
architectures either don't exist or are extremely rare. It does happen that
some packages are not available for all architectures. Usually, though,
every package is at least available for the most popular architecture, so
hopefully we're not missing out on much.
This site is backed by a PostgreSQL database containing all the man pages.
Weekly dumps of the full database are available for download at
http://dl.manned.org/dumps/.
This site isn't nearly as awesome yet as it could be. Here's some ideas that
would be nice to have in the future:
Goal
URL format
Man pages
/<name>[.<section>]
or /man/<name>[.<section>]
/socket
/socket.7
/man/socket.7/man/<system>/<name>[.<section>]
/man/ubuntu/rsync
/man/ubuntu-xenial/rsync/man/<system>/<category>/<package>/<name>[.<section>]
/man/ubuntu-xenial/net/rsync/rsync/man/<system>/<category>/<package>/<version>/<name>[.<section>]
/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync/man.<language>/...
/man/
component will select
the man page in the requested language. The man page has to be available
in that language, otherwise you will get a 404. Redirects to other
languages as fallback may be implemented in the future. English man
pages are typically not tagged with a language at all, so explicitely
requesting /man.en/...
will usually fail. This, too, may be
improved in the future. Examples:
/man.de/faked-tcp
/man.de/fedora/rsync.1/man.<8-hex-digits>/...
/man/
component of the above URLs will get that specific
man page from the requested system and/or package. The contents of the
man page should generally be the same regardless of which system or
package is included in the URL, but the UI may provide a different
nagivation context. Examples:
/man.910be0ed/ls
/man.910be0ed/fedora/ls
/man.910be0ed/arch/ls
/man.910be0ed/fedora/everything/coreutils-common/ls/raw...
/man
with
/raw
to get the raw UTF-8 encoded man page source, e.g.:
/raw/socket.7
/raw/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync
/raw.de/faked-tcp
/raw.910be0ed/fedora/ls/<name>/<8-hex-digits>
.<section>
can be provided,
the search is performed as a prefix match. For example, /cat.3 will provide the cat.3tcl
man page if
no exact cat.3
version is available. Linking to the full
section name is also possible: /cat.3tcl. If no
section is given and multiple sections are available, the lowest section
number is chosen.Packages
/pkg/<system>/<category>/<package>
/pkg/<system>/<category>/<package>/<version>
The indexing process
The repositories are scanned for new packages on a daily basis.
Database download
Be warned that the download server may not be terribly fast or reliable,
so it is advisable to use a client that supports resumption of partial
downloads. See wget's -c or
curl's -C.
The database schema is "documented" at schema.sql
in the git repo. Keep in mind that these dumps don't constitute a stable
API and, while this won't happen frequently, incompatible schema changes
or Postgres major version bumps will occassionally occur.
Future plans
All manual pages are copyrighted by their respective authors. The manuals have been fetched from publically available repositories of free and (primarily) open source software. The distributors of said software have put in efforts to only include software and documentation that allows free distribution. Nonetheless, if a manual that does not allow to be redistributed has been inadvertently included in our index, please let me know and I will have it removed as soon as possible.
_ }; }; # Very simple (and fast) prefix match. sub search_man { my($q, $limit) = @_; my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; return !$name ? [] : tuwf->dbAlli( 'SELECT name, section FROM mans WHERE', sql_and( sql('lower(name) LIKE', \(escape_like(lc $name).'%')), $sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (), ), 'ORDER BY name, section LIMIT', \$limit, ); } TUWF::get '/browse/search' => sub { my $q = tuwf->reqGet('q')||''; my $man = search_man $q, 150; return tuwf->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1; framework_ title => 'Search results for '.$q, mainclass => 'searchres', sub { h1_ 'Search results for '.$q; # Package search would also be useful. p_ 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.'; if(@$man) { ul_ sub { li_ sub { a_ href => "/$_->{name}.$_->{section}", $_->{name}; small_ " $_->{section}"; } for @$man; } } else { p_ 'No results :-('; } }; }; TUWF::get '/xml/search.xml' => sub { my $q = tuwf->reqGet('q')||''; my $man = search_man $q, 20; tuwf->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); xml; tag 'results', sub { tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for @$man; }; }; # Object to represent the various URLs to a man page. # # Parameters: # fmt => man|txt|raw # shorthash => 8-char hex # lang => language code # system => system shortname # category => package category # package => name of the package # version => package version # man => name of the man page # section => man page section # # URL format: # /$fmt[.$shorthash][.$lang][/$system[/$category/$package[/$version]]]/$man[.$section] # # Note that the URL format has some ambiguity: # - $category may contain a slash, so a database lookup is required to # disambiguate between URLs with [/$version] and those without. # - $man may contain a dot, so a database lookup is required to disambiguate # between URLs with [.$section] and those without # # $system may also refer to system shortnames without the version suffix (e.g. # 'ubuntu' rather than 'ubuntu-impish'). In that case the man page from the # latest release of that system is chosen. package ManUrl { sub new { my($p,%o)=@_; bless \%o, $p } sub set { my($o,@o)=@_; bless +{%$o,@o}, ref $o } sub mansect { $_[0]{man}.(defined $_[0]{section} ? ".$_[0]{section}" : '') } use overload '""' => sub { my($o)=@_; "/$o->{fmt}".(defined $o->{shorthash} ? ".$o->{shorthash}" : '').(defined $o->{lang} ? ".$o->{lang}" : '') .(defined $o->{system} ? ("/$o->{system}" .(defined $o->{category} ? ("/$o->{category}/$o->{package}" .(defined $o->{version} ? "/$o->{version}" : '')) : '')) : '') .'/'.$o->mansect }; }; sub man_nav_ { my($man, $url, $toc, $htmllang) = @_; my @systems = tuwf->dbAlli(' SELECT DISTINCT p.system FROM packages p JOIN package_versions v ON v.package = p.id JOIN files f ON f.pkgver = v.id JOIN mans m ON m.id = f.man WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section} )->@*; my @sect = map $_->{section}, tuwf->dbAlli( 'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section' )->@*; my @lang = map $_->{lang}, tuwf->dbAlli( "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang FROM files f JOIN mans m ON m.id = f.man JOIN locales l ON l.id = f.locale WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, " ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST" )->@*; nav_ sub { form_ action => '/sysredir/'.$url->mansect(), method => 'get', onsubmit => 'location.href="/man/"+system_select[system_select.selectedIndex].value+"/'.$url->mansect().'";return false', sub { my %names; push $names{$_->{name}}->@*, $_ for map sysbyid->{$_->{system}}, sort { $b->{system} <=> $a->{system} } @systems; select_ id => 'system_select', name => 'system', sub { for (sort { ($names{$b}->@* == 1) <=> ($names{$a}->@* == 1) || $a cmp $b } keys %names) { my $s = $names{$_}; if (@$s == 1) { option_ value => $s->[0]{short}, selected => $s->[0]{id} == $man->{system}?'':undef, $s->[0]{full}; next; } optgroup_ label => $_, sub { option_ value => $_->{short}, selected => $_->{id} == $man->{system}?'':undef, $_->{full} for @$s; }; } }; input_ type => 'submit', value => 'Go'; } if @systems > 1; # TODO: This is ugly, especially because clicking on a translation or # section, you can end up with a man page that is nowhere close to the # man page you're currently reading. Sections or languages available # for the currently selected system should be highlighted. if(@sect > 1) { b_ 'Sections'; p_ sub { for (@sect) { if($man->{section} eq $_) { i_ $_; } else { a_ href => "/man/$man->{name}.$_", $_; } txt_ ' '; } } } if(@lang > 1) { b_ 'Languages'; p_ sub { (my $cur = $man->{locale}||'') =~ s/\..*//; for (@lang) { if(($_||'') eq $cur) { i_ $_ || 'default'; } else { a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default'; } txt_ ' '; } } } if(@$toc > 1) { b_ 'Table of Contents'; ul_ sub { for (0..$#$toc) { li_ sub { a_ @$htmllang, href => sprintf('#head%d', $_+1), sub { lit_ lc $toc->[$_] }; } } } } } } # Replace .so's in man source with the contents (if available in the same # package) or with a reference to the other man page. sub soelim { my($verid, $src) = @_; # tix comes with* a custom(?) macro package. But it looks okay even without loading that. # (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it) $src =~ s/^\.so man.macros$//mg; # Other .so's should be handled by html() $src =~ s{^\.so (.+)$}{ my $path = $1; my $name = (reverse split /\//, $path)[0]; my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); $man->{name} # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content})) : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" }emg; $src; } sub man_page { my($man, $url) = @_; tuwf->resLastMod($man->{released}); my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content}); if($url->{fmt} eq 'raw') { tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $man->{name}, $man->{section}); lit $content->{content}; return; } my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content}; if($url->{fmt} eq 'txt') { # TODO: The 'txt' format is kind of broken right now as it includes our HTML formatting codes. # This feature is a WIP and not advertised at the moment, anyway. tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s.txt"', $man->{name}, $man->{section}); lit $fmt; return; } # Prefix links to other man pages with the current system, to ensure we # grab the most relevant man page. # XXX: This is a hack, prefixing is better done directly in ManUtils. my $sys = sysbyid->{$man->{system}}{short}; $fmt =~ s{([^<\n]+?)<\/b>\n}{ push @toc, $1; my $c = @toc; qq{\n$1\n} }eg; my $hasversions = tuwf->dbVali( 'SELECT 1 FROM files f JOIN mans m ON m.id = f.man JOIN locales l ON l.id = f.locale WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, ' AND l.locale =', \$man->{locale}, ' AND f.shorthash <> ', \$man->{shorthash}, ' LIMIT 1' ); my @htmllang = $man->{locale} =~ /^([a-z]{2,3})(?:_([A-Z]{2}))?(?:$|@|\.)/ ? (lang => $1.($2?"-$2":'')) : (); framework_ title => $man->{name}, mainclass => 'manpage', sub { man_nav_ $man, $url, \@toc, \@htmllang; # TODO: Replace the 'versions' and 'locations' functionality with non-JS alternatives. div_ id => 'manbuttons', sub { h1_ $man->{name}; ul_ 'data-hash' => $content->{hash}, 'data-name' => $man->{name}, 'data-section' => $man->{section}, 'data-locale' => $man->{locale}||'', 'data-hasversions' => $hasversions?1:0, sub { li_ sub { a_ href => $url->set(fmt => 'raw'), 'source' }; li_ sub { a_ href => $url->set(system => sysbyid->{$man->{system}}{short}, category => undef, shorthash => shorthash_to_hex $man->{shorthash}), 'permalink' }; } }; div_ id => 'manres', class => 'hidden', ''; pre_ @htmllang, sub { lit_ $fmt }; }; } # /