#!/usr/bin/perl use v5.26; use warnings; use TUWF ':html5_', ':xml'; use POSIX 'ceil'; use SQL::Interp 'sql', 'sql_interp'; use Time::Local 'timegm'; use Cwd 'abs_path'; our $ROOT; BEGIN { ($ROOT = abs_path $0) =~ s{/www/index\.pl$}{}; } # Force the pure-perl AnyEvent backend; More lightweight and we don't need the # performance of EV. Fixes an issue with subprocess spawning under TUWF's # built-in web server that I haven't been able to track down. BEGIN { $ENV{PERL_ANYEVENT_MODEL} = 'Perl'; } use lib "$ROOT/lib/ManUtils/inst/lib/perl5"; use ManUtils; TUWF::set( logfile => $ENV{TUWF_LOG}, db_login => [undef, undef, undef], debug => $ENV{TUWF_DEBUG}, xml_pretty => 0, log_slow_pages => 500, ); TUWF::hook before => sub { if(tuwf->{_TUWF}{http}) { if(tuwf->resFile("$ROOT/www", tuwf->reqPath)) { tuwf->resHeader('Cache-Control' => 'max-age=31536000'); tuwf->done; } } }; # TODO: Add SQL::Interp support to TUWF directly, in some form. sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) } sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) } sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) } sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) } sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) } # Set the last modification time from a string in yyyy-mm-dd format. sub TUWF::Object::resLastMod { my($s, $d) = @_; return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/; my @t = gmtime timegm 0,0,0,$3,$2-1,$1; $s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT', (qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3], (qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]], $t[5]+1900, $t[2], $t[1], $t[0]); } # The systems table doesn't change often, so keep an in-memory cache for quick lookups. sub systems { state $s ||= [ map { $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:''); $_ } tuwf->dbAll('SELECT id, name, release, short FROM systems ORDER BY name, id')->@* ]; } sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } } sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } } # URL-unescape some special characters that may occur in man names. # Firefox seems to escape [ and ] in URLs. It doesn't really have to... sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int # Subquery returning all packages that have a man page. my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))'; sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg } sub sql_join { my $sep = shift; my @args = map +($sep, $_), @_; sql @args[1..$#args]; } sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' } sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' } sub pkg_frompath { my($sys_where, $path) = @_; # $path should be "$category/$name" or "$category/$name/$version", since # $category may contain a slash, let's try both options. my sub lookup { my($cat, $name) = @_; tuwf->dbRowi('SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND category =', \$cat, 'AND name =', \$name); } # $category/$name # e.g. contrib/games/alien if($path =~ m{^(.+)/([^/]+)$}) { my $pkg = lookup $1, $2; return ($pkg, '') if $pkg->{id}; } # $category/$name/$version # e.g. contrib/games/alien/10.2 if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) { my $pkg = lookup $1, $2; return ($pkg, $3) if $pkg->{id}; } (undef, ''); } # Get the preferred man page for the given filters. sub man_pref { my($section, $where) = @_; $where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section; # Criteria to determine a "preferred" man page: # 1. english: English versions of a man page have preference over other locales # 2. pkgver: Newer versions of the same package have preference over older versions # 3. stdloc: Prefer man pages in standard locations # 4. secmatch: Prefer an exact section match # 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams) # 6. debian: If there's no Arch, prefer latest Debian over other systems (again, tends to be more up-to-date) # (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 ) # 7. sysrel: Prefer a more recent system release over an older release # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) # 9. pkgdate: Prefer more recent packages (cross-distro) # 10. Fall back on shorthash comparison, to ensure the result is stable state $archid = sysbyshort->{arch}{id}; state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id}; tuwf->dbRowi(q{ WITH unfiltered AS ( SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver FROM files f JOIN locales l ON l.id = f.locale JOIN mans m ON m.id = f.man JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package JOIN systems s ON s.id = p.system WHERE}, $where, q{ ), f_english AS( SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale) ), f_pkgver AS( SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) ), f_stdloc AS( SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename) ), f_secmatch AS( SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{ ), f_arch AS( SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid ), f_debian AS( SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).id = $debid) OR (sys).id = $debid ), f_sysrel AS( SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id) ), f_secorder AS( SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE a.section > b.section) ), f_pkgdate AS( SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) ) SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, name, section, filename, locale, shorthash, content FROM f_pkgdate ORDER BY shorthash LIMIT 1 }); } # Given the name of a man page with optional section, find out the actual name # and section suffix of the man page and the preferred version. sub man_pref_name { my($name, $where) = @_; # Check the .
format first, because ~most~ cases where # there's a collision in the format, the -only page is either # uninteresting or a file name parsing error. if ($name =~ /^(.+)\.([^.]+)$/) { my($n, $s) = ($1,$2); my $man = man_pref $s, sql_and $where, sql 'm.name =', \$n; return ($man, $s) if length $man->{name}; } my $man = man_pref undef, sql_and $where, sql 'm.name =', \$name; length $man->{name} ? ($man, '') : (undef, ''); } sub framework_ { my $content = pop; my(%o) = @_; html_ lang => 'en', sub { head_ sub { link_ rel => 'stylesheet', type => 'text/css', href => '/man.css?5'; title_ $o{title}.' - manned.org'; }; body_ sub { header_ sub { a_ href => '/', 'Manned.org'; form_ action => '/browse/search', method => 'get', sub { input_ type => 'text', name => 'q', id => 'q', tabindex => 1; input_ type => 'submit', value => 'Search'; } }; main_ class => $o{mainclass}, $content; footer_ sub { span_ sub { a_ href => '/info/about', 'about'; txt_ ' | '; a_ href => 'mailto:manned@yorhel.nl', 'contact'; txt_ ' | '; a_ href => 'https://code.blicky.net/yorhel/manned', 'source'; }; span_ 'all manual pages are copyrighted by their respective authors.'; }; script_ type => 'text/javascript', src => '/man.js', ''; } }; # write the SQL queries as a HTML comment when debugging is enabled # (stolen from VNDB code) # (TODO: Move this into TUWF or something) if(tuwf->debug) { my(@sql_r, @sql_i) = (); for (tuwf->{_TUWF}{DB}{queries}->@*) { my($sql, $params, $time) = @$_; my @params = sort { $a =~ /^[0-9]+$/ && $b =~ /^[0-9]+$/ ? $a <=> $b : $a cmp $b } keys %$params; my $prefix = sprintf " [%6.2fms] ", $time*1000; push @sql_r, sprintf "%s%s | %s", $prefix, $sql, join ', ', map "$_:".DBI::neat($params->{$_}), @params; my $i=1; push @sql_i, $prefix.($sql =~ s/\?/tuwf->dbh->quote($params->{$i++})/egr); } my $sql_r = join "\n", @sql_r; my $sql_i = join "\n", @sql_i; my $modules = join "\n", sort keys %INC; lit_ "\n"; } } sub paginate_ { my($url, $count, $perpage, $p) = @_; return if $count <= $perpage; my sub l_ { my($c)= @_; a_ href => "$url$c", $c if $c != $p; b_ $c if $c == $p; }; my $lp = ceil($count/$perpage); nav_ class => 'paginate', sub { l_ 1 if $p > 1+4; b_ '...' if $p > 1+5; l_ $_ for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4)); b_ '...' if $p < $lp-5; l_ $lp if $p < $lp-4; } } TUWF::set error_404_handler => sub { tuwf->resStatus(404); my $title = 'No manual entry for '.tuwf->reqPath; framework_ title => $title, sub { h1_ $title; p_ 'That is, the page you were looking for doesn\'t exist.'; }; }; TUWF::get '/' => sub { my $stats = tuwf->dbRow('SELECT * FROM stats_cache'); sub num { local $_=shift; 1 while(s/(\d)(\d{3})($|,)/$1,$2/); $_ }; framework_ title => 'Man Pages Archive', mainclass => 'thin', sub { h1_ 'Welcome to Manned.org'; h2_ 'The archive for man pages'; lit sprintf <<' _', map num($stats->{$_}), qw|hashes mans files packages|;

Indexing %s versions of %s manual pages found in %s files of %s packages.

Manned.org aims to index all manual pages from a variety of systems, both old and new, and provides a convenient interface for looking up and viewing the various versions of each man page. More information »

_ h2_ 'Indexed systems'; div_ class => 'systems', sub { my %sys; push $sys{$_->{name}}->@*, $_ for systems->@*; div_ sub { my $sys = $sys{$_}; my $img = $sys->[0]{short} =~ s/^(.+)-.+$/$1/r; if(@$sys == 1) { a_ href => "/pkg/$sys->[0]{short}", sub { img_ width => 50, height => 50, src => "images/$img.png"; b_ $sys->[0]{name}; }; return; } img_ width => 50, height => 50, src => "images/$img.png"; div_ sub { b_ $sys->[0]{name}; for(reverse @$sys) { a_ href => "/pkg/$_->{short}", $_->{release}; lit_ ' '; } }; } for sort keys %sys; }; h2_ 'Other relevant sites'; ul_ sub { li_ sub { a_ href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt_ ' - Linux man pages from several upstream projects.' }; li_ sub { a_ href => 'https://manpag.es/', 'ManPag.es'; txt_ ' - Man pages from several Linux distributions.' }; li_ sub { a_ href => 'https://www.mankier.com/', 'ManKier'; txt_ ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.' }; li_ sub { a_ href => 'https://man.cx/', 'man.cx'; txt_ ' - Man pages extracted from Debian testing.' }; li_ sub { a_ href => 'http://man.he.net/', 'man.he.net'; txt_ ' - Also seems to be from a Debian-like system.' }; li_ sub { a_ href => 'https://linux.die.net/man/', 'die.net'; txt_ ' - Seems to be based on an RPM-based Linux distribution.' }; li_ sub { a_ href => 'http://manpages.org/', 'manpages.org'; txt_ ' - Lots of mostly-nicely formatted man pages, no clue about source.' }; li_ sub { a_ href => 'https://www.manpagez.com/', 'manpagez.com'; txt_ ' - Mac OS X, has some GTK-html and texinfo documentation as well.' }; li_ sub { a_ href => 'https://man.archlinux.org/', 'Arch Linux Man Pages' }; li_ sub { a_ href => 'https://manpages.debian.org/', 'Debian Man Pages' }; li_ sub { a_ href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages' }; li_ sub { a_ href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages' }; li_ sub { a_ href => 'https://man.netbsd.org/', 'NetBSD Man Pages' }; li_ sub { a_ href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages' }; li_ sub { a_ href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals' }; li_ sub { a_ href => 'https://man.voidlinux.org/', 'Void Linux manpages' }; }; }; }; TUWF::get '/info/about' => sub { framework_ title => 'About', mainclass => 'thin', sub { h1_ 'About Manned.org'; lit <<' _';

Goal

The state of online indices of manual pages used to be a sad one. Existing sites used to only offer you a single version of a man page: From one origin, and often only in a single language. Most didn't even tell you where the manual actually originated from, making it very hard to determine whether the manual you found applied to your situation and even harder to find a manual for a specific system. Additionally, some sites rendered the manuals in an unreadable way, didn't correctly handle special formatting - like tables - or didn't correctly display non-ASCII characters.

Nowadays there are many good alternatives, but Manned.org was one of the sites created in order to improve that situation. This site aims to index the manual pages from a variaty of systems, both old and new, and allows you to browse through the various versions of a manual page to find out how each system behaves. The manuals are stored in the database as UTF-8, and are passed through groff to render them in (mostly) the same way as they are displayed in your terminal.

This website is open source (AGPL licensed) and written in a combination of Perl and Rust. The entire PostgreSQL database is available for download.

URL format

You can link to specific packages and man pages with several URL formats. These URLs will keep working in the future, so you should not have to worry about eventual dead links.

Man pages

The following URLs are available to refer to an individual man page:

/<name>[.<section>] or /man/<name>[.<section>]
Will try to get the latest and most-close-to-upstream version of a man page. Note that this will fetch the man page from any of the available systems, so may result in confusing scenarios for system-specific documentation. I try to at least keep the selection algorithm stable and deterministic, but can't provide any guarantees. Examples:
/socket
/socket.7
/man/socket.7
/man/<system>/<name>[.<section>]
Will get the latest version of a man page from the given system, e.g.:
/man/ubuntu/rsync
/man/ubuntu-xenial/rsync
/man/<system>/<category>/<package>/<name>[.<section>]
Will get the latest version of a man page from the given package, e.g.:
/man/ubuntu-xenial/net/rsync/rsync
/man/<system>/<category>/<package>/<version>/<name>[.<section>]
Will get the man page from a specific package version, e.g.:
/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync
/man.<language>/...
Adding a language code to the /man/ component will select the man page in the requested language. The man page has to be available in that language, otherwise you will get a 404. Redirects to other languages as fallback may be implemented in the future. English man pages are typically not tagged with a language at all, so explicitely requesting /man.en/... will usually fail. This, too, may be improved in the future. Examples:
/man.de/faked-tcp
/man.de/fedora/rsync.1
/man.<8-hex-digits>/...
Permalink format. Adding the shorthash of the man page to the /man/ component of the above URLs will get that specific man page from the requested system and/or package. The contents of the man page should generally be the same regardless of which system or package is included in the URL, but the UI may provide a different nagivation context. Examples:
/man.910be0ed/ls
/man.910be0ed/fedora/ls
/man.910be0ed/arch/ls
/man.910be0ed/fedora/everything/coreutils-common/ls
/raw...
In all of the above URL formats, you can change /man with /raw to get the raw UTF-8 encoded man page source, e.g.:
/raw/socket.7
/raw/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync
/raw.de/faked-tcp
/raw.910be0ed/fedora/ls
/<name>/<8-hex-digits>
Old permalink format for a specific man page (e.g. /ls/910be0ed).

In all URLs where an optional .<section> can be provided, the search is performed as a prefix match. For example, /cat.3 will provide the cat.3tcl man page if no exact cat.3 version is available. Linking to the full section name is also possible: /cat.3tcl. If no section is given and multiple sections are available, the lowest section number is chosen.

Packages

Linking to individual packages is also possible. These pages will show a listing of all manual pages available in the given package.

/pkg/<system>/<category>/<package>
For the latest version of a package (e.g. /pkg/arch/core/coreutils).
/pkg/<system>/<category>/<package>/<version>
For a particular version of a package (e.g. /pkg/arch/core/coreutils/8.25-2).

This site only indexes packages that actually have manual pages, linking to a package that doesn't have any will result in a 404 page.

The indexing process

All man pages are fetched right from the (binary) packages available on the public repositories of Linux distributions. In particular:

Alpine Linux
The main (since 3.0) and community (since 3.3) repositories are indexed for the x86_64 architecture. Indexing started in December 2021, packages and releases not available in the repositories at that time have not been indexed. I haven't found an archive for version 2.x releases yet.
Arch Linux
The core, extra and community repositories are fetched from a local Arch mirror. Indexing started around begin June 2012. The i686 architecture was indexed until November 6th, 2016, packages after that were fetched from from x86_64.
Debian
Historical releases were fetched from http://archive.debian.org/debian/ and http://snapshot.debian.org/. For buzz, rex and bo, we're missing a few man pages because some packages were missing from the repository archives. Where available, all components (main, contrib and non-free) from the $release and $release-updates repositories are indexed.
CentOS
Historical releases were fetched from vault.centos.org, current releases from a local mirror. Where applicable, the following repositories were indexed: addons, centosplus, contrib, extras, os. The i386 architecture was indexed for versions lower than 7.0, since 7.0 the packages from x86_64 are indexed.
Fedora
Historical releases were fetched from archives.fedoraproject.org, current releases from a local repository. Fedora Core 1 till 6 are (incorrectly) called 'Fedora' here. To compensate for that, Fedora 3 till 6 also include the Extras repository. For Fedora 7 and later, the 'Everything' and 'updates' repositories are indexed. The i386 arch was indexed for Fedora 17 and older, the x86_64 arch starting with Fedora 18.
FreeBSD
Historical releases were fetched from http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/. The base installation tarballs are included in the database as packages prefixed with core-. The package repositories have also been indexed, except for 2.0.5 - 2.2.7 and 3.0 - 3.3 because those were not available on the ftp archive. Only the -RELEASE repositories have been included, which is generally a snapshot of the ports directory around the time of the release. The release dates indicated for many packages were guessed from the file modification dates in the tarball, and may be inaccurate. The i368 arch was indexed for FreeBSD 11.0 and older, the amd64 arch starting with 11.1.
NetBSD
Only the core installation sets have been indexed, pkgsrc is awesome but out of scope for now. The i368 arch was indexed for 5.x and older, the amd64 arch starting with 6.0. Releases before 1.3 only distributed preformatted man pages and have therefore not been indexed. The original roff sources could perhaps be extracted from the source tarballs, but that's a project for another time.
Ubuntu
Historical releases were fetched from http://old-releases.ubuntu.com/ubuntu/, supported releases from a local mirror. All components (main, universe, restricted and multiverse) from the $release, $release-updates and $release-security repositories are indexed. Indexing started around mid June 2012. All releases before 2017 were indexed from the i386 repositories, starting with 17.04 the amd64 repositories were used.

Only packages for a single architecture (i386 or amd64) are scanned. To my knowledge, packages that come with different manuals for different architectures either don't exist or are extremely rare. It does happen that some packages are not available for all architectures. Usually, though, every package is at least available for the most popular architecture, so hopefully we're not missing out on much.

The repositories are scanned for new packages on a daily basis.

Database download

This site is backed by a PostgreSQL database containing all the man pages. Weekly dumps of the full database are available for download at http://dl.manned.org/dumps/.

Be warned that the download server may not be terribly fast or reliable, so it is advisable to use a client that supports resumption of partial downloads. See wget's -c or curl's -C.

The database schema is "documented" at schema.sql in the git repo. Keep in mind that these dumps don't constitute a stable API and, while this won't happen frequently, incompatible schema changes or Postgres major version bumps will occassionally occur.

Future plans

This site isn't nearly as awesome yet as it could be. Here's some ideas that would be nice to have in the future:

  • Index a few more systems: Gentoo (now that it has official binary packages), OpenBSD and perhaps others.
  • Better browsing and discovery features.
  • Improved, more intelligent, search,
  • apropos(1) emulation(?),
  • Diffs between various versions of a man page,
  • Anchor links within man pages, for easier linking to a section or paragraph,
  • Alternative formats (Text, PDF, more semantic HTML, etc),
  • A command-line client, like man(1) with manned.org as database backend.

All manual pages are copyrighted by their respective authors. The manuals have been fetched from publically available repositories of free and (primarily) open source software. The distributors of said software have put in efforts to only include software and documentation that allows free distribution. Nonetheless, if a manual that does not allow to be redistributed has been inadvertently included in our index, please let me know and I will have it removed as soon as possible.

_ }; }; # Very simple (and fast) prefix match. sub search_man { my($q, $limit) = @_; my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; return !$name ? [] : tuwf->dbAlli( 'SELECT name, section FROM mans WHERE', sql_and( sql('lower(name) LIKE', \(escape_like(lc $name).'%')), $sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (), ), 'ORDER BY name, section LIMIT', \$limit, ); } TUWF::get '/browse/search' => sub { my $q = tuwf->reqGet('q')||''; my $man = search_man $q, 150; return tuwf->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1; framework_ title => 'Search results for '.$q, mainclass => 'searchres', sub { h1_ 'Search results for '.$q; # Package search would also be useful. p_ 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.'; if(@$man) { ul_ sub { li_ sub { a_ href => "/$_->{name}.$_->{section}", $_->{name}; small_ " $_->{section}"; } for @$man; } } else { p_ 'No results :-('; } }; }; TUWF::get '/xml/search.xml' => sub { my $q = tuwf->reqGet('q')||''; my $man = search_man $q, 20; tuwf->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); xml; tag 'results', sub { tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for @$man; }; }; # Object to represent the various URLs to a man page. # # Parameters: # fmt => man|txt|raw # shorthash => 8-char hex # lang => language code # system => system shortname # category => package category # package => name of the package # version => package version # man => name of the man page # section => man page section # # URL format: # /$fmt[.$shorthash][.$lang][/$system[/$category/$package[/$version]]]/$man[.$section] # # Note that the URL format has some ambiguity: # - $category may contain a slash, so a database lookup is required to # disambiguate between URLs with [/$version] and those without. # - $man may contain a dot, so a database lookup is required to disambiguate # between URLs with [.$section] and those without # # $system may also refer to system shortnames without the version suffix (e.g. # 'ubuntu' rather than 'ubuntu-impish'). In that case the man page from the # latest release of that system is chosen. package ManUrl { sub new { my($p,%o)=@_; bless \%o, $p } sub set { my($o,@o)=@_; bless +{%$o,@o}, ref $o } sub mansect { $_[0]{man}.(defined $_[0]{section} ? ".$_[0]{section}" : '') } use overload '""' => sub { my($o)=@_; "/$o->{fmt}".(defined $o->{shorthash} ? ".$o->{shorthash}" : '').(defined $o->{lang} ? ".$o->{lang}" : '') .(defined $o->{system} ? ("/$o->{system}" .(defined $o->{category} ? ("/$o->{category}/$o->{package}" .(defined $o->{version} ? "/$o->{version}" : '')) : '')) : '') .'/'.$o->mansect }; }; sub man_nav_ { my($man, $url, $toc, $htmllang) = @_; my @systems = tuwf->dbAlli(' SELECT DISTINCT p.system FROM packages p JOIN package_versions v ON v.package = p.id JOIN files f ON f.pkgver = v.id JOIN mans m ON m.id = f.man WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section} )->@*; my @sect = map $_->{section}, tuwf->dbAlli( 'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section' )->@*; my @lang = map $_->{lang}, tuwf->dbAlli( "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang FROM files f JOIN mans m ON m.id = f.man JOIN locales l ON l.id = f.locale WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, " ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST" )->@*; nav_ sub { form_ action => '/sysredir/'.$url->mansect(), method => 'get', onsubmit => 'location.href="/man/"+system_select[system_select.selectedIndex].value+"/'.$url->mansect().'";return false', sub { my %names; push $names{$_->{name}}->@*, $_ for map sysbyid->{$_->{system}}, sort { $b->{system} <=> $a->{system} } @systems; select_ id => 'system_select', name => 'system', sub { for (sort { ($names{$b}->@* == 1) <=> ($names{$a}->@* == 1) || $a cmp $b } keys %names) { my $s = $names{$_}; if (@$s == 1) { option_ value => $s->[0]{short}, selected => $s->[0]{id} == $man->{system}?'':undef, $s->[0]{full}; next; } optgroup_ label => $_, sub { option_ value => $_->{short}, selected => $_->{id} == $man->{system}?'':undef, $_->{full} for @$s; }; } }; input_ type => 'submit', value => 'Go'; } if @systems > 1; # TODO: This is ugly, especially because clicking on a translation or # section, you can end up with a man page that is nowhere close to the # man page you're currently reading. Sections or languages available # for the currently selected system should be highlighted. if(@sect > 1) { b_ 'Sections'; p_ sub { for (@sect) { if($man->{section} eq $_) { i_ $_; } else { a_ href => "/man/$man->{name}.$_", $_; } txt_ ' '; } } } if(@lang > 1) { b_ 'Languages'; p_ sub { (my $cur = $man->{locale}||'') =~ s/\..*//; for (@lang) { if(($_||'') eq $cur) { i_ $_ || 'default'; } else { a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default'; } txt_ ' '; } } } if(@$toc > 1) { b_ 'Table of Contents'; ul_ sub { for (0..$#$toc) { li_ sub { a_ @$htmllang, href => sprintf('#head%d', $_+1), sub { lit_ lc $toc->[$_] }; } } } } } } # Replace .so's in man source with the contents (if available in the same # package) or with a reference to the other man page. sub soelim { my($verid, $src) = @_; # tix comes with* a custom(?) macro package. But it looks okay even without loading that. # (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it) $src =~ s/^\.so man.macros$//mg; # Other .so's should be handled by html() $src =~ s{^\.so (.+)$}{ my $path = $1; my $name = (reverse split /\//, $path)[0]; my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); $man->{name} # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content})) : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" }emg; $src; } sub man_page { my($man, $url) = @_; tuwf->resLastMod($man->{released}); my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content}); if($url->{fmt} eq 'raw') { tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $man->{name}, $man->{section}); lit $content->{content}; return; } my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content}; if($url->{fmt} eq 'txt') { # TODO: The 'txt' format is kind of broken right now as it includes our HTML formatting codes. # This feature is a WIP and not advertised at the moment, anyway. tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s.txt"', $man->{name}, $man->{section}); lit $fmt; return; } # Prefix links to other man pages with the current system, to ensure we # grab the most relevant man page. # XXX: This is a hack, prefixing is better done directly in ManUtils. my $sys = sysbyid->{$man->{system}}{short}; $fmt =~ s{([^<\n]+?)<\/b>\n}{ push @toc, $1; my $c = @toc; qq{\n$1\n} }eg; my $hasversions = tuwf->dbVali( 'SELECT 1 FROM files f JOIN mans m ON m.id = f.man JOIN locales l ON l.id = f.locale WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, ' AND l.locale =', \$man->{locale}, ' AND f.shorthash <> ', \$man->{shorthash}, ' LIMIT 1' ); my @htmllang = $man->{locale} =~ /^([a-z]{2,3})(?:_([A-Z]{2}))?(?:$|@|\.)/ ? (lang => $1.($2?"-$2":'')) : (); framework_ title => $man->{name}, mainclass => 'manpage', sub { man_nav_ $man, $url, \@toc, \@htmllang; # TODO: Replace the 'versions' and 'locations' functionality with non-JS alternatives. div_ id => 'manbuttons', sub { h1_ $man->{name}; ul_ 'data-hash' => $content->{hash}, 'data-name' => $man->{name}, 'data-section' => $man->{section}, 'data-locale' => $man->{locale}||'', 'data-hasversions' => $hasversions?1:0, sub { li_ sub { a_ href => $url->set(fmt => 'raw'), 'source' }; li_ sub { a_ href => $url->set(system => sysbyid->{$man->{system}}{short}, category => undef, shorthash => shorthash_to_hex $man->{shorthash}), 'permalink' }; } }; div_ id => 'manres', class => 'hidden', ''; pre_ @htmllang, sub { lit_ $fmt }; }; } # /[.section] - short and handy catch-all URL for man pages # // - old permalink format # This one has to go before the other mappings, to ensure that links work for # man pages called 'pkg' or 'man'. TUWF::get qr{/(?[^/]+)(?:/(?[0-9a-f]{8}))?} => sub { my $name = normalize_name tuwf->capture('name'); my $shorthash = tuwf->capture('hash'); my($man, $sect) = man_pref_name $name, $shorthash ? sql 'f.shorthash =', \shorthash_to_int $shorthash : 'true'; return tuwf->resNotFound() if !$man->{name}; man_page $man, ManUrl->new( fmt => 'man', man => length $sect ? $man->{name} : $name, section => length $sect ? $sect : undef, ); }; # ///src - old URL format to get the raw man page TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { my $name = normalize_name tuwf->capture(1); my $shorthash = tuwf->capture(2); my($man) = man_pref_name $name, sql 'f.shorthash =', \shorthash_to_int $shorthash; return tuwf->resNotFound if !$man->{name}; man_page $man, ManUrl->new(fmt => 'raw', man => $name); }; TUWF::get qr{/(?man|txt|raw)(?:\.(?[a-fA-F0-9]{8}))?(?:\.(?[^/]+))?/(?.+)} => sub { my($fmt, $shorthash, $lang, $path) = tuwf->captures(qw|fmt shorthash lang path|); my @where; my $name = normalize_name($path =~ s{/?([^/]+)$}{} && $1); my $system = $path =~ s{^([^/]+)/?}{} && $1; # $sys can be either a full system 'short' name, or a prefix (e.g. 'debian' meaning 'any debian-* version') if($system) { my $sysid = sysbyshort->{$system}; $sysid = $sysid ? [$sysid->{id}] : [ map sysbyshort->{$_}{id}, grep /^\Q$system\E-/, keys sysbyshort->%* ]; return tuwf->resNotFound if !@$sysid; push @where, sql 'system IN', $sysid; } # $path is now either: # 1. $category/$package # 2. $cagegory/$package/$version my($pkg, $ver) = length $path ? pkg_frompath sql_and(@where), $path : (undef,undef); return tuwf->resNotFound if length $path && !$pkg; push @where, sql 'p.id =', \$pkg->{id} if $pkg; push @where, sql 'v.version =', \$ver if length $ver; push @where, sql 'f.shorthash =', \shorthash_to_int $shorthash if $shorthash; push @where, sql 'l.locale ilike', \(escape_like($lang).'%') if $lang; my($man, $section) = man_pref_name $name, sql_and @where; return tuwf->resNotFound if !$man; my $url = ManUrl->new( fmt => $fmt, shorthash => $shorthash, lang => $lang, system => length $system ? $system : undef, category => $pkg ? $pkg->{category} : undef, package => $pkg ? $pkg->{name} : undef, version => length $ver ? $ver : undef, man => length $section ? $man->{name} : $name, section => length $section ? $section : undef, ); man_page $man, $url; }; TUWF::get qr{/pkg/([^/]+)} => sub { my $short = tuwf->capture(1); my $sys = sysbyshort->{$short}; return tuwf->resNotFound if !$sys; my $f = tuwf->validate(get => c => { onerror => 'all', enum => [ '0', 'all', 'a'..'z' ] }, p => { onerror => 1, uint => 1, range => [1,200] }, )->data; my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : (); my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where); my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} }, 'SELECT id, system, name, category, dead FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name, category' ); framework_ title => $sys->{full}, mainclass => 'pkglist', sub { div_ sub { div_ sub { h1_ $sys->{full}; }; nav_ class => 'charselect', sub { for('all', 0, 'a'..'z') { a_ href => "/pkg/$short?c=$_", $_?uc$_:'#' if $_ ne $f->{c}; b_ $_?uc$_:'#' if $_ eq $f->{c}; } }; }; small_ '(Packages without man pages are not listed)'; paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; ul_ sub { li_ sub { a_ href => "/pkg/$short/$_->{category}/$_->{name}", $_->{name}; small_ ' '.$_->{category}; } for @$pkg; }; paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; }; }; # Package info: /pkg/$system/$category/$name (/$version); $category may contain a slash, too. TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { my ($short, $path) = tuwf->captures(1,2); my $sys = sysbyshort->{$short}; return tuwf->resNotFound if !$sys; my($pkg, $ver) = pkg_frompath(sql('system =', \$sys->{id}), $path); return tuwf->resNotFound if !$pkg; my $vers = tuwf->dbAlli(' SELECT id, version, released FROM package_versions v WHERE package =', \$pkg->{id}, ' AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id) ORDER BY released DESC' ); my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0]; return tuwf->resNotFound if !$sel; my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data; my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id}); my $mans = tuwf->dbPagei({ results => 200, page => $p }, ' WITH lst AS ( SELECT f.man, m.name, m.section, f.shorthash, f.filename, l.locale FROM files f JOIN locales l ON l.id = f.locale JOIN mans m ON m.id = f.man WHERE f.pkgver =', \$sel->{id}, ' ), needlang AS ( SELECT man FROM lst GROUP BY man HAVING count(*) > 1 ), needhash AS ( SELECT man, locale FROM lst GROUP BY man, locale HAVING count(*) > 1 ) SELECT name, section, shorthash, filename, locale , EXISTS(SELECT 1 FROM needlang WHERE man = l.man) AS needlang , EXISTS(SELECT 1 FROM needhash WHERE man = l.man AND locale = l.locale) AS needhash FROM lst l ORDER BY name, section, locale, filename '); # Latest version of this package determines last modification date of the page. tuwf->resLastMod($vers->[0]{released}); my $subtitle = " / $pkg->{category} / $pkg->{name}"; my $pkgpath = "$sys->{short}/$pkg->{category}/$pkg->{name}"; framework_ title => "$sys->{full}$subtitle $sel->{version}", mainclass => 'pkgpage', sub { h1_ sub { a_ href => "/pkg/$sys->{short}", $sys->{full}; txt_ $subtitle; }; div_ sub { section_ sub { h2_ 'Versions'; ul_ sub { li_ sub { a_ href => "/pkg/$pkgpath/$_->{version}", $_->{version} if $_ != $sel; b_ " $_->{version}" if $_ == $sel; small_ " $_->{released}"; } for(@$vers); } }; section_ sub { h2_ "Manuals for version $sel->{version}"; paginate_ "/pkg/$pkgpath/$sel->{version}?p=", $count, 200, $p; ul_ sub { li_ sub { # Only add the hash or locale to the URL if it's necessary to select the proper man page. my $ext = $_->{needhash} ? '.'.shorthash_to_hex $_->{shorthash} : $_->{needlang} && length $_->{locale} ? ".$_->{locale}" : ''; a_ href => "/man$ext/$pkgpath/$sel->{version}/$_->{name}.$_->{section}", "$_->{name}($_->{section})"; b_ " $_->{locale}" if $_->{locale}; small_ " $_->{filename}"; } for(@$mans); }; paginate_ "/pkg/$pkgpath/$sel->{version}?p=", $count, 200, $p; }; }; } }; # /browse/ has been moved to /pkg/ with the package category added to the path TUWF::get qr{/browse/([^/]+)} => sub { tuwf->resRedirect('/pkg/'.tuwf->capture(1), 'perm') }; TUWF::get qr{/browse/([^/]+)/([^/]+)(?:/([^/]+))?} => sub { my($sys, $name, $ver) = tuwf->captures(1,2,3); $sys = sysbyshort->{$sys}; return tuwf->resNotFound if !$sys; my $pkgs = tuwf->dbRowi('SELECT category FROM packages WHERE system =', \$sys->{id}, 'AND name =', \$name, 'LIMIT 1'); return tuwf->resNotFound if !defined $pkgs->{category}; tuwf->resRedirect("/pkg/$sys->{short}/$pkgs->{category}/$name".($ver ? "/$ver" :''), 'perm'); }; # Redirect for the system selection box, for visitors who have disabled JS. TUWF::get qr{/sysredir/([^/]+)} => sub { tuwf->resRedirect('/man/'.(tuwf->reqGet('system')//'arch').'/'.tuwf->capture(1), 'temp') }; # Redirect for a specific language for a man page. I have no idea if anyone # still uses this URL format, but it was supported at some point, so let's keep # it around. TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub { tuwf->resRedirect('/man.'.tuwf->capture(1).'/'.tuwf->capture(2), 'temp') }; TUWF::get '/json/tree.json' => sub { my $f = tuwf->validate(get => name => { default => '', maxlength => 256 }, section => { default => '', maxlength => 32 }, locale => { default => sub{$_[0]}, maxlength => 32 }, cur => { default => '', regex => qr/^[a-fA-F0-9]{40}$/ }, hash => { default => '', regex => qr/^[a-fA-F0-9]{40}$/ }, )->data; return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name}); my $l = tuwf->dbAlli(" SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale FROM files f JOIN locales l ON l.id = f.locale JOIN mans m ON m.id = f.man JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package JOIN systems s ON s.id = p.system WHERE", sql_and( length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (), length $f->{name} ? sql 'm.name =', \$f->{name} : (), length $f->{section} ? sql 'm.section =', \$f->{section} : (), defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (), ), ' ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename '); # Convert the list into a tree my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0; my $tree = []; my($sys, $sysver, $pkg, $pkgver); for my $m (@$l) { my $sysname = sysbyid->{$m->{system}}{name}; if(!$sys || $sysname ne $sys->{name}) { $sys = { name => $sysname, childs => [] }; $sysver = undef; push @$tree, $sys; } my $sysversion = sysbyid->{$m->{system}}{release} || ''; if(!$sysver || $sysversion ne $sysver->{name}) { $sysver = { name => $sysversion, childs => [] }; $pkg = undef; push @{$sys->{childs}}, $sysver; } if(!$pkg || $m->{package} ne $pkg->{name}) { $pkg = { name => $m->{package}, i => $m->{category}, table => [] }; $pkgver = undef; push @{$sysver->{childs}}, $pkg; } push @{$pkg->{table}}, [ $pkgver && $pkgver eq $m->{version} ? {name=>''} : {name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"}, { name => "$m->{name}($m->{section})", $f->{hash} || $cur == $m->{shorthash} ? () : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash})) }, { name => shorthash_to_hex($m->{shorthash}), $f->{hash} || $cur == $m->{shorthash} ? () : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash})) }, { name => $m->{filename} } ]; $pkgver = $m->{version}; } # Determine which elements to show/hide by default. # It might make more sense to do this in JS, but since I am utterly # incapable of writing maintainable JS I'm doing it here in order to keep the # JS stupid and simple. # TODO: Highlight systems/packages where the 'current' man page is? for my $sys (@$tree) { $sys->{expand} = 1 if $sys->{childs}[0]{name}; # Expand all systems that have named versions $sys->{expand} = 1 if $f->{hash}; # Expand everything on 'location' my $i = 0; for my $sysver (@{$sys->{childs}}) { $i++; $sysver->{expand} = 1 if !$sysver->{name}; # Expand unnamed versions (since you can't click them) $sysver->{expand} = 1 if $f->{hash}; # Expand everything on 'location' $sysver->{hide} = 1 if $i > 3 && @{$sys->{childs}} > 5; # Show only the first 3 versions for my $pkg (@{$sysver->{childs}}) { $pkg->{expand} = 1 if @{$sysver->{childs}} <= 3; # Expand everything if there's not too many things to expand $pkg->{expand} = 1 if $f->{hash}; # Expand everything on 'location' # TODO: Show/Hide duplicate hashes? } } } tuwf->resJSON($tree); }; TUWF::run();