diff options
author | Yorhel <git@yorhel.nl> | 2021-06-09 13:43:40 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2021-06-09 13:43:42 +0200 |
commit | 98e4f014ba242f026102274c4849d9e4de1e2eb2 (patch) | |
tree | f77f3f179c208e5ef89d2c723403ecfebd7baca8 | |
parent | 1572f6cedf77e8e85cc6cefa781e77e6f9182a4e (diff) |
Rewrite Perl web backend to modern TUWF style
Lots of churn for no real benefit. \o/
-rw-r--r-- | README.md | 7 | ||||
-rwxr-xr-x | www/index.pl | 1935 | ||||
-rw-r--r-- | www/man.js | 2 |
3 files changed, 909 insertions, 1035 deletions
@@ -13,11 +13,12 @@ Ironically, documentation about how things work is completely lacking. ### Web front-end -- DBI +- AnyEvent - DBD::Pg -- TUWF +- DBI - JSON::XS -- AnyEvent +- SQL::Interp +- TUWF ### Man page indexer diff --git a/www/index.pl b/www/index.pl index a8f9395..f8e7b46 100755 --- a/www/index.pl +++ b/www/index.pl @@ -1,10 +1,11 @@ #!/usr/bin/perl -use strict; +use v5.26; use warnings; -use TUWF ':html', 'html_escape', ':xml'; -use JSON::XS; +use TUWF ':html_', ':xml'; use POSIX 'ceil'; +use SQL::Interp 'sql', 'sql_interp'; +use Time::Local 'timegm'; use Cwd 'abs_path'; our $ROOT; @@ -20,31 +21,11 @@ use ManUtils; TUWF::set( - logfile => $ENV{TUWF_LOG}, - db_login => [undef, undef, undef], - debug => $ENV{TUWF_DEBUG}, - xml_pretty => 0, - log_slow_pages => 500, - # Cache the system information - pre_request_handler => sub { - my $self = shift; - if(!$self->{systems}) { - $self->{systems} = $self->dbSystemGet; - $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:'') for(@{$self->{systems}}); - $self->{sysbyid} = { map +($_->{id}, $_), @{$self->{systems}} }; - $self->{sysbyshort} = { map +($_->{short}, $_), @{$self->{systems}} }; - } - 1; - }, - error_404_handler => sub { - my $self = shift; - $self->resStatus(404); - my $title = 'No manual entry for '.$self->reqPath; - $self->htmlHeader(title => $title); - h1 $title; - p 'That is, the page you were looking for doesn\'t exist.'; - $self->htmlFooter; - }, + logfile => $ENV{TUWF_LOG}, + db_login => [undef, undef, undef], + debug => $ENV{TUWF_DEBUG}, + xml_pretty => 0, + log_slow_pages => 500, ); @@ -58,1097 +39,989 @@ TUWF::hook before => sub { }; -TUWF::register( - qr// => \&home, - qr{info/about} => \&about, - qr{browse/search} => \&browsesearch, - - # These have to go before the other mappings, to ensure that links work for - # man pages called 'pkg' or 'man'. This also means that we can't have a - # system named 8 hex digits, but at least that's easy to guarantee. :) - qr{([^/]+)/([0-9a-f]{8})} => \&man, - qr{([^/]+)/([0-9a-f]{8})/src} => \&src, - # We don't have any other single-component paths - qr{([^/]+)} => \&man, - - qr{pkg/([^/]+)} => \&pkg_list, - # pkg/$system/$category/$name (/$version); $category may contain a slash, too. - qr{pkg/([^/]+)/(.+)} => \&pkg_info, - - # Redirects for canonical URLs - qr{man/([^/]+)/(.+)} => \&man_redir, - - # Redirects for old URLs. - # /browse/<pkg> has been moved to /pkg/ with the package category added to the path - qr{browse/([^/]+)} => sub { $_[0]->resRedirect("/pkg/$_[1]", 'perm'); }, - qr{browse/([^/]+)/([^/]+)(?:/([^/]+))?} => sub { - my($self, $sys, $name, $ver) = @_; - $sys = $self->{sysbyshort}{$sys}; - return $self->resNotFound if !$sys; - my $pkgs = $self->dbPackageGet(sysid => $sys->{id}, name => $name, results => 1); - return $self->resNotFound if !@$pkgs; - $self->resRedirect("/pkg/$sys->{short}/$pkgs->[0]{category}/$name".($ver ? "/$ver" :''), 'perm'); - }, - - # Redirect for a specific language for a man page. - # I'm not a fan of this solution; might drop it in the future. - qr{lang/([^/]+)/([^/]+)} => sub { - my($s, $l, $n) = @_; - $n = _normalizename($n); - my($m, undef) = $s->dbManPrefName($n, language => $l); - return $s->resNotFound if !$m; - $s->resRedirect("/$m->{name}/".substr($m->{hash}, 0, 8), 'temp'); - }, - - qr{xml/search\.xml} => \&xmlsearch, - qr{json/tree\.json} => \&jsontree, -); - -TUWF::run(); - - -sub home { - my $self = shift; - my $stats = $self->dbStats; - my $fn = sub { local $_=shift; 1 while(s/(\d)(\d{3})($|,)/$1,$2/); $_ }; - $self->htmlHeader(title => 'Man Pages Archive'); - h1 'Man Pages Archive'; - p class => 'txt'; lit sprintf <<' _', map $fn->($stats->{$_}), qw|hashes mans files packages|; - Indexing <b>%s</b> versions of <b>%s</b> manual pages found in <b>%s</b> - files of <b>%s</b> packages. - <br /><br /> - Manned.org aims to index all manual pages from a variety of systems, both - old and new, and provides a convenient interface for looking up and viewing - the various versions of each man page. - <a href="/info/about">About manned.org »</a> - _ - end; - - h2 'Browse the manuals'; - ul id => 'systems'; - my %sys; - push @{$sys{$_->{name}}}, $_ for(@{$self->{systems}}); - for my $sys (sort keys %sys) { - $sys = $sys{$sys}; - (my $img = $sys->[0]{short}) =~ s/^(.+)-.+$/$1/; - li; - a href => "/pkg/$sys->[0]{short}" if @$sys == 1; - span style => "background-image: url('images/$img.png')", ''; - b $sys->[0]{name}; - if(@$sys > 1) { - my $i = 0; - for(reverse @$sys) { - a href => "/pkg/$_->{short}", ++$i > 3 ? (class => 'hidden') : (), $_->{release}; - lit ' '; - } - a href => "#", class => 'more', 'more...' if $i > 3; - } - end 'a' if @$sys == 1; - end; - } - end; - - h2 'Other sites'; - ul id => 'external'; - li; a href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt ' - Linux man pages from several upstream projects.'; end; - li; a href => 'https://manpag.es/', 'ManPag.es'; txt ' - Man pages from several Linux distributions.'; end; - li; a href => 'https://www.mankier.com/', 'ManKier'; txt ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.'; end; - li; a href => 'https://man.cx/', 'man.cx'; txt ' - Man pages extracted from Debian testing.'; end; - li; a href => 'http://man.he.net/', 'man.he.net'; txt ' - Also seems to be from a Debian-like system.'; end; - li; a href => 'https://linux.die.net/man/', 'die.net'; txt ' - Seems to be based on an RPM-based Linux distribution.'; end; - li; a href => 'http://manpages.org/', 'manpages.org'; txt ' - Lots of mostly-nicely formatted man pages, no clue about source.'; end; - li; a href => 'https://www.manpagez.com/', 'manpagez.com'; txt ' - Mac OS X, has some GTK-html and texinfo documentation as well.'; end; - li; a href => 'https://man.archlinux.org/', 'Arch Linux Man Pages'; end; - li; a href => 'https://manpages.debian.org/', 'Debian Man Pages'; end; - li; a href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages'; end; - li; a href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages'; end; - li; a href => 'https://netbsd.gw.com/cgi-bin/man-cgi', 'NetBSD Man Pages'; end; - li; a href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages'; end; - li; a href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals'; end; - li; a href => 'https://man.voidlinux.org/', 'Void Linux manpages'; end; - end; - $self->htmlFooter; -} - - -sub about { - my $self = shift; - $self->htmlHeader(title => 'About'); - h1 'About Manned.org'; - div id => 'about'; - - h2 'Goal'; - p; lit <<' _'; - The state of online indices of manual pages used to be a sad one. Existing - sites used to only offer you a single version of a man page: From one - origin, and often only in a single language. Most didn't even tell you where - the manual actually originated from, making it very hard to determine - whether the manual you found actually applied to your situation and even - harder to find a manual for a specific system. Additionally, some sites - rendered the manuals in an unreadable way, didn't correctly handle special - formatting - like tables - or didn't correctly display non-ASCII characters. - <br /><br /> - Nowadays there are many good alternatives, but Manned.org was one of the - sites created in order to improve that situation. This site aims to index - the manual pages from a variaty of systems, both old and new, and allows you - to browse through the various versions of a manual page to find out how each - system behaves. The manuals are stored in the database as UTF-8, and are - passed through <a href="http://www.gnu.org/software/groff/">groff</a> to - render them in (mostly) the same way as they are displayed in your terminal. - <br /><br /> - This website is <a href="https://code.blicky.net/yorhel/manned">open - source</a> (MIT licensed) and written in a combination of Perl and Rust. The - entire PostgreSQL database is available for download (see "Database - download" below). - _ - end; - - h2 'URL format'; - lit <<' _'; - <p>You can link to specific packages and man pages with several URL formats. - These URLs will keep working in the future, so you should not have to worry - about eventual dead links.</p> - <h3>Man pages</h3> - <p>The following URLs are available to refer to an individual man page:</p> - <dl> - <dt><code>/<name>/<8-hex-digits></code></dt><dd> - This is the permalink format for a specific man page (e.g. <a href="/ls/910be0ed">/ls/910be0ed</a>).</dd> - <dt><code>/<name>[.<section>]</code></dt><dd> - Will try to get the latest and most-close-to-upstream version of a man - page (e.g. <a href="/socket">/socket</a> or <a - href="/socket.7">/socket.7</a>). Note that this may fetch the man page - from any available system, so may result in confusing scenarios for - system-specific documentation.</dd> - <dt><code>/man/<system>/<name>[.<section>]</code></dt><dd> - Will get the latest version of a man page from the given system (e.g. <a - href="/man/ubuntu-xenial/rsync">/man/ubuntu-xenial/rsync</a>)</dd> - <dt><code>/man/<system>/<category>/<package>/<name>[.<section>]</code></dt><dd> - Will get the latest version of a man page from the given package (e.g. <a - href="/man/ubuntu-xenial/net/rsync/rsync">/man/ubuntu-xenial/net/rsync/rsync</a>)</dd> - <dt><code>/man/<system>/<category>/<package>/<version>/<name>[.<section>]</code></dt><dd> - Will get the man page from a specific package version (e.g. <a - href="/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync">/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync</a>)</dd> - </dl> - <p>Currently, the last three URLs will perform a redirect to the - appropriate permalink URL, but this may change in the future.<br /> - In all URLs where an optional <code>.<section></code> can be provided, - the search is performed as a prefix match. For example, <a - href="/cat.3">/cat.3</a> will provide the <code>cat.3tcl</code> man page if - no exact <code>cat.3</code> version is available. Linking to the full - section name is also possible: <a href="/cat.3tcl">/cat.3tcl</a>. If no - section is given and multiple sections are available, the lowest section - number is chosen.</p> - <h3>Packages</h3> - <p>Linking to individual packages is also possible. These pages will show a - listing of all manual pages available in the given package.</p> - <dl> - <dt><code>/pkg/<system>/<category>/<package></code></dt><dd> - For the latest version of a package (e.g. <a - href="/pkg/arch/core/coreutils">/pkg/arch/core/coreutils</a>).</dd> - <dt><code>/pkg/<system>/<category>/<package>/<version></code></dt><dd> - For a particular version of a package (e.g. <a - href="/pkg/arch/core/coreutils/8.25-2">/pkg/arch/core/coreutils/8.25-2</a>).</dd> - </dl> - <p>Note that this site only indexes packages that actually have manual - pages; Linking to a package that doesn't have any will result in a 404 - page.</p> - _ - - h2 'The indexing process'; - p; lit <<' _'; - All man pages are fetched right from the (binary) packages available on the - public repositories of Linux distributions. In particular:<br /> - <dl> - <dt>Arch Linux</dt><dd> - The core, extra and community repositories are fetched from a local - Arch mirror. Indexing started around begin June 2012. The i686 - architecture was indexed until November 6th, 2016, packages after that - were fetched from from x86_64.</dd> - <dt>Debian</dt><dd> - Historical releases were fetched from <a - href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a> - and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>. - For buzz, rex and bo, we're missing a few man pages because some packages - were missing from the repository archives. Where available, all components - (main, contrib and non-free) from the $release and $release-updates - repositories are indexed.</dd> - <dt>CentOS</dt><dd> - Historical releases were fetched from <a - href="http://vault.centos.org/">vault.centos.org</a>, current releases - from a local mirror. Where applicable, the following repositories were - indexed: addons, centosplus, contrib, extras, os. The i386 architecture - was indexed for versions lower than 7.0, since 7.0 the packages from - x86_64 are indexed. - <dt>Fedora</dt><dd> - Historical releases were fetched from <a - href="http://archives.fedoraproject.org/pub/archive/fedora/linux/">archives.fedoraproject.org</a>, - current releases from a local repository. Fedora Core 1 till 6 are - (incorrectly) called 'Fedora' here. To compensate for that, Fedora 3 till - 6 also include the Extras repository. For Fedora 7 and later, the - 'Everything' and 'updates' repositories are indexed. The i386 arch was - indexed for Fedora 17 and older, the x86_64 arch starting with Fedora - 18.</dd> - <dt>FreeBSD</dt><dd> - Historical releases were fetched from <a - href="http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/">http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/</a>. - The base installation tarballs are included in the database as packages - prefixed with <i>core-</i>. The package repositories have also been - indexed, except for 2.0.5 - 2.2.7 and 3.0 - 3.3 because those were not - available on the ftp archive. Only the -RELEASE repositories have been - included, which is generally a snapshot of the ports directory around the - time of the release. The release dates indicated for many packages were - guessed from the file modification dates in the tarball, and may be - inaccurate. The i368 arch was indexed for FreeBSD 11.0 and older, the - amd64 arch starting with 11.1.</dd> - <dt>Ubuntu</dt><dd> - Historical releases were fetched from <a - href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>, - supported releases from a local mirror. All components (main, universe, - restricted and multiverse) from the $release, $release-updates and - $release-security repositories are indexed. Indexing started around mid - June 2012. All releases before 2017 were indexed from the i386 - repositories, starting with 17.04 the amd64 repositories were used.</dd> - </dl> - <p> - Only packages for a single architecture (i386 or amd64) are scanned. To my - knowledge, packages that come with different manuals for different - architectures either don't exist or are extremely rare. It does happen that - some packages are not available for all architectures. Usually, though, - every package is at least available for the most popular architecture, so - hopefully we're not missing out on much. <br /><br /> - The repositories are scanned for new packages on a daily basis. - </p> - _ - end; - - h2 'Database download'; - p; lit <<' _'; - This site is backed by a PostgreSQL database containing all the man pages. - Weekly dumps of the full database are available for download at - <a href="http://dl.manned.org/dumps/">http://dl.manned.org/dumps/</a>. - <br /><br /> - Be warned that the download server may not be terribly reliable, so it is - advisable to use a client that supports resumption of partial downloads. See - <a href="/wget">wget's -c</a> or <a href="/curl">curl's -C</a>. - <br /><br /> - The database schema is "documented" at <a - href="https://code.blicky.net/yorhel/manned/src/branch/master/sql/schema.sql">schema.sql</a> - in the git repo. Note that these dumps don't constitute a stable API and, - while this won't happen frequently, incompatible schema changes or Postgres - major version bumps may occur. - _ - end; - - h2 'Other systems'; - p; lit <<' _'; - Suggestions for new (or old) systems to index are welcome. - <br /><br /> - It would be great to index a few more non-Linux systems such as other BSDs, - Solaris/Illumos and Mac OS X. Unfortunately, those don't always follow a - binary package based approach, or are otherwise less easy to properly index. - <br /><br /> - In general, systems that follow an entirely source-based distribution - approach can't be indexed without compiling everything. Since that is both - very resource-heavy and open to security issues, there are no plans to - include manuals from such systems at the moment. So unless someone comes - with a solution I hadn't thought of yet, there won't be any Gentoo manuals - here. :-( - _ - end; - - h2 'Future plans'; - p; lit <<' _'; - This site isn't nearly as awesome yet as it could be. Here's some ideas that - would be nice to have in the future: - <ul> - <li>Improved, more intelligent, search,</li> - <li><a href="/apropos.1">apropos(1)</a> emulation(?),</li> - <li>Diffs between various versions of a man page,</li> - <li>Anchor links within man pages, for easier linking to a section or paragraph,</li> - <li>Alternative formats (Text, PDF, more semantic HTML, etc),</li> - <li>A command-line client, like <a href="/man.1">man(1)</a> with manned.org as database backend.</li> - </ul> - _ - end; - - h2 'Copyright'; - p; lit <<' _'; - All manual pages are copyrighted by their respective authors. The manuals - have been fetched from publically available repositories of free and - (primarily) open source software. The distributors of said software have put - in efforts to only include software and documentation that allows free - distribution. Nonetheless, if a manual that does not allow to be - redistributed has been inadvertently included in our index, please let me - know and I will have it removed as soon as possible. - _ - end; - - end; - $self->htmlFooter; -} - +# TODO: Add SQL::Interp support to TUWF directly, in some form. +sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) } +sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) } +sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) } +sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) } +sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) } -sub paginate { - my($url, $count, $perpage, $p) = @_; - return if $count <= $perpage; - - my $l = sub { - my $c = shift; - a href => sprintf('%s%d', $url, $c), $c if $c != $p; - b $c if $c == $p; - }; - - my $lp = ceil($count/$perpage); - p class => 'paginate'; - $l->(1) if $p > 1+4; - b '...' if $p > 1+5; - $l->($_) for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4)); - b '...' if $p < $lp-5; - $l->($lp) if $p < $lp-4; - end; +# Set the last modification time from a string in yyyy-mm-dd format. +sub TUWF::Object::resLastMod { + my($s, $d) = @_; + return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/; + my @t = gmtime timegm 0,0,0,$3,$2-1,$1; + $s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT', + (qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3], + (qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]], + $t[5]+1900, $t[2], $t[1], $t[0]); } -sub browsesearch { - my $self = shift; - my $q = $self->reqGet('q')||''; - my $man = $self->dbSearch($q, 150); - - return $self->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1; - - $self->htmlHeader(title => 'Search results for '.$q); - h1 'Search results for '.$q; - p 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.'; - if(@$man) { - ul id => 'searchres'; - for(@$man) { - li; - a href => "/$_->{name}.$_->{section}", $_->{name}; - i " $_->{section}"; - end; - } - end; - } else { - br; br; - b 'No results :-('; - } - - $self->htmlFooter; +# The systems table doesn't change often, so keep an in-memory cache for quick lookups. +sub systems { + state $s ||= [ map { + $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:''); + $_ + } tuwf->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder')->@* ]; } +sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } } +sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } } -sub pkg_list { - my($self, $short) = @_; - - my $sys = $self->{sysbyshort}{$short}; - return $self->resNotFound if !$sys; +# URL-unescape some special characters that may occur in man names. +# Firefox seems to escape [ and ] in URLs. It doesn't really have to... +sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } - my $f = $self->formValidate( - { get => 'c', required => 0, enum => [ '0', 'all', 'a'..'z' ], default => 'all' }, - { get => 'p', required => 0, default => 1, template => 'uint', min => 1, max => 200 }, - ); - return $self->resNotFound if $f->{_err}; +# Subquery returning all packages that have a man page. +my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))'; - my %opt = (hasman => 1, sysid => $sys->{id}, char => $f->{c} eq 'all' ? undef : $f->{c}); - my $pkg = $self->dbPackageGet(%opt, results => 200, page => $f->{p}); - my $count = $self->dbPackageGet(%opt, countonly => 1)->[0]{count}; +sub escape_like { $_[0] =~ s/([_%])/\\$1/rg } - my $title = "Packages for $sys->{name}".($sys->{release}?" $sys->{release}":""); - $self->htmlHeader(title => $title); - div id => 'pkglist'; - h1 $title; - - p class => 'charselect'; - for('all', 0, 'a'..'z') { - a href => "/pkg/$short?c=$_", $_?uc$_:'#' if $_ ne $f->{c}; - b $_?uc$_:'#' if $_ eq $f->{c}; - } - end; - - p 'Note: Packages without man pages are not listed.'; - paginate "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; - ul id => 'packages'; - for(@$pkg) { - li; - a href => "/pkg/$short/$_->{category}/$_->{name}", $_->{name}; - i ' '.$_->{category}; - end; - } - end; - paginate "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; - - end; - $self->htmlFooter; +sub sql_join { + my $sep = shift; + my @args = map +($sep, $_), @_; + sql @args[1..$#args]; } - - -sub pkg_frompath { - my($self, $sys, $path) = @_; - - # $path should be "$category/$name" or "$category/$name/$version", since - # $category may contain a slash, let's try both options. - - # $category/$name - # e.g. contrib/games/alien - if($path =~ m{^(.+)/([^/]+)$}) { - my($category, $name) = ($1, $2); - my $pkg = $self->dbPackageGet(sysid => $sys, category => $category, name => $name, hasman => 1)->[0]; - return ($pkg, '') if $pkg; - } - - # $category/$name/$version - # e.g. contrib/games/alien/10.2 - if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) { - my($category, $name, $version) = ($1, $2, $3); - my $pkg = $self->dbPackageGet(sysid => $sys, category => $category, name => $name, hasman => 1)->[0]; - return ($pkg, $version) if $pkg; - } - - (undef, ''); +sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' } +sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' } + +# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable. +sub sql_hash_prefix { + my($sql_expr, $prefix) = @_; + my $esc = unpack 'H*', escape_like pack 'H*', $prefix; + sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))" } -sub pkg_info { - my($self, $short, $path) = @_; - - my $sys = $self->{sysbyshort}{$short}; - return $self->resNotFound if !$sys; - - my($pkg, $ver) = pkg_frompath($self, $sys->{id}, $path); - return $self->resNotFound if !$pkg; - - my $vers = $self->dbPackageVersions($pkg->{id}); - - my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0]; - return $self->resNotFound if !$sel; - - my $f = $self->formValidate({ get => 'p', required => 0, default => 1, template => 'uint', min => 1, max => 100}); - return $self->resNotFound if $f->{_err}; - - my $mans = $self->dbManInfo(package => $sel->{id}, results => 200, page => $f->{p}, sort => 'syspkgname'); - my $count = $self->dbManInfo(package => $sel->{id}, countonly => 1)->[0]{count}; +sub pkg_frompath { + my($sys_where, $path) = @_; - # Latest version of this package determines last modification date of the page. - $self->setLastMod($vers->[0]{released}); + # $path should be "$category/$name" or "$category/$name/$version", since + # $category may contain a slash, let's try both options. - my $title = "$sys->{name}".($sys->{release}?" $sys->{release}":"")." / $pkg->{category} / $pkg->{name}"; - $self->htmlHeader(title => "$title $sel->{version}"); - h1 $title; + my sub lookup { + my($cat, $name) = @_; + tuwf->dbRowi('SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND category =', \$cat, 'AND name =', \$name); + } - div id => 'pkgversions'; - h2 'Versions'; - ul; - for(@$vers) { - li; - a href => "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$_->{version}", $_->{version} if $_ != $sel; - b " $_->{version}" if $_ == $sel; - i " $_->{released}"; - end; + # $category/$name + # e.g. contrib/games/alien + if($path =~ m{^(.+)/([^/]+)$}) { + my $pkg = lookup $1, $2; + return ($pkg, '') if $pkg->{id}; } - end; - end; - - div id => 'pkgmans'; - h2 "Manuals for version $sel->{version}"; - paginate "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $f->{p}; - ul; - for(@$mans) { - li; - a href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})"; - b " $_->{locale}" if $_->{locale}; - i " $_->{filename}"; - end; + + # $category/$name/$version + # e.g. contrib/games/alien/10.2 + if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) { + my $pkg = lookup $1, $2; + return ($pkg, $3) if $pkg->{id}; } - end; - paginate "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $f->{p}; - end; - $self->htmlFooter; + (undef, ''); } -sub man_redir { - my($self, $sys, $path) = @_; - - # Path can be: - # 1. <name> - # 2. <category>/<package>/<name> - # 3. <category>/<package>/<version>/<name> - - my $sysid = $self->{sysbyshort}{$sys}; - $sysid = $sysid ? [$sysid->{id}] : [ map $self->{sysbyshort}{$_}{id}, grep /^\Q$sys\E-/, keys $self->{sysbyshort}->%* ]; - return $self->resNotFound if !@$sysid; - - my $man; - if($path !~ m{/}) { # (1) - ($man) = $self->dbManPrefName($path, sysid => $sysid); - - } else { - $path =~ s{/([^/]+)$}{}; - my $name = $1; - - my($pkg, $ver) = pkg_frompath($self, $sysid, $path); # Handles (2) and (3) - return $self->resNotFound if !$pkg; - - my $verid = $ver && $self->dbPackageVersions($pkg->{id}, $ver)->[0]{id}; - return $self->resNotFound if $ver && !$verid; - - ($man) = $self->dbManPrefName($name, sysid => $sysid, pkgid => $pkg->{id}, pkgver => $verid); - } - return $self->resNotFound if !$man; - - $self->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); -}; +# Get the preferred man page for the given filters. +sub man_pref { + my($section, $where) = @_; + $where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section; + + # Criteria to determine a "preferred" man page: + # 1. english: English versions of a man page have preference over other locales + # 2. pkgver: Newer versions of the same package have preference over older versions + # 3. stdloc: Prefer man pages in standard locations + # 4. secmatch: Prefer an exact section match + # 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams) + # 6. ubuntu: If there's no Arch, prefer Ubuntu over other systems (again, tends to be more up-to-date) + # (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 ) + # 7. sysrel: Prefer a later system release over an older release + # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) + # 9. pkgdate: Prefer more recent packages (cross-distro) + # 10. Fall back on hash comparison, to ensure the result is stable + + tuwf->dbRowi(q{ + WITH unfiltered AS ( + SELECT s AS sys, p AS pkg, v AS ver, m AS man + FROM man m + JOIN package_versions v ON v.id = m.package + JOIN packages p ON p.id = v.package + JOIN systems s ON s.id = p.system + WHERE}, $where, q{ + ), f_english AS( + SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale) + ), f_pkgver AS( + SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) + ), f_stdloc AS( + SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename) + ), f_secmatch AS( + SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{ + ), f_arch AS( + SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', q{WHERE (sys).id = 1) OR (sys).id = 1 + ), f_ubuntu AS( + SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).name = 'Ubuntu') OR (sys).name = 'Ubuntu' + ), f_sysrel AS( + SELECT * FROM f_ubuntu a WHERE NOT EXISTS(SELECT 1 FROM f_ubuntu b WHERE (a.sys).name = (b.sys).name AND (a.sys).relorder < (b.sys).relorder) + ), f_secorder AS( + SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section) + ), f_pkgdate AS( + SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) + ) + SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, + (man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash + FROM f_pkgdate ORDER BY (man).hash LIMIT 1 + }); +} -sub _man_nav { - my($self, $man, $toc) = @_; +# Given the name of a man page with optional section, find out the actual name +# and section prefix of the man page and the preferred version. +sub man_pref_name { + my($name, $where) = @_; - my @sect = $self->dbManSections($man->{name}); - my @lang = $self->dbManLanguages($man->{name}, $man->{section}); - return if !@sect && !@lang && !@$toc; + my $man = man_pref undef, sql_and $where, sql 'm.name =', \$name; + return ($man, '') if length $man->{name}; - # TODO: This is ugly, especially because clicking on a translation or - # section, you can end up with a man page that is nowhere close to the man - # page you're currently reading. Opening a version selector box might be a - # better alternative. + return (undef, '') if $name !~ s/\.([^.]+)$// || !length $name; + my $section = $1; + $man = man_pref $section, sql_and $where, sql 'm.name =', \$name; + length $man->{name} ? ($man, $section) : (undef, ''); +} - div id => 'nav'; - if(@sect > 1) { - b 'Sections'; - p; - for (@sect) { - if($man->{section} eq $_) { - i $_; - } else { - a href => "/$man->{name}.$_", $_; +sub framework_ { + my $content = pop; + my(%o) = @_; + + html_ sub { + head_ sub { + link_ rel => 'stylesheet', type => 'text/css', href => '/man.css?4'; + title_ $o{title}.' - manned.org'; + }; + body_ sub { + div_ id => 'header', sub { + a_ href => '/', 'manned.org'; + form_ action => '/browse/search', method => 'get', sub { + input_ type => 'text', name => 'q', id => 'q', tabindex => 1; + input_ type => 'submit', value => ' '; + } + }; + div_ id => 'body', sub { + $content->(); + br_ style => 'clear: both'; + }; + div_ id => 'footer', sub { + lit_ 'All manual pages are copyrighted by their respective authors. + | <a href="/info/about">About manned.org</a> + | <a href="mailto:contact@manned.org">Contact</a> + | <a href="https://code.blicky.net/yorhel/manned">Source</a>'; + }; + script_ type => 'text/javascript', src => '/man.js', ''; } - txt ' '; - } - end; - } - - if(@lang > 1) { - b 'Languages'; - p; - (my $cur = $man->{locale}||'') =~ s/\..*//; - for (@lang) { - if(($_||'') eq $cur) { - i $_ || 'default'; - } else { - a href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default'; + }; + + # write the SQL queries as a HTML comment when debugging is enabled + # (stolen from VNDB code) + # (TODO: Move this into TUWF or something) + if(tuwf->debug) { + my(@sql_r, @sql_i) = (); + for (tuwf->{_TUWF}{DB}{queries}->@*) { + my($sql, $params, $time) = @$_; + my @params = sort { $a =~ /^[0-9]+$/ && $b =~ /^[0-9]+$/ ? $a <=> $b : $a cmp $b } keys %$params; + my $prefix = sprintf " [%6.2fms] ", $time*1000; + push @sql_r, sprintf "%s%s | %s", $prefix, $sql, join ', ', map "$_:".DBI::neat($params->{$_}), @params; + my $i=1; + push @sql_i, $prefix.($sql =~ s/\?/tuwf->dbh->quote($params->{$i++})/egr); } - txt ' '; - } - end; - } - - if(@$toc > 1) { - b 'Table of Contents'; - ul; - for (0..$#$toc) { - li; - a href => sprintf('#head%d', $_+1), lc $toc->[$_]; - end; - } - end; - } - end; -} - - -sub _normalizename { - local $_ = shift; - # Firefox seems to escape [ and ] in URLs. It doesn't really have to... - s/%5b/[/ig; - s/%5d/]/ig; - # Man pages with spaces in the path, eww - s/%20/ /g; - $_; -} - - -# Replace .so's in man source with the contents (if available in the same -# package) or with a reference to the other man page. -sub soelim { - my($self, $verid, $src) = @_; - - # tix comes with[1] a custom(?) macro package. But it looks okay even without - # loading that. - # [1] It actually doesn't, the tcllib package appears to have that file, but - # doesn't '.so' it. - $src =~ s/^\.so man.macros$//mg; - - # Other .so's should be handled by html() - $src =~ s{^\.so (.+)$}{ - my $path = $1; - my $name = (reverse split /\//, $path)[0]; - my($man) = $verid ? $self->dbManPrefName($name, pkgver => $verid) : (); - if($man) { - # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database - soelim($self, 0, $self->dbManContent($man->{hash})) - } else { - ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" + my $sql_r = join "\n", @sql_r; + my $sql_i = join "\n", @sql_i; + my $modules = join "\n", sort keys %INC; + lit_ "\n<!--\nSQL (with placeholders):\n$sql_r\n\nSQL (interpolated, possibly buggy):\n$sql_i\n\nMODULES:\n$modules\n-->"; } - }emg; - return $src; -} - - -sub man { - my($self, $name, $hash) = @_; - - $name = _normalizename($name); - - # Unfortunately, even in the permalink format with the hash, we don't know - # from which package we're supposed to get the man page. This info is - # needed in order to do .so substitution, so we can substitute files from - # the same package as the requested man page. Use the dbManPref logic here - # to deterministically select a good package. - my($man, undef) = $hash - ? $self->dbManPref(name => $name, shorthash => $hash) - : $self->dbManPrefName($name); - return $self->resNotFound() if !$man; - - my $fmt = ManUtils::html(ManUtils::fmt_block soelim $self, $man->{verid}, $self->dbManContent($man->{hash})); - my @toc; - $fmt =~ s{\n<b>(.+?)<\/b>\n}{ - push @toc, $1; - my $c = @toc; - qq{\n<a href="#head$c" id="head$c">$1</a>\n} - }eg; - - $self->setLastMod($man->{released}); - $self->htmlHeader(title => $name); - _man_nav($self, $man, \@toc); - div id => 'manbuttons'; - h1 $man->{name}; - ul 'data-hash' => $man->{hash}, 'data-name' => $man->{name}, 'data-section' => $man->{section}, 'data-locale' => $man->{locale}||'', - 'data-hasversions' => $self->dbManHasVersions($man->{name}, $man->{section}, $man->{locale}, $man->{hash}); - li; a href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source'; end; - li; a href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink'; end; - end; - end; - div id => 'manres', class => 'hidden'; - end; - - div id => 'contents'; - pre; lit $fmt; end; - end; - $self->htmlFooter(); } -sub src { - my($self, $name, $hash) = @_; +sub paginate_ { + my($url, $count, $perpage, $p) = @_; + return if $count <= $perpage; - $name = _normalizename($name); + my sub l_ { + my($c)= @_; + a_ href => "$url$c", $c if $c != $p; + b_ $c if $c == $p; + }; - my $m = $self->dbManInfo(name => $name, shorthash => $hash); - return $self->resNotFound if !@$m; - - $self->setLastMod($m->[0]{released}); - $self->resHeader('Content-Type', 'text/plain; charset=UTF-8'); - $self->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $m->[0]{name}, $m->[0]{section}); - my $c = $self->dbManContent($m->[0]{hash}); - lit $c; + my $lp = ceil($count/$perpage); + p_ class => 'paginate', sub { + l_ 1 if $p > 1+4; + b_ '...' if $p > 1+5; + l_ $_ for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4)); + b_ '...' if $p < $lp-5; + l_ $lp if $p < $lp-4; + } } -sub xmlsearch { - my $self = shift; - my $q = $self->reqGet('q')||''; - my $man = $self->dbSearch($q, 20); - - # The JS dropdown search expects this particular format. - $self->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); - xml; - tag 'results'; - tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for(@$man); - end 'results'; -} +TUWF::set error_404_handler => sub { + tuwf->resStatus(404); + my $title = 'No manual entry for '.tuwf->reqPath; + framework_ title => $title, sub { + h1_ $title; + p_ 'That is, the page you were looking for doesn\'t exist.'; + }; +}; -sub jsontree { - my $self = shift; - - my $f = $self->formValidate( - { get => 'name', required => 0, maxlength => 256 }, - { get => 'section', required => 0, maxlength => 32 }, - { get => 'locale', required => 0, default => '', maxlength => 32 }, - { get => 'cur', required => 0, default => '', regex => qr/^[a-fA-F0-9]{40}$/ }, - { get => 'hash', required => 0, default => '', regex => qr/^[a-fA-F0-9]{40}$/ }, - ); - return $self->resNotFound() if $f->{_err} || (!$f->{hash} && !($f->{section} && $f->{name})); - - my $l = $self->dbManInfo(sort => 'syspkgname', $f->{hash} - ? (hash => $f->{hash}) - : (name => $f->{name}, section => $f->{section}, locale => $f->{locale})); - - # Convert the list into a tree - my $tree = []; - my($sys, $sysver, $pkg, $pkgver); - for my $m (@$l) { - my $sysname = $self->{sysbyid}{$m->{system}}{name}; - if(!$sys || $sysname ne $sys->{name}) { - $sys = { name => $sysname, childs => [] }; - $sysver = undef; - push @$tree, $sys; +TUWF::get '/' => sub { + my $stats = tuwf->dbRow('SELECT * FROM stats_cache'); + + sub num { local $_=shift; 1 while(s/(\d)(\d{3})($|,)/$1,$2/); $_ }; + + framework_ title => 'Man Pages Archive', sub { + h1_ 'Man Pages Archive'; + p_ class => 'txt', sub { + lit sprintf <<' _', map num($stats->{$_}), qw|hashes mans files packages|; + Indexing <b>%s</b> versions of <b>%s</b> manual pages found in <b>%s</b> + files of <b>%s</b> packages. + <br /><br /> + Manned.org aims to index all manual pages from a variety of systems, both + old and new, and provides a convenient interface for looking up and viewing + the various versions of each man page. + <a href="/info/about">About manned.org »</a> + _ + }; + + h2_ 'Browse the manuals'; + ul_ id => 'systems', sub { + my %sys; + push $sys{$_->{name}}->@*, $_ for(systems->@*); + li_ sub { + my $sys = $sys{$_}; + my $img = $sys->[0]{short} =~ s/^(.+)-.+$/$1/r; + if(@$sys == 1) { + a_ href => "/pkg/$sys->[0]{short}", sub { + span_ style => "background-image: url('images/$img.png')", ''; + b_ $sys->[0]{name}; + }; + return; + } + span_ style => "background-image: url('images/$img.png')", ''; + b_ $sys->[0]{name}; + my $i = 0; + for(reverse @$sys) { + a_ href => "/pkg/$_->{short}", ++$i > 3 ? (class => 'hidden') : (), $_->{release}; + lit_ ' '; + } + a_ href => '#', class => 'more', 'more...' if $i > 3; + } for sort keys %sys; + }; + + h2_ 'Other sites'; + ul_ id => 'external', sub { + li_ sub { a_ href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt_ ' - Linux man pages from several upstream projects.' }; + li_ sub { a_ href => 'https://manpag.es/', 'ManPag.es'; txt_ ' - Man pages from several Linux distributions.' }; + li_ sub { a_ href => 'https://www.mankier.com/', 'ManKier'; txt_ ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.' }; + li_ sub { a_ href => 'https://man.cx/', 'man.cx'; txt_ ' - Man pages extracted from Debian testing.' }; + li_ sub { a_ href => 'http://man.he.net/', 'man.he.net'; txt_ ' - Also seems to be from a Debian-like system.' }; + li_ sub { a_ href => 'https://linux.die.net/man/', 'die.net'; txt_ ' - Seems to be based on an RPM-based Linux distribution.' }; + li_ sub { a_ href => 'http://manpages.org/', 'manpages.org'; txt_ ' - Lots of mostly-nicely formatted man pages, no clue about source.' }; + li_ sub { a_ href => 'https://www.manpagez.com/', 'manpagez.com'; txt_ ' - Mac OS X, has some GTK-html and texinfo documentation as well.' }; + li_ sub { a_ href => 'https://man.archlinux.org/', 'Arch Linux Man Pages' }; + li_ sub { a_ href => 'https://manpages.debian.org/', 'Debian Man Pages' }; + li_ sub { a_ href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages' }; + li_ sub { a_ href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages' }; + li_ sub { a_ href => 'https://netbsd.gw.com/cgi-bin/man-cgi', 'NetBSD Man Pages' }; + li_ sub { a_ href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages' }; + li_ sub { a_ href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals' }; + li_ sub { a_ href => 'https://man.voidlinux.org/', 'Void Linux manpages' }; + }; } +}; - my $sysversion = $self->{sysbyid}{$m->{system}}{release} || ''; - if(!$sysver || $sysversion ne $sysver->{name}) { - $sysver = { name => $sysversion, childs => [] }; - $pkg = undef; - push @{$sys->{childs}}, $sysver; - } - if(!$pkg || $m->{package} ne $pkg->{name}) { - $pkg = { name => $m->{package}, i => $m->{category}, table => [] }; - $pkgver = undef; - push @{$sysver->{childs}}, $pkg; - } +TUWF::get '/info/about' => sub { + framework_ title => 'About', sub { + h1_ 'About Manned.org'; + div_ id => 'about', sub { + lit <<' _'; + <h2>Goal</h2> + <p> + The state of online indices of manual pages used to be a sad one. Existing + sites used to only offer you a single version of a man page: From one + origin, and often only in a single language. Most didn't even tell you where + the manual actually originated from, making it very hard to determine + whether the manual you found actually applied to your situation and even + harder to find a manual for a specific system. Additionally, some sites + rendered the manuals in an unreadable way, didn't correctly handle special + formatting - like tables - or didn't correctly display non-ASCII characters. + <br /><br /> + Nowadays there are many good alternatives, but Manned.org was one of the + sites created in order to improve that situation. This site aims to index + the manual pages from a variaty of systems, both old and new, and allows you + to browse through the various versions of a manual page to find out how each + system behaves. The manuals are stored in the database as UTF-8, and are + passed through <a href="http://www.gnu.org/software/groff/">groff</a> to + render them in (mostly) the same way as they are displayed in your terminal. + <br /><br /> + This website is <a href="https://code.blicky.net/yorhel/manned">open + source</a> (MIT licensed) and written in a combination of Perl and Rust. The + entire PostgreSQL database is available for download (see "Database + download" below). + </p> + + <h2>URL format</h2> + <p>You can link to specific packages and man pages with several URL formats. + These URLs will keep working in the future, so you should not have to worry + about eventual dead links.</p> + <h3>Man pages</h3> + <p>The following URLs are available to refer to an individual man page:</p> + <dl> + <dt><code>/<name>/<8-hex-digits></code></dt><dd> + This is the permalink format for a specific man page (e.g. <a href="/ls/910be0ed">/ls/910be0ed</a>).</dd> + <dt><code>/<name>[.<section>]</code></dt><dd> + Will try to get the latest and most-close-to-upstream version of a man + page (e.g. <a href="/socket">/socket</a> or <a + href="/socket.7">/socket.7</a>). Note that this may fetch the man page + from any available system, so may result in confusing scenarios for + system-specific documentation.</dd> + <dt><code>/man/<system>/<name>[.<section>]</code></dt><dd> + Will get the latest version of a man page from the given system (e.g. <a + href="/man/ubuntu-xenial/rsync">/man/ubuntu-xenial/rsync</a>)</dd> + <dt><code>/man/<system>/<category>/<package>/<name>[.<section>]</code></dt><dd> + Will get the latest version of a man page from the given package (e.g. <a + href="/man/ubuntu-xenial/net/rsync/rsync">/man/ubuntu-xenial/net/rsync/rsync</a>)</dd> + <dt><code>/man/<system>/<category>/<package>/<version>/<name>[.<section>]</code></dt><dd> + Will get the man page from a specific package version (e.g. <a + href="/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync">/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync</a>)</dd> + </dl> + <p>Currently, the last three URLs will perform a redirect to the + appropriate permalink URL, but this may change in the future.<br /> + In all URLs where an optional <code>.<section></code> can be provided, + the search is performed as a prefix match. For example, <a + href="/cat.3">/cat.3</a> will provide the <code>cat.3tcl</code> man page if + no exact <code>cat.3</code> version is available. Linking to the full + section name is also possible: <a href="/cat.3tcl">/cat.3tcl</a>. If no + section is given and multiple sections are available, the lowest section + number is chosen.</p> + <h3>Packages</h3> + <p>Linking to individual packages is also possible. These pages will show a + listing of all manual pages available in the given package.</p> + <dl> + <dt><code>/pkg/<system>/<category>/<package></code></dt><dd> + For the latest version of a package (e.g. <a + href="/pkg/arch/core/coreutils">/pkg/arch/core/coreutils</a>).</dd> + <dt><code>/pkg/<system>/<category>/<package>/<version></code></dt><dd> + For a particular version of a package (e.g. <a + href="/pkg/arch/core/coreutils/8.25-2">/pkg/arch/core/coreutils/8.25-2</a>).</dd> + </dl> + <p>Note that this site only indexes packages that actually have manual + pages; Linking to a package that doesn't have any will result in a 404 + page.</p> + + <h2>The indexing process</h2> + <p> + All man pages are fetched right from the (binary) packages available on the + public repositories of Linux distributions. In particular:<br /> + </p> + <dl> + <dt>Arch Linux</dt><dd> + The core, extra and community repositories are fetched from a local + Arch mirror. Indexing started around begin June 2012. The i686 + architecture was indexed until November 6th, 2016, packages after that + were fetched from from x86_64.</dd> + <dt>Debian</dt><dd> + Historical releases were fetched from <a + href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a> + and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>. + For buzz, rex and bo, we're missing a few man pages because some packages + were missing from the repository archives. Where available, all components + (main, contrib and non-free) from the $release and $release-updates + repositories are indexed.</dd> + <dt>CentOS</dt><dd> + Historical releases were fetched from <a + href="http://vault.centos.org/">vault.centos.org</a>, current releases + from a local mirror. Where applicable, the following repositories were + indexed: addons, centosplus, contrib, extras, os. The i386 architecture + was indexed for versions lower than 7.0, since 7.0 the packages from + x86_64 are indexed. + <dt>Fedora</dt><dd> + Historical releases were fetched from <a + href="http://archives.fedoraproject.org/pub/archive/fedora/linux/">archives.fedoraproject.org</a>, + current releases from a local repository. Fedora Core 1 till 6 are + (incorrectly) called 'Fedora' here. To compensate for that, Fedora 3 till + 6 also include the Extras repository. For Fedora 7 and later, the + 'Everything' and 'updates' repositories are indexed. The i386 arch was + indexed for Fedora 17 and older, the x86_64 arch starting with Fedora + 18.</dd> + <dt>FreeBSD</dt><dd> + Historical releases were fetched from <a + href="http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/">http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/</a>. + The base installation tarballs are included in the database as packages + prefixed with <i>core-</i>. The package repositories have also been + indexed, except for 2.0.5 - 2.2.7 and 3.0 - 3.3 because those were not + available on the ftp archive. Only the -RELEASE repositories have been + included, which is generally a snapshot of the ports directory around the + time of the release. The release dates indicated for many packages were + guessed from the file modification dates in the tarball, and may be + inaccurate. The i368 arch was indexed for FreeBSD 11.0 and older, the + amd64 arch starting with 11.1.</dd> + <dt>Ubuntu</dt><dd> + Historical releases were fetched from <a + href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>, + supported releases from a local mirror. All components (main, universe, + restricted and multiverse) from the $release, $release-updates and + $release-security repositories are indexed. Indexing started around mid + June 2012. All releases before 2017 were indexed from the i386 + repositories, starting with 17.04 the amd64 repositories were used.</dd> + </dl> + <p> + Only packages for a single architecture (i386 or amd64) are scanned. To my + knowledge, packages that come with different manuals for different + architectures either don't exist or are extremely rare. It does happen that + some packages are not available for all architectures. Usually, though, + every package is at least available for the most popular architecture, so + hopefully we're not missing out on much. <br /><br /> + The repositories are scanned for new packages on a daily basis. + </p> + + <h2>Database download</h2> + <p> + This site is backed by a PostgreSQL database containing all the man pages. + Weekly dumps of the full database are available for download at + <a href="http://dl.manned.org/dumps/">http://dl.manned.org/dumps/</a>. + <br /><br /> + Be warned that the download server may not be terribly reliable, so it is + advisable to use a client that supports resumption of partial downloads. See + <a href="/wget">wget's -c</a> or <a href="/curl">curl's -C</a>. + <br /><br /> + The database schema is "documented" at <a + href="https://code.blicky.net/yorhel/manned/src/branch/master/sql/schema.sql">schema.sql</a> + in the git repo. Note that these dumps don't constitute a stable API and, + while this won't happen frequently, incompatible schema changes or Postgres + major version bumps may occur. + </p> + + <h2>Other systems</h2> + <p> + Suggestions for new (or old) systems to index are welcome. + <br /><br /> + It would be great to index a few more non-Linux systems such as other BSDs, + Solaris/Illumos and Mac OS X. Unfortunately, those don't always follow a + binary package based approach, or are otherwise less easy to properly index. + <br /><br /> + In general, systems that follow an entirely source-based distribution + approach can't be indexed without compiling everything. Since that is both + very resource-heavy and open to security issues, there are no plans to + include manuals from such systems at the moment. So unless someone comes + with a solution I hadn't thought of yet, there won't be any Gentoo manuals + here. :-( + </p> + + <h2>Future plans</h2> + <p> + This site isn't nearly as awesome yet as it could be. Here's some ideas that + would be nice to have in the future: + <ul> + <li>Improved, more intelligent, search,</li> + <li><a href="/apropos.1">apropos(1)</a> emulation(?),</li> + <li>Diffs between various versions of a man page,</li> + <li>Anchor links within man pages, for easier linking to a section or paragraph,</li> + <li>Alternative formats (Text, PDF, more semantic HTML, etc),</li> + <li>A command-line client, like <a href="/man.1">man(1)</a> with manned.org as database backend.</li> + </ul> + </p> + + <h2>Copyright</h2> + <p> + All manual pages are copyrighted by their respective authors. The manuals + have been fetched from publically available repositories of free and + (primarily) open source software. The distributors of said software have put + in efforts to only include software and documentation that allows free + distribution. Nonetheless, if a manual that does not allow to be + redistributed has been inadvertently included in our index, please let me + know and I will have it removed as soon as possible. + </p> + _ + } } +}; - push @{$pkg->{table}}, [ - $pkgver && $pkgver eq $m->{version} ? {name=>''} : - {name => $m->{version}, href => "/pkg/$self->{sysbyid}{$m->{system}}{short}/$m->{category}/$m->{package}/$m->{version}"}, - { name => "$m->{name}($m->{section})", - $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () - : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) - }, - { name => substr($m->{hash}, 0, 8), - $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () - : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) - }, - { name => $m->{filename} } - ]; - $pkgver = $m->{version}; - } - - # Determine which elements to show/hide by default. - # It might make more sense to do this in JS, but since I am utterly - # incapable of writing maintainable JS I'm doing it here in order to keep the - # JS stupid and simple. - # TODO: Highlight systems/packages where the 'current' man page is? - for my $sys (@$tree) { - $sys->{expand} = 1 if $sys->{childs}[0]{name}; # Expand all systems that have named versions - $sys->{expand} = 1 if $f->{hash}; # Expand everything on 'location' - - my $i = 0; - for my $sysver (@{$sys->{childs}}) { - $i++; - $sysver->{expand} = 1 if !$sysver->{name}; # Expand unnamed versions (since you can't click them) - $sysver->{expand} = 1 if $f->{hash}; # Expand everything on 'location' - $sysver->{hide} = 1 if $i > 3 && @{$sys->{childs}} > 5; # Show only the first 3 versions - - for my $pkg (@{$sysver->{childs}}) { - $pkg->{expand} = 1 if @{$sysver->{childs}} <= 3; # Expand everything if there's not too many things to expand - $pkg->{expand} = 1 if $f->{hash}; # Expand everything on 'location' - - # TODO: Show/Hide duplicate hashes? - } - } - } - # Why JSON? Because TUWF::XML is pretty slow with many nodes - $self->resHeader('Content-Type' => 'application/json; charset=UTF-8'); - lit(JSON::XS->new->ascii->encode($tree)); +# Very simple (and fast) prefix match. +sub search_man { + my($q, $limit) = @_; + + my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; + my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; + + return !$name ? [] : tuwf->dbAll( + 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', + { + 'lower(name) LIKE ?' => escape_like(lc $name).'%', + $sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (), + }, + $limit + ); } +TUWF::get '/browse/search' => sub { + my $q = tuwf->reqGet('q')||''; + my $man = search_man $q, 150; + return tuwf->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1; + + framework_ title => 'Search results for '.$q, sub { + h1_ 'Search results for '.$q; + # Package search would also be useful. + p_ 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.'; + if(@$man) { + ul_ id => 'searchres', sub { + li_ sub { + a_ href => "/$_->{name}.$_->{section}", $_->{name}; + i_ " $_->{section}"; + } for @$man; + } + } else { + br_; br_; + b_ 'No results :-('; + } + }; +}; -package TUWF::Object; - -use TUWF ':html', 'html_escape'; -use Time::Local 'timegm'; -sub escape_like { - (my $v = shift) =~ s/([_%])/\\$1/g; - $v; -} +TUWF::get '/xml/search.xml' => sub { + my $q = tuwf->reqGet('q')||''; + my $man = search_man $q, 20; + tuwf->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); + xml; + tag 'results', sub { + tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for @$man; + }; +}; -sub htmlHeader { - my $self = shift; - my %o = @_; - html; - head; - Link rel => 'stylesheet', type => 'text/css', href => '/man.css?4'; - title $o{title}.' - manned.org'; - end 'head'; - body; +TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { + my $name = normalize_name tuwf->capture(1); + my $hash = tuwf->capture(2); + + my $nfo = tuwf->dbRowi(' + SELECT m.name, m.section, v.released, c.content + FROM man m + JOIN package_versions v ON v.id = m.package + JOIN contents c ON c.hash = m.hash + WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), ' + LIMIT 1' + ); + return tuwf->resNotFound if !$nfo->{name}; + + tuwf->resLastMod($nfo->{released}); + tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); + tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $nfo->{name}, $nfo->{section}); + lit $nfo->{content}; +}; - div id => 'header'; - a href => '/', 'manned.org'; - form action => '/browse/search', method => 'get'; - input type => 'text', name => 'q', id => 'q', tabindex => 1; - input type => 'submit', value => ' '; - end; - end; - div id => 'body'; -} +sub _man_nav { + my($man, $toc) = @_; + + my @sect = map $_->{section}, tuwf->dbAlli( + 'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section' + )->@*; + + my @lang = map $_->{lang}, tuwf->dbAlli( + "SELECT DISTINCT substring(locale from '^[^.]+') AS lang + FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, " + ORDER BY substring(locale from '^[^.]+') NULLS FIRST" + )->@*; + return if !@sect && !@lang && !@$toc; + + # TODO: This is ugly, especially because clicking on a translation or + # section, you can end up with a man page that is nowhere close to the man + # page you're currently reading. Opening a version selector box might be a + # better alternative. + div_ id => 'nav', sub { + if(@sect > 1) { + b_ 'Sections'; + p_ sub { + for (@sect) { + if($man->{section} eq $_) { + i_ $_; + } else { + a_ href => "/$man->{name}.$_", $_; + } + txt_ ' '; + } + } + } + if(@lang > 1) { + b_ 'Languages'; + p_ sub { + (my $cur = $man->{locale}||'') =~ s/\..*//; + for (@lang) { + if(($_||'') eq $cur) { + i_ $_ || 'default'; + } else { + a_ href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default'; + } + txt_ ' '; + } + } + } -sub htmlFooter { - my($self, %o) = @_; - - br style => 'clear: both'; - end; - div id => 'footer'; - lit 'All manual pages are copyrighted by their respective authors. - | <a href="/info/about">About manned.org</a> - | <a href="mailto:contact@manned.org">Contact</a> - | <a href="https://code.blicky.net/yorhel/manned">Source</a>'; - end; - script type => 'text/javascript', src => '/man.js', ''; - end; - end 'html'; - - # write the SQL queries as a HTML comment when debugging is enabled - # (stolen from VNDB code) - if($self->debug) { - lit "\n<!--\n SQL Queries:\n"; - for (@{$self->{_TUWF}{DB}{queries}}) { - my $q = !ref $_->[0] ? $_->[0] : - $_->[0][0].(exists $_->[0][1] ? ' | "'.join('", "', map defined()?$_:'NULL', @{$_->[0]}[1..$#{$_->[0]}]).'"' : ''); - $q =~ s/^\s//g; - lit sprintf " [%6.2fms] %s\n", $_->[1]*1000, $q; + if(@$toc > 1) { + b_ 'Table of Contents'; + ul_ sub { + for (0..$#$toc) { + li_ sub { + a_ href => sprintf('#head%d', $_+1), lc $toc->[$_]; + } + } + } + } } - lit "-->\n"; - } } -# Set the last modification time from a string in yyyy-mm-dd format. -sub setLastMod { - my($s, $d) = @_; - return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/; - my @t = gmtime timegm 0,0,0,$3,$2-1,$1; - $s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT', - (qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3], - (qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]], - $t[5]+1900, $t[2], $t[1], $t[0]); -} - - -sub dbManContent { - my($s, $hash) = @_; - return $s->dbRow(q{SELECT content FROM contents WHERE hash = decode(?, 'hex')}, $hash)->{content}; +# Replace .so's in man source with the contents (if available in the same +# package) or with a reference to the other man page. +sub soelim { + my($verid, $src) = @_; + + # tix comes with* a custom(?) macro package. But it looks okay even without loading that. + # (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it) + $src =~ s/^\.so man.macros$//mg; + + # Other .so's should be handled by html() + $src =~ s{^\.so (.+)$}{ + my $path = $1; + my $name = (reverse split /\//, $path)[0]; + my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); + $man->{name} + # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database + ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')")) + : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" + }emg; + $src; } -# Options: name, section, shorthash, package, results, sort, countonly -sub dbManInfo { - my $s = shift; - my %o = ( - sort => '', - page => 1, - results => 10_000, - @_ - ); - - my %where = ( - $o{name} ? ('m.name = ?' => $o{name}) : (), - $o{package} ? ('m.package = ?' => $o{package}) : (), - defined($o{section}) ? ('m.section = ?' => $o{section}) : (), - $o{locale} ? ('m.locale = ?' => $o{locale}) : (), - defined($o{locale}) && !$o{locale} ? ('m.locale IS NULL' => 1) : (), - $o{shorthash} ? (q{substring(m.hash from 1 for 4) = decode(?, 'hex')} => $o{shorthash}) : (), - $o{hash} ? (q{m.hash = decode(?, 'hex')} => $o{hash}) : (), - ); - - my $order = - $o{sort} eq 'syspkgname' ? 'ORDER BY s.name, s.relorder DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename' : ''; - - my $select = $o{countonly} ? 'COUNT(*) as count' - : "p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash"; - - my($r, $np) = $s->dbPage(\%o, q{ - SELECT !s - FROM man m - JOIN package_versions v ON v.id = m.package - JOIN packages p ON p.id = v.package - JOIN systems s ON s.id = p.system - !W - !s - }, $select, \%where, $order); - wantarray ? ($r, $np) : $r; -} +# This one has to go before the other mappings, to ensure that links work for +# man pages called 'pkg' or 'man'. This also means that we can't have a +# system named 8 hex digits, but at least that's easy to guarantee. :) +TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub { + my $name = normalize_name tuwf->capture('name'); + my $shorthash = tuwf->capture('hash'); + + # Unfortunately, even in the permalink format with the hash, we don't know + # from which package we're supposed to get the man page. This info is + # needed in order to do .so substitution, so we can substitute files from + # the same package as the requested man page. Use the dbManPref logic here + # to deterministically select a good package. + my($man, undef) = $shorthash + ? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash + : man_pref_name $name, 'true'; + return tuwf->resNotFound() if !$man->{name}; + + my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, + tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"); + my @toc; + $fmt =~ s{\n<b>(.+?)<\/b>\n}{ + push @toc, $1; + my $c = @toc; + qq{\n<a href="#head$c" id="head$c">$1</a>\n} + }eg; + + my $hasversions = tuwf->dbVali( + 'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section}, + 'AND locale IS NOT DISTINCT FROM', \$man->{locale}, + 'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1" + ); + + tuwf->resLastMod($man->{released}); + framework_ title => $name, sub { + _man_nav $man, \@toc; + div_ id => 'manbuttons', sub { + h1_ $man->{name}; + ul_ 'data-hash' => $man->{hash}, + 'data-name' => $man->{name}, + 'data-section' => $man->{section}, + 'data-locale' => $man->{locale}||'', + 'data-hasversions' => $hasversions?1:0, + sub { + li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' }; + li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' }; + } + }; + div_ id => 'manres', class => 'hidden', ''; + + div_ id => 'contents', sub { + pre_ sub { lit_ $fmt } + } + }; +}; -# Very simple (and fast) prefix match. -sub dbSearch { - my($s, $q, $limit) = @_; - - my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; - my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; - - return !$name ? [] : $s->dbAll( - 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', - { - 'lower(name) LIKE ?' => escape_like(lc $name).'%', - $sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (), - }, - $limit - ); -} +TUWF::get qr{/pkg/([^/]+)} => sub { + my $short = tuwf->capture(1); + + my $sys = sysbyshort->{$short}; + return tuwf->resNotFound if !$sys; + + my $f = tuwf->validate(get => + c => { onerror => 'all', enum => [ '0', 'all', 'a'..'z' ] }, + p => { onerror => 1, uint => 1, range => [1,200] }, + )->data; + + my $where = sql 'system =', \$sys->{id}, + $f->{c} eq '0' ? ('AND (ASCII(name) < 97 OR ASCII(name) > 122) AND (ASCII(name) < 65 OR ASCII(name) > 90)') : + $f->{c} ne 'all' ? ('AND LOWER(SUBSTR(name, 1, 1)) =', \$f->{c}) : (); + my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where); + my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} }, + 'SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name' + ); + + my $title = "Packages for $sys->{name}".($sys->{release}?" $sys->{release}":""); + framework_ title => $title, sub { + div_ id => 'pkglist', sub { + h1_ $title; + + p_ class => 'charselect', sub { + for('all', 0, 'a'..'z') { + a_ href => "/pkg/$short?c=$_", $_?uc$_:'#' if $_ ne $f->{c}; + b_ $_?uc$_:'#' if $_ eq $f->{c}; + } + }; + + p_ 'Note: Packages without man pages are not listed.'; + paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; + ul_ id => 'packages', sub { + li_ sub { + a_ href => "/pkg/$short/$_->{category}/$_->{name}", $_->{name}; + i_ ' '.$_->{category}; + } for @$pkg; + }; + paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; + } + }; +}; -# Get the preferred man page for the given filters. Returns a row with the same fields as dbManInfo(). -sub dbManPref { - my($s, %o) = @_; - my %where = ( - length $o{name} ? ('m.name = ?' => $o{name}) : (), - $o{shorthash} ? (q{substring(m.hash from 1 for 4) = decode(?, 'hex')} => $o{shorthash}) : (), - length $o{section} ? ('m.section LIKE ?' => escape_like($o{section}).'%') : (), - $o{sysid} ? ('p.system IN(!l)' => [ ref $o{sysid} ? $o{sysid} : [$o{sysid}] ]) : (), - $o{package} ? ('p.id = ?' => $o{package}) : (), - $o{pkgver} ? ('v.id = ?' => $o{pkgver}) : (), - $o{language} ? (q{substring(locale from '^[^.]+') = ?} => $o{language}) : (), - ); - - # Criteria to determine a "preferred" man page: - # 1. english: English versions of a man page have preference over other locales - # 2. pkgver: Newer versions of the same package have preference over older versions - # 3. stdloc: Prefer man pages in standard locations - # 4. secmatch: Prefer an exact section match - # 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams) - # 6. ubuntu: If there's no Arch, prefer Ubuntu over other systems (again, tends to be more up-to-date) - # (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 ) - # 7. sysrel: Prefer a later system release over an older release - # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) - # 9. pkgdate: Prefer more recent packages (cross-distro) - # 10.Fall back on hash comparison, to ensure the result is stable - - $s->dbAll(q{ - WITH unfiltered AS ( - SELECT s AS sys, p AS pkg, v AS ver, m AS man - FROM man m - JOIN package_versions v ON v.id = m.package - JOIN packages p ON p.id = v.package - JOIN systems s ON s.id = p.system - !W - ), f_english AS( - SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale) - ), f_pkgver AS( - SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) - ), f_stdloc AS( - SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename) - ), f_secmatch AS( - SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section = ?) OR (man).section = ? - ), f_arch AS( - SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM f_secmatch WHERE (sys).id = 1) OR (sys).id = 1 - ), f_ubuntu AS( - SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).name = 'Ubuntu') OR (sys).name = 'Ubuntu' - ), f_sysrel AS( - SELECT * FROM f_ubuntu a WHERE NOT EXISTS(SELECT 1 FROM f_ubuntu b WHERE (a.sys).name = (b.sys).name AND (a.sys).relorder < (b.sys).relorder) - ), f_secorder AS( - SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section) - ), f_pkgdate AS( - SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) - ) - SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, - (man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash - FROM f_pkgdate ORDER BY (man).hash LIMIT 1 - }, \%where, $o{section}||'', $o{section}||'')->[0]; -} +# Package info: /pkg/$system/$category/$name (/$version); $category may contain a slash, too. +TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { + my ($short, $path) = tuwf->captures(1,2); + + my $sys = sysbyshort->{$short}; + return tuwf->resNotFound if !$sys; + + my($pkg, $ver) = pkg_frompath(sql('system =', \$sys->{id}), $path); + return tuwf->resNotFound if !$pkg; + + my $vers = tuwf->dbAlli(' + SELECT id, version, released + FROM package_versions v + WHERE package =', \$pkg->{id}, ' + AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id) + ORDER BY released DESC' + ); + my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0]; + return tuwf->resNotFound if !$sel; + + my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data; + + my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id}); + my $mans = tuwf->dbPagei({ results => 200, page => $p }, + "SELECT name, encode(hash, 'hex') AS hash, section, locale, filename + FROM man WHERE package =", \$sel->{id}, ' + ORDER BY name, locale NULLS FIRST, filename' + ); + + # Latest version of this package determines last modification date of the page. + tuwf->resLastMod($vers->[0]{released}); + + my $title = "$sys->{name}".($sys->{release}?" $sys->{release}":"")." / $pkg->{category} / $pkg->{name}"; + framework_ title => "$title $sel->{version}", sub { + h1_ $title; + + div_ id => 'pkgversions', sub { + h2_ 'Versions'; + ul_ sub { + li_ sub { + a_ href => "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$_->{version}", $_->{version} if $_ != $sel; + b_ " $_->{version}" if $_ == $sel; + i_ " $_->{released}"; + } for(@$vers); + } + }; + + div_ id => 'pkgmans', sub { + h2_ "Manuals for version $sel->{version}"; + paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p; + ul_ sub { + li_ sub { + a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})"; + b_ " $_->{locale}" if $_->{locale}; + i_ " $_->{filename}"; + } for(@$mans); + }; + paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p; + } + } +}; +# /browse/<pkg> has been moved to /pkg/ with the package category added to the path +TUWF::get qr{/browse/([^/]+)} => sub { tuwf->resRedirect('/pkg/'.tuwf->capture(1), 'perm') }; +TUWF::get qr{/browse/([^/]+)/([^/]+)(?:/([^/]+))?} => sub { + my($sys, $name, $ver) = tuwf->captures(1,2,3); + $sys = sysbyshort->{$sys}; + return tuwf->resNotFound if !$sys; + my $pkgs = tuwf->dbRowi('SELECT category FROM packages WHERE system =', \$sys->{id}, 'AND name =', \$name, 'LIMIT 1'); + return tuwf->resNotFound if !defined $pkgs->{category}; + tuwf->resRedirect("/pkg/$sys->{short}/$pkgs->{category}/$name".($ver ? "/$ver" :''), 'perm'); +}; -# Given the name of a man page with optional section, find out the actual name -# and section prefix of the man page and the preferred version. -sub dbManPrefName { - my($s, $name, %o) = @_; +# Redirects for canonical URLs +TUWF::get qr{/man/([^/]+)/(.+)} => sub { + my($sys, $path) = tuwf->captures(1,2); - my $man = $s->dbManPref(%o, name => $name); - return ($man, '') if $man; + # Path can be: + # 1. <name> + # 2. <category>/<package>/<name> + # 3. <category>/<package>/<version>/<name> - return (undef, '') if $name !~ s/\.([^.]+)$// || !length $name; - my $section = $1; - $man = $s->dbManPref(%o, name => $name, section => $section); - return ($man, $section) if $man; - return (undef, ''); -} + # $sys can be either a full system 'short' name, or a prefix (e.g. 'debian' meaning 'any debian-* version') + my $sysid = sysbyshort->{$sys}; + $sysid = $sysid ? [$sysid->{id}] : [ map sysbyshort->{$_}{id}, grep /^\Q$sys\E-/, keys sysbyshort->%* ]; + return tuwf->resNotFound if !@$sysid; + my $man; + if($path !~ m{/}) { # (1) + ($man) = tuwf->dbManPrefName($path, sysid => $sysid); -# Returns 1 of there are alternative versions of the given man page. -sub dbManHasVersions { - my($s, $name, $section, $locale, $hash) = @_; - return $s->dbRow( - q{SELECT 1 AS ok FROM man WHERE name = ? AND section = ? AND locale IS NOT DISTINCT FROM ? AND hash <> decode(?, 'hex') LIMIT 1}, - $name, $section, $locale, $hash - )->{ok}||0; -} + } else { + $path =~ s{/([^/]+)$}{}; + my $name = $1; + my($pkg, $ver) = pkg_frompath(sql('system IN', $sysid), $path); # Handles (2) and (3) + return tuwf->resNotFound if !$pkg; -# Returns all available languages for a man page -sub dbManLanguages { - my($s, $name, $section) = @_; - return map $_->{lang}, @{$s->dbAll(q{SELECT DISTINCT substring(locale from '^[^.]+') AS lang - FROM man WHERE name = ? AND section = ? - ORDER BY substring(locale from '^[^.]+') NULLS FIRST - }, $name, $section)}; -} + my $verid = tuwf->dbVali('SELECT id FROM package_versions WHERE package =', \$pkg->{id}, 'AND version =', \$ver); + return tuwf->resNotFound if $ver && !$verid; + ($man) = tuwf->dbManPrefName($name, sysid => $sysid, pkgid => $pkg->{id}, pkgver => $verid); + } + return tuwf->resNotFound if !$man; -# Returns all available languages for a man page -sub dbManSections { - my($s, $name) = @_; - return map $_->{section}, @{$s->dbAll(q{SELECT DISTINCT section FROM man WHERE name = ? ORDER BY section}, $name)}; -} + tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); +}; -sub dbSystemGet { - return shift->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder'); -} +# Redirect for a specific language for a man page. +# I'm not a fan of this solution; might drop it in the future. +TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub { + my $lang = tuwf->capture(1); + my $name = normalize_name tuwf->capture(2); + my($man, undef) = man_pref_name $name, + sql "substring(locale from '^[^.]+') ilike", \escape_like $lang; + return tuwf->resNotFound if !$man->{name}; + tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); +}; -# Options: sysid char hasman page results countonly -sub dbPackageGet { - my $s = shift; - my %o = (results => 10, page => 1, @_); - - my @where = ( - $o{sysid} ? ('system IN(!l)' => [ ref $o{sysid} ? $o{sysid} : [$o{sysid}] ]) : (), - $o{category} ? ('category = ?' => $o{category}) : (), - $o{name} ? ('name = ?' => $o{name} ) : (), - # This seems slow, perhaps cache? - defined($o{hasman}) ? ('!s EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id))' => $o{hasman}?'':'NOT') : (), - $o{char} ? ( 'LOWER(SUBSTR(name, 1, 1)) = ?' => $o{char} ) : (), - defined($o{char}) && !$o{char} ? ( '(ASCII(name) < 97 OR ASCII(name) > 122) AND (ASCII(name) < 65 OR ASCII(name) > 90)' => 1 ) : (), - ); - - my $select = $o{countonly} ? 'COUNT(*) as count' : 'id, system, name, category'; - my $order = $o{countonly} ? '' : 'ORDER BY name'; - - my($r, $np) = $s->dbPage(\%o, - 'SELECT !s FROM packages p !W !s', - $select, \@where, $order - ); - wantarray ? ($r, $np) : $r; -} +TUWF::get '/json/tree.json' => sub { + my $f = tuwf->validate(get => + name => { required => 0, maxlength => 256 }, + section => { required => 0, maxlength => 32 }, + locale => { required => 0, maxlength => 32 }, + cur => { required => 0, default => '', regex => qr/^[a-fA-F0-9]{40}$/ }, + hash => { required => 0, default => '', regex => qr/^[a-fA-F0-9]{40}$/ }, + )->data; + return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name}); + + my $l = tuwf->dbAlli(" + SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash + FROM man m + JOIN package_versions v ON v.id = m.package + JOIN packages p ON p.id = v.package + JOIN systems s ON s.id = p.system + WHERE", sql_and( + length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (), + length $f->{name} ? sql 'm.name =', \$f->{name} : (), + length $f->{section} ? sql 'm.section =', \$f->{section} : (), + length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (), + defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (), + ), ' + ORDER BY s.name, s.relorder DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename + '); + + # Convert the list into a tree + my $tree = []; + my($sys, $sysver, $pkg, $pkgver); + for my $m (@$l) { + my $sysname = sysbyid->{$m->{system}}{name}; + if(!$sys || $sysname ne $sys->{name}) { + $sys = { name => $sysname, childs => [] }; + $sysver = undef; + push @$tree, $sys; + } + my $sysversion = sysbyid->{$m->{system}}{release} || ''; + if(!$sysver || $sysversion ne $sysver->{name}) { + $sysver = { name => $sysversion, childs => [] }; + $pkg = undef; + push @{$sys->{childs}}, $sysver; + } -sub dbPackageVersions { - my($s, $id, $version) = @_; + if(!$pkg || $m->{package} ne $pkg->{name}) { + $pkg = { name => $m->{package}, i => $m->{category}, table => [] }; + $pkgver = undef; + push @{$sysver->{childs}}, $pkg; + } - my %where = ( - 'package = ?' => $id, - $version ? ('version = ?' => $version) : (), - 'EXISTS(SELECT 1 FROM man m WHERE m.package = v.id)' => 1, - ); + push @{$pkg->{table}}, [ + $pkgver && $pkgver eq $m->{version} ? {name=>''} : + {name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"}, + { name => "$m->{name}($m->{section})", + $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () + : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) + }, + { name => substr($m->{hash}, 0, 8), + $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () + : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) + }, + { name => $m->{filename} } + ]; + $pkgver = $m->{version}; + } - return $s->dbAll(q{ - SELECT id, version, released - FROM package_versions v !W - ORDER BY released DESC}, - \%where) -} + # Determine which elements to show/hide by default. + # It might make more sense to do this in JS, but since I am utterly + # incapable of writing maintainable JS I'm doing it here in order to keep the + # JS stupid and simple. + # TODO: Highlight systems/packages where the 'current' man page is? + for my $sys (@$tree) { + $sys->{expand} = 1 if $sys->{childs}[0]{name}; # Expand all systems that have named versions + $sys->{expand} = 1 if $f->{hash}; # Expand everything on 'location' + + my $i = 0; + for my $sysver (@{$sys->{childs}}) { + $i++; + $sysver->{expand} = 1 if !$sysver->{name}; # Expand unnamed versions (since you can't click them) + $sysver->{expand} = 1 if $f->{hash}; # Expand everything on 'location' + $sysver->{hide} = 1 if $i > 3 && @{$sys->{childs}} > 5; # Show only the first 3 versions + + for my $pkg (@{$sysver->{childs}}) { + $pkg->{expand} = 1 if @{$sysver->{childs}} <= 3; # Expand everything if there's not too many things to expand + $pkg->{expand} = 1 if $f->{hash}; # Expand everything on 'location' + + # TODO: Show/Hide duplicate hashes? + } + } + } + tuwf->resJSON($tree); +}; -sub dbStats { - return $_[0]->dbRow('SELECT * FROM stats_cache'); -} +TUWF::run(); @@ -454,7 +454,7 @@ function dsResults(hr, obj) { )); ul.appendChild(tag('li', tag('a', {href:'#', onclick: buttonclick, - 'data-url': '/json/tree.json?hash='+hash+';name='+name+';section='+section, + 'data-url': '/json/tree.json?hash='+hash, 'data-p': 'This manual page was found in the following locations.'}, 'locations'))); })(); |