diff options
author | Yorhel <git@yorhel.nl> | 2021-12-14 15:06:05 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2021-12-14 15:08:54 +0100 |
commit | f376f1f137ce7b3f2a1fe992d66b332443287b86 (patch) | |
tree | 26056570f6266bd6e54cd6eb4d522da940b91f96 | |
parent | 6f7f59c6dfa1c94ecfaf14ec81d895d616fddc19 (diff) |
Large-ish SQL schema revamp/optimizations
Primarily aimed at reducing the size of the old 'man' (now: files)
table, using smaller integers to refer to man contents and text fields,
and storing a shorthash as an integer for quick lookups. This better
normalization also removes the need to keep a separate 'man_index' cache
for the search function.
The old schema wasn't necessarily bad, but I was in the mood for some
optimizations. And a little cleanup.
Prolly introduces a bunch of new bugs, I haven't tested this too well.
-rw-r--r-- | indexer/src/pkg.rs | 39 | ||||
-rw-r--r-- | sql/schema.sql | 89 | ||||
-rw-r--r-- | sql/update-2021-12-14.sql | 106 | ||||
-rwxr-xr-x | util/cron.sh | 8 | ||||
-rw-r--r-- | util/update_indices.sql | 12 | ||||
-rwxr-xr-x | www/index.pl | 140 |
6 files changed, 267 insertions, 127 deletions
diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs index 654de1a..11aab66 100644 --- a/indexer/src/pkg.rs +++ b/indexer/src/pkg.rs @@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> { // XXX: Should we update released & arch here? verid = res?.get(0); info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo); - tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap(); + tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap(); Some(verid) } else { @@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> { } -fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) { +fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) { let (name, sect, locale) = man::parse_path(path).unwrap(); - let locale = if locale == "" { None } else { Some(locale) }; - if let Err(e) = tr.execute( - "INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)", - &[&verid, &name, &path, &locale, &hash, §, &enc] - ) { + let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3), + mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id), + m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi), + ls(id) AS (SELECT id FROM locales WHERE locale = $5), + li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id), + l(id) AS (SELECT id FROM ls UNION SELECT id FROM li), + es(id) AS (SELECT id FROM encodings WHERE encoding = $6), + ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id), + e(id) AS (SELECT id FROM es UNION SELECT id FROM ei), + c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4) + INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename) + SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c"; + if let Err(e) = tr.execute(q, &[&verid, &name, §, &content, &locale, &enc, &path]) { // I think this can only happen if archread gives us the same file twice, which really // shouldn't happen. But I'd rather continue with an error logged than panic. error!("Can't insert verid {} fn {}: {}", verid, path, e); @@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str] cont = cont.replace(0 as char, ""); } - tr.execute( - "INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING", - &[&dig.as_ref(), &cont] - ).unwrap(); + let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1), + i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id) + SELECT id FROM s UNION SELECT id FROM i"; + let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0); for path in paths { - insert_man_row(tr, verid, path, enc, dig.as_ref()); + insert_man_row(tr, verid, path, enc, id); info!("Inserted man page: {} ({})", path, enc); } } fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient { - let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() { + let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2"; + let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() { None => { /* Can happen if man::decode() failed previously. */ error!("Link to unindexed man page: {} -> {}", src, dest); return; }, Some(x) => x }; - let hash: Vec<u8> = res.get(0); + let content: i32 = res.get(0); let enc: String = res.get(1); - insert_man_row(tr, verid, src, &enc, &hash); + insert_man_row(tr, verid, src, &enc, content); info!("Inserted man link: {} -> {}", src, dest); } diff --git a/sql/schema.sql b/sql/schema.sql index a5b43c2..cce549c 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -7,15 +7,44 @@ CREATE TABLE systems ( short varchar NOT NULL ); + CREATE TABLE contents ( + id SERIAL PRIMARY KEY, -- 'hash' is the SHA1 of the man page file after decompression but *before* -- encoding conversion and removing 0-bytes. This means taking sha1(content) -- may not necessary match the hash, and it's possible for the same content -- to be in the database under multiple hashes (but I suspect that's rare). - hash bytea PRIMARY KEY, - content varchar NOT NULL + hash bytea NOT NULL UNIQUE, + content text NOT NULL +); + + +-- Unique man page, as identified by name & section +CREATE TABLE mans ( + id SERIAL PRIMARY KEY, + name text NOT NULL, + section text NOT NULL, + UNIQUE(name, section) +); +CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops); + + +-- List of man page locales for efficient referencing. Some locales include +-- the encoding in their name, which isn't really correct or even necessary +-- since we convert everything to UTF-8 anyway, but w/e, Can fix later. +CREATE TABLE locales ( + id SMALLSERIAL PRIMARY KEY, + locale text NOT NULL UNIQUE +); + + +-- List of encodings for efficient referencing. +CREATE TABLE encodings ( + id SMALLSERIAL PRIMARY KEY, + encoding text NOT NULL UNIQUE ); + CREATE TABLE packages ( id SERIAL PRIMARY KEY, system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE, @@ -31,6 +60,7 @@ CREATE TABLE packages ( UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common ); + CREATE TABLE package_versions ( id SERIAL PRIMARY KEY, package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE, @@ -40,43 +70,36 @@ CREATE TABLE package_versions ( UNIQUE(package, version) ); -CREATE TABLE man ( - package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE, - name varchar NOT NULL, - filename varchar NOT NULL, - locale varchar, - hash bytea NOT NULL, - section varchar NOT NULL, - encoding varchar, - UNIQUE(package, filename) -); - -CREATE INDEX ON man (hash); -CREATE INDEX ON man (name); - - - -CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man; -CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops); - -CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man; - +CREATE TABLE files ( + pkgver integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE, + man integer NOT NULL REFERENCES mans(id), + content integer NOT NULL REFERENCES content(id), + shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash) + locale smallint NOT NULL REFERENCES locales(id) + -- The original encoding the man page was found in. This column isn't really + -- used at the moment, but is potentially useful when investigating encoding + -- issues. + encoding smallint NOT NULL REFERENCES encodings(id), + filename text NOT NULL, + PRIMARY KEY(pkgver, filename) +); +CREATE INDEX ON files (man, shorthash); +CREATE INDEX ON files (content); --- Removes any path components and compression extensions from the filename. -CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$ - SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1'); -$$ LANGUAGE SQL; +-- For stats_cache +\i util/update_indices.sql -CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$ - SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1'); -$$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$ - SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1'); -$$ LANGUAGE SQL; +-- Interpret first 4 bytes of hash as a signed 32-bit integer. +CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$ + SELECT CASE WHEN get_byte(hash, 3) < 128 + THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0) + ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0) + END; +$$ LANGUAGE SQL IMMUTABLE; CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$ diff --git a/sql/update-2021-12-14.sql b/sql/update-2021-12-14.sql new file mode 100644 index 0000000..2a86a92 --- /dev/null +++ b/sql/update-2021-12-14.sql @@ -0,0 +1,106 @@ +-- WARNING: This script does a complete rewrite of the two largest tables. +-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes. +-- The site needs to be taken down during that time. + + +DROP FUNCTION section_from_filename(text); +DROP FUNCTION name_from_filename(text); +DROP FUNCTION basename_from_filename(text); + +-- Interpret first 4 bytes of hash as a signed 32-bit integer. +CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$ + SELECT CASE WHEN get_byte(hash, 3) < 128 + THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0) + ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0) + END; +$$ LANGUAGE SQL IMMUTABLE; + + +ALTER TABLE contents DROP CONSTRAINT contents_pkey; +ALTER TABLE contents RENAME TO contents_old; + +CREATE SEQUENCE contents_id_seq AS integer; +CREATE TABLE contents ( + id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass), + hash bytea NOT NULL, + content text NOT NULL +); +ALTER SEQUENCE contents_id_seq OWNED BY contents.id; +-- 4m15s; start 29.3G end 53.3G; +24G +INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old; +DROP TABLE contents_old; -- back to 29.3G +ALTER TABLE contents ADD PRIMARY KEY (id); +ALTER TABLE contents ADD UNIQUE (hash); +-- 29.7G at this point + + + +CREATE TABLE mans ( + id SERIAL PRIMARY KEY, + name text NOT NULL, + section text NOT NULL, + UNIQUE(name, section) +); +INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man; +CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops); + +CREATE TABLE locales ( + id SMALLSERIAL PRIMARY KEY, + locale text NOT NULL UNIQUE +); +INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess +INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale; + +-- Encodings are stored for reference but are never actually used anywhere. +CREATE TABLE encodings ( + id SMALLSERIAL PRIMARY KEY, + encoding text NOT NULL UNIQUE +); +INSERT INTO encodings (id, encoding) VALUES (0,''); +INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding; + + +-- Replaces the 'man' table; It's the largest table in terms of number of rows +-- and it's pretty frequently accessed, so keeping the rows small helps. +CREATE TABLE files ( + pkgver integer NOT NULL, -- package_versions.id + man integer NOT NULL, -- mans.id + content integer NOT NULL, -- content.id + shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash) + locale smallint NOT NULL, -- locales.id + encoding smallint NOT NULL, -- encodings.id + filename text NOT NULL +); + +-- 1min; 29.7G -> 31.8G +INSERT INTO files + SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename + FROM man o + JOIN mans m ON m.name = o.name AND m.section = o.section + JOIN contents c ON c.hash = o.hash + JOIN locales l ON l.locale = coalesce(o.locale, '') + JOIN encodings e ON e.encoding = coalesce(o.encoding, ''); + +-- 1min; 31.8G -> 33.7G for both indices +ALTER TABLE files ADD PRIMARY KEY (pkgver, filename); +CREATE INDEX ON files (man, shorthash); +CREATE INDEX ON files (content); + +-- 20sec to verify +ALTER TABLE files + ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE, + ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id), + ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id), + ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id), + ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id); + +DROP TABLE man; +DROP TABLE man_index; +-- final: 29.1G; we saved a whole 300M! \o/ + + +-- There's only about a 100 unreferenced rows, leftovers from removals of +-- incorrectly indexed packages. Let's remove them while we're at it. +DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id); + +VACUUM ANALYZE mans, files, contents, locales, encodings; diff --git a/util/cron.sh b/util/cron.sh index 793d74e..1da2321 100755 --- a/util/cron.sh +++ b/util/cron.sh @@ -10,9 +10,5 @@ PSQL="psql -U manned -Awtq" ./fedora.sh current ./ubuntu.sh current -# Only update indices once a week (on mondays). This process is slow and the data doesn't often change anyway. -if [ `date +%u` == 1 ] -then - echo "============ Updating SQL indices" - $PSQL -f update_indices.sql -fi +echo "============ Updating SQL indices" +$PSQL -f update_indices.sql diff --git a/util/update_indices.sql b/util/update_indices.sql index a785ab7..b977d8a 100644 --- a/util/update_indices.sql +++ b/util/update_indices.sql @@ -2,14 +2,10 @@ -- the table being replaced. The site should remain responsive while these -- queries are run. BEGIN; -CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man; -CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops); -DROP TABLE man_index; -ALTER TABLE man_index_new RENAME TO man_index; -COMMIT; - -BEGIN; -CREATE TABLE stats_cache_new AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man; +CREATE TABLE stats_cache_new AS + SELECT (SELECT count(*) FROM contents) AS hashes, + (SELECT count(distinct name) FROM mans) AS mans, * + FROM (SELECT count(*), count(distinct pkgver) FROM files) x(files, packages); DROP TABLE stats_cache; ALTER TABLE stats_cache_new RENAME TO stats_cache; COMMIT; diff --git a/www/index.pl b/www/index.pl index 4bb6754..4624e59 100755 --- a/www/index.pl +++ b/www/index.pl @@ -73,8 +73,11 @@ sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } } # Firefox seems to escape [ and ] in URLs. It doesn't really have to... sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } +sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex +sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int + # Subquery returning all packages that have a man page. -my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))'; +my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))'; sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg } @@ -86,13 +89,6 @@ sub sql_join { sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' } sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' } -# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable. -sub sql_hash_prefix { - my($sql_expr, $prefix) = @_; - my $esc = unpack 'H*', escape_like pack 'H*', $prefix; - sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))" -} - sub pkg_frompath { my($sys_where, $path) = @_; @@ -139,27 +135,29 @@ sub man_pref { # 7. sysrel: Prefer a more recent system release over an older release # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) # 9. pkgdate: Prefer more recent packages (cross-distro) - # 10. Fall back on hash comparison, to ensure the result is stable + # 10. Fall back on shorthash comparison, to ensure the result is stable state $archid = sysbyshort->{arch}{id}; state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id}; tuwf->dbRowi(q{ WITH unfiltered AS ( - SELECT s AS sys, p AS pkg, v AS ver, m AS man - FROM man m - JOIN package_versions v ON v.id = m.package + SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver + FROM files f + JOIN locales l ON l.id = f.locale + JOIN mans m ON m.id = f.man + JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package JOIN systems s ON s.id = p.system WHERE}, $where, q{ ), f_english AS( - SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale) + SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale) ), f_pkgver AS( SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) ), f_stdloc AS( - SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename) + SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename) ), f_secmatch AS( - SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{ + SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{ ), f_arch AS( SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid ), f_debian AS( @@ -167,13 +165,13 @@ sub man_pref { ), f_sysrel AS( SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id) ), f_secorder AS( - SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section) + SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE section > section) ), f_pkgdate AS( SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) ) SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, - (man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash - FROM f_pkgdate ORDER BY (man).hash LIMIT 1 + name, section, filename, locale, shorthash, content + FROM f_pkgdate ORDER BY shorthash LIMIT 1 }); } @@ -562,13 +560,11 @@ sub search_man { my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; - return !$name ? [] : tuwf->dbAll( - 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', - { - 'lower(name) LIKE ?' => escape_like(lc $name).'%', - $sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (), - }, - $limit + return !$name ? [] : tuwf->dbAlli( + 'SELECT name, section FROM mans WHERE', sql_and( + sql('lower(name) LIKE', \(escape_like(lc $name).'%')), + $sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (), + ), 'ORDER BY name, section LIMIT', \$limit, ); } @@ -614,10 +610,11 @@ TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { my $nfo = tuwf->dbRowi(' SELECT m.name, m.section, v.released, c.content - FROM man m - JOIN package_versions v ON v.id = m.package - JOIN contents c ON c.hash = m.hash - WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), ' + FROM files f + JOIN mans m ON m.id = f.man + JOIN package_versions v ON v.id = f.pkgver + JOIN contents c ON c.id = f.content + WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), ' LIMIT 1' ); return tuwf->resNotFound if !$nfo->{name}; @@ -633,13 +630,16 @@ sub _man_nav { my($man, $toc) = @_; my @sect = map $_->{section}, tuwf->dbAlli( - 'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section' + 'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section' )->@*; my @lang = map $_->{lang}, tuwf->dbAlli( - "SELECT DISTINCT substring(locale from '^[^.]+') AS lang - FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, " - ORDER BY substring(locale from '^[^.]+') NULLS FIRST" + "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang + FROM files f + JOIN mans m ON m.id = f.man + JOIN locales l ON l.id = f.locale + WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, " + ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST" )->@*; return if !@sect && !@lang && !@$toc; @@ -707,7 +707,7 @@ sub soelim { my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); $man->{name} # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database - ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')")) + ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content})) : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" }emg; $src; @@ -727,12 +727,12 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub { # the same package as the requested man page. Use the man_pref logic here # to deterministically select a good package. my($man, undef) = $shorthash - ? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash + ? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash) : man_pref_name $name, 'true'; return tuwf->resNotFound() if !$man->{name}; - my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, - tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"); + my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content}); + my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content}; my @toc; $fmt =~ s{\n<b>(.+?)<\/b>\n}{ push @toc, $1; @@ -741,9 +741,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub { }eg; my $hasversions = tuwf->dbVali( - 'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section}, - 'AND locale IS NOT DISTINCT FROM', \$man->{locale}, - 'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1" + 'SELECT 1 + FROM files f + JOIN mans m ON m.id = f.man + JOIN locales l ON l.id = f.locale + WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, ' + AND l.locale =', \$man->{locale}, ' + AND f.shorthash <> ', \shorthash_to_int($man->{shorthash}), ' + LIMIT 1' ); tuwf->resLastMod($man->{released}); @@ -751,14 +756,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub { _man_nav $man, \@toc; div_ id => 'manbuttons', sub { h1_ $man->{name}; - ul_ 'data-hash' => $man->{hash}, + ul_ 'data-hash' => $content->{hash}, 'data-name' => $man->{name}, 'data-section' => $man->{section}, 'data-locale' => $man->{locale}||'', 'data-hasversions' => $hasversions?1:0, sub { - li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' }; - li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' }; + li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' }; + li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' }; } }; div_ id => 'manres', class => 'hidden', ''; @@ -828,7 +833,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { SELECT id, version, released FROM package_versions v WHERE package =', \$pkg->{id}, ' - AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id) + AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id) ORDER BY released DESC' ); my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0]; @@ -836,11 +841,14 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data; - my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id}); + my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id}); my $mans = tuwf->dbPagei({ results => 200, page => $p }, - "SELECT name, encode(hash, 'hex') AS hash, section, locale, filename - FROM man WHERE package =", \$sel->{id}, ' - ORDER BY name, locale NULLS FIRST, filename' + "SELECT m.name, m.section, f.shorthash, f.filename, l.locale + FROM files f + JOIN locales l ON l.id = f.locale + JOIN mans m ON m.id = f.man + WHERE f.pkgver =", \$sel->{id}, ' + ORDER BY m.name, l.locale, f.filename' ); # Latest version of this package determines last modification date of the page. @@ -871,7 +879,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p; ul_ sub { li_ sub { - a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})"; + a_ href => "/$_->{name}/".shorthash_to_hex($_->{shorthash}), "$_->{name}($_->{section})"; b_ " $_->{locale}" if $_->{locale}; small_ " $_->{filename}"; } for(@$mans); @@ -922,7 +930,7 @@ TUWF::get qr{/man/([^/]+)/(.+)} => sub { } return tuwf->resNotFound if !$man; - tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); + tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp'); }; @@ -932,9 +940,9 @@ TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub { my $lang = tuwf->capture(1); my $name = normalize_name tuwf->capture(2); my($man, undef) = man_pref_name $name, - sql "substring(locale from '^[^.]+') ilike", \escape_like $lang; - return tuwf->resNotFound if !$man->{name}; - tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); + sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%'); + return tuwf->resNotFound if !length $man->{name}; + tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp'); }; @@ -949,22 +957,24 @@ TUWF::get '/json/tree.json' => sub { return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name}); my $l = tuwf->dbAlli(" - SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash - FROM man m - JOIN package_versions v ON v.id = m.package + SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale + FROM files f + JOIN locales l ON l.id = f.locale + JOIN mans m ON m.id = f.man + JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package JOIN systems s ON s.id = p.system WHERE", sql_and( - length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (), + length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (), length $f->{name} ? sql 'm.name =', \$f->{name} : (), length $f->{section} ? sql 'm.section =', \$f->{section} : (), - length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (), - defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (), + defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (), ), ' - ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename + ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename '); # Convert the list into a tree + my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0; my $tree = []; my($sys, $sysver, $pkg, $pkgver); for my $m (@$l) { @@ -992,12 +1002,12 @@ TUWF::get '/json/tree.json' => sub { $pkgver && $pkgver eq $m->{version} ? {name=>''} : {name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"}, { name => "$m->{name}($m->{section})", - $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () - : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) + $f->{hash} || $cur == $m->{shorthash} ? () + : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash})) }, - { name => substr($m->{hash}, 0, 8), - $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () - : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) + { name => shorthash_to_hex($m->{shorthash}), + $f->{hash} || $cur == $m->{shorthash} ? () + : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash})) }, { name => $m->{filename} } ]; |