summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2021-12-14 15:06:05 +0100
committerYorhel <git@yorhel.nl>2021-12-14 15:08:54 +0100
commitf376f1f137ce7b3f2a1fe992d66b332443287b86 (patch)
tree26056570f6266bd6e54cd6eb4d522da940b91f96
parent6f7f59c6dfa1c94ecfaf14ec81d895d616fddc19 (diff)
Large-ish SQL schema revamp/optimizations
Primarily aimed at reducing the size of the old 'man' (now: files) table, using smaller integers to refer to man contents and text fields, and storing a shorthash as an integer for quick lookups. This better normalization also removes the need to keep a separate 'man_index' cache for the search function. The old schema wasn't necessarily bad, but I was in the mood for some optimizations. And a little cleanup. Prolly introduces a bunch of new bugs, I haven't tested this too well.
-rw-r--r--indexer/src/pkg.rs39
-rw-r--r--sql/schema.sql89
-rw-r--r--sql/update-2021-12-14.sql106
-rwxr-xr-xutil/cron.sh8
-rw-r--r--util/update_indices.sql12
-rwxr-xr-xwww/index.pl140
6 files changed, 267 insertions, 127 deletions
diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs
index 654de1a..11aab66 100644
--- a/indexer/src/pkg.rs
+++ b/indexer/src/pkg.rs
@@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
// XXX: Should we update released & arch here?
verid = res?.get(0);
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
- tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
+ tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
Some(verid)
} else {
@@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
}
-fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) {
+fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
let (name, sect, locale) = man::parse_path(path).unwrap();
- let locale = if locale == "" { None } else { Some(locale) };
- if let Err(e) = tr.execute(
- "INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
- &[&verid, &name, &path, &locale, &hash, &sect, &enc]
- ) {
+ let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
+ mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
+ m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
+ ls(id) AS (SELECT id FROM locales WHERE locale = $5),
+ li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id),
+ l(id) AS (SELECT id FROM ls UNION SELECT id FROM li),
+ es(id) AS (SELECT id FROM encodings WHERE encoding = $6),
+ ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id),
+ e(id) AS (SELECT id FROM es UNION SELECT id FROM ei),
+ c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
+ INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
+ SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
+ if let Err(e) = tr.execute(q, &[&verid, &name, &sect, &content, &locale, &enc, &path]) {
// I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic.
error!("Can't insert verid {} fn {}: {}", verid, path, e);
@@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str]
cont = cont.replace(0 as char, "");
}
- tr.execute(
- "INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING",
- &[&dig.as_ref(), &cont]
- ).unwrap();
+ let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1),
+ i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id)
+ SELECT id FROM s UNION SELECT id FROM i";
+ let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0);
for path in paths {
- insert_man_row(tr, verid, path, enc, dig.as_ref());
+ insert_man_row(tr, verid, path, enc, id);
info!("Inserted man page: {} ({})", path, enc);
}
}
fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient {
- let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() {
+ let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2";
+ let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() {
None => { /* Can happen if man::decode() failed previously. */
error!("Link to unindexed man page: {} -> {}", src, dest);
return;
},
Some(x) => x
};
- let hash: Vec<u8> = res.get(0);
+ let content: i32 = res.get(0);
let enc: String = res.get(1);
- insert_man_row(tr, verid, src, &enc, &hash);
+ insert_man_row(tr, verid, src, &enc, content);
info!("Inserted man link: {} -> {}", src, dest);
}
diff --git a/sql/schema.sql b/sql/schema.sql
index a5b43c2..cce549c 100644
--- a/sql/schema.sql
+++ b/sql/schema.sql
@@ -7,15 +7,44 @@ CREATE TABLE systems (
short varchar NOT NULL
);
+
CREATE TABLE contents (
+ id SERIAL PRIMARY KEY,
-- 'hash' is the SHA1 of the man page file after decompression but *before*
-- encoding conversion and removing 0-bytes. This means taking sha1(content)
-- may not necessary match the hash, and it's possible for the same content
-- to be in the database under multiple hashes (but I suspect that's rare).
- hash bytea PRIMARY KEY,
- content varchar NOT NULL
+ hash bytea NOT NULL UNIQUE,
+ content text NOT NULL
+);
+
+
+-- Unique man page, as identified by name & section
+CREATE TABLE mans (
+ id SERIAL PRIMARY KEY,
+ name text NOT NULL,
+ section text NOT NULL,
+ UNIQUE(name, section)
+);
+CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
+
+
+-- List of man page locales for efficient referencing. Some locales include
+-- the encoding in their name, which isn't really correct or even necessary
+-- since we convert everything to UTF-8 anyway, but w/e, Can fix later.
+CREATE TABLE locales (
+ id SMALLSERIAL PRIMARY KEY,
+ locale text NOT NULL UNIQUE
+);
+
+
+-- List of encodings for efficient referencing.
+CREATE TABLE encodings (
+ id SMALLSERIAL PRIMARY KEY,
+ encoding text NOT NULL UNIQUE
);
+
CREATE TABLE packages (
id SERIAL PRIMARY KEY,
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
@@ -31,6 +60,7 @@ CREATE TABLE packages (
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
);
+
CREATE TABLE package_versions (
id SERIAL PRIMARY KEY,
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
@@ -40,43 +70,36 @@ CREATE TABLE package_versions (
UNIQUE(package, version)
);
-CREATE TABLE man (
- package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
- name varchar NOT NULL,
- filename varchar NOT NULL,
- locale varchar,
- hash bytea NOT NULL,
- section varchar NOT NULL,
- encoding varchar,
- UNIQUE(package, filename)
-);
-
-CREATE INDEX ON man (hash);
-CREATE INDEX ON man (name);
-
-
-
-CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man;
-CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops);
-
-CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
-
+CREATE TABLE files (
+ pkgver integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
+ man integer NOT NULL REFERENCES mans(id),
+ content integer NOT NULL REFERENCES content(id),
+ shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
+ locale smallint NOT NULL REFERENCES locales(id)
+ -- The original encoding the man page was found in. This column isn't really
+ -- used at the moment, but is potentially useful when investigating encoding
+ -- issues.
+ encoding smallint NOT NULL REFERENCES encodings(id),
+ filename text NOT NULL,
+ PRIMARY KEY(pkgver, filename)
+);
+CREATE INDEX ON files (man, shorthash);
+CREATE INDEX ON files (content);
--- Removes any path components and compression extensions from the filename.
-CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
- SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1');
-$$ LANGUAGE SQL;
+-- For stats_cache
+\i util/update_indices.sql
-CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
- SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1');
-$$ LANGUAGE SQL;
-CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
- SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1');
-$$ LANGUAGE SQL;
+-- Interpret first 4 bytes of hash as a signed 32-bit integer.
+CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
+ SELECT CASE WHEN get_byte(hash, 3) < 128
+ THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
+ ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
+ END;
+$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
diff --git a/sql/update-2021-12-14.sql b/sql/update-2021-12-14.sql
new file mode 100644
index 0000000..2a86a92
--- /dev/null
+++ b/sql/update-2021-12-14.sql
@@ -0,0 +1,106 @@
+-- WARNING: This script does a complete rewrite of the two largest tables.
+-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes.
+-- The site needs to be taken down during that time.
+
+
+DROP FUNCTION section_from_filename(text);
+DROP FUNCTION name_from_filename(text);
+DROP FUNCTION basename_from_filename(text);
+
+-- Interpret first 4 bytes of hash as a signed 32-bit integer.
+CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
+ SELECT CASE WHEN get_byte(hash, 3) < 128
+ THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
+ ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
+ END;
+$$ LANGUAGE SQL IMMUTABLE;
+
+
+ALTER TABLE contents DROP CONSTRAINT contents_pkey;
+ALTER TABLE contents RENAME TO contents_old;
+
+CREATE SEQUENCE contents_id_seq AS integer;
+CREATE TABLE contents (
+ id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass),
+ hash bytea NOT NULL,
+ content text NOT NULL
+);
+ALTER SEQUENCE contents_id_seq OWNED BY contents.id;
+-- 4m15s; start 29.3G end 53.3G; +24G
+INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old;
+DROP TABLE contents_old; -- back to 29.3G
+ALTER TABLE contents ADD PRIMARY KEY (id);
+ALTER TABLE contents ADD UNIQUE (hash);
+-- 29.7G at this point
+
+
+
+CREATE TABLE mans (
+ id SERIAL PRIMARY KEY,
+ name text NOT NULL,
+ section text NOT NULL,
+ UNIQUE(name, section)
+);
+INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man;
+CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
+
+CREATE TABLE locales (
+ id SMALLSERIAL PRIMARY KEY,
+ locale text NOT NULL UNIQUE
+);
+INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess
+INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale;
+
+-- Encodings are stored for reference but are never actually used anywhere.
+CREATE TABLE encodings (
+ id SMALLSERIAL PRIMARY KEY,
+ encoding text NOT NULL UNIQUE
+);
+INSERT INTO encodings (id, encoding) VALUES (0,'');
+INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding;
+
+
+-- Replaces the 'man' table; It's the largest table in terms of number of rows
+-- and it's pretty frequently accessed, so keeping the rows small helps.
+CREATE TABLE files (
+ pkgver integer NOT NULL, -- package_versions.id
+ man integer NOT NULL, -- mans.id
+ content integer NOT NULL, -- content.id
+ shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
+ locale smallint NOT NULL, -- locales.id
+ encoding smallint NOT NULL, -- encodings.id
+ filename text NOT NULL
+);
+
+-- 1min; 29.7G -> 31.8G
+INSERT INTO files
+ SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename
+ FROM man o
+ JOIN mans m ON m.name = o.name AND m.section = o.section
+ JOIN contents c ON c.hash = o.hash
+ JOIN locales l ON l.locale = coalesce(o.locale, '')
+ JOIN encodings e ON e.encoding = coalesce(o.encoding, '');
+
+-- 1min; 31.8G -> 33.7G for both indices
+ALTER TABLE files ADD PRIMARY KEY (pkgver, filename);
+CREATE INDEX ON files (man, shorthash);
+CREATE INDEX ON files (content);
+
+-- 20sec to verify
+ALTER TABLE files
+ ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE,
+ ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id),
+ ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id),
+ ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id),
+ ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id);
+
+DROP TABLE man;
+DROP TABLE man_index;
+-- final: 29.1G; we saved a whole 300M! \o/
+
+
+-- There's only about a 100 unreferenced rows, leftovers from removals of
+-- incorrectly indexed packages. Let's remove them while we're at it.
+DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id);
+
+VACUUM ANALYZE mans, files, contents, locales, encodings;
diff --git a/util/cron.sh b/util/cron.sh
index 793d74e..1da2321 100755
--- a/util/cron.sh
+++ b/util/cron.sh
@@ -10,9 +10,5 @@ PSQL="psql -U manned -Awtq"
./fedora.sh current
./ubuntu.sh current
-# Only update indices once a week (on mondays). This process is slow and the data doesn't often change anyway.
-if [ `date +%u` == 1 ]
-then
- echo "============ Updating SQL indices"
- $PSQL -f update_indices.sql
-fi
+echo "============ Updating SQL indices"
+$PSQL -f update_indices.sql
diff --git a/util/update_indices.sql b/util/update_indices.sql
index a785ab7..b977d8a 100644
--- a/util/update_indices.sql
+++ b/util/update_indices.sql
@@ -2,14 +2,10 @@
-- the table being replaced. The site should remain responsive while these
-- queries are run.
BEGIN;
-CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man;
-CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops);
-DROP TABLE man_index;
-ALTER TABLE man_index_new RENAME TO man_index;
-COMMIT;
-
-BEGIN;
-CREATE TABLE stats_cache_new AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
+CREATE TABLE stats_cache_new AS
+ SELECT (SELECT count(*) FROM contents) AS hashes,
+ (SELECT count(distinct name) FROM mans) AS mans, *
+ FROM (SELECT count(*), count(distinct pkgver) FROM files) x(files, packages);
DROP TABLE stats_cache;
ALTER TABLE stats_cache_new RENAME TO stats_cache;
COMMIT;
diff --git a/www/index.pl b/www/index.pl
index 4bb6754..4624e59 100755
--- a/www/index.pl
+++ b/www/index.pl
@@ -73,8 +73,11 @@ sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
+sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
+sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
+
# Subquery returning all packages that have a man page.
-my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))';
+my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
@@ -86,13 +89,6 @@ sub sql_join {
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
-# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
-sub sql_hash_prefix {
- my($sql_expr, $prefix) = @_;
- my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
- sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
-}
-
sub pkg_frompath {
my($sys_where, $path) = @_;
@@ -139,27 +135,29 @@ sub man_pref {
# 7. sysrel: Prefer a more recent system release over an older release
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
# 9. pkgdate: Prefer more recent packages (cross-distro)
- # 10. Fall back on hash comparison, to ensure the result is stable
+ # 10. Fall back on shorthash comparison, to ensure the result is stable
state $archid = sysbyshort->{arch}{id};
state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id};
tuwf->dbRowi(q{
WITH unfiltered AS (
- SELECT s AS sys, p AS pkg, v AS ver, m AS man
- FROM man m
- JOIN package_versions v ON v.id = m.package
+ SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver
+ FROM files f
+ JOIN locales l ON l.id = f.locale
+ JOIN mans m ON m.id = f.man
+ JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE}, $where, q{
), f_english AS(
- SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
+ SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale)
), f_pkgver AS(
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
), f_stdloc AS(
- SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
+ SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename)
), f_secmatch AS(
- SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{
+ SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{
), f_arch AS(
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid
), f_debian AS(
@@ -167,13 +165,13 @@ sub man_pref {
), f_sysrel AS(
SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id)
), f_secorder AS(
- SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
+ SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE section > section)
), f_pkgdate AS(
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
)
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
- (man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
- FROM f_pkgdate ORDER BY (man).hash LIMIT 1
+ name, section, filename, locale, shorthash, content
+ FROM f_pkgdate ORDER BY shorthash LIMIT 1
});
}
@@ -562,13 +560,11 @@ sub search_man {
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
- return !$name ? [] : tuwf->dbAll(
- 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?',
- {
- 'lower(name) LIKE ?' => escape_like(lc $name).'%',
- $sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (),
- },
- $limit
+ return !$name ? [] : tuwf->dbAlli(
+ 'SELECT name, section FROM mans WHERE', sql_and(
+ sql('lower(name) LIKE', \(escape_like(lc $name).'%')),
+ $sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (),
+ ), 'ORDER BY name, section LIMIT', \$limit,
);
}
@@ -614,10 +610,11 @@ TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub {
my $nfo = tuwf->dbRowi('
SELECT m.name, m.section, v.released, c.content
- FROM man m
- JOIN package_versions v ON v.id = m.package
- JOIN contents c ON c.hash = m.hash
- WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), '
+ FROM files f
+ JOIN mans m ON m.id = f.man
+ JOIN package_versions v ON v.id = f.pkgver
+ JOIN contents c ON c.id = f.content
+ WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), '
LIMIT 1'
);
return tuwf->resNotFound if !$nfo->{name};
@@ -633,13 +630,16 @@ sub _man_nav {
my($man, $toc) = @_;
my @sect = map $_->{section}, tuwf->dbAlli(
- 'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section'
+ 'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section'
)->@*;
my @lang = map $_->{lang}, tuwf->dbAlli(
- "SELECT DISTINCT substring(locale from '^[^.]+') AS lang
- FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, "
- ORDER BY substring(locale from '^[^.]+') NULLS FIRST"
+ "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang
+ FROM files f
+ JOIN mans m ON m.id = f.man
+ JOIN locales l ON l.id = f.locale
+ WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, "
+ ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST"
)->@*;
return if !@sect && !@lang && !@$toc;
@@ -707,7 +707,7 @@ sub soelim {
my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : ();
$man->{name}
# Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database
- ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"))
+ ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content}))
: ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]"
}emg;
$src;
@@ -727,12 +727,12 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
# the same package as the requested man page. Use the man_pref logic here
# to deterministically select a good package.
my($man, undef) = $shorthash
- ? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash
+ ? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash)
: man_pref_name $name, 'true';
return tuwf->resNotFound() if !$man->{name};
- my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid},
- tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')");
+ my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content});
+ my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content};
my @toc;
$fmt =~ s{\n<b>(.+?)<\/b>\n}{
push @toc, $1;
@@ -741,9 +741,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
}eg;
my $hasversions = tuwf->dbVali(
- 'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section},
- 'AND locale IS NOT DISTINCT FROM', \$man->{locale},
- 'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1"
+ 'SELECT 1
+ FROM files f
+ JOIN mans m ON m.id = f.man
+ JOIN locales l ON l.id = f.locale
+ WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, '
+ AND l.locale =', \$man->{locale}, '
+ AND f.shorthash <> ', \shorthash_to_int($man->{shorthash}), '
+ LIMIT 1'
);
tuwf->resLastMod($man->{released});
@@ -751,14 +756,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
_man_nav $man, \@toc;
div_ id => 'manbuttons', sub {
h1_ $man->{name};
- ul_ 'data-hash' => $man->{hash},
+ ul_ 'data-hash' => $content->{hash},
'data-name' => $man->{name},
'data-section' => $man->{section},
'data-locale' => $man->{locale}||'',
'data-hasversions' => $hasversions?1:0,
sub {
- li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' };
- li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' };
+ li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' };
+ li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' };
}
};
div_ id => 'manres', class => 'hidden', '';
@@ -828,7 +833,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
SELECT id, version, released
FROM package_versions v
WHERE package =', \$pkg->{id}, '
- AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id)
+ AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id)
ORDER BY released DESC'
);
my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
@@ -836,11 +841,14 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data;
- my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id});
+ my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id});
my $mans = tuwf->dbPagei({ results => 200, page => $p },
- "SELECT name, encode(hash, 'hex') AS hash, section, locale, filename
- FROM man WHERE package =", \$sel->{id}, '
- ORDER BY name, locale NULLS FIRST, filename'
+ "SELECT m.name, m.section, f.shorthash, f.filename, l.locale
+ FROM files f
+ JOIN locales l ON l.id = f.locale
+ JOIN mans m ON m.id = f.man
+ WHERE f.pkgver =", \$sel->{id}, '
+ ORDER BY m.name, l.locale, f.filename'
);
# Latest version of this package determines last modification date of the page.
@@ -871,7 +879,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
ul_ sub {
li_ sub {
- a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})";
+ a_ href => "/$_->{name}/".shorthash_to_hex($_->{shorthash}), "$_->{name}($_->{section})";
b_ " $_->{locale}" if $_->{locale};
small_ " $_->{filename}";
} for(@$mans);
@@ -922,7 +930,7 @@ TUWF::get qr{/man/([^/]+)/(.+)} => sub {
}
return tuwf->resNotFound if !$man;
- tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
+ tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
};
@@ -932,9 +940,9 @@ TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub {
my $lang = tuwf->capture(1);
my $name = normalize_name tuwf->capture(2);
my($man, undef) = man_pref_name $name,
- sql "substring(locale from '^[^.]+') ilike", \escape_like $lang;
- return tuwf->resNotFound if !$man->{name};
- tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
+ sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%');
+ return tuwf->resNotFound if !length $man->{name};
+ tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
};
@@ -949,22 +957,24 @@ TUWF::get '/json/tree.json' => sub {
return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name});
my $l = tuwf->dbAlli("
- SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash
- FROM man m
- JOIN package_versions v ON v.id = m.package
+ SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale
+ FROM files f
+ JOIN locales l ON l.id = f.locale
+ JOIN mans m ON m.id = f.man
+ JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE", sql_and(
- length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (),
+ length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (),
length $f->{name} ? sql 'm.name =', \$f->{name} : (),
length $f->{section} ? sql 'm.section =', \$f->{section} : (),
- length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (),
- defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (),
+ defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (),
), '
- ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename
+ ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename
');
# Convert the list into a tree
+ my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0;
my $tree = [];
my($sys, $sysver, $pkg, $pkgver);
for my $m (@$l) {
@@ -992,12 +1002,12 @@ TUWF::get '/json/tree.json' => sub {
$pkgver && $pkgver eq $m->{version} ? {name=>''} :
{name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"},
{ name => "$m->{name}($m->{section})",
- $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
- : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
+ $f->{hash} || $cur == $m->{shorthash} ? ()
+ : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
},
- { name => substr($m->{hash}, 0, 8),
- $f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
- : (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
+ { name => shorthash_to_hex($m->{shorthash}),
+ $f->{hash} || $cur == $m->{shorthash} ? ()
+ : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
},
{ name => $m->{filename} }
];