diff options
author | Yorhel <git@yorhel.nl> | 2016-11-06 16:05:13 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2016-11-06 16:05:16 +0100 |
commit | cb81bedac133ebc32b2f028e4c3d3a8b4ef31d44 (patch) | |
tree | 578eae9d2bd2c94be691c35d0d7608662527e488 /indexer | |
parent | b8a1945d3812ddf7788c8fddbafb05433aa05ef4 (diff) |
Add arch/encoding metadata to DB + Fetch Arch Linux x86_64
The encoding metadata will be very useful in finding badly decoded man
pages. The package 'arch' is necessary to properly identify which
package was used, which is not obvious now that I'm going to switch more
systems to the (more common) x86_64 arch.
Diffstat (limited to 'indexer')
-rw-r--r-- | indexer/src/main.rs | 2 | ||||
-rw-r--r-- | indexer/src/pkg.rs | 23 | ||||
-rw-r--r-- | indexer/src/sys_arch.rs | 10 |
3 files changed, 21 insertions, 14 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 1083559..b46602d 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -42,6 +42,7 @@ fn main() { (@arg pkg: --pkg +required +takes_value "Package name") (@arg ver: --ver +required +takes_value "Package version") (@arg date: --date +required +takes_value "Package release date") + (@arg arch: --arch +takes_value "Architecture") (@arg FILE: +required "Package file") ) (@subcommand arch => @@ -81,6 +82,7 @@ fn main() { pkg: matches.value_of("pkg").unwrap(), ver: matches.value_of("ver").unwrap(), date: matches.value_of("date").unwrap(), + arch: matches.value_of("arch"), file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true}, }); } diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs index 4d3379d..d8766b3 100644 --- a/indexer/src/pkg.rs +++ b/indexer/src/pkg.rs @@ -14,6 +14,7 @@ pub struct PkgOpt<'a> { pub pkg: &'a str, pub ver: &'a str, pub date: &'a str, // TODO: Option to extract date from package metadata itself + pub arch: Option<&'a str>, pub file: open::Path<'a> } @@ -36,8 +37,8 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i let verid : i32; if res.is_empty() { - let q = "INSERT INTO package_versions (package, version, released) VALUES($1, $2, $3::text::date) RETURNING id"; - verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date]).unwrap().get(0).get(0); + let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id"; + verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0); info!("New package pkgid {} verid {}", pkgid, verid); Some(verid) @@ -54,12 +55,11 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i } -fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, hash: &[u8]) { - // TODO: Store 'encoding' in the database +fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, enc: &str, hash: &[u8]) { let (name, sect, locale) = man::parse_path(path).unwrap(); if let Err(e) = tr.execute( - "INSERT INTO man (package, name, filename, locale, hash, section) VALUES ($1, $2, '/'||$3, $4, $5, $6)", - &[&verid, &name, &path, &locale, &hash, §] + "INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)", + &[&verid, &name, &path, &locale, &hash, §, &enc] ) { // I think this can only happen if archread gives us the same file twice, which really // shouldn't happen. But I'd rather continue with an error logged than panic. @@ -84,20 +84,21 @@ fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent: ).unwrap(); for path in paths { - insert_man_row(tr, verid, path, dig.as_ref()); + insert_man_row(tr, verid, path, enc, dig.as_ref()); debug!("Inserted man page: {} ({})", path, enc); } } fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &str) { - let hash = tr.query("SELECT hash FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap(); - if hash.is_empty() { /* Can happen if man::decode() failed previously. */ + let res = tr.query("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap(); + if res.is_empty() { /* Can happen if man::decode() failed previously. */ error!("Link to unindexed man page: {} -> {}", src, dest); return; } - let hash: Vec<u8> = hash.get(0).get(0); - insert_man_row(tr, verid, src, &hash); + let hash: Vec<u8> = res.get(0).get(0); + let enc: String = res.get(0).get(1); + insert_man_row(tr, verid, src, &enc, &hash); debug!("Inserted man link: {} -> {}", src, dest); } diff --git a/indexer/src/sys_arch.rs b/indexer/src/sys_arch.rs index 7a0bf1f..1c3623d 100644 --- a/indexer/src/sys_arch.rs +++ b/indexer/src/sys_arch.rs @@ -15,6 +15,7 @@ struct Meta { name: String, version: String, date: String, + arch: Option<String>, } @@ -43,6 +44,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> { let mut name = None; let mut version = None; let mut builddate = None; + let mut arch = None; for kv in RE.captures_iter(&data) { let key = kv.at(1).unwrap(); @@ -53,6 +55,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> { "NAME" => name = Some(val), "VERSION" => version = Some(val), "BUILDDATE" => builddate = i64::from_str(val).ok(), + "ARCH" => arch = Some(val), _ => {}, } } @@ -63,6 +66,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> { name: name.unwrap().to_string(), version: version.unwrap().to_string(), date: NaiveDateTime::from_timestamp(builddate.unwrap(), 0).format("%Y-%m-%d").to_string(), + arch: arch.map(str::to_string), })) } else { warn!("Metadata missing from package description: {}", path); @@ -71,11 +75,10 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> { } -// TODO: Switch to x86_64 instead of i686 pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str) { info!("Reading packages from {} {}", mirror, repo); - let path = format!("{}/{}/os/i686/{1:}.files.tar.gz", mirror, repo); + let path = format!("{}/{}/os/x86_64/{1:}.files.tar.gz", mirror, repo); let path = open::Path{ path: &path, cache: true, canbelocal: false }; let mut index = match path.open() { Err(e) => { error!("Can't read package index: {}", e); return }, @@ -103,7 +106,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str hasman = false; let m = meta.take().unwrap(); - let p = format!("{}/{}/os/i686/{}", mirror, repo, m.filename); + let p = format!("{}/{}/os/x86_64/{}", mirror, repo, m.filename); pkg::pkg(pg, pkg::PkgOpt{ force: false, sys: sys, @@ -111,6 +114,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str pkg: &m.name, ver: &m.version, date: &m.date, + arch: m.arch.as_ref().map(|e| &e[..]), file: open::Path{ path: &p, cache: false, |