summaryrefslogtreecommitdiff
path: root/indexer
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-11-06 16:05:13 +0100
committerYorhel <git@yorhel.nl>2016-11-06 16:05:16 +0100
commitcb81bedac133ebc32b2f028e4c3d3a8b4ef31d44 (patch)
tree578eae9d2bd2c94be691c35d0d7608662527e488 /indexer
parentb8a1945d3812ddf7788c8fddbafb05433aa05ef4 (diff)
Add arch/encoding metadata to DB + Fetch Arch Linux x86_64
The encoding metadata will be very useful in finding badly decoded man pages. The package 'arch' is necessary to properly identify which package was used, which is not obvious now that I'm going to switch more systems to the (more common) x86_64 arch.
Diffstat (limited to 'indexer')
-rw-r--r--indexer/src/main.rs2
-rw-r--r--indexer/src/pkg.rs23
-rw-r--r--indexer/src/sys_arch.rs10
3 files changed, 21 insertions, 14 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index 1083559..b46602d 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -42,6 +42,7 @@ fn main() {
(@arg pkg: --pkg +required +takes_value "Package name")
(@arg ver: --ver +required +takes_value "Package version")
(@arg date: --date +required +takes_value "Package release date")
+ (@arg arch: --arch +takes_value "Architecture")
(@arg FILE: +required "Package file")
)
(@subcommand arch =>
@@ -81,6 +82,7 @@ fn main() {
pkg: matches.value_of("pkg").unwrap(),
ver: matches.value_of("ver").unwrap(),
date: matches.value_of("date").unwrap(),
+ arch: matches.value_of("arch"),
file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true},
});
}
diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs
index 4d3379d..d8766b3 100644
--- a/indexer/src/pkg.rs
+++ b/indexer/src/pkg.rs
@@ -14,6 +14,7 @@ pub struct PkgOpt<'a> {
pub pkg: &'a str,
pub ver: &'a str,
pub date: &'a str, // TODO: Option to extract date from package metadata itself
+ pub arch: Option<&'a str>,
pub file: open::Path<'a>
}
@@ -36,8 +37,8 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
let verid : i32;
if res.is_empty() {
- let q = "INSERT INTO package_versions (package, version, released) VALUES($1, $2, $3::text::date) RETURNING id";
- verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date]).unwrap().get(0).get(0);
+ let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
+ verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0);
info!("New package pkgid {} verid {}", pkgid, verid);
Some(verid)
@@ -54,12 +55,11 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
}
-fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, hash: &[u8]) {
- // TODO: Store 'encoding' in the database
+fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, enc: &str, hash: &[u8]) {
let (name, sect, locale) = man::parse_path(path).unwrap();
if let Err(e) = tr.execute(
- "INSERT INTO man (package, name, filename, locale, hash, section) VALUES ($1, $2, '/'||$3, $4, $5, $6)",
- &[&verid, &name, &path, &locale, &hash, &sect]
+ "INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
+ &[&verid, &name, &path, &locale, &hash, &sect, &enc]
) {
// I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic.
@@ -84,20 +84,21 @@ fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent:
).unwrap();
for path in paths {
- insert_man_row(tr, verid, path, dig.as_ref());
+ insert_man_row(tr, verid, path, enc, dig.as_ref());
debug!("Inserted man page: {} ({})", path, enc);
}
}
fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &str) {
- let hash = tr.query("SELECT hash FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap();
- if hash.is_empty() { /* Can happen if man::decode() failed previously. */
+ let res = tr.query("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap();
+ if res.is_empty() { /* Can happen if man::decode() failed previously. */
error!("Link to unindexed man page: {} -> {}", src, dest);
return;
}
- let hash: Vec<u8> = hash.get(0).get(0);
- insert_man_row(tr, verid, src, &hash);
+ let hash: Vec<u8> = res.get(0).get(0);
+ let enc: String = res.get(0).get(1);
+ insert_man_row(tr, verid, src, &enc, &hash);
debug!("Inserted man link: {} -> {}", src, dest);
}
diff --git a/indexer/src/sys_arch.rs b/indexer/src/sys_arch.rs
index 7a0bf1f..1c3623d 100644
--- a/indexer/src/sys_arch.rs
+++ b/indexer/src/sys_arch.rs
@@ -15,6 +15,7 @@ struct Meta {
name: String,
version: String,
date: String,
+ arch: Option<String>,
}
@@ -43,6 +44,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
let mut name = None;
let mut version = None;
let mut builddate = None;
+ let mut arch = None;
for kv in RE.captures_iter(&data) {
let key = kv.at(1).unwrap();
@@ -53,6 +55,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
"NAME" => name = Some(val),
"VERSION" => version = Some(val),
"BUILDDATE" => builddate = i64::from_str(val).ok(),
+ "ARCH" => arch = Some(val),
_ => {},
}
}
@@ -63,6 +66,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
name: name.unwrap().to_string(),
version: version.unwrap().to_string(),
date: NaiveDateTime::from_timestamp(builddate.unwrap(), 0).format("%Y-%m-%d").to_string(),
+ arch: arch.map(str::to_string),
}))
} else {
warn!("Metadata missing from package description: {}", path);
@@ -71,11 +75,10 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
}
-// TODO: Switch to x86_64 instead of i686
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str) {
info!("Reading packages from {} {}", mirror, repo);
- let path = format!("{}/{}/os/i686/{1:}.files.tar.gz", mirror, repo);
+ let path = format!("{}/{}/os/x86_64/{1:}.files.tar.gz", mirror, repo);
let path = open::Path{ path: &path, cache: true, canbelocal: false };
let mut index = match path.open() {
Err(e) => { error!("Can't read package index: {}", e); return },
@@ -103,7 +106,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str
hasman = false;
let m = meta.take().unwrap();
- let p = format!("{}/{}/os/i686/{}", mirror, repo, m.filename);
+ let p = format!("{}/{}/os/x86_64/{}", mirror, repo, m.filename);
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
@@ -111,6 +114,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str
pkg: &m.name,
ver: &m.version,
date: &m.date,
+ arch: m.arch.as_ref().map(|e| &e[..]),
file: open::Path{
path: &p,
cache: false,