diff options
author | Yorhel <git@yorhel.nl> | 2017-01-17 17:03:31 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2017-01-17 17:05:03 +0100 |
commit | 608f79eb93749aa0b335f64163cf62f1af20e654 (patch) | |
tree | a4fedc5dc6beed888b428fb57bc12fff1da3c1ae | |
parent | f77db5f541c24678cfce05a92284cb48d4e3e018 (diff) |
indexer: Add support for indexing RPM repositories
This code hasn't been thoroughly tested, I'll see how things go when
indexing a live repo.
And XML parsing sucks in every language.
-rw-r--r-- | indexer/Cargo.lock | 10 | ||||
-rw-r--r-- | indexer/Cargo.toml | 1 | ||||
-rw-r--r-- | indexer/src/main.rs | 16 | ||||
-rw-r--r-- | indexer/src/sys_rpm.rs | 173 |
4 files changed, 200 insertions, 0 deletions
diff --git a/indexer/Cargo.lock b/indexer/Cargo.lock index 04d00cd..eb506b6 100644 --- a/indexer/Cargo.lock +++ b/indexer/Cargo.lock @@ -12,6 +12,7 @@ dependencies = [ "libc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-xml 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "ring 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -351,6 +352,14 @@ dependencies = [ ] [[package]] +name = "quick-xml" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "redox_syscall" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -575,6 +584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585ca978431cddac0aa926246f18fe30a47401eabbe9bbda573dc60389c10ea1" "checksum postgres-protocol 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "283e27d237a5772ef00c9e3f97e632f9a565ff514761af3e88e129576af7077c" "checksum postgres-shared 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6f09b8819c2586032ed23bfbe95f6edfbebdc18bf9d0fe02c1f785f659958fbb" +"checksum quick-xml 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e685d9ea689e56229debf59cb6d24e28021a9c950bbd988af24e43da3ea2bd79" "checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753" "checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01" "checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457" diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml index 4113ded..bf62eb8 100644 --- a/indexer/Cargo.toml +++ b/indexer/Cargo.toml @@ -17,3 +17,4 @@ clap = "2.20.0" hyper = { version = "0.10.0", default-features = false } url = "1.2.3" chrono = "0.2.25" +quick-xml = "0.5.0" diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 5399e09..2941951 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -11,6 +11,7 @@ extern crate postgres; extern crate hyper; extern crate url; extern crate chrono; +extern crate quick_xml; mod archive; mod archread; @@ -22,6 +23,7 @@ mod sys_deb; mod sys_freebsd1; mod sys_freebsd2; mod sys_rpmdir; +mod sys_rpm; // Convenience function to get a system id by short-name. Panics if the system doesn't exist. @@ -80,6 +82,12 @@ fn main() { (@arg cat: --cat +required +takes_value "Category to set for all packages") (@arg mirror: --mirror +required +takes_value "Mirror URL") ) + (@subcommand rpm => + (about: "Index an RPM repository") + (@arg sys: --sys +required +takes_value "System short-name") + (@arg cat: --cat +required +takes_value "Category to set for all packages") + (@arg mirror: --mirror +required +takes_value "Mirror URL") + ) ).get_matches(); unsafe { pkg::DRY_RUN = arg.is_present("dry") }; @@ -169,5 +177,13 @@ fn main() { ).unwrap_or_else(|e| error!("{}", e)); } + if let Some(matches) = arg.subcommand_matches("rpm") { + sys_rpm::sync(&db, + sysbyshort(&db, matches.value_of("sys").unwrap()), + matches.value_of("cat").unwrap(), + matches.value_of("mirror").unwrap() + ).unwrap_or_else(|e| error!("{}", e)); + } + trace!("Exiting"); } diff --git a/indexer/src/sys_rpm.rs b/indexer/src/sys_rpm.rs new file mode 100644 index 0000000..a30d93d --- /dev/null +++ b/indexer/src/sys_rpm.rs @@ -0,0 +1,173 @@ +use std::collections::HashSet; +use std::io::BufReader; +use std::str::FromStr; +use std::error::Error; +use chrono::NaiveDateTime; +use postgres; +use quick_xml as xml; + +use archive; +use open; +use pkg; +use man; + + +fn xml_getattr(e: &xml::Element, attr: &str) -> Result<String,Box<Error>> { + for kv in e.unescaped_attributes() { + let (key, val) = kv.map_err(|(e,_)| e)?; + if key == attr.as_bytes() { + return Ok(String::from_utf8(val.into_owned())?); + } + } + Err(Box::new(xml::error::Error::EOL)) +} + + +#[derive(Default)] +struct PkgInfo { + name: Option<String>, + arch: Option<String>, + ver: Option<String>, + date: Option<i64>, + path: Option<String>, + hasman: bool, +} + + +// Shared function to read primary.xml.gz and filelists.xml.gz. Runs the callback for each package +// with the info that was found. +fn readpkgs<F>(url: String, mut cb: F) -> Result<(),Box<Error>> + where F: FnMut(PkgInfo) +{ + debug!("Reading {}", url); + let mut fd = open::Path{path: &url, cache: true, canbelocal: false}.open()?; + let xml = xml::XmlReader::from_reader( + BufReader::new( + archive::Archive::open_raw(&mut fd)? + ) + ).trim_text(true); + + let mut savestr = false; + let mut saved = None; + let mut pkg = PkgInfo::default(); + + let arch_src = Some("src".to_string()); + + for event in xml { + let event = event.map_err(|(e,_)| e)?; + match event { + + xml::Event::Start(ref e) => + match e.name() { + b"name" | + b"file" | + b"arch" => savestr = true, + b"version" => pkg.ver = Some(format!("{}-{}", xml_getattr(e, "ver")?, xml_getattr(e, "rel")?)), + b"location" => pkg.path = Some(xml_getattr(e, "href")?), + b"time" => pkg.date = Some(i64::from_str(&xml_getattr(e, "build")?)?), + b"package" => { + pkg.name = xml_getattr(e, "name").ok(); + pkg.arch = xml_getattr(e, "arch").ok(); + }, + _ => (), + }, + + xml::Event::Text(e) => + if savestr { + saved = Some(e.into_unescaped_string()?); + savestr = false + }, + + xml::Event::End(ref e) => { + savestr = false; + match e.name() { + b"name" => pkg.name = Some(saved.take().unwrap()), + b"arch" => pkg.arch = Some(saved.take().unwrap()), + b"file" => pkg.hasman = pkg.hasman || man::ismanpath(&saved.take().unwrap()), + b"package" => { + if pkg.arch != arch_src { + cb(pkg); + } + pkg = PkgInfo::default(); + }, + _ => (), + }; + }, + + _ => (), + } + } + Ok(()) +} + + +// Reads repomd.xml and returns the path to the primary.xml.gz and filelists.xml.gz +fn repomd(url: String) -> Result<(String,String),Box<Error>> { + debug!("Reading {}", url); + let mut fd = open::Path{path: &url, cache: true, canbelocal: false}.open()?; + let xml = xml::XmlReader::from_reader( + BufReader::new( + archive::Archive::open_raw(&mut fd)? + ) + ).trim_text(true); + + let mut primary = String::new(); + let mut filelists = String::new(); + let mut datatype = 0; + + for event in xml { + if let xml::Event::Start(ref e) = event.map_err(|(e,_)| e)? { + match e.name() { + b"data" => + datatype = match &xml_getattr(e, "type")? as &str { + "primary" => 1, + "filelists" => 2, + _ => 0, + }, + + b"location" => + match datatype { + 1 => primary = xml_getattr(e, "href")?, + 2 => filelists = xml_getattr(e, "href")?, + _ => (), + }, + + _ => (), + } + } + } + Ok((primary, filelists)) +} + + +pub fn sync(pg: &postgres::GenericConnection, sys: i32, cat: &str, mirror: &str) -> Result<(),Box<Error>> { + let(primary, filelists) = repomd(format!("{}repodata/repomd.xml", mirror))?; + + let mut pkgswithman = HashSet::new(); + readpkgs(format!("{}{}", mirror, filelists), |pkg| { + if pkg.hasman { pkgswithman.insert(pkg.name.unwrap()); () } + })?; + + readpkgs(format!("{}{}", mirror, primary), |pkg| { + let name = pkg.name.unwrap(); + if pkgswithman.contains(&name) { + let uri = format!("{}{}", mirror, pkg.path.unwrap()); + let date = NaiveDateTime::from_timestamp(pkg.date.unwrap(), 0).format("%Y-%m-%d").to_string(); + pkg::pkg(pg, pkg::PkgOpt{ + force: false, + sys: sys, + cat: cat, + pkg: &name, + ver: &pkg.ver.unwrap(), + date: pkg::Date::Known(&date), + arch: Some(&pkg.arch.unwrap()), + file: open::Path{ + path: &uri, + cache: false, + canbelocal: false, + }, + }); + } + })?; + Ok(()) +} |