summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2017-01-17 17:03:31 +0100
committerYorhel <git@yorhel.nl>2017-01-17 17:05:03 +0100
commit608f79eb93749aa0b335f64163cf62f1af20e654 (patch)
treea4fedc5dc6beed888b428fb57bc12fff1da3c1ae
parentf77db5f541c24678cfce05a92284cb48d4e3e018 (diff)
indexer: Add support for indexing RPM repositories
This code hasn't been thoroughly tested, I'll see how things go when indexing a live repo. And XML parsing sucks in every language.
-rw-r--r--indexer/Cargo.lock10
-rw-r--r--indexer/Cargo.toml1
-rw-r--r--indexer/src/main.rs16
-rw-r--r--indexer/src/sys_rpm.rs173
4 files changed, 200 insertions, 0 deletions
diff --git a/indexer/Cargo.lock b/indexer/Cargo.lock
index 04d00cd..eb506b6 100644
--- a/indexer/Cargo.lock
+++ b/indexer/Cargo.lock
@@ -12,6 +12,7 @@ dependencies = [
"libc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quick-xml 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"ring 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -351,6 +352,14 @@ dependencies = [
]
[[package]]
+name = "quick-xml"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "redox_syscall"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -575,6 +584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585ca978431cddac0aa926246f18fe30a47401eabbe9bbda573dc60389c10ea1"
"checksum postgres-protocol 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "283e27d237a5772ef00c9e3f97e632f9a565ff514761af3e88e129576af7077c"
"checksum postgres-shared 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6f09b8819c2586032ed23bfbe95f6edfbebdc18bf9d0fe02c1f785f659958fbb"
+"checksum quick-xml 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e685d9ea689e56229debf59cb6d24e28021a9c950bbd988af24e43da3ea2bd79"
"checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753"
"checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01"
"checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457"
diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml
index 4113ded..bf62eb8 100644
--- a/indexer/Cargo.toml
+++ b/indexer/Cargo.toml
@@ -17,3 +17,4 @@ clap = "2.20.0"
hyper = { version = "0.10.0", default-features = false }
url = "1.2.3"
chrono = "0.2.25"
+quick-xml = "0.5.0"
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index 5399e09..2941951 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -11,6 +11,7 @@ extern crate postgres;
extern crate hyper;
extern crate url;
extern crate chrono;
+extern crate quick_xml;
mod archive;
mod archread;
@@ -22,6 +23,7 @@ mod sys_deb;
mod sys_freebsd1;
mod sys_freebsd2;
mod sys_rpmdir;
+mod sys_rpm;
// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
@@ -80,6 +82,12 @@ fn main() {
(@arg cat: --cat +required +takes_value "Category to set for all packages")
(@arg mirror: --mirror +required +takes_value "Mirror URL")
)
+ (@subcommand rpm =>
+ (about: "Index an RPM repository")
+ (@arg sys: --sys +required +takes_value "System short-name")
+ (@arg cat: --cat +required +takes_value "Category to set for all packages")
+ (@arg mirror: --mirror +required +takes_value "Mirror URL")
+ )
).get_matches();
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
@@ -169,5 +177,13 @@ fn main() {
).unwrap_or_else(|e| error!("{}", e));
}
+ if let Some(matches) = arg.subcommand_matches("rpm") {
+ sys_rpm::sync(&db,
+ sysbyshort(&db, matches.value_of("sys").unwrap()),
+ matches.value_of("cat").unwrap(),
+ matches.value_of("mirror").unwrap()
+ ).unwrap_or_else(|e| error!("{}", e));
+ }
+
trace!("Exiting");
}
diff --git a/indexer/src/sys_rpm.rs b/indexer/src/sys_rpm.rs
new file mode 100644
index 0000000..a30d93d
--- /dev/null
+++ b/indexer/src/sys_rpm.rs
@@ -0,0 +1,173 @@
+use std::collections::HashSet;
+use std::io::BufReader;
+use std::str::FromStr;
+use std::error::Error;
+use chrono::NaiveDateTime;
+use postgres;
+use quick_xml as xml;
+
+use archive;
+use open;
+use pkg;
+use man;
+
+
+fn xml_getattr(e: &xml::Element, attr: &str) -> Result<String,Box<Error>> {
+ for kv in e.unescaped_attributes() {
+ let (key, val) = kv.map_err(|(e,_)| e)?;
+ if key == attr.as_bytes() {
+ return Ok(String::from_utf8(val.into_owned())?);
+ }
+ }
+ Err(Box::new(xml::error::Error::EOL))
+}
+
+
+#[derive(Default)]
+struct PkgInfo {
+ name: Option<String>,
+ arch: Option<String>,
+ ver: Option<String>,
+ date: Option<i64>,
+ path: Option<String>,
+ hasman: bool,
+}
+
+
+// Shared function to read primary.xml.gz and filelists.xml.gz. Runs the callback for each package
+// with the info that was found.
+fn readpkgs<F>(url: String, mut cb: F) -> Result<(),Box<Error>>
+ where F: FnMut(PkgInfo)
+{
+ debug!("Reading {}", url);
+ let mut fd = open::Path{path: &url, cache: true, canbelocal: false}.open()?;
+ let xml = xml::XmlReader::from_reader(
+ BufReader::new(
+ archive::Archive::open_raw(&mut fd)?
+ )
+ ).trim_text(true);
+
+ let mut savestr = false;
+ let mut saved = None;
+ let mut pkg = PkgInfo::default();
+
+ let arch_src = Some("src".to_string());
+
+ for event in xml {
+ let event = event.map_err(|(e,_)| e)?;
+ match event {
+
+ xml::Event::Start(ref e) =>
+ match e.name() {
+ b"name" |
+ b"file" |
+ b"arch" => savestr = true,
+ b"version" => pkg.ver = Some(format!("{}-{}", xml_getattr(e, "ver")?, xml_getattr(e, "rel")?)),
+ b"location" => pkg.path = Some(xml_getattr(e, "href")?),
+ b"time" => pkg.date = Some(i64::from_str(&xml_getattr(e, "build")?)?),
+ b"package" => {
+ pkg.name = xml_getattr(e, "name").ok();
+ pkg.arch = xml_getattr(e, "arch").ok();
+ },
+ _ => (),
+ },
+
+ xml::Event::Text(e) =>
+ if savestr {
+ saved = Some(e.into_unescaped_string()?);
+ savestr = false
+ },
+
+ xml::Event::End(ref e) => {
+ savestr = false;
+ match e.name() {
+ b"name" => pkg.name = Some(saved.take().unwrap()),
+ b"arch" => pkg.arch = Some(saved.take().unwrap()),
+ b"file" => pkg.hasman = pkg.hasman || man::ismanpath(&saved.take().unwrap()),
+ b"package" => {
+ if pkg.arch != arch_src {
+ cb(pkg);
+ }
+ pkg = PkgInfo::default();
+ },
+ _ => (),
+ };
+ },
+
+ _ => (),
+ }
+ }
+ Ok(())
+}
+
+
+// Reads repomd.xml and returns the path to the primary.xml.gz and filelists.xml.gz
+fn repomd(url: String) -> Result<(String,String),Box<Error>> {
+ debug!("Reading {}", url);
+ let mut fd = open::Path{path: &url, cache: true, canbelocal: false}.open()?;
+ let xml = xml::XmlReader::from_reader(
+ BufReader::new(
+ archive::Archive::open_raw(&mut fd)?
+ )
+ ).trim_text(true);
+
+ let mut primary = String::new();
+ let mut filelists = String::new();
+ let mut datatype = 0;
+
+ for event in xml {
+ if let xml::Event::Start(ref e) = event.map_err(|(e,_)| e)? {
+ match e.name() {
+ b"data" =>
+ datatype = match &xml_getattr(e, "type")? as &str {
+ "primary" => 1,
+ "filelists" => 2,
+ _ => 0,
+ },
+
+ b"location" =>
+ match datatype {
+ 1 => primary = xml_getattr(e, "href")?,
+ 2 => filelists = xml_getattr(e, "href")?,
+ _ => (),
+ },
+
+ _ => (),
+ }
+ }
+ }
+ Ok((primary, filelists))
+}
+
+
+pub fn sync(pg: &postgres::GenericConnection, sys: i32, cat: &str, mirror: &str) -> Result<(),Box<Error>> {
+ let(primary, filelists) = repomd(format!("{}repodata/repomd.xml", mirror))?;
+
+ let mut pkgswithman = HashSet::new();
+ readpkgs(format!("{}{}", mirror, filelists), |pkg| {
+ if pkg.hasman { pkgswithman.insert(pkg.name.unwrap()); () }
+ })?;
+
+ readpkgs(format!("{}{}", mirror, primary), |pkg| {
+ let name = pkg.name.unwrap();
+ if pkgswithman.contains(&name) {
+ let uri = format!("{}{}", mirror, pkg.path.unwrap());
+ let date = NaiveDateTime::from_timestamp(pkg.date.unwrap(), 0).format("%Y-%m-%d").to_string();
+ pkg::pkg(pg, pkg::PkgOpt{
+ force: false,
+ sys: sys,
+ cat: cat,
+ pkg: &name,
+ ver: &pkg.ver.unwrap(),
+ date: pkg::Date::Known(&date),
+ arch: Some(&pkg.arch.unwrap()),
+ file: open::Path{
+ path: &uri,
+ cache: false,
+ canbelocal: false,
+ },
+ });
+ }
+ })?;
+ Ok(())
+}