diff options
author | Yorhel <git@yorhel.nl> | 2016-11-19 15:27:24 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2016-11-19 15:27:24 +0100 |
commit | 4bdd91f65e6565b568a19806c33ea870810c3be5 (patch) | |
tree | 427f4525b79c89d3f636811c43ebfe4487b966ce | |
parent | 50fe17a6049c40d47835da3ec2db047d8132b2ef (diff) |
Indexer: Initial support for debian repos
-rw-r--r-- | indexer/src/main.rs | 17 | ||||
-rw-r--r-- | indexer/src/sys_deb.rs | 124 |
2 files changed, 141 insertions, 0 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs index b46602d..84dd4bc 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -18,6 +18,7 @@ mod man; mod open; mod pkg; mod sys_arch; +mod sys_deb; // Convenience function to get a system id by short-name. Panics if the system doesn't exist. @@ -51,6 +52,13 @@ fn main() { (@arg mirror: --mirror +required +takes_value "Mirror URL") (@arg repo: --repo +required +takes_value "Repository name") ) + (@subcommand deb => + (about: "Index a Debian repository") + (@arg sys: --sys +required +takes_value "System short-name") + (@arg mirror: --mirror +required +takes_value "Mirror URL") + (@arg contents: --contents +required +takes_value "Contents file") + (@arg packages: --packages +required +takes_value "Packages file") + ) ).get_matches(); let verbose = arg.occurrences_of("v"); @@ -94,4 +102,13 @@ fn main() { matches.value_of("repo").unwrap() ); } + + if let Some(matches) = arg.subcommand_matches("deb") { + sys_deb::sync(&db, + sysbyshort(&db, matches.value_of("sys").unwrap()), + matches.value_of("mirror").unwrap(), + open::Path{ path: matches.value_of("contents").unwrap(), cache: true, canbelocal: true}, + open::Path{ path: matches.value_of("packages").unwrap(), cache: true, canbelocal: true}, + ); + } } diff --git a/indexer/src/sys_deb.rs b/indexer/src/sys_deb.rs new file mode 100644 index 0000000..d22de5d --- /dev/null +++ b/indexer/src/sys_deb.rs @@ -0,0 +1,124 @@ +use std::io::{Result,BufReader,BufRead}; +use std::collections::HashSet; +use std::str; +use postgres; +use regex::bytes::Regex; + +use man; +use pkg; +use open; +use archive; + +// Reference: https://wiki.debian.org/RepositoryFormat + +fn get_contents(f: open::Path) -> Result<HashSet<String>> { + let mut fd = f.open()?; + let rd = archive::Archive::open_raw(&mut fd)?; + let brd = BufReader::new(rd); + let mut pkgs = HashSet::new(); + let mut filecnt = 0; + let mut mancnt = 0; + + // Run the regex on bytes instead of strings, as paths aren't always UTF-8. This regex will + // not match non-UTF-8 paths. + let re = Regex::new(r"^(?u:([^\s].*?))\s+(?u:([^\s]+))\s*$").unwrap(); + + for line in brd.split(b'\n') { + re.captures(&line?).map(|cap| { + filecnt += 1; + let path = str::from_utf8(cap.at(1).unwrap()).unwrap(); + if man::ismanpath(path) { + mancnt += 1; + pkgs.extend( str::from_utf8(cap.at(2).unwrap()).unwrap().split(',').map(|e| { + e.split('/').last().unwrap().to_string() + }) ); + } + }); + } + + debug!("Found {}/{} man files in {} relevant packages from {}", mancnt, filecnt, pkgs.len(), f.path); + Ok(pkgs) +} + + +#[derive(Default)] +struct Pkg { + name: Option<String>, + section: Option<String>, + arch: Option<String>, + version: Option<String>, + filename: Option<String>, +} + + +fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs: &HashSet<String>, pkg: &Pkg) { + let name = match pkg.name { Some(ref x) => x, None => return }; + if !manpkgs.contains(name) { + return + } + let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } }; + let arch = match pkg.arch { Some(ref x) => x, None => { error!("Package {} has no arch", name); return } }; + let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } }; + let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } }; + let uri = format!("{}{}", mirror, filename); + + pkg::pkg(pg, pkg::PkgOpt{ + force: false, + sys: sys, + cat: §ion, + pkg: &name, + ver: &version, + date: "1980-01-01", // TODO: Fetch date from somewhere (package contents itself, likely) + arch: Some(arch), + file: open::Path{ + path: &uri, + cache: false, + canbelocal: false, + }, + }); +} + + +pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: open::Path, packages: open::Path) { + let manpkgs = match get_contents(contents) { + Err(e) => { error!("Can't read {}: {}", contents.path, e); return }, + Ok(x) => x, + }; + + let mut fd = match packages.open() { + Err(e) => { error!("Can't read {}: {}", packages.path, e); return }, + Ok(x) => x, + }; + let rd = match archive::Archive::open_raw(&mut fd) { + Err(e) => { error!("Can't read {}: {}", packages.path, e); return }, + Ok(x) => x, + }; + + let brd = BufReader::new(rd); + let mut pkg = Pkg::default(); + let emptyline = Regex::new(r"^\s*$").unwrap(); + let kv = Regex::new(r"^(?u:([^#-][^ :]*)\s*:\s*(.+))$").unwrap(); + + for line in brd.split(b'\n') { + let line = match line { + Err(e) => { error!("Can't read {}: {}", packages.path, e); return }, + Ok(x) => x, + }; + if emptyline.is_match(&line) { + handlepkg(pg, sys, &mirror, &manpkgs, &pkg); + pkg = Pkg::default(); + } + if let Some(cap) = kv.captures(&line) { + let val = str::from_utf8(cap.at(2).unwrap()).unwrap(); + match str::from_utf8(cap.at(1).unwrap()).unwrap() { + "Package" => pkg.name = Some(val.to_string()), + "Section" => pkg.section = Some(val.to_string()), + "Version" => pkg.version = Some(val.to_string()), + "Architecture" => pkg.arch = Some(val.to_string()), + "Filename" => pkg.filename = Some(val.to_string()), + _ => {} + } + } + } + handlepkg(pg, sys, &mirror, &manpkgs, &pkg); +} |