summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-11-19 15:27:24 +0100
committerYorhel <git@yorhel.nl>2016-11-19 15:27:24 +0100
commit4bdd91f65e6565b568a19806c33ea870810c3be5 (patch)
tree427f4525b79c89d3f636811c43ebfe4487b966ce
parent50fe17a6049c40d47835da3ec2db047d8132b2ef (diff)
Indexer: Initial support for debian repos
-rw-r--r--indexer/src/main.rs17
-rw-r--r--indexer/src/sys_deb.rs124
2 files changed, 141 insertions, 0 deletions
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index b46602d..84dd4bc 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -18,6 +18,7 @@ mod man;
mod open;
mod pkg;
mod sys_arch;
+mod sys_deb;
// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
@@ -51,6 +52,13 @@ fn main() {
(@arg mirror: --mirror +required +takes_value "Mirror URL")
(@arg repo: --repo +required +takes_value "Repository name")
)
+ (@subcommand deb =>
+ (about: "Index a Debian repository")
+ (@arg sys: --sys +required +takes_value "System short-name")
+ (@arg mirror: --mirror +required +takes_value "Mirror URL")
+ (@arg contents: --contents +required +takes_value "Contents file")
+ (@arg packages: --packages +required +takes_value "Packages file")
+ )
).get_matches();
let verbose = arg.occurrences_of("v");
@@ -94,4 +102,13 @@ fn main() {
matches.value_of("repo").unwrap()
);
}
+
+ if let Some(matches) = arg.subcommand_matches("deb") {
+ sys_deb::sync(&db,
+ sysbyshort(&db, matches.value_of("sys").unwrap()),
+ matches.value_of("mirror").unwrap(),
+ open::Path{ path: matches.value_of("contents").unwrap(), cache: true, canbelocal: true},
+ open::Path{ path: matches.value_of("packages").unwrap(), cache: true, canbelocal: true},
+ );
+ }
}
diff --git a/indexer/src/sys_deb.rs b/indexer/src/sys_deb.rs
new file mode 100644
index 0000000..d22de5d
--- /dev/null
+++ b/indexer/src/sys_deb.rs
@@ -0,0 +1,124 @@
+use std::io::{Result,BufReader,BufRead};
+use std::collections::HashSet;
+use std::str;
+use postgres;
+use regex::bytes::Regex;
+
+use man;
+use pkg;
+use open;
+use archive;
+
+// Reference: https://wiki.debian.org/RepositoryFormat
+
+fn get_contents(f: open::Path) -> Result<HashSet<String>> {
+ let mut fd = f.open()?;
+ let rd = archive::Archive::open_raw(&mut fd)?;
+ let brd = BufReader::new(rd);
+ let mut pkgs = HashSet::new();
+ let mut filecnt = 0;
+ let mut mancnt = 0;
+
+ // Run the regex on bytes instead of strings, as paths aren't always UTF-8. This regex will
+ // not match non-UTF-8 paths.
+ let re = Regex::new(r"^(?u:([^\s].*?))\s+(?u:([^\s]+))\s*$").unwrap();
+
+ for line in brd.split(b'\n') {
+ re.captures(&line?).map(|cap| {
+ filecnt += 1;
+ let path = str::from_utf8(cap.at(1).unwrap()).unwrap();
+ if man::ismanpath(path) {
+ mancnt += 1;
+ pkgs.extend( str::from_utf8(cap.at(2).unwrap()).unwrap().split(',').map(|e| {
+ e.split('/').last().unwrap().to_string()
+ }) );
+ }
+ });
+ }
+
+ debug!("Found {}/{} man files in {} relevant packages from {}", mancnt, filecnt, pkgs.len(), f.path);
+ Ok(pkgs)
+}
+
+
+#[derive(Default)]
+struct Pkg {
+ name: Option<String>,
+ section: Option<String>,
+ arch: Option<String>,
+ version: Option<String>,
+ filename: Option<String>,
+}
+
+
+fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs: &HashSet<String>, pkg: &Pkg) {
+ let name = match pkg.name { Some(ref x) => x, None => return };
+ if !manpkgs.contains(name) {
+ return
+ }
+ let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } };
+ let arch = match pkg.arch { Some(ref x) => x, None => { error!("Package {} has no arch", name); return } };
+ let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } };
+ let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } };
+ let uri = format!("{}{}", mirror, filename);
+
+ pkg::pkg(pg, pkg::PkgOpt{
+ force: false,
+ sys: sys,
+ cat: &section,
+ pkg: &name,
+ ver: &version,
+ date: "1980-01-01", // TODO: Fetch date from somewhere (package contents itself, likely)
+ arch: Some(arch),
+ file: open::Path{
+ path: &uri,
+ cache: false,
+ canbelocal: false,
+ },
+ });
+}
+
+
+pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: open::Path, packages: open::Path) {
+ let manpkgs = match get_contents(contents) {
+ Err(e) => { error!("Can't read {}: {}", contents.path, e); return },
+ Ok(x) => x,
+ };
+
+ let mut fd = match packages.open() {
+ Err(e) => { error!("Can't read {}: {}", packages.path, e); return },
+ Ok(x) => x,
+ };
+ let rd = match archive::Archive::open_raw(&mut fd) {
+ Err(e) => { error!("Can't read {}: {}", packages.path, e); return },
+ Ok(x) => x,
+ };
+
+ let brd = BufReader::new(rd);
+ let mut pkg = Pkg::default();
+ let emptyline = Regex::new(r"^\s*$").unwrap();
+ let kv = Regex::new(r"^(?u:([^#-][^ :]*)\s*:\s*(.+))$").unwrap();
+
+ for line in brd.split(b'\n') {
+ let line = match line {
+ Err(e) => { error!("Can't read {}: {}", packages.path, e); return },
+ Ok(x) => x,
+ };
+ if emptyline.is_match(&line) {
+ handlepkg(pg, sys, &mirror, &manpkgs, &pkg);
+ pkg = Pkg::default();
+ }
+ if let Some(cap) = kv.captures(&line) {
+ let val = str::from_utf8(cap.at(2).unwrap()).unwrap();
+ match str::from_utf8(cap.at(1).unwrap()).unwrap() {
+ "Package" => pkg.name = Some(val.to_string()),
+ "Section" => pkg.section = Some(val.to_string()),
+ "Version" => pkg.version = Some(val.to_string()),
+ "Architecture" => pkg.arch = Some(val.to_string()),
+ "Filename" => pkg.filename = Some(val.to_string()),
+ _ => {}
+ }
+ }
+ }
+ handlepkg(pg, sys, &mirror, &manpkgs, &pkg);
+}