summaryrefslogtreecommitdiff
path: root/indexer
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-11-20 09:01:33 +0100
committerYorhel <git@yorhel.nl>2016-11-20 09:01:33 +0100
commitecb1a9e25b3b5659052bb42ece912e566205ce00 (patch)
tree1ed5803af43b2a6792e99f024ee5f4d6a2a50746 /indexer
parenta1e5a2d80d4339b27c3c41ff97b3d31d9204d6b1 (diff)
Indexer: Support reading date from .deb archives
Diffstat (limited to 'indexer')
-rw-r--r--indexer/src/archive.rs7
-rw-r--r--indexer/src/archread.rs10
-rw-r--r--indexer/src/main.rs6
-rw-r--r--indexer/src/pkg.rs73
-rw-r--r--indexer/src/sys_arch.rs2
-rw-r--r--indexer/src/sys_deb.rs2
-rwxr-xr-xindexer/tests/mkarchives.sh1
-rw-r--r--indexer/tests/simpletest.tar.gzbin248 -> 255 bytes
8 files changed, 78 insertions, 23 deletions
diff --git a/indexer/src/archive.rs b/indexer/src/archive.rs
index a81755a..d651566 100644
--- a/indexer/src/archive.rs
+++ b/indexer/src/archive.rs
@@ -210,6 +210,12 @@ impl<'a> ArchiveEntry<'a> {
}
}
+ // A proper implementation would call mtime_is_set() and _mtime_nsec() to return an
+ // Option<SomeHighResolutionTimestamp>. But this'll do for what I need.
+ pub fn mtime(&self) -> i64 {
+ unsafe { ffi::archive_entry_mtime(self.e) }
+ }
+
fn symlink(&self) -> Option<String> {
let c_str: &CStr = unsafe {
let ptr = ffi::archive_entry_symlink(self.e);
@@ -329,6 +335,7 @@ mod tests {
ent = ent.next().unwrap().unwrap();
t(&mut ent, Some("simple/file"), 3, FileType::File, "Hi\n");
+ assert_eq!(ent.mtime(), 1479627842);
ent = ent.next().unwrap().unwrap();
t(&mut ent, Some("simple/link"), 0, FileType::Link("file".to_string()), "");
diff --git a/indexer/src/archread.rs b/indexer/src/archread.rs
index 22086f8..b9543e4 100644
--- a/indexer/src/archread.rs
+++ b/indexer/src/archread.rs
@@ -76,8 +76,8 @@ impl FileList {
*
* Returns a FileList struct that can be used to retreive all interesting non-regular files.
*/
- pub fn read<F,G>(ent: Option<ArchiveEntry>, interest_cb: F, mut file_cb: G) -> Result<FileList>
- where F: Fn(&str) -> bool, G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()>
+ pub fn read<F,G>(ent: Option<ArchiveEntry>, mut interest_cb: F, mut file_cb: G) -> Result<FileList>
+ where F: FnMut(&ArchiveEntry) -> bool, G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()>
{
let mut fl = FileList {
seen: HashMap::new(),
@@ -101,7 +101,7 @@ impl FileList {
let et = match ft {
FileType::File => {
- if interest_cb(&path) {
+ if interest_cb(&e) {
let pathv = [&path as &str];
try!(file_cb(&pathv[..], &mut e));
EntryType::Handled
@@ -110,7 +110,7 @@ impl FileList {
}
},
FileType::Link(l) => {
- if interest_cb(&path) {
+ if interest_cb(&e) {
fl.links.push(path.clone());
}
EntryType::Link(l)
@@ -260,7 +260,7 @@ mod tests {
let arch = Archive::open_archive(&mut f).unwrap();
let mut cnt = 0;
FileList::read(arch,
- |p| p.starts_with("man/man"),
+ |p| p.path().unwrap().starts_with("man/man"),
|p,e| {
assert_eq!(cnt, 0);
cnt += 1;
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index ef75984..ed6f200 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -88,13 +88,17 @@ fn main() {
debug!("Connected to database");
if let Some(matches) = arg.subcommand_matches("pkg") {
+ let date = match matches.value_of("date").unwrap() {
+ "deb" => pkg::Date::Deb,
+ s => pkg::Date::Known(s),
+ };
pkg::pkg(&db, pkg::PkgOpt {
force: matches.is_present("force"),
sys: sysbyshort(&db, matches.value_of("sys").unwrap()),
cat: matches.value_of("cat").unwrap(),
pkg: matches.value_of("pkg").unwrap(),
ver: matches.value_of("ver").unwrap(),
- date: matches.value_of("date").unwrap(),
+ date: date,
arch: matches.value_of("arch"),
file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true},
});
diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs
index d480f2b..a6fa0e1 100644
--- a/indexer/src/pkg.rs
+++ b/indexer/src/pkg.rs
@@ -1,26 +1,47 @@
use std;
use std::io::{Error,ErrorKind,Read};
use postgres;
+use chrono::NaiveDateTime;
use open;
use archread;
use man;
use archive::{Format,Archive,ArchiveEntry};
+
+#[derive(Debug,Clone,Copy)]
+pub enum Date<'a> {
+ Known(&'a str), // Given in PkgOpt
+ Found(i64), // Found in package
+ Deb, // Should be read from the timestamp of the 'debian-binary' file
+}
+
+
+impl<'a> Date<'a> {
+ fn update(&mut self, ent: &ArchiveEntry) {
+ // TODO: Validate that the mtime() date is sensible (e.g. 1990 < date < now)
+ *self = match *self {
+ Date::Deb if ent.format() == Format::Ar && ent.path() == Some("debian-binary") => Date::Found(ent.mtime()),
+ x => x,
+ }
+ }
+}
+
+
pub struct PkgOpt<'a> {
pub force: bool,
pub sys: i32,
pub cat: &'a str,
pub pkg: &'a str,
pub ver: &'a str,
- pub date: &'a str, // TODO: Option to extract date from package metadata itself
+ pub date: Date<'a>,
pub arch: Option<&'a str>,
pub file: open::Path<'a>
}
fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i32> {
- let pkginfo = format!("sys {} / {} / {} - {} @ {} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
+ let pkginfo = format!("sys {} / {} / {} - {} @ {:?} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
// The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the
// RETURNING clause wouldn't give us a package id.
@@ -38,9 +59,15 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
let res = tr.query(q, &[&pkgid, &opt.ver]).unwrap();
let verid : i32;
+
+ let date = match opt.date {
+ Date::Known(d) => d,
+ _ => "1980-01-01", // Placeholder
+ };
+
if res.is_empty() {
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
- verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0);
+ verid = tr.query(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0).get(0);
info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
Some(verid)
@@ -106,21 +133,23 @@ fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &s
}
-fn with_pkg<F,T>(f: open::Path, cb: F) -> std::io::Result<T>
- where F: FnOnce(Option<ArchiveEntry>) -> std::io::Result<T>
+fn with_pkg<F,T>(opt: &mut PkgOpt, cb: F) -> std::io::Result<T>
+ where F: FnOnce(Option<ArchiveEntry>, &mut PkgOpt) -> std::io::Result<T>
{
- let mut rd = f.open()?;
+ let mut rd = opt.file.open()?;
let ent = match Archive::open_archive(&mut rd)? {
- None => return cb(None),
+ None => return cb(None, opt),
Some(x) => x,
};
// .deb ("2.0")
if ent.format() == Format::Ar && ent.path() == Some("debian-binary") {
+ opt.date.update(&ent);
let mut ent = ent.next()?;
while let Some(mut e) = ent {
+ opt.date.update(&e);
if e.path().map(|p| p.starts_with("data.tar")) == Some(true) {
- return cb(Archive::open_archive(&mut e)?);
+ return cb(Archive::open_archive(&mut e)?, opt);
}
ent = e.next()?
}
@@ -128,25 +157,39 @@ fn with_pkg<F,T>(f: open::Path, cb: F) -> std::io::Result<T>
// any other archive (Arch/FreeBSD .tar)
} else {
- cb(Some(ent))
+ cb(Some(ent), opt)
}
}
-fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std::io::Result<()> {
+fn index_pkg(tr: &postgres::GenericConnection, mut opt: PkgOpt, verid: i32) -> std::io::Result<()> {
let indexfunc = |paths: &[&str], ent: &mut ArchiveEntry| {
insert_man(tr, verid, paths, ent);
Ok(()) /* Don't propagate errors, continue handling other man pages */
};
- let missed = with_pkg(opt.file, |e| { archread::FileList::read(e, man::ismanpath, &indexfunc) })?
- .links(|src, dest| { insert_link(tr, verid, src, dest) });
+ let missed = with_pkg(&mut opt, |e, opt| {
+ archread::FileList::read(e, |ent: &ArchiveEntry| {
+ opt.date.update(ent);
+ man::ismanpath(ent.path().unwrap())
+ }, &indexfunc)
+ })?.links(|src, dest| { insert_link(tr, verid, src, dest) });
if let Some(missed) = missed {
warn!("Some links were missed, reading package again");
- with_pkg(opt.file, |e| { missed.read(e, indexfunc) })?
+ with_pkg(&mut opt, |e, _| { missed.read(e, indexfunc) })?
+ }
+
+ match opt.date {
+ Date::Known(_) => Ok(()),
+ Date::Found(t) => {
+ let date = NaiveDateTime::from_timestamp(t, 0).format("%Y-%m-%d").to_string();
+ debug!("Date from package: {}", date);
+ tr.execute("UPDATE package_versions SET released = $1::text::date WHERE id = $2", &[&date, &verid]).unwrap();
+ Ok(())
+ },
+ _ => Err(Error::new(ErrorKind::Other, "No valid date found in this package")),
}
- Ok(())
}
@@ -156,7 +199,7 @@ pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
- match index_pkg(&tr, &opt, verid) {
+ match index_pkg(&tr, opt, verid) {
Err(e) => error!("Error reading package: {}", e),
Ok(_) => tr.set_commit()
}
diff --git a/indexer/src/sys_arch.rs b/indexer/src/sys_arch.rs
index 1c3623d..468c28f 100644
--- a/indexer/src/sys_arch.rs
+++ b/indexer/src/sys_arch.rs
@@ -113,7 +113,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str
cat: repo,
pkg: &m.name,
ver: &m.version,
- date: &m.date,
+ date: pkg::Date::Known(&m.date),
arch: m.arch.as_ref().map(|e| &e[..]),
file: open::Path{
path: &p,
diff --git a/indexer/src/sys_deb.rs b/indexer/src/sys_deb.rs
index d22de5d..971aab1 100644
--- a/indexer/src/sys_deb.rs
+++ b/indexer/src/sys_deb.rs
@@ -68,7 +68,7 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
cat: &section,
pkg: &name,
ver: &version,
- date: "1980-01-01", // TODO: Fetch date from somewhere (package contents itself, likely)
+ date: pkg::Date::Deb,
arch: Some(arch),
file: open::Path{
path: &uri,
diff --git a/indexer/tests/mkarchives.sh b/indexer/tests/mkarchives.sh
index 169f2bd..3d6299a 100755
--- a/indexer/tests/mkarchives.sh
+++ b/indexer/tests/mkarchives.sh
@@ -9,6 +9,7 @@
mkdir simple
echo Hi >simple/file
+touch -d '2016-11-20 08:44:02+01:00' simple/file
ln -s file simple/link
ln simple/file simple/hardlink
mkfifo simple/fifo
diff --git a/indexer/tests/simpletest.tar.gz b/indexer/tests/simpletest.tar.gz
index 409f5ca..1c3ecae 100644
--- a/indexer/tests/simpletest.tar.gz
+++ b/indexer/tests/simpletest.tar.gz
Binary files differ