summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2021-06-01 13:00:54 +0200
committerYorhel <git@yorhel.nl>2021-06-01 13:00:58 +0200
commite6b2cff356c146662f3195406a2ba2c076c42c33 (patch)
tree258632c535d20dea72b3ad8f60f57f83a43e8edf
parent5264be76c7e314fdfcff9d0caf2537278325c628 (diff)
Support hard link counts when importing old ncdu dumps
Under the assumption that there are no external references to files mentioned in the dump, i.e. a file's nlink count matches the number of times the file occurs in the dump. This machinery could also be used for regular scans, when you want to scan an individual directory without caring about external hard links. Maybe that should be the default, even? Not sure...
-rw-r--r--src/model.zig87
-rw-r--r--src/scan.zig12
2 files changed, 86 insertions, 13 deletions
diff --git a/src/model.zig b/src/model.zig
index deef87d..e99c8c6 100644
--- a/src/model.zig
+++ b/src/model.zig
@@ -101,12 +101,7 @@ pub const Entry = packed struct {
}
}
- // Insert this entry into the tree at the given directory, updating parent sizes and item counts.
- pub fn insert(self: *Entry, parents: *const Parents) void {
- self.next = parents.top().sub;
- parents.top().sub = self;
- if (self.dir()) |d| std.debug.assert(d.sub == null);
-
+ fn addStats(self: *Entry, parents: *const Parents) void {
const dev = parents.top().dev;
// Set if this is the first time we've found this hardlink in the bottom-most directory of the given dev.
// Means we should count it for other-dev parent dirs, too.
@@ -149,6 +144,19 @@ pub const Entry = packed struct {
}
}
}
+
+ // Insert this entry into the tree at the given directory, updating parent sizes and item counts.
+ pub fn insert(self: *Entry, parents: *const Parents) void {
+ self.next = parents.top().sub;
+ parents.top().sub = self;
+ if (self.dir()) |d| std.debug.assert(d.sub == null);
+
+ // Links with nlink == 0 are counted after we're done scanning.
+ if (if (self.link()) |l| l.nlink == 0 else false)
+ link_count.add(parents.top().dev, self.link().?.ino)
+ else
+ self.addStats(parents);
+ }
};
const DevId = u30; // Can be reduced to make room for more flags in Dir.
@@ -181,8 +189,12 @@ pub const Dir = packed struct {
// File that's been hardlinked (i.e. nlink > 1)
pub const Link = packed struct {
entry: Entry,
- ino: u64,
// dev is inherited from the parent Dir
+ ino: u64,
+ // Special value '0' means: "This link hasn't been counted in the parent
+ // sizes yet because we only know that it's a hard link but not how many
+ // links it has". These are added to the tree structure first and are
+ // counted after the scan is complete (see link_count below).
nlink: u32,
name: u8,
};
@@ -311,6 +323,67 @@ pub const devices = struct {
}
};
+// Special hash table for counting hard links with nlink=0.
+pub const link_count = struct {
+ var nodes = std.HashMap(Node, void, Node.hash, Node.eql, 80).init(main.allocator);
+
+ const Node = struct {
+ ino: u64,
+ dev: u32, // DevId, but 32-bits for easier hashing
+ count: u32,
+
+ const Self = @This();
+
+ fn hash(self: Self) u64 {
+ var h = std.hash.Wyhash.init(0);
+ h.update(std.mem.asBytes(&self.dev));
+ h.update(std.mem.asBytes(&self.ino));
+ return h.final();
+ }
+
+ fn eql(a: Self, b: Self) bool {
+ return a.ino == b.ino and a.dev == b.dev;
+ }
+ };
+
+ pub fn add(dev: DevId, ino: u64) void {
+ const n = Node{ .dev = dev, .ino = ino, .count = 1 };
+ var d = nodes.getOrPut(n) catch unreachable;
+ if (d.found_existing) d.entry.key.count += 1;
+ }
+
+ var final_dir: Parents = undefined;
+
+ fn final_rec() void {
+ var it = final_dir.top().sub;
+ while (it) |e| : (it = e.next) {
+ if (e.dir()) |d| {
+ final_dir.push(d);
+ final_rec();
+ final_dir.pop();
+ continue;
+ }
+ const l = e.link() orelse continue;
+ if (l.nlink > 0) continue;
+ const s = Node{ .dev = final_dir.top().dev, .ino = l.ino, .count = 0 };
+ if (nodes.getEntry(s)) |n| {
+ l.nlink = n.key.count;
+ e.addStats(&final_dir);
+ }
+ }
+ }
+
+ // Called when all files have been added, will traverse the directory to
+ // find all links, update their nlink count and parent sizes.
+ pub fn final() void {
+ if (nodes.count() == 0) return;
+ final_dir = Parents{};
+ final_rec();
+ nodes.clearAndFree();
+ final_dir.deinit();
+ }
+};
+
pub var root: *Dir = undefined;
// Stack of parent directories, convenient helper when constructing and traversing the tree.
diff --git a/src/scan.zig b/src/scan.zig
index a4341b3..c159330 100644
--- a/src/scan.zig
+++ b/src/scan.zig
@@ -151,6 +151,7 @@ const Context = struct {
}
fn final(self: *Self) void {
+ if (self.parents) |_| model.link_count.final();
if (self.wr) |wr| {
wr.writer().writeByte(']') catch |e| writeErr(e);
wr.flush() catch |e| writeErr(e);
@@ -261,8 +262,6 @@ const Context = struct {
e.size = self.stat.size;
if (e.dir()) |d| d.dev = model.devices.getId(self.stat.dev);
if (e.file()) |f| f.notreg = !self.stat.dir and !self.stat.reg;
- // TODO: Handle the scenario where we don't know the hard link count
- // (i.e. on imports from old ncdu versions that don't have the "nlink" field)
if (e.link()) |l| {
l.ino = self.stat.ino;
l.nlink = self.stat.nlink;
@@ -342,8 +341,8 @@ fn scanDir(ctx: *Context, dir: std.fs.Dir, dir_dev: u64) void {
ctx.stat = nstat;
// Symlink targets may reside on different filesystems,
// this will break hardlink detection and counting so let's disable it.
- if (ctx.stat.nlink > 1 and ctx.stat.dev != dir_dev)
- ctx.stat.nlink = 1;
+ if (ctx.stat.hlinkc and ctx.stat.dev != dir_dev)
+ ctx.stat.hlinkc = false;
}
} else |_| {}
}
@@ -431,6 +430,7 @@ const Import = struct {
self.ch = self.rd.reader().readByte() catch |e| switch (e) {
error.EndOfStream => 0,
error.InputOutput => self.die("I/O error"),
+ error.IsDir => self.die("not a file"), // should be detected at open() time, but no flag for that...
// TODO: This one can be retried
error.SystemResources => self.die("out of memory"),
else => unreachable,
@@ -628,8 +628,8 @@ const Import = struct {
}
},
'h' => {
- if (eq(u8, key, "hlinkc")) {
- self.ctx.stat.hlinkc = true;
+ if (eq(u8, key, "hlnkc")) {
+ self.ctx.stat.hlinkc = self.boolean();
return;
}
},