summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2012-09-06 12:41:50 +0200
committerYorhel <git@yorhel.nl>2012-09-06 12:49:20 +0200
commit267de619ba6d2f07a0d28790059f746e0bf83f1e (patch)
tree7835d3904b479cffd63b6835ff155cc9a1a87915
parentfdc614126a7284cb5408d273a9735e9eabc3a2e6 (diff)
dir_import.c: Only call input_handle() once every 32 read items
Some measurements importing a gzip-compressed file (zcat .. | ncdu -f -) containing a bit under 6 million items and a few choices of how often to call input_handle(): Called on every item: real 0m13.745s user 0m12.576s sys 0m4.566s Called on every 8 items: real 0m7.932s user 0m9.636s sys 0m1.623s Called on every 16 items: real 0m7.559s user 0m9.553s sys 0m1.323s Called on every 32 items: real 0m7.279s user 0m9.353s sys 0m1.277s Called on every 64 items: real 0m7.166s user 0m9.389s sys 0m1.117s Called on every 256 items: real 0m7.073s user 0m9.439s sys 0m1.027s 32 seemed like a good compromise.
-rw-r--r--src/dir_import.c6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/dir_import.c b/src/dir_import.c
index 872ff6d..5d365f5 100644
--- a/src/dir_import.c
+++ b/src/dir_import.c
@@ -502,7 +502,11 @@ static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
E(!*d->name, "No name field present in item information object");
*item = d;
ctx->items++;
- return input_handle(1);
+ /* Only call input_handle() once for every 32 items. Importing items is so
+ * fast that the time spent in input_handle() dominates when called every
+ * time. Don't set this value too high, either, as feedback should still be
+ * somewhat responsive when our import data comes from a slow-ish source. */
+ return !(ctx->items & 31) ? input_handle(1) : 0;
}