Added -f option to import an exported directory structure

This is the first working version. There's a few TODO's left.
author: Yorhel <git@yorhel.nl> 2012-09-05 13:52:12 +0200
committer: Yorhel <git@yorhel.nl> 2012-09-05 13:52:12 +0200
commit: 10dca5503a85eb7c668dd8fbb1318a682b3b6f39 (patch)
tree: aa0de604c8c57b03ec3f7b0b94be48fbb930098f /src
parent: ef4abec6cfcba19b2b2283d421cb50d32417271a (diff)
4 files changed, 602 insertions, 15 deletions
diff --git a/src/dir.h b/src/dir.h
index a5070a3..0ef7875 100644
--- a/src/dir.h
+++ b/src/dir.h
@@ -101,10 +101,15 @@ void dir_mem_init(struct dir *);
 int dir_export_init(const char *fn);
 
 
+/* Function set by input code. Returns dir_output.final(). */
+int (*dir_process)();
+
 /* Scanning a live directory */
 extern int dir_scan_smfs;
 void dir_scan_init(const char *path);
-int dir_scan_process();
+
+/* Importing a file */
+int dir_import_init(const char *fn);
 
 
 /* The currently configured output functions. */
diff --git a/src/dir_import.c b/src/dir_import.c
new file mode 100644
index 0000000..17c8fcc
--- /dev/null
+++ b/src/dir_import.c
@@ -0,0 +1,576 @@
+/* ncdu - NCurses Disk Usage
+
+  Copyright (c) 2007-2012 Yoran Heling
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be included
+  in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+/* This JSON parser has the following limitations:
+ * - No support for character encodings incompatible with ASCII (e.g.
+ *   UTF-16/32)
+ * - Doesn't validate UTF-8 correctness (in fact, besides the ASCII part this
+ *   parser doesn't know anything about encoding).
+ * - Doesn't validate that there are no duplicate keys in JSON objects.
+ * - Isn't very strict with validating non-integer numbers.
+ * - Does not check nesting level, easily allows stack overflow. (TODO: FIX!)
+ */
+
+#include "global.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+
+
+/* Max. length of any JSON string we're interested in. A string may of course
+ * be larger, we're not going to read more than MAX_VAL in memory. If a string
+ * we're interested in (e.g. a file name) is longer than this, reading the
+ * import will results in an error. */
+#define MAX_VAL (32*1024)
+
+/* Minimum number of bytes we request from fread() */
+#define MIN_READ_SIZE 1024
+
+/* Read buffer size. Must be at least 2*MIN_READ_SIZE, everything larger
+ * improves performance. */
+#define READ_BUF_SIZE (32*1024)
+
+
+/* Use a struct for easy batch-allocation and deallocation of state data. */
+struct ctx {
+  FILE *stream;
+
+  int line;
+  int byte;
+  int eof;
+  int items;
+  char *buf; /* points into readbuf, always zero-terminated. */
+  char *lastfill; /* points into readbuf, location of the zero terminator. */
+
+  char val[MAX_VAL];
+  char readbuf[READ_BUF_SIZE];
+} *ctx;
+
+
+/* Fills readbuf with data from the stream. *buf will have at least n (<
+ * READ_BUF_SIZE) bytes available, unless the stream reached EOF or an error
+ * occured. If the file data contains a null-type, this is considered an error.
+ * Returns 0 on success, non-zero on error. */
+static int fill(int n) {
+  int r;
+
+  if(ctx->eof)
+    return 0;
+
+  r = READ_BUF_SIZE-(ctx->lastfill - ctx->readbuf); /* number of bytes left in the buffer */
+  if(n < r)
+    n = r-1;
+  if(n < MIN_READ_SIZE) {
+    r = ctx->lastfill - ctx->buf; /* number of unread bytes left in the buffer */
+    memcpy(ctx->readbuf, ctx->buf, r);
+    ctx->lastfill = ctx->readbuf + r;
+    ctx->buf = ctx->readbuf;
+    n = READ_BUF_SIZE-r-1;
+  }
+
+  do {
+    r = fread(ctx->lastfill, 1, n, ctx->stream);
+    if(r != n) {
+      if(feof(ctx->stream))
+        ctx->eof = 1;
+      else if(ferror(ctx->stream)) {
+        dir_seterr("Read error: %s", strerror(errno));
+        return 1;
+      }
+    }
+
+    ctx->lastfill[r] = 0;
+    if(strlen(ctx->lastfill) != (size_t)r) {
+      dir_seterr("Zero-byte found in JSON stream");
+      return 1;
+    }
+    ctx->lastfill += r;
+    n -= r;
+  } while(!ctx->eof && n > MIN_READ_SIZE);
+
+  return 0;
+}
+
+
+/* Two macros that break function calling behaviour, but are damn convenient */
+#define E(_x, _m) do {\
+    if((_x) && !dir_fatalerr) {\
+      dir_seterr("Line %d byte %d: %s", ctx->line, ctx->byte, _m);\
+      return 1;\
+    }\
+  } while(0)
+
+#define C(_x) do {\
+    if(_x)\
+      return 1;\
+  } while(0)
+
+
+/* Require at least n bytes in the buffer, throw an error on early EOF.
+ * (Macro to quickly handle the common case) */
+#define rfill1 (!*ctx->buf && _rfill(1))
+#define rfill(_n) ((ctx->lastfill - ctx->buf < (_n)) && _rfill(_n))
+
+static int _rfill(int n) {
+  C(fill(n));
+  E(ctx->lastfill - ctx->buf < n, "Unexpected EOF");
+  return 0;
+}
+
+
+/* Consumes n bytes from the buffer. */
+static inline void con(int n) {
+  ctx->buf += n;
+  ctx->byte += n;
+}
+
+
+/* Consumes any whitespace. If *ctx->buf == 0 after this function, we've reached EOF. */
+static int cons() {
+  while(1) {
+    C(!*ctx->buf && fill(1));
+
+    switch(*ctx->buf) {
+    case 0x0A:
+      /* Special-case the newline-character with respect to consuming stuff
+       * from the buffer. This is the only function which *can* consume the
+       * newline character, so it's more efficient to handle it in here rather
+       * than in the more general con(). */
+      ctx->buf++;
+      ctx->line++;
+      ctx->byte = 0;
+      break;
+    case 0x20:
+    case 0x09:
+    case 0x0D:
+      con(1);
+      break;
+    default:
+      return 0;
+    }
+  }
+}
+
+
+static int rstring_esc(char **dest, int *destlen) {
+  unsigned int n;
+
+#define ap(c) if(*destlen > 1) { *((*dest)++) = c; (*destlen)--; }
+  switch(*ctx->buf) {
+  case '"':  ap('"');  con(1); break;
+  case '\\': ap('\\'); con(1); break;
+  case '/':  ap('/');  con(1); break;
+  case 'b':  ap(0x08); con(1); break;
+  case 'f':  ap(0x0C); con(1); break;
+  case 'n':  ap(0x0A); con(1); break;
+  case 'r':  ap(0x0D); con(1); break;
+  case 't':  ap(0x09); con(1); break;
+  case 'u':
+    C(rfill(5));
+#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16)
+    n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]);
+#undef hn
+    if(n <= 0x007F) {
+      ap(n);
+    } else if(n <= 0x07FF) {
+      ap(0xC0 | (n>>6));
+      ap(0x80 | (n & 0x3F));
+    } else if(n <= 0xFFFF) {
+      ap(0xE0 | (n>>12));
+      ap(0x80 | ((n>>6) & 0x3F));
+      ap(0x80 | (n & 0x3F));
+    } else // this happens if there was an invalid character (n >= (1<<16))
+      E(1, "Invalid character in \\u escape");
+    con(5);
+    break;
+  default:
+    E(1, "Invalid escape sequence");
+  }
+#undef ap
+  return 0;
+}
+
+
+/* Parse a JSON string and write it to *dest (max. destlen). Consumes but
+ * otherwise ignores any characters if the string is longer than destlen. *dest
+ * will be null-terminated, dest[destlen-1] = 0 if the string was cut just long
+ * enough of was cut off. That byte will be left untouched if the string is
+ * small enough. */
+static int rstring(char *dest, int destlen) {
+  C(rfill1);
+  E(*ctx->buf != '"', "Expected string");
+  con(1);
+
+  while(1) {
+    C(rfill1);
+    if(*ctx->buf == '"')
+      break;
+    if(*ctx->buf == '\\') {
+      con(1);
+      C(rstring_esc(&dest, &destlen));
+      continue;
+    }
+    E((unsigned char)*ctx->buf <= 0x1F || (unsigned char)*ctx->buf == 0x7F, "Invalid character");
+    if(destlen > 1) {
+      *(dest++) = *ctx->buf;
+      destlen--;
+    }
+    con(1);
+  }
+  con(1);
+  if(destlen > 0)
+    *dest = 0;
+  return 0;
+}
+
+
+/* Parse and consume a JSON integer. Throws an error if the value does not fit
+ * in an uint64_t, is not an integer or is larger than 'max'. */
+static int rint64(uint64_t *val, uint64_t max) {
+  uint64_t v;
+  int haschar = 0;
+  *val = 0;
+  while(1) {
+    C(!*ctx->buf && fill(1));
+    if(*ctx->buf == '0' && !haschar) {
+      con(1);
+      break;
+    }
+    if(*ctx->buf >= '0' && *ctx->buf <= '9') {
+      haschar = 1;
+      v = (*val)*10 + (*ctx->buf-'0');
+      E(v < *val, "Invalid (positive) integer");
+      *val = v;
+      con(1);
+      continue;
+    }
+    E(!haschar, "Invalid (positive) integer");
+    break;
+  }
+  E(*val > max, "Integer out of range");
+  return 0;
+}
+
+
+/* Parse and consume a JSON number. The result is discarded.
+ * TODO: Improve validation. */
+static int rnum() {
+  int haschar = 0;
+  C(rfill1);
+  while(1) {
+    C(!*ctx->buf && fill(1));
+    if(*ctx->buf == 'e' || *ctx->buf == 'E' || *ctx->buf == '-' || *ctx->buf == '+' || (*ctx->buf >= '0' && *ctx->buf <= '9')) {
+      haschar = 1;
+      con(1);
+    } else {
+      E(!haschar, "Invalid JSON value");
+      break;
+    }
+  }
+  return 0;
+}
+
+
+static int rlit(const char *v, int len) {
+  C(rfill(len));
+  E(strncmp(ctx->buf, v, len) != 0, "Invalid JSON value");
+  con(len);
+  return 0;
+}
+
+
+/* Parse the "<space> <string> <space> : <space>" part of an object key. */
+static int rkey(char *dest, int destlen) {
+  C(cons() || rstring(dest, destlen) || cons());
+  E(*ctx->buf != ':', "Expected ':'");
+  con(1);
+  return cons();
+}
+
+
+/* (Recursively) parse and consume any JSON value. The result is discarded. */
+static int rval() {
+  C(rfill1);
+  switch(*ctx->buf) {
+  case 't': /* true */
+    C(rlit("true", 4));
+    break;
+  case 'f': /* false */
+    C(rlit("false", 5));
+    break;
+  case 'n': /* null */
+    C(rlit("null", 4));
+    break;
+  case '"': /* string */
+    C(rstring(NULL, 0));
+    break;
+  case '{': /* object */
+    con(1);
+    while(1) {
+      C(rkey(NULL, 0) || rval() || cons());
+      if(*ctx->buf == '}')
+        break;
+      E(*ctx->buf != ',', "Expected ',' or '}'");
+      con(1);
+    }
+    con(1);
+    break;
+  case '[': /* array */
+    con(1);
+    while(1) {
+      C(cons() || rval() || cons());
+      if(*ctx->buf == ']')
+        break;
+      E(*ctx->buf != ',', "Expected ',' or ']'");
+      con(1);
+    }
+    con(1);
+    break;
+  default: /* assume number */
+    C(rnum());
+    break;
+  }
+  return 0;
+}
+
+
+/* Consumes everything up to the root item, and checks that this item is a dir. */
+static int header() {
+  uint64_t v;
+
+  C(cons());
+  E(*ctx->buf != '[', "Expected JSON array");
+  con(1);
+  C(cons() || rint64(&v, 10000) || cons());
+  E(v != 1, "Incompatible major format version");
+  E(*ctx->buf != ',', "Expected ','");
+  con(1);
+  C(cons() || rint64(&v, 10000) || cons()); /* Ignore the minor version for now */
+  E(*ctx->buf != ',', "Expected ','");
+  con(1);
+  /* Metadata block is currently ignored */
+  C(cons() || rval() || cons());
+  E(*ctx->buf != ',', "Expected ','");
+  con(1);
+
+  C(cons());
+  E(*ctx->buf != '[', "Top-level item must be a directory");
+
+  return 0;
+}
+
+
+static int item(uint64_t);
+
+/* Read and add dir contents */
+static int itemdir(uint64_t dev) {
+  while(1) {
+    C(cons());
+    if(*ctx->buf == ']')
+      break;
+    E(*ctx->buf != ',', "Expected ',' or ']'");
+    con(1);
+    C(cons());
+    item(dev);
+  }
+  con(1);
+  C(cons());
+  return 0;
+}
+
+
+static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
+  static struct dir dir;
+  struct dir *tmp, *d = &dir;
+  uint64_t iv;
+
+  memset(d, 0, sizeof(struct dir));
+  d->flags |= isdir ? FF_DIR : FF_FILE;
+  d->dev = dev;
+
+  E(*ctx->buf != '{', "Expected JSON object");
+  con(1);
+
+  while(1) {
+    C(rkey(ctx->val, MAX_VAL));
+    /* TODO: strcmp() in this fashion isn't very fast. */
+    if(strcmp(ctx->val, "name") == 0) {              /* name */
+      ctx->val[MAX_VAL-1] = 1;
+      C(rstring(ctx->val, MAX_VAL));
+      E(ctx->val[MAX_VAL-1] != 1, "Too large string value");
+      tmp = dir_createstruct(ctx->val);
+      memcpy(tmp, d, SDIRSIZE-1);
+      d = tmp;
+    } else if(strcmp(ctx->val, "asize") == 0) {      /* asize */
+      C(rint64(&iv, INT64_MAX));
+      d->asize = iv;
+    } else if(strcmp(ctx->val, "dsize") == 0) {      /* dsize */
+      C(rint64(&iv, INT64_MAX));
+      d->size = iv;
+    } else if(strcmp(ctx->val, "dev") == 0) {        /* dev */
+      C(rint64(&iv, UINT64_MAX));
+      d->dev = iv;
+    } else if(strcmp(ctx->val, "ino") == 0) {        /* ino */
+      C(rint64(&iv, UINT64_MAX));
+      d->ino = iv;
+    } else if(strcmp(ctx->val, "hlnkc") == 0) {      /* hlnkc */
+      if(*ctx->buf == 't') {
+        C(rlit("true", 4));
+        d->flags |= FF_HLNKC;
+      } else
+        C(rlit("false", 5));
+    } else if(strcmp(ctx->val, "read_error") == 0) { /* read_error */
+      if(*ctx->buf == 't') {
+        C(rlit("true", 4));
+        d->flags |= FF_ERR;
+      } else
+        C(rlit("false", 5));
+    } else if(strcmp(ctx->val, "excluded") == 0) {   /* excluded */
+      C(rstring(ctx->val, 8));
+      if(strcmp(ctx->val, "otherfs") == 0)
+        d->flags |= FF_OTHFS;
+      else
+        d->flags |= FF_EXL;
+    } else if(strcmp(ctx->val, "notreg") == 0) {     /* notreg */
+      if(*ctx->buf == 't') {
+        C(rlit("true", 4));
+        d->flags &= ~FF_FILE;
+      } else
+        C(rlit("false", 5));
+    } else
+      C(rval());
+
+    C(cons());
+    if(*ctx->buf == '}')
+      break;
+    E(*ctx->buf != ',', "Expected ',' or '}'");
+    con(1);
+  }
+  con(1);
+
+  E(!*d->name, "No name field present in item information object");
+  *item = d;
+  ctx->items++;
+  return input_handle(1);
+}
+
+
+/* Recursively reads a file or directory item */
+static int item(uint64_t dev) {
+  int isdir = 0;
+  int isroot = ctx->items == 0;
+  struct dir *d = NULL;
+
+  if(*ctx->buf == '[') {
+    isdir = 1;
+    con(1);
+    C(cons());
+  }
+
+  C(iteminfo(&d, dev, isdir));
+  dev = d->dev;
+
+  if(isroot)
+    dir_curpath_set(d->name);
+  else
+    dir_curpath_enter(d->name);
+
+  if(isdir) {
+    if(dir_output.item(d)) {
+      dir_seterr("Output error: %s", strerror(errno));
+      return 1;
+    }
+    C(itemdir(dev));
+    if(dir_output.item(NULL)) {
+      dir_seterr("Output error: %s", strerror(errno));
+      return 1;
+    }
+  } else if(dir_output.item(d)) {
+    dir_seterr("Output error: %s", strerror(errno));
+    return 1;
+  }
+
+  if(!isroot)
+    dir_curpath_leave();
+  else /* The root item must not be empty. */
+    E(ctx->items <= 1, "Empty directory");
+
+  return 0;
+}
+
+
+static int footer() {
+  C(cons());
+  E(*ctx->buf != ']', "Expected ']'");
+  con(1);
+  C(cons());
+  E(*ctx->buf, "Trailing garbage");
+  return 0;
+}
+
+
+static int process() {
+  int fail = 0;
+
+  header();
+
+  if(!dir_fatalerr)
+    fail = item(0);
+
+  if(!dir_fatalerr)
+    footer();
+
+  if(fclose(ctx->stream) && !dir_fatalerr)
+    dir_seterr("Error closing file: %s", strerror(errno));
+  free(ctx);
+
+  while(dir_fatalerr && !input_handle(0))
+    ;
+  return dir_output.final(dir_fatalerr || fail);
+}
+
+
+int dir_import_init(const char *fn) {
+  FILE *stream;
+  if(strcmp(fn, "-") == 0)
+    stream = stdin;
+  else if((stream = fopen(fn, "r")) == NULL)
+    return 1;
+
+  ctx = malloc(sizeof(struct ctx));
+  ctx->stream = stream;
+  ctx->line = 1;
+  ctx->byte = ctx->eof = ctx->items = 0;
+  ctx->buf = ctx->lastfill = ctx->readbuf;
+  ctx->readbuf[0] = 0;
+
+  dir_curpath_set(fn);
+  dir_process = process;
+  return 0;
+}
+
diff --git a/src/dir_scan.c b/src/dir_scan.c
index ee9b41d..f3688c6 100644
--- a/src/dir_scan.c
+++ b/src/dir_scan.c
@@ -234,8 +234,7 @@ static int dir_walk(char *dir) {
 }
 
 
-/* Returns 0 to continue running ncdu, 1 to quit. */
-int dir_scan_process() {
+static int process() {
   char *path;
   char *dir;
   int fail = 0;
@@ -296,5 +295,6 @@ void dir_scan_init(const char *path) {
   dir_curpath_set(path);
   dir_setlasterr(NULL);
   dir_seterr(NULL);
+  dir_process = process;
   pstate = ST_CALC;
 }
diff --git a/src/main.c b/src/main.c
index 28ea62e..24885f7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -102,9 +102,10 @@ int input_handle(int wait) {
 
 
 /* parse command line */
-static char *argv_parse(int argc, char **argv) {
+static void argv_parse(int argc, char **argv) {
   int i, j, len;
   char *export = NULL;
+  char *import = NULL;
   char *dir = NULL;
   dir_ui = -1;
 
@@ -112,7 +113,7 @@ static char *argv_parse(int argc, char **argv) {
   for(i=1; i<argc; i++) {
     if(argv[i][0] == '-') {
       /* flags requiring arguments */
-      if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o")
+      if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o") || !strcmp(argv[i], "-f")
           || !strcmp(argv[i], "--exclude-from") || !strcmp(argv[i], "--exclude")) {
         if(i+1 >= argc) {
           printf("Option %s requires an argument\n", argv[i]);
@@ -126,6 +127,8 @@ static char *argv_parse(int argc, char **argv) {
           dir_ui = argv[i][0]-'0';
         } else if(strcmp(argv[i], "-o") == 0)
           export = argv[++i];
+        else if(strcmp(argv[i], "-f") == 0)
+          import = argv[++i];
         else if(strcmp(argv[i], "--exclude") == 0)
           exclude_add(argv[++i]);
         else if(exclude_addfile(argv[++i])) {
@@ -150,6 +153,7 @@ static char *argv_parse(int argc, char **argv) {
             printf("  -x                         Same filesystem\n");
             printf("  -r                         Read only\n");
             printf("  -o FILE                    Export scanned directory to FILE\n");
+            printf("  -f FILE                    Import scanned directory from FILE\n");
             printf("  -u <0-2>                   UI to use when scanning (0=minimal,2=verbose)\n");
             printf("  --exclude PATTERN          Exclude files that match PATTERN\n");
             printf("  -X, --exclude-from FILE    Exclude files that match any pattern in FILE\n");
@@ -175,12 +179,20 @@ static char *argv_parse(int argc, char **argv) {
   } else
     dir_mem_init(NULL);
 
+  if(import) {
+    if(dir_import_init(import)) {
+      printf("Can't open %s: %s\n", import, strerror(errno));
+      exit(1);
+    }
+    if(strcmp(import, "-") == 0)
+      ncurses_tty = 1;
+  } else
+    dir_scan_init(dir ? dir : ".");
+
   /* Use the single-line scan feedback by default when exporting to file, no
    * feedback when exporting to stdout. */
   if(dir_ui == -1)
     dir_ui = export && strcmp(export, "-") == 0 ? 0 : export ? 1 : 2;
-
-  return dir;
 }
 
 
@@ -223,14 +235,8 @@ static void init_nc() {
 
 /* main program */
 int main(int argc, char **argv) {
-  char *dir;
-
   setlocale(LC_ALL, "");
-
-  if((dir = argv_parse(argc, argv)) == NULL)
-    dir = ".";
-
-  dir_scan_init(dir);
+  argv_parse(argc, argv);
 
   if(dir_ui == 2)
     init_nc();
@@ -245,7 +251,7 @@ int main(int argc, char **argv) {
     }
 
     if(pstate == ST_CALC) {
-      if(dir_scan_process()) {
+      if(dir_process()) {
         if(dir_ui == 1)
           fputc('\n', stderr);
         break;
author	Yorhel <git@yorhel.nl>	2012-09-05 13:52:12 +0200
committer	Yorhel <git@yorhel.nl>	2012-09-05 13:52:12 +0200
commit	10dca5503a85eb7c668dd8fbb1318a682b3b6f39 (patch)
tree	aa0de604c8c57b03ec3f7b0b94be48fbb930098f /src
parent	ef4abec6cfcba19b2b2283d421cb50d32417271a (diff)