summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2012-09-05 13:52:12 +0200
committerYorhel <git@yorhel.nl>2012-09-05 13:52:12 +0200
commit10dca5503a85eb7c668dd8fbb1318a682b3b6f39 (patch)
treeaa0de604c8c57b03ec3f7b0b94be48fbb930098f /src
parentef4abec6cfcba19b2b2283d421cb50d32417271a (diff)
Added -f option to import an exported directory structure
This is the first working version. There's a few TODO's left.
Diffstat (limited to 'src')
-rw-r--r--src/dir.h7
-rw-r--r--src/dir_import.c576
-rw-r--r--src/dir_scan.c4
-rw-r--r--src/main.c30
4 files changed, 602 insertions, 15 deletions
diff --git a/src/dir.h b/src/dir.h
index a5070a3..0ef7875 100644
--- a/src/dir.h
+++ b/src/dir.h
@@ -101,10 +101,15 @@ void dir_mem_init(struct dir *);
int dir_export_init(const char *fn);
+/* Function set by input code. Returns dir_output.final(). */
+int (*dir_process)();
+
/* Scanning a live directory */
extern int dir_scan_smfs;
void dir_scan_init(const char *path);
-int dir_scan_process();
+
+/* Importing a file */
+int dir_import_init(const char *fn);
/* The currently configured output functions. */
diff --git a/src/dir_import.c b/src/dir_import.c
new file mode 100644
index 0000000..17c8fcc
--- /dev/null
+++ b/src/dir_import.c
@@ -0,0 +1,576 @@
+/* ncdu - NCurses Disk Usage
+
+ Copyright (c) 2007-2012 Yoran Heling
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+/* This JSON parser has the following limitations:
+ * - No support for character encodings incompatible with ASCII (e.g.
+ * UTF-16/32)
+ * - Doesn't validate UTF-8 correctness (in fact, besides the ASCII part this
+ * parser doesn't know anything about encoding).
+ * - Doesn't validate that there are no duplicate keys in JSON objects.
+ * - Isn't very strict with validating non-integer numbers.
+ * - Does not check nesting level, easily allows stack overflow. (TODO: FIX!)
+ */
+
+#include "global.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+
+
+/* Max. length of any JSON string we're interested in. A string may of course
+ * be larger, we're not going to read more than MAX_VAL in memory. If a string
+ * we're interested in (e.g. a file name) is longer than this, reading the
+ * import will results in an error. */
+#define MAX_VAL (32*1024)
+
+/* Minimum number of bytes we request from fread() */
+#define MIN_READ_SIZE 1024
+
+/* Read buffer size. Must be at least 2*MIN_READ_SIZE, everything larger
+ * improves performance. */
+#define READ_BUF_SIZE (32*1024)
+
+
+/* Use a struct for easy batch-allocation and deallocation of state data. */
+struct ctx {
+ FILE *stream;
+
+ int line;
+ int byte;
+ int eof;
+ int items;
+ char *buf; /* points into readbuf, always zero-terminated. */
+ char *lastfill; /* points into readbuf, location of the zero terminator. */
+
+ char val[MAX_VAL];
+ char readbuf[READ_BUF_SIZE];
+} *ctx;
+
+
+/* Fills readbuf with data from the stream. *buf will have at least n (<
+ * READ_BUF_SIZE) bytes available, unless the stream reached EOF or an error
+ * occured. If the file data contains a null-type, this is considered an error.
+ * Returns 0 on success, non-zero on error. */
+static int fill(int n) {
+ int r;
+
+ if(ctx->eof)
+ return 0;
+
+ r = READ_BUF_SIZE-(ctx->lastfill - ctx->readbuf); /* number of bytes left in the buffer */
+ if(n < r)
+ n = r-1;
+ if(n < MIN_READ_SIZE) {
+ r = ctx->lastfill - ctx->buf; /* number of unread bytes left in the buffer */
+ memcpy(ctx->readbuf, ctx->buf, r);
+ ctx->lastfill = ctx->readbuf + r;
+ ctx->buf = ctx->readbuf;
+ n = READ_BUF_SIZE-r-1;
+ }
+
+ do {
+ r = fread(ctx->lastfill, 1, n, ctx->stream);
+ if(r != n) {
+ if(feof(ctx->stream))
+ ctx->eof = 1;
+ else if(ferror(ctx->stream)) {
+ dir_seterr("Read error: %s", strerror(errno));
+ return 1;
+ }
+ }
+
+ ctx->lastfill[r] = 0;
+ if(strlen(ctx->lastfill) != (size_t)r) {
+ dir_seterr("Zero-byte found in JSON stream");
+ return 1;
+ }
+ ctx->lastfill += r;
+ n -= r;
+ } while(!ctx->eof && n > MIN_READ_SIZE);
+
+ return 0;
+}
+
+
+/* Two macros that break function calling behaviour, but are damn convenient */
+#define E(_x, _m) do {\
+ if((_x) && !dir_fatalerr) {\
+ dir_seterr("Line %d byte %d: %s", ctx->line, ctx->byte, _m);\
+ return 1;\
+ }\
+ } while(0)
+
+#define C(_x) do {\
+ if(_x)\
+ return 1;\
+ } while(0)
+
+
+/* Require at least n bytes in the buffer, throw an error on early EOF.
+ * (Macro to quickly handle the common case) */
+#define rfill1 (!*ctx->buf && _rfill(1))
+#define rfill(_n) ((ctx->lastfill - ctx->buf < (_n)) && _rfill(_n))
+
+static int _rfill(int n) {
+ C(fill(n));
+ E(ctx->lastfill - ctx->buf < n, "Unexpected EOF");
+ return 0;
+}
+
+
+/* Consumes n bytes from the buffer. */
+static inline void con(int n) {
+ ctx->buf += n;
+ ctx->byte += n;
+}
+
+
+/* Consumes any whitespace. If *ctx->buf == 0 after this function, we've reached EOF. */
+static int cons() {
+ while(1) {
+ C(!*ctx->buf && fill(1));
+
+ switch(*ctx->buf) {
+ case 0x0A:
+ /* Special-case the newline-character with respect to consuming stuff
+ * from the buffer. This is the only function which *can* consume the
+ * newline character, so it's more efficient to handle it in here rather
+ * than in the more general con(). */
+ ctx->buf++;
+ ctx->line++;
+ ctx->byte = 0;
+ break;
+ case 0x20:
+ case 0x09:
+ case 0x0D:
+ con(1);
+ break;
+ default:
+ return 0;
+ }
+ }
+}
+
+
+static int rstring_esc(char **dest, int *destlen) {
+ unsigned int n;
+
+#define ap(c) if(*destlen > 1) { *((*dest)++) = c; (*destlen)--; }
+ switch(*ctx->buf) {
+ case '"': ap('"'); con(1); break;
+ case '\\': ap('\\'); con(1); break;
+ case '/': ap('/'); con(1); break;
+ case 'b': ap(0x08); con(1); break;
+ case 'f': ap(0x0C); con(1); break;
+ case 'n': ap(0x0A); con(1); break;
+ case 'r': ap(0x0D); con(1); break;
+ case 't': ap(0x09); con(1); break;
+ case 'u':
+ C(rfill(5));
+#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16)
+ n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]);
+#undef hn
+ if(n <= 0x007F) {
+ ap(n);
+ } else if(n <= 0x07FF) {
+ ap(0xC0 | (n>>6));
+ ap(0x80 | (n & 0x3F));
+ } else if(n <= 0xFFFF) {
+ ap(0xE0 | (n>>12));
+ ap(0x80 | ((n>>6) & 0x3F));
+ ap(0x80 | (n & 0x3F));
+ } else // this happens if there was an invalid character (n >= (1<<16))
+ E(1, "Invalid character in \\u escape");
+ con(5);
+ break;
+ default:
+ E(1, "Invalid escape sequence");
+ }
+#undef ap
+ return 0;
+}
+
+
+/* Parse a JSON string and write it to *dest (max. destlen). Consumes but
+ * otherwise ignores any characters if the string is longer than destlen. *dest
+ * will be null-terminated, dest[destlen-1] = 0 if the string was cut just long
+ * enough of was cut off. That byte will be left untouched if the string is
+ * small enough. */
+static int rstring(char *dest, int destlen) {
+ C(rfill1);
+ E(*ctx->buf != '"', "Expected string");
+ con(1);
+
+ while(1) {
+ C(rfill1);
+ if(*ctx->buf == '"')
+ break;
+ if(*ctx->buf == '\\') {
+ con(1);
+ C(rstring_esc(&dest, &destlen));
+ continue;
+ }
+ E((unsigned char)*ctx->buf <= 0x1F || (unsigned char)*ctx->buf == 0x7F, "Invalid character");
+ if(destlen > 1) {
+ *(dest++) = *ctx->buf;
+ destlen--;
+ }
+ con(1);
+ }
+ con(1);
+ if(destlen > 0)
+ *dest = 0;
+ return 0;
+}
+
+
+/* Parse and consume a JSON integer. Throws an error if the value does not fit
+ * in an uint64_t, is not an integer or is larger than 'max'. */
+static int rint64(uint64_t *val, uint64_t max) {
+ uint64_t v;
+ int haschar = 0;
+ *val = 0;
+ while(1) {
+ C(!*ctx->buf && fill(1));
+ if(*ctx->buf == '0' && !haschar) {
+ con(1);
+ break;
+ }
+ if(*ctx->buf >= '0' && *ctx->buf <= '9') {
+ haschar = 1;
+ v = (*val)*10 + (*ctx->buf-'0');
+ E(v < *val, "Invalid (positive) integer");
+ *val = v;
+ con(1);
+ continue;
+ }
+ E(!haschar, "Invalid (positive) integer");
+ break;
+ }
+ E(*val > max, "Integer out of range");
+ return 0;
+}
+
+
+/* Parse and consume a JSON number. The result is discarded.
+ * TODO: Improve validation. */
+static int rnum() {
+ int haschar = 0;
+ C(rfill1);
+ while(1) {
+ C(!*ctx->buf && fill(1));
+ if(*ctx->buf == 'e' || *ctx->buf == 'E' || *ctx->buf == '-' || *ctx->buf == '+' || (*ctx->buf >= '0' && *ctx->buf <= '9')) {
+ haschar = 1;
+ con(1);
+ } else {
+ E(!haschar, "Invalid JSON value");
+ break;
+ }
+ }
+ return 0;
+}
+
+
+static int rlit(const char *v, int len) {
+ C(rfill(len));
+ E(strncmp(ctx->buf, v, len) != 0, "Invalid JSON value");
+ con(len);
+ return 0;
+}
+
+
+/* Parse the "<space> <string> <space> : <space>" part of an object key. */
+static int rkey(char *dest, int destlen) {
+ C(cons() || rstring(dest, destlen) || cons());
+ E(*ctx->buf != ':', "Expected ':'");
+ con(1);
+ return cons();
+}
+
+
+/* (Recursively) parse and consume any JSON value. The result is discarded. */
+static int rval() {
+ C(rfill1);
+ switch(*ctx->buf) {
+ case 't': /* true */
+ C(rlit("true", 4));
+ break;
+ case 'f': /* false */
+ C(rlit("false", 5));
+ break;
+ case 'n': /* null */
+ C(rlit("null", 4));
+ break;
+ case '"': /* string */
+ C(rstring(NULL, 0));
+ break;
+ case '{': /* object */
+ con(1);
+ while(1) {
+ C(rkey(NULL, 0) || rval() || cons());
+ if(*ctx->buf == '}')
+ break;
+ E(*ctx->buf != ',', "Expected ',' or '}'");
+ con(1);
+ }
+ con(1);
+ break;
+ case '[': /* array */
+ con(1);
+ while(1) {
+ C(cons() || rval() || cons());
+ if(*ctx->buf == ']')
+ break;
+ E(*ctx->buf != ',', "Expected ',' or ']'");
+ con(1);
+ }
+ con(1);
+ break;
+ default: /* assume number */
+ C(rnum());
+ break;
+ }
+ return 0;
+}
+
+
+/* Consumes everything up to the root item, and checks that this item is a dir. */
+static int header() {
+ uint64_t v;
+
+ C(cons());
+ E(*ctx->buf != '[', "Expected JSON array");
+ con(1);
+ C(cons() || rint64(&v, 10000) || cons());
+ E(v != 1, "Incompatible major format version");
+ E(*ctx->buf != ',', "Expected ','");
+ con(1);
+ C(cons() || rint64(&v, 10000) || cons()); /* Ignore the minor version for now */
+ E(*ctx->buf != ',', "Expected ','");
+ con(1);
+ /* Metadata block is currently ignored */
+ C(cons() || rval() || cons());
+ E(*ctx->buf != ',', "Expected ','");
+ con(1);
+
+ C(cons());
+ E(*ctx->buf != '[', "Top-level item must be a directory");
+
+ return 0;
+}
+
+
+static int item(uint64_t);
+
+/* Read and add dir contents */
+static int itemdir(uint64_t dev) {
+ while(1) {
+ C(cons());
+ if(*ctx->buf == ']')
+ break;
+ E(*ctx->buf != ',', "Expected ',' or ']'");
+ con(1);
+ C(cons());
+ item(dev);
+ }
+ con(1);
+ C(cons());
+ return 0;
+}
+
+
+static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
+ static struct dir dir;
+ struct dir *tmp, *d = &dir;
+ uint64_t iv;
+
+ memset(d, 0, sizeof(struct dir));
+ d->flags |= isdir ? FF_DIR : FF_FILE;
+ d->dev = dev;
+
+ E(*ctx->buf != '{', "Expected JSON object");
+ con(1);
+
+ while(1) {
+ C(rkey(ctx->val, MAX_VAL));
+ /* TODO: strcmp() in this fashion isn't very fast. */
+ if(strcmp(ctx->val, "name") == 0) { /* name */
+ ctx->val[MAX_VAL-1] = 1;
+ C(rstring(ctx->val, MAX_VAL));
+ E(ctx->val[MAX_VAL-1] != 1, "Too large string value");
+ tmp = dir_createstruct(ctx->val);
+ memcpy(tmp, d, SDIRSIZE-1);
+ d = tmp;
+ } else if(strcmp(ctx->val, "asize") == 0) { /* asize */
+ C(rint64(&iv, INT64_MAX));
+ d->asize = iv;
+ } else if(strcmp(ctx->val, "dsize") == 0) { /* dsize */
+ C(rint64(&iv, INT64_MAX));
+ d->size = iv;
+ } else if(strcmp(ctx->val, "dev") == 0) { /* dev */
+ C(rint64(&iv, UINT64_MAX));
+ d->dev = iv;
+ } else if(strcmp(ctx->val, "ino") == 0) { /* ino */
+ C(rint64(&iv, UINT64_MAX));
+ d->ino = iv;
+ } else if(strcmp(ctx->val, "hlnkc") == 0) { /* hlnkc */
+ if(*ctx->buf == 't') {
+ C(rlit("true", 4));
+ d->flags |= FF_HLNKC;
+ } else
+ C(rlit("false", 5));
+ } else if(strcmp(ctx->val, "read_error") == 0) { /* read_error */
+ if(*ctx->buf == 't') {
+ C(rlit("true", 4));
+ d->flags |= FF_ERR;
+ } else
+ C(rlit("false", 5));
+ } else if(strcmp(ctx->val, "excluded") == 0) { /* excluded */
+ C(rstring(ctx->val, 8));
+ if(strcmp(ctx->val, "otherfs") == 0)
+ d->flags |= FF_OTHFS;
+ else
+ d->flags |= FF_EXL;
+ } else if(strcmp(ctx->val, "notreg") == 0) { /* notreg */
+ if(*ctx->buf == 't') {
+ C(rlit("true", 4));
+ d->flags &= ~FF_FILE;
+ } else
+ C(rlit("false", 5));
+ } else
+ C(rval());
+
+ C(cons());
+ if(*ctx->buf == '}')
+ break;
+ E(*ctx->buf != ',', "Expected ',' or '}'");
+ con(1);
+ }
+ con(1);
+
+ E(!*d->name, "No name field present in item information object");
+ *item = d;
+ ctx->items++;
+ return input_handle(1);
+}
+
+
+/* Recursively reads a file or directory item */
+static int item(uint64_t dev) {
+ int isdir = 0;
+ int isroot = ctx->items == 0;
+ struct dir *d = NULL;
+
+ if(*ctx->buf == '[') {
+ isdir = 1;
+ con(1);
+ C(cons());
+ }
+
+ C(iteminfo(&d, dev, isdir));
+ dev = d->dev;
+
+ if(isroot)
+ dir_curpath_set(d->name);
+ else
+ dir_curpath_enter(d->name);
+
+ if(isdir) {
+ if(dir_output.item(d)) {
+ dir_seterr("Output error: %s", strerror(errno));
+ return 1;
+ }
+ C(itemdir(dev));
+ if(dir_output.item(NULL)) {
+ dir_seterr("Output error: %s", strerror(errno));
+ return 1;
+ }
+ } else if(dir_output.item(d)) {
+ dir_seterr("Output error: %s", strerror(errno));
+ return 1;
+ }
+
+ if(!isroot)
+ dir_curpath_leave();
+ else /* The root item must not be empty. */
+ E(ctx->items <= 1, "Empty directory");
+
+ return 0;
+}
+
+
+static int footer() {
+ C(cons());
+ E(*ctx->buf != ']', "Expected ']'");
+ con(1);
+ C(cons());
+ E(*ctx->buf, "Trailing garbage");
+ return 0;
+}
+
+
+static int process() {
+ int fail = 0;
+
+ header();
+
+ if(!dir_fatalerr)
+ fail = item(0);
+
+ if(!dir_fatalerr)
+ footer();
+
+ if(fclose(ctx->stream) && !dir_fatalerr)
+ dir_seterr("Error closing file: %s", strerror(errno));
+ free(ctx);
+
+ while(dir_fatalerr && !input_handle(0))
+ ;
+ return dir_output.final(dir_fatalerr || fail);
+}
+
+
+int dir_import_init(const char *fn) {
+ FILE *stream;
+ if(strcmp(fn, "-") == 0)
+ stream = stdin;
+ else if((stream = fopen(fn, "r")) == NULL)
+ return 1;
+
+ ctx = malloc(sizeof(struct ctx));
+ ctx->stream = stream;
+ ctx->line = 1;
+ ctx->byte = ctx->eof = ctx->items = 0;
+ ctx->buf = ctx->lastfill = ctx->readbuf;
+ ctx->readbuf[0] = 0;
+
+ dir_curpath_set(fn);
+ dir_process = process;
+ return 0;
+}
+
diff --git a/src/dir_scan.c b/src/dir_scan.c
index ee9b41d..f3688c6 100644
--- a/src/dir_scan.c
+++ b/src/dir_scan.c
@@ -234,8 +234,7 @@ static int dir_walk(char *dir) {
}
-/* Returns 0 to continue running ncdu, 1 to quit. */
-int dir_scan_process() {
+static int process() {
char *path;
char *dir;
int fail = 0;
@@ -296,5 +295,6 @@ void dir_scan_init(const char *path) {
dir_curpath_set(path);
dir_setlasterr(NULL);
dir_seterr(NULL);
+ dir_process = process;
pstate = ST_CALC;
}
diff --git a/src/main.c b/src/main.c
index 28ea62e..24885f7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -102,9 +102,10 @@ int input_handle(int wait) {
/* parse command line */
-static char *argv_parse(int argc, char **argv) {
+static void argv_parse(int argc, char **argv) {
int i, j, len;
char *export = NULL;
+ char *import = NULL;
char *dir = NULL;
dir_ui = -1;
@@ -112,7 +113,7 @@ static char *argv_parse(int argc, char **argv) {
for(i=1; i<argc; i++) {
if(argv[i][0] == '-') {
/* flags requiring arguments */
- if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o")
+ if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o") || !strcmp(argv[i], "-f")
|| !strcmp(argv[i], "--exclude-from") || !strcmp(argv[i], "--exclude")) {
if(i+1 >= argc) {
printf("Option %s requires an argument\n", argv[i]);
@@ -126,6 +127,8 @@ static char *argv_parse(int argc, char **argv) {
dir_ui = argv[i][0]-'0';
} else if(strcmp(argv[i], "-o") == 0)
export = argv[++i];
+ else if(strcmp(argv[i], "-f") == 0)
+ import = argv[++i];
else if(strcmp(argv[i], "--exclude") == 0)
exclude_add(argv[++i]);
else if(exclude_addfile(argv[++i])) {
@@ -150,6 +153,7 @@ static char *argv_parse(int argc, char **argv) {
printf(" -x Same filesystem\n");
printf(" -r Read only\n");
printf(" -o FILE Export scanned directory to FILE\n");
+ printf(" -f FILE Import scanned directory from FILE\n");
printf(" -u <0-2> UI to use when scanning (0=minimal,2=verbose)\n");
printf(" --exclude PATTERN Exclude files that match PATTERN\n");
printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n");
@@ -175,12 +179,20 @@ static char *argv_parse(int argc, char **argv) {
} else
dir_mem_init(NULL);
+ if(import) {
+ if(dir_import_init(import)) {
+ printf("Can't open %s: %s\n", import, strerror(errno));
+ exit(1);
+ }
+ if(strcmp(import, "-") == 0)
+ ncurses_tty = 1;
+ } else
+ dir_scan_init(dir ? dir : ".");
+
/* Use the single-line scan feedback by default when exporting to file, no
* feedback when exporting to stdout. */
if(dir_ui == -1)
dir_ui = export && strcmp(export, "-") == 0 ? 0 : export ? 1 : 2;
-
- return dir;
}
@@ -223,14 +235,8 @@ static void init_nc() {
/* main program */
int main(int argc, char **argv) {
- char *dir;
-
setlocale(LC_ALL, "");
-
- if((dir = argv_parse(argc, argv)) == NULL)
- dir = ".";
-
- dir_scan_init(dir);
+ argv_parse(argc, argv);
if(dir_ui == 2)
init_nc();
@@ -245,7 +251,7 @@ int main(int argc, char **argv) {
}
if(pstate == ST_CALC) {
- if(dir_scan_process()) {
+ if(dir_process()) {
if(dir_ui == 1)
fputc('\n', stderr);
break;