diff options
author | Yorhel <git@yorhel.nl> | 2021-03-04 17:13:37 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2021-03-04 17:35:18 +0100 |
commit | 783bcb5b78f4cb58cbac8b12065a16364550e3c6 (patch) | |
tree | 0886fd080bc0a66443c6c01c48f91d6a10eff59d | |
parent | 9337cdc99e8c205e6e75de25de29a5be53ea858c (diff) |
Experiment with scanning using openat() rather than chdir()openat
A slightly different approach. In terms of syscalls it probably doesn't
matter at all, but it's a simpler, less hacky and maybe more efficient
way to work around PATH_MAX limitations than constantly chdir()'ing for
each directory. Should also fix the chdir("..") fail case in MacOS
firmlink scenarios, though I haven't a clue if MacOS implements all
these openat()-related calls in the first place.
Downsides:
- Less portable. Everything is in POSIX, but it's more recent than what
we used to use.
- Requires more open file descriptions, might hit the limit for deeply
nested directories.
Haven't done much testing yet, but there's a bunch of TODO's:
- Make refreshing work again (not sure what's going wrong there)
- See if we can report readdir() etc errors again?
- Make firmlink detection work again?
- See how portable this really is.
- Also update file deletion code
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | src/dir.h | 2 | ||||
-rw-r--r-- | src/dir_scan.c | 214 | ||||
-rw-r--r-- | src/exclude.c | 36 | ||||
-rw-r--r-- | src/exclude.h | 2 | ||||
-rw-r--r-- | src/main.c | 4 |
6 files changed, 87 insertions, 173 deletions
diff --git a/configure.ac b/configure.ac index a5d1cd4..dcb4f0d 100644 --- a/configure.ac +++ b/configure.ac @@ -28,7 +28,7 @@ AC_CHECK_FUNCS( [getcwd gettimeofday fnmatch chdir rmdir unlink lstat system getenv],[], AC_MSG_ERROR([required function missing])) -AC_CHECK_FUNCS(statfs) +AC_CHECK_FUNCS(fstatfs) AC_CHECK_HEADERS([sys/attr.h]) @@ -113,7 +113,7 @@ void dir_scan_init(const char *path); extern int dir_import_active; int dir_import_init(const char *fn); -#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS +#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS extern int exclude_kernfs; #endif diff --git a/src/dir_scan.c b/src/dir_scan.c index 03a582b..98b9972 100644 --- a/src/dir_scan.c +++ b/src/dir_scan.c @@ -30,6 +30,7 @@ #include <errno.h> #include <unistd.h> +#include <fcntl.h> #include <sys/types.h> #include <sys/stat.h> #include <dirent.h> @@ -38,7 +39,7 @@ #include <sys/attr.h> #endif -#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS +#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS #include <sys/statfs.h> #include <linux/magic.h> #endif @@ -59,7 +60,7 @@ static struct dir *buf_dir; static struct dir_ext buf_ext[1]; -#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS +#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS int exclude_kernfs; /* Exclude Linux pseudo filesystems */ static int is_kernfs(unsigned long type) { @@ -138,112 +139,14 @@ static void stat_to_dir(struct stat *fs) { } -/* Reads all filenames in the currently chdir'ed directory and stores it as a - * nul-separated list of filenames. The list ends with an empty filename (i.e. - * two nuls). . and .. are not included. Returned memory should be freed. *err - * is set to 1 if some error occurred. Returns NULL if that error was fatal. - * The reason for reading everything in memory first and then walking through - * the list is to avoid eating too many file descriptors in a deeply recursive - * directory. */ -static char *dir_read(int *err) { - DIR *dir; - struct dirent *item; - char *buf = NULL; - size_t buflen = 512; - size_t off = 0; - - if((dir = opendir(".")) == NULL) { - *err = 1; - return NULL; - } - - buf = xmalloc(buflen); - errno = 0; - - while((item = readdir(dir)) != NULL) { - if(item->d_name[0] == '.' && (item->d_name[1] == 0 || (item->d_name[1] == '.' && item->d_name[2] == 0))) - continue; - size_t req = off+3+strlen(item->d_name); - if(req > buflen) { - buflen = req < buflen*2 ? buflen*2 : req; - buf = xrealloc(buf, buflen); - } - strcpy(buf+off, item->d_name); - off += strlen(item->d_name)+1; - } - if(errno) - *err = 1; - if(closedir(dir) < 0) - *err = 1; - - buf[off] = 0; - buf[off+1] = 0; - return buf; -} - - -static int dir_walk(char *); - - -/* Tries to recurse into the current directory item (buf_dir is assumed to be the current dir) */ -static int dir_scan_recurse(const char *name) { - int fail = 0; - char *dir; - - if(chdir(name)) { - dir_setlasterr(dir_curpath); - buf_dir->flags |= FF_ERR; - if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) { - dir_seterr("Output error: %s", strerror(errno)); - return 1; - } - return 0; - } - - if((dir = dir_read(&fail)) == NULL) { - dir_setlasterr(dir_curpath); - buf_dir->flags |= FF_ERR; - if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) { - dir_seterr("Output error: %s", strerror(errno)); - return 1; - } - if(chdir("..")) { - dir_seterr("Error going back to parent directory: %s", strerror(errno)); - return 1; - } else - return 0; - } - - /* readdir() failed halfway, not fatal. */ - if(fail) - buf_dir->flags |= FF_ERR; - - if(dir_output.item(buf_dir, name, buf_ext)) { - dir_seterr("Output error: %s", strerror(errno)); - return 1; - } - fail = dir_walk(dir); - if(dir_output.item(NULL, 0, NULL)) { - dir_seterr("Output error: %s", strerror(errno)); - return 1; - } - - /* Not being able to chdir back is fatal */ - if(!fail && chdir("..")) { - dir_seterr("Error going back to parent directory: %s", strerror(errno)); - return 1; - } - - return fail; -} +static int dir_walk(int); /* Scans and adds a single item. Recurses into dir_walk() again if this is a - * directory. Assumes we're chdir'ed in the directory in which this item - * resides. */ -static int dir_scan_item(const char *name) { + * directory. */ +static int dir_scan_item(int parfd, const char *name) { static struct stat st, stl; - int fail = 0; + int fail = 0, dirfd = -1; #ifdef __CYGWIN__ /* /proc/registry names may contain slashes */ @@ -256,15 +159,20 @@ static int dir_scan_item(const char *name) { if(exclude_match(dir_curpath)) buf_dir->flags |= FF_EXL; - if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && lstat(name, &st)) { + if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && fstatat(parfd, name, &st, AT_SYMLINK_NOFOLLOW)) { buf_dir->flags |= FF_ERR; dir_setlasterr(dir_curpath); } -#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS - if(exclude_kernfs && !(buf_dir->flags & (FF_ERR|FF_EXL)) && S_ISDIR(st.st_mode)) { + if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && S_ISDIR(st.st_mode) && (dirfd = openat(parfd, name, O_RDONLY|O_DIRECTORY)) < 0) { + buf_dir->flags |= FF_ERR; + dir_setlasterr(dir_curpath); + } + +#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS + if(exclude_kernfs && dirfd >= 0) { struct statfs fst; - if(statfs(name, &fst)) { + if(fstatfs(dirfd, &fst)) { buf_dir->flags |= FF_ERR; dir_setlasterr(dir_curpath); } else if(is_kernfs(fst.f_type)) @@ -272,7 +180,8 @@ static int dir_scan_item(const char *name) { } #endif -#if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH + /* TODO: Completely broken; prolly needs absolute path lookup */ +#if 0 && HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH if(!follow_firmlinks) { struct attrlist list = { .bitmapcount = ATTR_BIT_MAP_COUNT, @@ -292,60 +201,79 @@ static int dir_scan_item(const char *name) { #endif if(!(buf_dir->flags & (FF_ERR|FF_EXL))) { - if(follow_symlinks && S_ISLNK(st.st_mode) && !stat(name, &stl) && !S_ISDIR(stl.st_mode)) + if(follow_symlinks && S_ISLNK(st.st_mode) && !fstatat(parfd, name, &stl, 0) && !S_ISDIR(stl.st_mode)) stat_to_dir(&stl); else stat_to_dir(&st); } - if(cachedir_tags && (buf_dir->flags & FF_DIR) && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) - if(has_cachedir_tag(name)) { + if(cachedir_tags && dirfd >= 0 && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) + if(has_cachedir_tag(dirfd)) { buf_dir->flags |= FF_EXL; buf_dir->size = buf_dir->asize = 0; } - /* Recurse into the dir or output the item */ - if(buf_dir->flags & FF_DIR && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) - fail = dir_scan_recurse(name); - else if(buf_dir->flags & FF_DIR) { - if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) { - dir_seterr("Output error: %s", strerror(errno)); - fail = 1; - } - } else if(dir_output.item(buf_dir, name, buf_ext)) { + if(dir_output.item(buf_dir, name, buf_ext)) { + dir_seterr("Output error: %s", strerror(errno)); + fail = 1; + } + + if(!fail && dirfd >= 0 && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) { + /* XXX: Can't do anything with the return value, since we've already outputted our dir entry item. + * So errors reading dir items will be silently ignored. Not great. */ + dir_walk(dirfd); + dirfd = -1; + } + + if(!fail && (buf_dir->flags & FF_DIR) && dir_output.item(NULL, 0, NULL)) { dir_seterr("Output error: %s", strerror(errno)); fail = 1; } + if(dirfd >= 0) + close(dirfd); + return fail || input_handle(1); } -/* Walks through the directory that we're currently chdir'ed to. *dir contains - * the filenames as returned by dir_read(), and will be freed automatically by - * this function. */ -static int dir_walk(char *dir) { +/* Recursively walks through the directory descriptor. Will close() the given dirfd. */ +static int dir_walk(int dirfd) { int fail = 0; - char *cur; + DIR *dir; + struct dirent *item; - fail = 0; - for(cur=dir; !fail&&cur&&*cur; cur+=strlen(cur)+1) { - dir_curpath_enter(cur); + /* Illegal behavior: We're giving dirfd to fdopendir(), which in turn takes + * control of the fd and we shouldn't be using it again. Yet we do use it + * later on for openat() calls. I doubt this will be a problem, but may need + * further testing. The alternative is to dup(), but that makes us run out of + * descriptors twice as fast... */ + if((dir = fdopendir(dirfd)) == NULL) { + close(dirfd); + return -1; + } + + while((item = readdir(dir)) != NULL) { + if(item->d_name[0] == '.' && (item->d_name[1] == 0 || (item->d_name[1] == '.' && item->d_name[2] == 0))) + continue; + dir_curpath_enter(item->d_name); memset(buf_dir, 0, offsetof(struct dir, name)); memset(buf_ext, 0, sizeof(struct dir_ext)); - fail = dir_scan_item(cur); + fail |= dir_scan_item(dirfd, item->d_name); dir_curpath_leave(); } - free(dir); + if(errno) + fail = 1; + if(closedir(dir) < 0) + fail = 1; return fail; } static int process(void) { char *path; - char *dir; - int fail = 0; + int fail = 0, dirfd = -1; struct stat fs; memset(buf_dir, 0, offsetof(struct dir, name)); @@ -361,15 +289,12 @@ static int process(void) { if(!dir_fatalerr && path_chdir(dir_curpath) < 0) dir_seterr("Error changing directory: %s", strerror(errno)); - /* Can these even fail after a chdir? */ - if(!dir_fatalerr && lstat(".", &fs) != 0) - dir_seterr("Error obtaining directory information: %s", strerror(errno)); - if(!dir_fatalerr && !S_ISDIR(fs.st_mode)) - dir_seterr("Not a directory"); - - if(!dir_fatalerr && !(dir = dir_read(&fail))) + if(!dir_fatalerr && (dirfd = open(".", O_RDONLY|O_DIRECTORY)) < 0) dir_seterr("Error reading directory: %s", strerror(errno)); + if(!dir_fatalerr && fstat(dirfd, &fs) != 0) + dir_seterr("Error obtaining directory information: %s", strerror(errno)); + if(!dir_fatalerr) { curdev = (uint64_t)fs.st_dev; if(fail) @@ -380,14 +305,19 @@ static int process(void) { dir_seterr("Output error: %s", strerror(errno)); fail = 1; } - if(!fail) - fail = dir_walk(dir); + if(!fail) { + fail = dir_walk(dirfd); + dirfd = -1; + } if(!fail && dir_output.item(NULL, 0, NULL)) { dir_seterr("Output error: %s", strerror(errno)); fail = 1; } } + if(dirfd >= 0) + close(dirfd); + while(dir_fatalerr && !input_handle(0)) ; return dir_output.final(dir_fatalerr || fail); diff --git a/src/exclude.c b/src/exclude.c index 572335f..b9c54a0 100644 --- a/src/exclude.c +++ b/src/exclude.c @@ -29,6 +29,8 @@ #include <stdlib.h> #include <string.h> #include <fnmatch.h> +#include <unistd.h> +#include <fcntl.h> static struct exclude { @@ -105,35 +107,17 @@ void exclude_clear() { * Exclusion of directories that contain only cached information. * See http://www.brynosaurus.com/cachedir/ */ -#define CACHEDIR_TAG_FILENAME "CACHEDIR.TAG" #define CACHEDIR_TAG_SIGNATURE "Signature: 8a477f597d28d172789f06886806bc55" -int has_cachedir_tag(const char *name) { - static int path_l = 1024; - static char *path = NULL; - int l; +int has_cachedir_tag(int dirfd) { + int fd = -1, match = 0; char buf[sizeof CACHEDIR_TAG_SIGNATURE - 1]; - FILE *f; - int match = 0; - - /* Compute the required length for `path`. */ - l = strlen(name) + sizeof CACHEDIR_TAG_FILENAME + 2; - if(l > path_l || path == NULL) { - path_l = path_l * 2; - if(path_l < l) - path_l = l; - /* We don't need to copy the content of `path`, so it's more efficient to - * use `free` + `malloc`. */ - free(path); - path = xmalloc(path_l); - } - snprintf(path, path_l, "%s/%s", name, CACHEDIR_TAG_FILENAME); - f = fopen(path, "rb"); - if(f != NULL) { - match = ((fread(buf, 1, sizeof buf, f) == sizeof buf) && - !memcmp(buf, CACHEDIR_TAG_SIGNATURE, sizeof buf)); - fclose(f); - } + /* Assumption: We won't get a short read() when fetching the tag. */ + match = (fd = openat(dirfd, "CACHEDIR.TAG", O_RDONLY)) >= 0 + && read(fd, buf, sizeof buf) == sizeof buf + && !memcmp(buf, CACHEDIR_TAG_SIGNATURE, sizeof buf); + if(fd >= 0) + close(fd); return match; } diff --git a/src/exclude.h b/src/exclude.h index 72eff50..2e59e2d 100644 --- a/src/exclude.h +++ b/src/exclude.h @@ -30,6 +30,6 @@ void exclude_add(char *); int exclude_addfile(char *); int exclude_match(char *); void exclude_clear(void); -int has_cachedir_tag(const char *name); +int has_cachedir_tag(int); #endif @@ -170,7 +170,7 @@ static void argv_parse(int argc, char **argv) { printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n"); printf(" -L, --follow-symlinks Follow symbolic links (excluding directories)\n"); printf(" --exclude-caches Exclude directories containing CACHEDIR.TAG\n"); -#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS +#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS printf(" --exclude-kernfs Exclude Linux pseudo filesystems (procfs,sysfs,cgroup,...)\n"); #endif #if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH @@ -206,7 +206,7 @@ static void argv_parse(int argc, char **argv) { break; case 2 : /* --exclude-kernfs */ -#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS +#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS exclude_kernfs = 1; break; #else fprintf(stderr, "This feature is not supported on your platform\n"); |