summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2021-03-04 17:13:37 +0100
committerYorhel <git@yorhel.nl>2021-03-04 17:35:18 +0100
commit783bcb5b78f4cb58cbac8b12065a16364550e3c6 (patch)
tree0886fd080bc0a66443c6c01c48f91d6a10eff59d
parent9337cdc99e8c205e6e75de25de29a5be53ea858c (diff)
Experiment with scanning using openat() rather than chdir()openat
A slightly different approach. In terms of syscalls it probably doesn't matter at all, but it's a simpler, less hacky and maybe more efficient way to work around PATH_MAX limitations than constantly chdir()'ing for each directory. Should also fix the chdir("..") fail case in MacOS firmlink scenarios, though I haven't a clue if MacOS implements all these openat()-related calls in the first place. Downsides: - Less portable. Everything is in POSIX, but it's more recent than what we used to use. - Requires more open file descriptions, might hit the limit for deeply nested directories. Haven't done much testing yet, but there's a bunch of TODO's: - Make refreshing work again (not sure what's going wrong there) - See if we can report readdir() etc errors again? - Make firmlink detection work again? - See how portable this really is. - Also update file deletion code
-rw-r--r--configure.ac2
-rw-r--r--src/dir.h2
-rw-r--r--src/dir_scan.c214
-rw-r--r--src/exclude.c36
-rw-r--r--src/exclude.h2
-rw-r--r--src/main.c4
6 files changed, 87 insertions, 173 deletions
diff --git a/configure.ac b/configure.ac
index a5d1cd4..dcb4f0d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -28,7 +28,7 @@ AC_CHECK_FUNCS(
[getcwd gettimeofday fnmatch chdir rmdir unlink lstat system getenv],[],
AC_MSG_ERROR([required function missing]))
-AC_CHECK_FUNCS(statfs)
+AC_CHECK_FUNCS(fstatfs)
AC_CHECK_HEADERS([sys/attr.h])
diff --git a/src/dir.h b/src/dir.h
index fc33eb0..32459fb 100644
--- a/src/dir.h
+++ b/src/dir.h
@@ -113,7 +113,7 @@ void dir_scan_init(const char *path);
extern int dir_import_active;
int dir_import_init(const char *fn);
-#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS
+#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
extern int exclude_kernfs;
#endif
diff --git a/src/dir_scan.c b/src/dir_scan.c
index 03a582b..98b9972 100644
--- a/src/dir_scan.c
+++ b/src/dir_scan.c
@@ -30,6 +30,7 @@
#include <errno.h>
#include <unistd.h>
+#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
@@ -38,7 +39,7 @@
#include <sys/attr.h>
#endif
-#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS
+#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
#include <sys/statfs.h>
#include <linux/magic.h>
#endif
@@ -59,7 +60,7 @@ static struct dir *buf_dir;
static struct dir_ext buf_ext[1];
-#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS
+#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
int exclude_kernfs; /* Exclude Linux pseudo filesystems */
static int is_kernfs(unsigned long type) {
@@ -138,112 +139,14 @@ static void stat_to_dir(struct stat *fs) {
}
-/* Reads all filenames in the currently chdir'ed directory and stores it as a
- * nul-separated list of filenames. The list ends with an empty filename (i.e.
- * two nuls). . and .. are not included. Returned memory should be freed. *err
- * is set to 1 if some error occurred. Returns NULL if that error was fatal.
- * The reason for reading everything in memory first and then walking through
- * the list is to avoid eating too many file descriptors in a deeply recursive
- * directory. */
-static char *dir_read(int *err) {
- DIR *dir;
- struct dirent *item;
- char *buf = NULL;
- size_t buflen = 512;
- size_t off = 0;
-
- if((dir = opendir(".")) == NULL) {
- *err = 1;
- return NULL;
- }
-
- buf = xmalloc(buflen);
- errno = 0;
-
- while((item = readdir(dir)) != NULL) {
- if(item->d_name[0] == '.' && (item->d_name[1] == 0 || (item->d_name[1] == '.' && item->d_name[2] == 0)))
- continue;
- size_t req = off+3+strlen(item->d_name);
- if(req > buflen) {
- buflen = req < buflen*2 ? buflen*2 : req;
- buf = xrealloc(buf, buflen);
- }
- strcpy(buf+off, item->d_name);
- off += strlen(item->d_name)+1;
- }
- if(errno)
- *err = 1;
- if(closedir(dir) < 0)
- *err = 1;
-
- buf[off] = 0;
- buf[off+1] = 0;
- return buf;
-}
-
-
-static int dir_walk(char *);
-
-
-/* Tries to recurse into the current directory item (buf_dir is assumed to be the current dir) */
-static int dir_scan_recurse(const char *name) {
- int fail = 0;
- char *dir;
-
- if(chdir(name)) {
- dir_setlasterr(dir_curpath);
- buf_dir->flags |= FF_ERR;
- if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
- dir_seterr("Output error: %s", strerror(errno));
- return 1;
- }
- return 0;
- }
-
- if((dir = dir_read(&fail)) == NULL) {
- dir_setlasterr(dir_curpath);
- buf_dir->flags |= FF_ERR;
- if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
- dir_seterr("Output error: %s", strerror(errno));
- return 1;
- }
- if(chdir("..")) {
- dir_seterr("Error going back to parent directory: %s", strerror(errno));
- return 1;
- } else
- return 0;
- }
-
- /* readdir() failed halfway, not fatal. */
- if(fail)
- buf_dir->flags |= FF_ERR;
-
- if(dir_output.item(buf_dir, name, buf_ext)) {
- dir_seterr("Output error: %s", strerror(errno));
- return 1;
- }
- fail = dir_walk(dir);
- if(dir_output.item(NULL, 0, NULL)) {
- dir_seterr("Output error: %s", strerror(errno));
- return 1;
- }
-
- /* Not being able to chdir back is fatal */
- if(!fail && chdir("..")) {
- dir_seterr("Error going back to parent directory: %s", strerror(errno));
- return 1;
- }
-
- return fail;
-}
+static int dir_walk(int);
/* Scans and adds a single item. Recurses into dir_walk() again if this is a
- * directory. Assumes we're chdir'ed in the directory in which this item
- * resides. */
-static int dir_scan_item(const char *name) {
+ * directory. */
+static int dir_scan_item(int parfd, const char *name) {
static struct stat st, stl;
- int fail = 0;
+ int fail = 0, dirfd = -1;
#ifdef __CYGWIN__
/* /proc/registry names may contain slashes */
@@ -256,15 +159,20 @@ static int dir_scan_item(const char *name) {
if(exclude_match(dir_curpath))
buf_dir->flags |= FF_EXL;
- if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && lstat(name, &st)) {
+ if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && fstatat(parfd, name, &st, AT_SYMLINK_NOFOLLOW)) {
buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath);
}
-#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS
- if(exclude_kernfs && !(buf_dir->flags & (FF_ERR|FF_EXL)) && S_ISDIR(st.st_mode)) {
+ if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && S_ISDIR(st.st_mode) && (dirfd = openat(parfd, name, O_RDONLY|O_DIRECTORY)) < 0) {
+ buf_dir->flags |= FF_ERR;
+ dir_setlasterr(dir_curpath);
+ }
+
+#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
+ if(exclude_kernfs && dirfd >= 0) {
struct statfs fst;
- if(statfs(name, &fst)) {
+ if(fstatfs(dirfd, &fst)) {
buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath);
} else if(is_kernfs(fst.f_type))
@@ -272,7 +180,8 @@ static int dir_scan_item(const char *name) {
}
#endif
-#if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH
+ /* TODO: Completely broken; prolly needs absolute path lookup */
+#if 0 && HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH
if(!follow_firmlinks) {
struct attrlist list = {
.bitmapcount = ATTR_BIT_MAP_COUNT,
@@ -292,60 +201,79 @@ static int dir_scan_item(const char *name) {
#endif
if(!(buf_dir->flags & (FF_ERR|FF_EXL))) {
- if(follow_symlinks && S_ISLNK(st.st_mode) && !stat(name, &stl) && !S_ISDIR(stl.st_mode))
+ if(follow_symlinks && S_ISLNK(st.st_mode) && !fstatat(parfd, name, &stl, 0) && !S_ISDIR(stl.st_mode))
stat_to_dir(&stl);
else
stat_to_dir(&st);
}
- if(cachedir_tags && (buf_dir->flags & FF_DIR) && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK)))
- if(has_cachedir_tag(name)) {
+ if(cachedir_tags && dirfd >= 0 && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK)))
+ if(has_cachedir_tag(dirfd)) {
buf_dir->flags |= FF_EXL;
buf_dir->size = buf_dir->asize = 0;
}
- /* Recurse into the dir or output the item */
- if(buf_dir->flags & FF_DIR && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK)))
- fail = dir_scan_recurse(name);
- else if(buf_dir->flags & FF_DIR) {
- if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
- dir_seterr("Output error: %s", strerror(errno));
- fail = 1;
- }
- } else if(dir_output.item(buf_dir, name, buf_ext)) {
+ if(dir_output.item(buf_dir, name, buf_ext)) {
+ dir_seterr("Output error: %s", strerror(errno));
+ fail = 1;
+ }
+
+ if(!fail && dirfd >= 0 && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) {
+ /* XXX: Can't do anything with the return value, since we've already outputted our dir entry item.
+ * So errors reading dir items will be silently ignored. Not great. */
+ dir_walk(dirfd);
+ dirfd = -1;
+ }
+
+ if(!fail && (buf_dir->flags & FF_DIR) && dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
+ if(dirfd >= 0)
+ close(dirfd);
+
return fail || input_handle(1);
}
-/* Walks through the directory that we're currently chdir'ed to. *dir contains
- * the filenames as returned by dir_read(), and will be freed automatically by
- * this function. */
-static int dir_walk(char *dir) {
+/* Recursively walks through the directory descriptor. Will close() the given dirfd. */
+static int dir_walk(int dirfd) {
int fail = 0;
- char *cur;
+ DIR *dir;
+ struct dirent *item;
- fail = 0;
- for(cur=dir; !fail&&cur&&*cur; cur+=strlen(cur)+1) {
- dir_curpath_enter(cur);
+ /* Illegal behavior: We're giving dirfd to fdopendir(), which in turn takes
+ * control of the fd and we shouldn't be using it again. Yet we do use it
+ * later on for openat() calls. I doubt this will be a problem, but may need
+ * further testing. The alternative is to dup(), but that makes us run out of
+ * descriptors twice as fast... */
+ if((dir = fdopendir(dirfd)) == NULL) {
+ close(dirfd);
+ return -1;
+ }
+
+ while((item = readdir(dir)) != NULL) {
+ if(item->d_name[0] == '.' && (item->d_name[1] == 0 || (item->d_name[1] == '.' && item->d_name[2] == 0)))
+ continue;
+ dir_curpath_enter(item->d_name);
memset(buf_dir, 0, offsetof(struct dir, name));
memset(buf_ext, 0, sizeof(struct dir_ext));
- fail = dir_scan_item(cur);
+ fail |= dir_scan_item(dirfd, item->d_name);
dir_curpath_leave();
}
- free(dir);
+ if(errno)
+ fail = 1;
+ if(closedir(dir) < 0)
+ fail = 1;
return fail;
}
static int process(void) {
char *path;
- char *dir;
- int fail = 0;
+ int fail = 0, dirfd = -1;
struct stat fs;
memset(buf_dir, 0, offsetof(struct dir, name));
@@ -361,15 +289,12 @@ static int process(void) {
if(!dir_fatalerr && path_chdir(dir_curpath) < 0)
dir_seterr("Error changing directory: %s", strerror(errno));
- /* Can these even fail after a chdir? */
- if(!dir_fatalerr && lstat(".", &fs) != 0)
- dir_seterr("Error obtaining directory information: %s", strerror(errno));
- if(!dir_fatalerr && !S_ISDIR(fs.st_mode))
- dir_seterr("Not a directory");
-
- if(!dir_fatalerr && !(dir = dir_read(&fail)))
+ if(!dir_fatalerr && (dirfd = open(".", O_RDONLY|O_DIRECTORY)) < 0)
dir_seterr("Error reading directory: %s", strerror(errno));
+ if(!dir_fatalerr && fstat(dirfd, &fs) != 0)
+ dir_seterr("Error obtaining directory information: %s", strerror(errno));
+
if(!dir_fatalerr) {
curdev = (uint64_t)fs.st_dev;
if(fail)
@@ -380,14 +305,19 @@ static int process(void) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
- if(!fail)
- fail = dir_walk(dir);
+ if(!fail) {
+ fail = dir_walk(dirfd);
+ dirfd = -1;
+ }
if(!fail && dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
}
+ if(dirfd >= 0)
+ close(dirfd);
+
while(dir_fatalerr && !input_handle(0))
;
return dir_output.final(dir_fatalerr || fail);
diff --git a/src/exclude.c b/src/exclude.c
index 572335f..b9c54a0 100644
--- a/src/exclude.c
+++ b/src/exclude.c
@@ -29,6 +29,8 @@
#include <stdlib.h>
#include <string.h>
#include <fnmatch.h>
+#include <unistd.h>
+#include <fcntl.h>
static struct exclude {
@@ -105,35 +107,17 @@ void exclude_clear() {
* Exclusion of directories that contain only cached information.
* See http://www.brynosaurus.com/cachedir/
*/
-#define CACHEDIR_TAG_FILENAME "CACHEDIR.TAG"
#define CACHEDIR_TAG_SIGNATURE "Signature: 8a477f597d28d172789f06886806bc55"
-int has_cachedir_tag(const char *name) {
- static int path_l = 1024;
- static char *path = NULL;
- int l;
+int has_cachedir_tag(int dirfd) {
+ int fd = -1, match = 0;
char buf[sizeof CACHEDIR_TAG_SIGNATURE - 1];
- FILE *f;
- int match = 0;
-
- /* Compute the required length for `path`. */
- l = strlen(name) + sizeof CACHEDIR_TAG_FILENAME + 2;
- if(l > path_l || path == NULL) {
- path_l = path_l * 2;
- if(path_l < l)
- path_l = l;
- /* We don't need to copy the content of `path`, so it's more efficient to
- * use `free` + `malloc`. */
- free(path);
- path = xmalloc(path_l);
- }
- snprintf(path, path_l, "%s/%s", name, CACHEDIR_TAG_FILENAME);
- f = fopen(path, "rb");
- if(f != NULL) {
- match = ((fread(buf, 1, sizeof buf, f) == sizeof buf) &&
- !memcmp(buf, CACHEDIR_TAG_SIGNATURE, sizeof buf));
- fclose(f);
- }
+ /* Assumption: We won't get a short read() when fetching the tag. */
+ match = (fd = openat(dirfd, "CACHEDIR.TAG", O_RDONLY)) >= 0
+ && read(fd, buf, sizeof buf) == sizeof buf
+ && !memcmp(buf, CACHEDIR_TAG_SIGNATURE, sizeof buf);
+ if(fd >= 0)
+ close(fd);
return match;
}
diff --git a/src/exclude.h b/src/exclude.h
index 72eff50..2e59e2d 100644
--- a/src/exclude.h
+++ b/src/exclude.h
@@ -30,6 +30,6 @@ void exclude_add(char *);
int exclude_addfile(char *);
int exclude_match(char *);
void exclude_clear(void);
-int has_cachedir_tag(const char *name);
+int has_cachedir_tag(int);
#endif
diff --git a/src/main.c b/src/main.c
index 6e85c52..55ddf63 100644
--- a/src/main.c
+++ b/src/main.c
@@ -170,7 +170,7 @@ static void argv_parse(int argc, char **argv) {
printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n");
printf(" -L, --follow-symlinks Follow symbolic links (excluding directories)\n");
printf(" --exclude-caches Exclude directories containing CACHEDIR.TAG\n");
-#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS
+#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
printf(" --exclude-kernfs Exclude Linux pseudo filesystems (procfs,sysfs,cgroup,...)\n");
#endif
#if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH
@@ -206,7 +206,7 @@ static void argv_parse(int argc, char **argv) {
break;
case 2 : /* --exclude-kernfs */
-#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS
+#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
exclude_kernfs = 1; break;
#else
fprintf(stderr, "This feature is not supported on your platform\n");