Commit 73d4f7ae authored by Jim Meyering's avatar Jim Meyering

Big performance improvement for fts-based tools that use FTS_NOSTAT.

Avoid spurious inode-mismatch problems on non-POSIX file systems.
Details: http://article.gmane.org/gmane.comp.lib.gnulib.bugs/7416
* lib/fts_.h (FTS_DEFER_STAT): Define new flag.
(FTS_OPTIONMASK): Extend the mask to reflect this addition.
* lib/fts.c (DT_IS_KNOWN, DT_MUST_BE): Define.
(FTS_NO_STAT_REQUIRED, FTS_STAT_REQUIRED): Define.
(fts_set_stat_required): New function.
(fts_open): Defer the calls to fts_stat, if possible or requested.
Move the code that maps a command-line fts_info value FTS_DOT to FTS_D
into fts_stat itself.
(fts_read): Perform any required (deferred) fts_stat call.
(fts_build): Likewise, for the directory we're about to open and read.
In the readdir loop, carefully decide whether each entry will require
an eventual call to fts_stat, using dirent.d_type info if available.
(fts_stat): Move the test for whether to honor FTS_COMFOLLOW on
a command line argument into this function.  Update all callers.
Map a return value of FTS_DOT to FTS_D for a command line argument.
* modules/fts (Depends-on): Add d-type.  Alphabetize.
Thanks to Miklos Szeredi for his tenacity and for the initial
bug report about "find" failing on a FUSE-based file system.
parent d0fe5568
2006-10-12 Jim Meyering <jim@meyering.net>
Big performance improvement for fts-based tools that use FTS_NOSTAT.
Avoid spurious inode-mismatch problems on non-POSIX file systems.
Details: http://article.gmane.org/gmane.comp.lib.gnulib.bugs/7416
* lib/fts_.h (FTS_DEFER_STAT): Define new flag.
(FTS_OPTIONMASK): Extend the mask to reflect this addition.
* lib/fts.c (DT_IS_KNOWN, DT_MUST_BE): Define.
(FTS_NO_STAT_REQUIRED, FTS_STAT_REQUIRED): Define.
(fts_set_stat_required): New function.
(fts_open): Defer the calls to fts_stat, if possible or requested.
Move the code that maps a command-line fts_info value FTS_DOT to FTS_D
into fts_stat itself.
(fts_read): Perform any required (deferred) fts_stat call.
(fts_build): Likewise, for the directory we're about to open and read.
In the readdir loop, carefully decide whether each entry will require
an eventual call to fts_stat, using dirent.d_type info if available.
(fts_stat): Move the test for whether to honor FTS_COMFOLLOW on
a command line argument into this function. Update all callers.
Map a return value of FTS_DOT to FTS_D for a command line argument.
* modules/fts (Depends-on): Add d-type. Alphabetize.
Thanks to Miklos Szeredi for his tenacity and for the initial
bug report about "find" failing on a FUSE-based file system.
2006-10-12 Paul Eggert <eggert@cs.ucla.edu>
* m4/extensions.m4 (AC_USE_SYSTEM_EXTENSIONS): Renamed from
......
......@@ -81,6 +81,22 @@ static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94";
# define _D_EXACT_NAMLEN(dirent) strlen ((dirent)->d_name)
#endif
#if HAVE_STRUCT_DIRENT_D_TYPE
/* True if the type of the directory entry D is known. */
# define DT_IS_KNOWN(d) ((d)->d_type != DT_UNKNOWN)
/* True if the type of the directory entry D must be T. */
# define DT_MUST_BE(d, t) ((d)->d_type == (t))
#else
# define DT_IS_KNOWN(d) false
# define DT_MUST_BE(d, t) false
#endif
enum
{
FTS_NO_STAT_REQUIRED = 1,
FTS_STAT_REQUIRED = 2
};
#ifdef _LIBC
# undef close
# define close __close
......@@ -195,6 +211,19 @@ bool fts_debug = false;
} \
while (false)
/* Overload the fts_statp->st_size member (otherwise unused, when
fts_info is FTS_NSOK) to indicate whether fts_read should stat
this entry or not. */
static void
fts_set_stat_required (FTSENT *p, bool required)
{
if (p->fts_info != FTS_NSOK)
abort ();
p->fts_statp->st_size = (required
? FTS_STAT_REQUIRED
: FTS_NO_STAT_REQUIRED);
}
/* file-descriptor-relative opendir. */
/* FIXME: if others need this function, move it into lib/openat.c */
static inline DIR *
......@@ -258,6 +287,7 @@ fts_open (char * const *argv,
FTSENT *parent = NULL;
FTSENT *tmp = NULL; /* pacify gcc */
size_t len;
bool defer_stat;
/* Options check. */
if (options & ~FTS_OPTIONMASK) {
......@@ -340,6 +370,19 @@ fts_open (char * const *argv,
parent->fts_level = FTS_ROOTPARENTLEVEL;
}
/* The classic fts implementation would call fts_stat with
a new entry for each iteration of the loop below.
If the comparison function is not specified or if the
FTS_DEFER_STAT option is in effect, don't stat any entry
in this loop. This is an attempt to minimize the interval
between the initial stat/lstat/fstatat and the point at which
a directory argument is first opened. This matters for any
directory command line argument that resides on a file system
without genuine i-nodes. If you specify FTS_DEFER_STAT along
with a comparison function, that function must not access any
data via the fts_statp pointer. */
defer_stat = (compar == NULL || ISSET(FTS_DEFER_STAT));
/* Allocate/initialize root(s). */
for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
/* Don't allow zero-length file names. */
......@@ -353,11 +396,12 @@ fts_open (char * const *argv,
p->fts_level = FTS_ROOTLEVEL;
p->fts_parent = parent;
p->fts_accpath = p->fts_name;
p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW) != 0);
/* Command-line "." and ".." are real directories. */
if (p->fts_info == FTS_DOT)
p->fts_info = FTS_D;
if (defer_stat) {
p->fts_info = FTS_NSOK;
fts_set_stat_required(p, true);
} else {
p->fts_info = fts_stat(sp, p, false);
}
/*
* If comparison routine supplied, traverse in sorted
......@@ -645,6 +689,19 @@ name: t = sp->fts_path + NAPPEND(p->fts_parent);
*t++ = '/';
memmove(t, p->fts_name, p->fts_namelen + 1);
check_for_dir:
if (p->fts_info == FTS_NSOK)
{
switch (p->fts_statp->st_size)
{
case FTS_STAT_REQUIRED:
p->fts_info = fts_stat(sp, p, false);
break;
case FTS_NO_STAT_REQUIRED:
break;
default:
abort ();
}
}
sp->fts_cur = p;
if (p->fts_info == FTS_D)
{
......@@ -672,6 +729,9 @@ check_for_dir:
return (sp->fts_cur = NULL);
}
if (p->fts_info == FTS_NSOK)
abort ();
/* NUL terminate the file name. */
sp->fts_path[p->fts_pathlen] = '\0';
......@@ -862,6 +922,11 @@ fts_build (register FTS *sp, int type)
}
return (NULL);
}
/* Rather than calling fts_stat for each and every entry encountered
in the readdir loop (below), stat each directory only right after
opening it. */
if (cur->fts_info == FTS_NSOK)
cur->fts_info = fts_stat(sp, cur, false);
/*
* Nlinks is the number of possible entries of type directory in the
......@@ -1004,12 +1069,38 @@ mem1: saved_errno = errno;
memmove(cp, p->fts_name, p->fts_namelen + 1);
} else
p->fts_accpath = p->fts_name;
/* Stat it. */
p->fts_info = fts_stat(sp, p, false);
bool is_dir;
if (sp->fts_compar == NULL || ISSET(FTS_DEFER_STAT)) {
/* Record what fts_read will have to do with this
entry. In many cases, it will simply fts_stat it,
but we can take advantage of any d_type information
to optimize away the unnecessary stat calls. I.e.,
if FTS_NOSTAT is in effect and we're not following
symlinks (FTS_PHYSICAL) and d_type indicates this
is *not* a directory, then we won't have to stat it
at all. If it *is* a directory, then (currently)
we stat it regardless, in order to get device and
inode numbers. Some day we might optimize that
away, too, for directories where d_ino is known to
be valid. */
bool skip_stat = (ISSET(FTS_PHYSICAL)
&& ISSET(FTS_NOSTAT)
&& DT_IS_KNOWN(dp)
&& ! DT_MUST_BE(dp, DT_DIR));
p->fts_info = FTS_NSOK;
fts_set_stat_required(p, !skip_stat);
is_dir = (ISSET(FTS_PHYSICAL) && ISSET(FTS_NOSTAT)
&& DT_MUST_BE(dp, DT_DIR));
} else {
p->fts_info = fts_stat(sp, p, false);
is_dir = (p->fts_info == FTS_D
|| p->fts_info == FTS_DC
|| p->fts_info == FTS_DOT);
}
/* Decrement link count if applicable. */
if (nlinks > 0 && (p->fts_info == FTS_D ||
p->fts_info == FTS_DC || p->fts_info == FTS_DOT))
if (nlinks > 0 && is_dir)
nlinks -= nostat;
/* We walk in directory order so "ls -f" doesn't get upset. */
......@@ -1143,6 +1234,9 @@ fts_stat(FTS *sp, register FTSENT *p, bool follow)
struct stat *sbp = p->fts_statp;
int saved_errno;
if (p->fts_level == FTS_ROOTLEVEL && ISSET(FTS_COMFOLLOW))
follow = true;
#if defined FTS_WHITEOUT && 0
/* check for whiteout */
if (p->fts_flags & FTS_ISW) {
......@@ -1176,8 +1270,10 @@ err: memset(sbp, 0, sizeof(struct stat));
}
if (S_ISDIR(sbp->st_mode)) {
if (ISDOT(p->fts_name))
return (FTS_DOT);
if (ISDOT(p->fts_name)) {
/* Command-line "." and ".." are real directories. */
return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT);
}
#if _LGPL_PACKAGE
{
......
......@@ -123,7 +123,19 @@ typedef struct {
through the file descriptor member, fts_cwd_fd. */
# define FTS_CWDFD 0x0200
# define FTS_OPTIONMASK 0x03ff /* valid user option mask */
/* Historically, for each directory that fts initially encounters, it would
open it, read all entries, and stat each entry, storing the results, and
then it would process the first entry. But that behavior is bad for
locality of reference, and also causes trouble with inode-simulating
file systems like FAT, CIFS, FUSE-based ones, etc., when entries from
their name/inode cache are flushed too early.
Use this flag to make fts_open and fts_read defer the stat/lstat/fststat
of each entry until it actually processed. However, note that if you use
this option and also specify a comparison function, that function may not
examine any data via fts_statp. */
# define FTS_DEFER_STAT 0x0400
# define FTS_OPTIONMASK 0x07ff /* valid user option mask */
# define FTS_NAMEONLY 0x1000 /* (private) child names only */
# define FTS_STOP 0x2000 /* (private) unrecoverable error */
......
......@@ -9,13 +9,14 @@ m4/fts.m4
Depends-on:
cycle-check
d-type
dirfd
fcntl
fcntl-safer
hash
lstat
openat
stdbool
fcntl-safer
unistd-safer
configure.ac:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment