[SV 41273] Allow the directory cache to be invalidated

Each time we invoke a command it's possible that it will change the
filesystem in ways that were not described by the target.  If that
happens but we have cached previous directory contents then we may
make decisions or report results based on obsolete information.

Keep a count of how many commands we've invoked, and remember the
current command count every time we load the contents of a directory.
If we request the directory and the current command count has changed
we know the cache is outdated so reload from scratch.

* NEWS: Announce the change.
* src/makeint.h (command_count): Create a global counter.
* src/main.c (command_count): Ditto.
* src/job.c (reap_children): Increment the counter on job completion.
* src/function.c (func_file): Increment if we write a file.
* src/dir.c (clear_directory_contents): Clear the current contents of
a cached directory.
(struct directory_contents): Remember the counter value.
(struct directory): Remember the counter value for non-existing dirs.
(find_directory): If we have a cached directory and the count hasn't
changed then return it.  Else, clear the previous contents and re-read
from scratch.
* tests/scripts/features/dircache: Add tests of the directory cache.
This commit is contained in:
Paul Smith 2020-11-27 17:07:53 -05:00
parent 2dc0280d82
commit 9e2fa24649
7 changed files with 209 additions and 129 deletions

4
NEWS
View file

@ -42,6 +42,10 @@ https://sv.gnu.org/bugs/index.php?group=make&report_id=111&fix_release_id=109&se
* Special targets like .POSIX are detected upon definition, ensuring that any
change in behavior takes effect immediately, before the next line is parsed.
* A long-standing issue with the directory cache has been resolved: changes
made as a side-effect of some other target's recipe are now noticed as
expected.
* GNU Make can now be built for MS-Windows using the Tiny C tcc compiler.

280
src/dir.c
View file

@ -18,6 +18,7 @@ this program. If not, see <http://www.gnu.org/licenses/>. */
#include "hash.h"
#include "filedef.h"
#include "dep.h"
#include "debug.h"
#ifdef HAVE_DIRENT_H
# include <dirent.h>
@ -232,6 +233,12 @@ vmsstat_dir (const char *name, struct stat *st)
#endif /* _USE_STD_STAT */
#endif /* VMS */
/* Never have more than this many directories open at once. */
#define MAX_OPEN_DIRECTORIES 10
static unsigned int open_directories = 0;
/* Hash table of directories. */
#ifndef DIRECTORY_BUCKETS
@ -262,9 +269,25 @@ struct directory_contents
# endif
#endif /* WINDOWS32 */
struct hash_table dirfiles; /* Files in this directory. */
unsigned long counter; /* command_count value when last read. */
DIR *dirstream; /* Stream reading this directory. */
};
static struct directory_contents *
clear_directory_contents (struct directory_contents *dc)
{
dc->counter = 0;
if (dc->dirstream)
{
--open_directories;
closedir (dc->dirstream);
dc->dirstream = 0;
}
hash_free (&dc->dirfiles, 1);
return NULL;
}
static unsigned long
directory_contents_hash_1 (const void *key_0)
{
@ -363,7 +386,9 @@ static struct hash_table directory_contents;
struct directory
{
const char *name; /* Name of the directory. */
const char *name; /* Name of the directory. */
unsigned long counter; /* command_count value when last read.
Used for non-existent directories. */
/* The directory's contents. This data may be shared by several
entries in the hash table, which refer to the same directory
@ -393,12 +418,6 @@ directory_hash_cmp (const void *x, const void *y)
/* Table of directories hashed by name. */
static struct hash_table directories;
/* Never have more than this many directories open at once. */
#define MAX_OPEN_DIRECTORIES 10
static unsigned int open_directories = 0;
/* Hash table of files in each directory. */
@ -449,150 +468,163 @@ find_directory (const char *name)
struct directory *dir;
struct directory **dir_slot;
struct directory dir_key;
struct directory_contents *dc;
struct directory_contents **dc_slot;
struct directory_contents dc_key;
struct stat st;
int r;
#ifdef WINDOWS32
char *w32_path;
#endif
dir_key.name = name;
dir_slot = (struct directory **) hash_find_slot (&directories, &dir_key);
dir = *dir_slot;
if (HASH_VACANT (dir))
if (!HASH_VACANT (dir))
{
unsigned long ctr = dir->contents ? dir->contents->counter : dir->counter;
/* No commands have run since we parsed this directory so it's good. */
if (ctr == command_count)
return dir;
DB (DB_VERBOSE, ("Directory %s cache invalidated (count %lu != command %lu)\n",
name, ctr, command_count));
if (dir->contents)
clear_directory_contents (dir->contents);
}
else
{
/* The directory was not found. Create a new entry for it. */
const char *p = name + strlen (name);
struct stat st;
int r;
size_t len = strlen (name);
dir = xmalloc (sizeof (struct directory));
#if defined(HAVE_CASE_INSENSITIVE_FS) && defined(VMS)
/* Todo: Why is this only needed on VMS? */
{
char *lname = downcase_inplace (xstrdup (name));
dir->name = strcache_add_len (lname, p - name);
dir->name = strcache_add_len (lname, len);
free (lname);
}
#else
dir->name = strcache_add_len (name, p - name);
dir->name = strcache_add_len (name, len);
#endif
hash_insert_at (&directories, dir, dir_slot);
/* The directory is not in the name hash table.
Find its device and inode numbers, and look it up by them. */
}
dir->contents = NULL;
dir->counter = command_count;
/* See if the directory exists. */
#if defined(WINDOWS32)
{
char tem[MAXPATHLEN], *tstart, *tend;
{
char tem[MAXPATHLEN], *tstart, *tend;
/* Remove any trailing slashes. Windows32 stat fails even on
valid directories if they end in a slash. */
memcpy (tem, name, p - name + 1);
tstart = tem;
if (tstart[1] == ':')
tstart += 2;
for (tend = tem + (p - name - 1);
tend > tstart && (*tend == '/' || *tend == '\\');
tend--)
*tend = '\0';
/* Remove any trailing slashes. Windows32 stat fails even on
valid directories if they end in a slash. */
memcpy (tem, name, p - name + 1);
tstart = tem;
if (tstart[1] == ':')
tstart += 2;
for (tend = tem + (p - name - 1);
tend > tstart && (*tend == '/' || *tend == '\\');
tend--)
*tend = '\0';
r = stat (tem, &st);
}
r = stat (tem, &st);
}
#else
EINTRLOOP (r, stat (name, &st));
EINTRLOOP (r, stat (name, &st));
#endif
if (r < 0)
{
/* Couldn't stat the directory. Mark this by
setting the 'contents' member to a nil pointer. */
dir->contents = 0;
}
if (r < 0)
/* Couldn't stat the directory; nothing else to do. */
return dir;
/* Search the contents hash table; device and inode are the key. */
memset (&dc_key, '\0', sizeof (dc_key));
dc_key.dev = st.st_dev;
#ifdef WINDOWS32
dc_key.path_key = w32_path = w32ify (name, 1);
dc_key.ctime = st.st_ctime;
#else
# ifdef VMS_INO_T
dc_key.ino[0] = st.st_ino[0];
dc_key.ino[1] = st.st_ino[1];
dc_key.ino[2] = st.st_ino[2];
# else
dc_key.ino = st.st_ino;
# endif
#endif
dc_slot = (struct directory_contents **) hash_find_slot (&directory_contents, &dc_key);
dc = *dc_slot;
if (HASH_VACANT (dc))
{
/* Nope; this really is a directory we haven't seen before. */
#ifdef WINDOWS32
char fs_label[BUFSIZ];
char fs_type[BUFSIZ];
unsigned long fs_serno;
unsigned long fs_flags;
unsigned long fs_len;
#endif
/* Enter it in the contents hash table. */
dc = xcalloc (sizeof (struct directory_contents));
*dc = dc_key;
#ifdef WINDOWS32
dc->path_key = xstrdup (w32_path);
dc->mtime = st.st_mtime;
/* NTFS is the only WINDOWS32 filesystem that bumps mtime on a
directory when files are added/deleted from a directory. */
w32_path[3] = '\0';
if (GetVolumeInformation (w32_path, fs_label, sizeof (fs_label),
&fs_serno, &fs_len, &fs_flags, fs_type,
sizeof (fs_type)) == FALSE)
dc->fs_flags = FS_UNKNOWN;
else if (!strcmp (fs_type, "FAT"))
dc->fs_flags = FS_FAT;
else if (!strcmp (fs_type, "NTFS"))
dc->fs_flags = FS_NTFS;
else
dc->fs_flags = FS_UNKNOWN;
#endif /* WINDOWS32 */
hash_insert_at (&directory_contents, dc, dc_slot);
}
/* Point the name-hashed entry for DIR at its contents data. */
dir->contents = dc;
/* If the contents have changed, we need to reseet. */
if (dc->counter != command_count)
{
if (dc->counter)
clear_directory_contents (dc);
dc->counter = command_count;
ENULLLOOP (dc->dirstream, opendir (name));
if (dc->dirstream == 0)
/* Couldn't open the directory. Mark this by setting the
'files' member to a nil pointer. */
dc->dirfiles.ht_vec = 0;
else
{
/* Search the contents hash table; device and inode are the key. */
#ifdef WINDOWS32
char *w32_path;
#endif
struct directory_contents *dc;
struct directory_contents **dc_slot;
struct directory_contents dc_key;
dc_key.dev = st.st_dev;
#ifdef WINDOWS32
dc_key.path_key = w32_path = w32ify (name, 1);
dc_key.ctime = st.st_ctime;
#else
# ifdef VMS_INO_T
dc_key.ino[0] = st.st_ino[0];
dc_key.ino[1] = st.st_ino[1];
dc_key.ino[2] = st.st_ino[2];
# else
dc_key.ino = st.st_ino;
# endif
#endif
dc_slot = (struct directory_contents **) hash_find_slot (&directory_contents, &dc_key);
dc = *dc_slot;
if (HASH_VACANT (dc))
{
/* Nope; this really is a directory we haven't seen before. */
#ifdef WINDOWS32
char fs_label[BUFSIZ];
char fs_type[BUFSIZ];
unsigned long fs_serno;
unsigned long fs_flags;
unsigned long fs_len;
#endif
dc = (struct directory_contents *)
xmalloc (sizeof (struct directory_contents));
/* Enter it in the contents hash table. */
dc->dev = st.st_dev;
#ifdef WINDOWS32
dc->path_key = xstrdup (w32_path);
dc->ctime = st.st_ctime;
dc->mtime = st.st_mtime;
/* NTFS is the only WINDOWS32 filesystem that bumps mtime on a
directory when files are added/deleted from a directory. */
w32_path[3] = '\0';
if (GetVolumeInformation (w32_path, fs_label, sizeof (fs_label),
&fs_serno, &fs_len, &fs_flags, fs_type,
sizeof (fs_type)) == FALSE)
dc->fs_flags = FS_UNKNOWN;
else if (!strcmp (fs_type, "FAT"))
dc->fs_flags = FS_FAT;
else if (!strcmp (fs_type, "NTFS"))
dc->fs_flags = FS_NTFS;
else
dc->fs_flags = FS_UNKNOWN;
#else
# ifdef VMS_INO_T
dc->ino[0] = st.st_ino[0];
dc->ino[1] = st.st_ino[1];
dc->ino[2] = st.st_ino[2];
# else
dc->ino = st.st_ino;
# endif
#endif /* WINDOWS32 */
hash_insert_at (&directory_contents, dc, dc_slot);
ENULLLOOP (dc->dirstream, opendir (name));
if (dc->dirstream == 0)
/* Couldn't open the directory. Mark this by setting the
'files' member to a nil pointer. */
dc->dirfiles.ht_vec = 0;
else
{
hash_init (&dc->dirfiles, DIRFILE_BUCKETS,
dirfile_hash_1, dirfile_hash_2, dirfile_hash_cmp);
/* Keep track of how many directories are open. */
++open_directories;
if (open_directories == MAX_OPEN_DIRECTORIES)
/* We have too many directories open already.
Read the entire directory and then close it. */
dir_contents_file_exists_p (dc, 0);
}
}
/* Point the name-hashed entry for DIR at its contents data. */
dir->contents = dc;
hash_init (&dc->dirfiles, DIRFILE_BUCKETS,
dirfile_hash_1, dirfile_hash_2, dirfile_hash_cmp);
/* Keep track of how many directories are open. */
++open_directories;
if (open_directories == MAX_OPEN_DIRECTORIES)
/* We have too many directories open already.
Read the entire directory and then close it. */
dir_contents_file_exists_p (dc, 0);
}
}

View file

@ -2235,6 +2235,11 @@ func_file (char *o, char **argv, const char *funcname UNUSED)
if (fp == NULL)
OSS (fatal, reading_file, _("open: %s: %s"), fn, strerror (errno));
/* We've changed the contents of a directory, possibly.
Another option would be to look up the directory we changed and reset
its counter to 0. */
++command_count;
if (argv[1])
{
size_t l = strlen (argv[1]);

View file

@ -721,9 +721,6 @@ reap_children (int block, int err)
else if (pid < 0)
{
/* A remote status command failed miserably. Punt. */
#if !defined(__MSDOS__) && !defined(_AMIGA) && !defined(WINDOWS32)
remote_status_lose:
#endif
pfatal_with_name ("remote_status");
}
else
@ -779,8 +776,9 @@ reap_children (int block, int err)
/* Now try a blocking wait for a remote child. */
pid = remote_status (&exit_code, &exit_sig, &coredump, 1);
if (pid < 0)
goto remote_status_lose;
else if (pid == 0)
pfatal_with_name ("remote_status");
if (pid == 0)
/* No remote children either. Finally give up. */
break;
@ -876,6 +874,9 @@ reap_children (int block, int err)
#endif /* WINDOWS32 */
}
/* Some child finished: increment the command count. */
++command_count;
/* Check if this is the child of the 'shell' function. */
if (!remote && pid == shell_function_pid)
{

View file

@ -322,6 +322,12 @@ struct variable shell_var;
char cmd_prefix = '\t';
/* Count the number of commands we've invoked, that might change something in
the filesystem. Start with 1 so calloc'd memory never matches. */
unsigned long command_count = 1;
/* The usage output. We write it this way to make life easier for the
translators, especially those trying to translate to right-to-left

View file

@ -674,6 +674,7 @@ extern int print_version_flag, print_directory, check_symlink_flag;
extern int warn_undefined_variables_flag, trace_flag, posix_pedantic;
extern int not_parallel, second_expansion, clock_skew_detected;
extern int rebuilding_makefiles, one_shell, output_sync, verify_flag;
extern unsigned long command_count;
extern const char *default_shell;

View file

@ -0,0 +1,31 @@
# -*-mode: perl-*-
$description = "Test the directory cache behavior.";
# The first wildcard should bring the entire directory into the cache Then we
# create a new file "behind make's back" then see if the next wildcard detects
# it.
run_make_test(q!
_orig := $(wildcard ./*)
$(shell echo > anewfile)
_new := $(wildcard ./*)
$(info diff=$(filter-out $(_orig),$(_new)))
all:;@:
!,
'', "diff=./anewfile\n");
rmfiles('anewfile');
run_make_test(q!
_orig := $(wildcard ./*)
$(file >anewfile)
_new := $(wildcard ./*)
$(info diff=$(filter-out $(_orig),$(_new)))
all:;@:
!,
'', "diff=./anewfile\n");
rmfiles('anewfile');
1;