Commit 3a2f785b authored by Dave Rolsky's avatar Dave Rolsky

Add optional file attribute & content tracking for modify events

parent d2d78ce2
{{$NEXT}}
- Added an optional feature to track path attribute changes (permissions,
ownership) as well as to provide the old vs new attributes and/or content in
modify events. Both of these features are off by default since they can use
a lot more CPU or memory, especially when using otherwise low-overhead
kernel-based watchers.
0.29 2018-09-25
- The change to Module::Runtime in 0.25 wasn't complete so this distro still
......
......@@ -6,7 +6,7 @@ use namespace::autoclean;
our $VERSION = '0.30';
use Types::Standard qw( Str );
use Types::Standard qw( ArrayRef HashRef Str );
use Type::Utils qw( enum );
use Moo;
......@@ -23,6 +23,18 @@ has type => (
required => 1,
);
has attributes => (
is => 'ro',
isa => ArrayRef [HashRef],
predicate => 'has_attributes',
);
has content => (
is => 'ro',
isa => ArrayRef,
predicate => 'has_content',
);
__PACKAGE__->meta->make_immutable;
1;
......@@ -75,4 +87,48 @@ Returns the path of the changed file or directory.
Returns the type of event.
=head2 $event->has_attributes
This returns true for modify events which include information about a path's
attribute changes.
=head2 $event->attributes
If the event includes information about changes to a path's attributes, then
this returns a two-element arrayref. Each element is in turn a hashref which
will contain at least one of the following keys:
=over 4
=item * permissions
The permissions mask for the path.
=item * uid
The user id that owns the path.
=item * gid
The group id that owns the path.
=back
Note that only keys which changed will be included.
=head2 $event->has_content
This returns true for modify events which include information about a file's
content.
=head2 $event->content
This returns a two-element arrayref where the first element is the old content
and the second is the new content.
B<Note that this content is stored as bytes, not UTF-8. You will need to
explicitly call C<Encode::decode> on the content to make it UTF-8.> This is
done because there's no reason you couldn't use this feature with file's
containing any sort of binary data.
=cut
......@@ -6,11 +6,25 @@ use namespace::autoclean;
our $VERSION = '0.30';
use Fcntl qw( S_IMODE );
use File::ChangeNotify::Event;
use File::Find qw( find );
use File::Spec;
use Module::Runtime qw( use_module );
use Types::Standard qw( ArrayRef Bool ClassName CodeRef Num RegexpRef Str );
use Types::Standard
qw( ArrayRef Bool ClassName CodeRef HashRef Num RegexpRef Str );
use Type::Utils -all;
# Trying to import this just blows up on Win32, and checking
# Time::HiRes::d_hires_stat() _also_ blows up on Win32.
BEGIN {
## no critic (ErrorHandling::RequireCheckingReturnValueOfEval)
eval {
require Time::HiRes;
Time::HiRes->import('stat');
};
}
use Moo::Role;
has filter => (
......@@ -70,35 +84,219 @@ has exclude => (
default => sub { [] },
);
has modify_includes_file_attributes => (
is => 'ro',
isa => Bool | $files_or_regexps_or_code_t,
default => 0,
);
has modify_includes_content => (
is => 'ro',
isa => Bool | $files_or_regexps_or_code_t,
default => 0,
);
has _map => (
is => 'ro',
writer => '_set_map',
isa => HashRef,
predicate => '_has_map',
);
sub BUILD {
my $self = shift;
use_module( $self->event_class );
}
sub new_events {
## no critic ( Subroutines::ProhibitUnusedPrivateSubroutines)
sub _current_map {
my $self = shift;
return $self->_interesting_events;
my %map;
find(
{
wanted => sub {
# File::Find seems to use '/' as the path separator on Windows
# for some odd reason. It really should be using File::Spec
# internally everywhere but it doesn't.
my $path
= $^O eq 'MSWin32'
? File::Spec->canonpath($File::Find::name)
: $File::Find::name;
if ( $self->_path_is_excluded($path) ) {
$File::Find::prune = 1;
return;
}
my $entry = $self->_entry_for_map($path) or return;
$map{$path} = $entry;
},
follow_fast => ( $self->follow_symlinks ? 1 : 0 ),
no_chdir => 1,
follow_skip => 2,
},
@{ $self->directories },
);
return \%map;
}
## use critic
## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
sub _path_is_excluded {
my $self = shift;
my $path = shift;
foreach my $excluded ( @{ $self->exclude } ) {
if ( my $ref = ref $excluded ) {
return $self->_path_matches( $self->exclude, $path );
}
sub _entry_for_map {
my $self = shift;
my $path = shift;
my $is_dir = -d $path ? 1 : 0;
# This should be free since the stat call was already done when checking
# -d.
my @stat = stat;
return if -l $path && !$is_dir;
unless ($is_dir) {
my $filter = $self->filter;
return unless ( File::Spec->splitpath($path) )[2] =~ /$filter/;
}
return {
is_dir => $is_dir,
size => ( $is_dir ? 0 : $stat[7] ),
$self->_maybe_file_attributes( $path, \@stat ),
( $is_dir ? () : $self->_maybe_content($path) ),
};
}
sub _maybe_file_attributes {
my $self = shift;
my $path = shift;
my $stat = shift;
# The Default watcher always requires the mtime, regardless of whether or
# not we're including stat info in the modify events.
unless ( $self->_always_requires_mtime ) {
return
unless $self->_path_matches(
$self->modify_includes_file_attributes,
$path,
);
}
return ( stat => $self->_stat( $path, $stat ) );
}
sub _stat {
my $self = shift;
my $path = shift;
my $stat = shift;
my @stat = $stat ? @{$stat} : stat $path;
return {
attributes => {
permissions => S_IMODE( $stat[2] ),
uid => $stat[4],
gid => $stat[5],
},
mtime => $stat[9],
};
}
sub _always_requires_mtime {0}
sub _maybe_content {
my $self = shift;
my $path = shift;
return
unless $self->_path_matches( $self->modify_includes_content, $path );
open my $fh, '<', $path or die "Cannot open $path for reading: $!";
binmode $fh, ':bytes' or die qq{Cannot binmode $path as ':bytes': $!};
my $content = do {
local $/ = undef;
<$fh>;
};
close $fh or die "Cannot close $path: $!";
return ( content => $content );
}
sub new_events {
my $self = shift;
return $self->_interesting_events;
}
## no critic ( Subroutines::ProhibitUnusedPrivateSubroutines)
sub _modify_event_maybe_file_attribute_changes {
my $self = shift;
my $path = shift;
my $old_map = shift;
my $new_map = shift;
return
unless $self->_path_matches(
$self->modify_includes_file_attributes,
$path,
);
my $old_attr = $old_map->{$path}{stat}{attributes};
my $new_attr = $new_map->{$path}{stat}{attributes};
for my $k ( keys %{$new_attr} ) {
# Any possible info retrieved from stat will be numeric, so we can
# always use numeric comparison safely.
return ( attributes => [ $old_attr, $new_attr ] )
if $old_attr->{$k} != $new_attr->{$k};
}
return;
}
sub _modify_event_maybe_content_changes {
my $self = shift;
my $path = shift;
my $old_map = shift;
my $new_map = shift;
return
unless $self->_path_matches( $self->modify_includes_content, $path );
return (
content => [ $old_map->{$path}{content}, $new_map->{$path}{content} ]
);
}
sub _path_matches {
my $self = shift;
my $matches = shift;
my $path = shift;
return $matches if !ref $matches;
foreach my $matcher ( @{$matches} ) {
if ( my $ref = ref $matcher ) {
if ( $ref eq 'Regexp' ) {
return 1 if $path =~ /$excluded/;
return 1 if $path =~ /$matcher/;
}
elsif ( $ref eq 'CODE' ) {
local $_ = $path;
return 1 if $excluded->($path);
return 1 if $matcher->($path);
}
}
else {
return 1 if $path eq $excluded;
return 1 if $path eq $matcher;
}
}
......@@ -152,8 +350,10 @@ By default, events are returned in the form of L<File::ChangeNotify::Event>
objects, but this can be overridden by providing an "event_class" attribute to
the constructor.
The watcher can operate in a blocking/callback style, or you can simply ask it
for a list of new events as needed.
You can block while waiting for events or do a non-blocking call asking for
any new events since the last call (or since the watcher was
instantiated). Different watchers will implement blocking in different ways,
and the Default watcher just does a sleep loop.
=head1 METHODS
......@@ -179,9 +379,9 @@ By default, all files are included.
=item * exclude => [...]
An optional list of paths to exclude. This list can contain plain strings,
regular expressions, or subroutine references. If you provide a string it
should contain the complete path to be excluded.
An optional arrayref of paths to exclude. This arrayref can contain plain
strings, regular expressions, or subroutine references. If you provide a
string it should contain the complete path to be excluded.
If you provide a sub, it should return a true value for paths to be excluded
e.g. C<< exclude => [ sub { -e && ! -r } ], >>. The path will be passed as the
......@@ -190,6 +390,37 @@ first argument to the subroutine as well as in a localized C<$_>.
The paths can be either directories or specific files. If the exclusion
matches a directory, all of its files and subdirectories are ignored.
=item * modify_includes_file_attributes
This can either be a boolean or an arrayref.
If it is an arrayref then it should contain paths for which you want
information about changes to the file's attributes. This arrayref can contain
plain strings, regular expressions, or subroutine references. If you provide a
string it should contain the complete path to be excluded.
When this matches a file, then modify events for that file will include
information about the file's before and after permissions and ownership when
these change.
See the L<File::ChangeNotify::Event> documentation for details on what this
looks like.
=item * modify_includes_content
This can either be a boolean or an arrayref.
If it is an arrayref then it should contain paths for which you want to see
past and current content for a file when it is modified. This arrayref can
contain plain strings, regular expressions, or subroutine references. If you
provide a string it should contain the complete path to be excluded.
When this matches a file, then modify events for that file will include
information about the file's before and after content when it changes.
See the L<File::ChangeNotify::Event> documentation for details on what this
looks like.
=item * follow_symlinks => $bool
By default, symlinks are ignored. Set this to true to follow them.
......
......@@ -6,91 +6,24 @@ use namespace::autoclean;
our $VERSION = '0.30';
use File::Find qw( finddepth );
use File::Spec;
use Time::HiRes qw( sleep );
use Types::Standard qw( HashRef );
# Trying to import this just blows up on Win32, and checking
# Time::HiRes::d_hires_stat() _also_ blows up on Win32.
BEGIN {
## no critic (ErrorHandling::RequireCheckingReturnValueOfEval)
eval { Time::HiRes->import('stat') };
}
use Moo;
with 'File::ChangeNotify::Watcher';
has _map => (
is => 'ro',
writer => '_set_map',
isa => HashRef,
default => sub { {} },
);
sub sees_all_events {0}
sub BUILD {
my $self = shift;
$self->_set_map( $self->_build_map );
}
sub _build_map {
my $self = shift;
my %map;
File::Find::find(
{
wanted => sub {
my $path = $File::Find::name;
if ( $self->_path_is_excluded($path) ) {
$File::Find::prune = 1;
return;
}
my $entry = $self->_entry_for_map($path) or return;
$map{$path} = $entry;
},
follow_fast => ( $self->follow_symlinks ? 1 : 0 ),
no_chdir => 1,
follow_skip => 2,
},
@{ $self->directories },
);
return \%map;
}
## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
sub _always_requires_mtime {1}
## use critic
sub _entry_for_map {
sub BUILD {
my $self = shift;
my $path = shift;
my $is_dir = -d $path ? 1 : 0;
return if -l $path && !$is_dir;
unless ($is_dir) {
my $filter = $self->filter;
return unless ( File::Spec->splitpath($path) )[2] =~ /$filter/;
}
return {
is_dir => $is_dir,
mtime => _mtime(*_),
size => ( $is_dir ? 0 : -s _ ),
};
}
# It seems that Time::HiRes's stat does not act exactly like the
# built-in, so if I do ( stat _ )[9] it will not work (grr).
sub _mtime {
my @stat = stat;
$self->_set_map( $self->_current_map );
return $stat[9];
return;
}
sub wait_for_events {
......@@ -110,7 +43,7 @@ sub _interesting_events {
my @interesting;
my $old_map = $self->_map;
my $new_map = $self->_build_map;
my $new_map = $self->_current_map;
for my $path ( sort keys %{$old_map} ) {
if ( !exists $new_map->{$path} ) {
......@@ -123,32 +56,29 @@ sub _interesting_events {
type => 'delete',
);
}
elsif (
!$old_map->{$path}{is_dir}
&& ( $old_map->{$path}{mtime} != $new_map->{$path}{mtime}
|| $old_map->{$path}{size} != $new_map->{$path}{size} )
) {
push @interesting, $self->event_class->new(
path => $path,
type => 'modify',
);
else {
# If we're tracking stat info changes then we get the old & new
# stat info back in @extra. No need to stat the path _again_.
my ( $modified, @extra )
= $self->_path_was_modified( $path, $old_map, $new_map );
if ($modified) {
push @interesting, $self->event_class->new(
path => $path,
type => 'modify',
@extra,
$self->_modify_event_maybe_content_changes(
$path, $old_map, $new_map
),
);
}
}
}
for my $path ( sort grep { !exists $old_map->{$_} } keys %{$new_map} ) {
if ( -d $path ) {
push @interesting, $self->event_class->new(
path => $path,
type => 'create',
),
;
}
else {
push @interesting, $self->event_class->new(
path => $path,
type => 'create',
);
}
push @interesting, $self->event_class->new(
path => $path,
type => 'create',
);
}
$self->_set_map($new_map);
......@@ -156,6 +86,34 @@ sub _interesting_events {
return @interesting;
}
sub _path_was_modified {
my $self = shift;
my $path = shift;
my $old_map = shift;
my $new_map = shift;
my $old_entry = $old_map->{$path};
my $new_entry = $new_map->{$path};
# If it's a file and the mtime or size changed we know it's been modified
# in some way.
return 1
if !$old_entry->{is_dir}
&& ( $old_entry->{stat}{mtime} != $new_entry->{stat}{mtime}
|| $old_entry->{size} != $new_entry->{size} );
if (
my @attrs = $self->_modify_event_maybe_file_attribute_changes(
$path, $old_map, $new_map
)
) {
return ( 1, @attrs );
}
return 0;
}
__PACKAGE__->meta->make_immutable;
1;
......
......@@ -50,7 +50,11 @@ sub BUILD {
# calls ->_inotify itself.
$self->_watch_directory($_) for @{ $self->directories };
return $self;
$self->_set_map( $self->_current_map )
if $self->modify_includes_file_attributes
|| $self->modify_includes_content;
return;
}
sub wait_for_events {
......@@ -76,14 +80,22 @@ around new_events => sub {
sub _interesting_events {
my $self = shift;
# This may be a blocking read, in which case it will not return until
# something happens. For Catalyst, the restarter will end up calling
# ->wait_for_events again after handling the changes.
my @events = $self->_inotify->read;
my ( $old_map, $new_map );
if ( $self->modify_includes_file_attributes
|| $self->modify_includes_content ) {
$old_map = $self->_map;
$new_map = $self->_current_map;
}
my $filter = $self->filter;
my @interesting;
# This may be a blocking read, in which case it will not return until
# something happens. For Catalyst, the restarter will end up calling
# ->watch again after handling the changes.
for my $event ( $self->_inotify->read ) {
for my $event (@events) {
# An excluded path will show up here if ...
#
......@@ -91,6 +103,7 @@ sub _interesting_events {
# excluded or when the exclusion excludes a file, not a dir.
next if $self->_path_is_excluded( $event->fullname );
## no critic (ControlStructures::ProhibitCascadingIfElse)
if ( $event->IN_CREATE && $event->IN_ISDIR ) {
$self->_watch_directory( $event->fullname );
push @interesting, $event;
......@@ -100,6 +113,14 @@ sub _interesting_events {
elsif ( $event->IN_DELETE_SELF ) {
$self->_remove_directory( $event->fullname );
}
elsif ( $event->IN_ATTRIB ) {
next
unless $self->_path_matches(
$self->modify_includes_file_attributes,
$event->fullname
);
push @interesting, $event;
}
# We just want to check the _file_ name
elsif ( $event->name =~ /$filter/ ) {
......@@ -107,8 +128,14 @@ sub _interesting_events {
}
}
return
map { $_->can('path') ? $_ : $self->_convert_event($_) } @interesting;
$self->_set_map($new_map)
if $self->_has_map;
return map {
$_->can('path')
? $_
: $self->_convert_event( $_, $old_map, $new_map )
} @interesting;
}
sub _build_mask {
......@@ -118,6 +145,7 @@ sub _build_mask {
= IN_MODIFY | IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_MOVE_SELF
| IN_MOVED_TO;
$mask |= IN_DONT_FOLLOW unless $self->follow_symlinks;
$mask |= IN_ATTRIB if $self->modify_includes_file_attributes;
return $mask;
}
......@@ -139,7 +167,6 @@ sub _watch_directory {
$File::Find::prune = 1;
return;
}
$self->_add_watch_if_dir($path);
},
follow_fast => ( $self->follow_symlinks ? 1 : 0 ),
......@@ -196,17 +223,39 @@ sub _fake_events_for_new_dir {
}
sub _convert_event {
my $self = shift;
my $event = shift;
my $self = shift;
my $event = shift;
my $old_map = shift;
my $new_map = shift;
my $path = $event->fullname;
my $type
= $event->IN_CREATE || $event->IN_MOVED_TO ? 'create'
: $event->IN_MODIFY || $event->IN_ATTRIB ? 'modify'
: $event->IN_DELETE ? 'delete'
: 'unknown';
my @extra;
if (
$type eq 'modify'
&& ( $self->modify_includes_file_attributes
|| $self->modify_includes_content )
) {
@extra = (
$self->_modify_event_maybe_file_attribute_changes(
$path, $old_map, $new_map
),
$self->_modify_event_maybe_content_changes(
$path, $old_map, $new_map
),
);
}
return $self->event_class->new(
path => $event->fullname,
type => (
$event->IN_CREATE || $event->IN_MOVED_TO ? 'create'
: $event->IN_MODIFY ? 'modify'
: $event->IN_DELETE ? 'delete'
: 'unknown'
),
path => $path,
type => $type,
@extra,