directory.py 9.07 KB
Newer Older
1 2
# -*- coding: utf-8 -*-
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
3
# diffoscope: in-depth comparison of files, archives, and directories
4 5 6
#
# Copyright © 2015 Jérémy Bobbio <lunar@debian.org>
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
7
# diffoscope is free software: you can redistribute it and/or modify
8 9 10 11
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
Jérémy Bobbio's avatar
Jérémy Bobbio committed
12
# diffoscope is distributed in the hope that it will be useful,
13 14 15 16 17
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
18
# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
19

20
import os
21
import re
22
import logging
23
import subprocess
24 25
import collections
import itertools
Chris Lamb's avatar
Chris Lamb committed
26

27
from diffoscope.exc import RequiredToolNotFound
28
from diffoscope.tools import tool_required
29
from diffoscope.config import Config
30
from diffoscope.progress import Progress
Jérémy Bobbio's avatar
Jérémy Bobbio committed
31
from diffoscope.difference import Difference
32 33

from .binary import FilesystemFile
34 35
from .utils.command import Command
from .utils.container import Container
36

37 38
logger = logging.getLogger(__name__)

39

40 41 42 43
def list_files(path):
    path = os.path.realpath(path)
    all_files = []
    for root, dirs, names in os.walk(path):
44 45 46 47
        all_files.extend([os.path.join(root[len(path) + 1:], dir)
                         for dir in dirs])
        all_files.extend([os.path.join(root[len(path) + 1:], name)
                         for name in names])
48
    all_files.sort()
49
    return all_files
50 51


52 53 54 55
if os.uname()[0] == 'FreeBSD':
    class Stat(Command):
        @tool_required('stat')
        def cmdline(self):
56 57 58 59 60 61
            return [
                'stat',
                '-t', '%Y-%m-%d %H:%M:%S',
                '-f', '%Sp %l %Su %Sg %z %Sm %k %b %#Xf',
                self.path,
            ]
62 63 64 65 66
else:
    class Stat(Command):
        @tool_required('stat')
        def cmdline(self):
            return ['stat', self.path]
67

68 69 70 71 72
        FILE_RE = re.compile(r'^\s*File:.*$')
        DEVICE_RE = re.compile(r'Device: [0-9a-f]+h/[0-9]+d\s+')
        INODE_RE = re.compile(r'Inode: [0-9]+\s+')
        ACCESS_TIME_RE = re.compile(r'^Access: [0-9]{4}-[0-9]{2}-[0-9]{2}.*$')
        CHANGE_TIME_RE = re.compile(r'^Change: [0-9]{4}-[0-9]{2}-[0-9]{2}.*$')
73

74 75 76 77 78 79 80 81
        def filter(self, line):
            line = line.decode('utf-8')
            line = Stat.FILE_RE.sub('', line)
            line = Stat.DEVICE_RE.sub('', line)
            line = Stat.INODE_RE.sub('', line)
            line = Stat.ACCESS_TIME_RE.sub('', line)
            line = Stat.CHANGE_TIME_RE.sub('', line)
            return line.encode('utf-8')
82 83


84
@tool_required('lsattr')
85
def lsattr(path):
86 87
    """
    NB. Difficult to replace with in-Python version. See
88
    <https://stackoverflow.com/questions/35501249/python-get-linux-file-immutable-attribute/38092961#38092961>
89 90
    """

91
    try:
92 93 94 95 96
        output = subprocess.check_output(
            ['lsattr', '-d', path],
            shell=False,
            stderr=subprocess.STDOUT,
        ).decode('utf-8')
97
        return output.split()[0]
98 99 100 101 102 103
    except subprocess.CalledProcessError as e:
        if e.returncode == 1:
            # filesystem doesn't support xattrs
            return ''


104 105 106
class Getfacl(Command):
    @tool_required('getfacl')
    def cmdline(self):
107 108 109
        osname = os.uname()[0]
        if osname == 'FreeBSD':
            return ['getfacl', '-q', '-h', self.path]
110
        return ['getfacl', '-p', '-c', self.path]
111 112


113 114
def xattr(path1, path2):
    try:
115
        import xattr as xattr_
116 117 118
    except ImportError:
        return None

119 120 121 122 123 124
    # Support the case where the python3-xattr package is installed but
    # python3-pyxattr is not; python3-xattr has an xattr class that can be used
    # like a dict.
    try:
        get_all = xattr_.get_all
    except AttributeError:
125 126
        def get_all(x):
            return xattr_.xattr(x).items()
127

128 129 130 131
    def fn(x):
        return '\n'.join('{}: {}'.format(
            k.decode('utf-8', 'ignore'),
            v.decode('utf-8', 'ignore'),
132
        ) for k, v in get_all(x))
133 134 135 136 137

    return Difference.from_text(
        fn(path1), fn(path2), path1, path2, source='extended file attributes',
    )

138

139
def compare_meta(path1, path2):
140
    if Config().exclude_directory_metadata:
141 142
        logger.debug(
            "Excluding directory metadata for paths (%s, %s)", path1, path2)
143 144
        return []

145
    logger.debug('compare_meta(%s, %s)', path1, path2)
146
    differences = []
147

148 149 150 151
    # Don't run any commands if any of the paths do not exist
    if not os.path.exists(path1) or not os.path.exists(path2):
        return differences

152
    try:
153
        differences.append(Difference.from_command(Stat, path1, path2))
154
    except RequiredToolNotFound:
155
        logger.error("Unable to find 'stat'! Is PATH wrong?")
156 157
    if os.path.islink(path1) or os.path.islink(path2):
        return [d for d in differences if d is not None]
158 159 160
    try:
        differences.append(Difference.from_command(Getfacl, path1, path2))
    except RequiredToolNotFound:
161
        logger.info(
162
            "Unable to find 'getfacl', some directory metadata differences might not be noticed.")
163 164 165
    try:
        lsattr1 = lsattr(path1)
        lsattr2 = lsattr(path2)
166
        differences.append(Difference.from_text(
167 168 169 170 171 172
            lsattr1,
            lsattr2,
            path1,
            path2,
            source='lsattr',
        ))
173
    except RequiredToolNotFound:
174 175
        logger.info(
            "Unable to find 'lsattr', some directory metadata differences might not be noticed.")
176
    differences.append(xattr(path1, path2))
177
    return [d for d in differences if d is not None]
178

179

180
def compare_directories(path1, path2, source=None):
181 182 183 184
    return FilesystemDirectory(path1).compare(FilesystemDirectory(path2))


class Directory(object):
185 186
    DESCRIPTION = "directories"

187 188
    @classmethod
    def recognizes(cls, file):
189 190
        return file.is_directory()

191 192 193 194
    @classmethod
    def fallback_recognizes(cls, file):
        return False

195

196
class FilesystemDirectory(Directory):
197 198 199 200 201 202 203 204 205 206 207
    def __init__(self, path):
        self._path = path

    @property
    def path(self):
        return self._path

    @property
    def name(self):
        return self._path

208 209 210 211 212 213
    @property
    def as_container(self):
        if not hasattr(self, '_as_container'):
            self._as_container = DirectoryContainer(self)
        return self._as_container

214 215 216 217 218 219 220 221 222
    def is_directory(self):
        return True

    def has_same_content_as(self, other):
        # no shortcut
        return False

    def compare(self, other, source=None):
        differences = []
223

224 225 226 227 228 229 230 231 232
        listing_diff = Difference.from_text(
            '\n'.join(list_files(self.path)),
            '\n'.join(list_files(other.path)),
            self.path,
            other.path,
            source='file list',
        )
        if listing_diff:
            differences.append(listing_diff)
233

234
        differences.extend(compare_meta(self.name, other.name))
235

236 237
        my_container = DirectoryContainer(self)
        other_container = DirectoryContainer(other)
238
        differences.extend(my_container.compare(other_container))
239

240 241
        if not differences:
            return None
242

243 244 245 246 247 248 249
        difference = Difference(None, self.path, other.path, source)
        difference.add_details(differences)
        return difference


class DirectoryContainer(Container):
    def get_member_names(self):
250
        return sorted(os.listdir(self.source.path or '.'))
251 252

    def get_member(self, member_name):
253
        member_path = os.path.join(self.source.path, member_name)
254

255 256
        if not os.path.islink(member_path) and os.path.isdir(member_path):
            return FilesystemDirectory(member_path)
257 258 259 260 261

        return FilesystemFile(
            os.path.join(self.source.path, member_name),
            container=self,
        )
262 263

    def comparisons(self, other):
264
        my_members = collections.OrderedDict(self.get_adjusted_members_sizes())
265 266 267 268
        other_members = collections.OrderedDict(
            other.get_adjusted_members_sizes())
        total_size = sum(x[1] for x in my_members.values()) + \
                         sum(x[1] for x in other_members.values())
269

270 271
        to_compare = set(my_members.keys()).intersection(other_members.keys())
        with Progress(total_size) as p:
272
            for name in sorted(to_compare):
273 274
                my_file, my_size = my_members[name]
                other_file, other_size = other_members[name]
275
                p.begin_step(my_size + other_size, msg=name)
276 277 278 279 280 281
                yield my_file, other_file, name

    def compare(self, other, source=None):
        from .utils.compare import compare_files

        def compare_pair(file1, file2, source):
282
            inner_difference = compare_files(file1, file2, source=source)
283 284 285 286 287 288 289
            meta_differences = compare_meta(file1.name, file2.name)
            if meta_differences and not inner_difference:
                inner_difference = Difference(None, file1.path, file2.path)
            if inner_difference:
                inner_difference.add_details(meta_differences)
            return inner_difference

290 291 292 293
        return filter(
            None,
            itertools.starmap(compare_pair, self.comparisons(other)),
        )