Commits (14)
......@@ -36,6 +36,7 @@ class ComparatorManager:
("missing_file.MissingFile",),
("symlink.Symlink",),
("device.Device",),
("socket_or_fifo.SocketOrFIFO",),
("debian.DotChangesFile", "debian_fallback.DotChangesFile"),
("debian.DotDscFile", "debian_fallback.DotDscFile"),
("debian.DotBuildinfoFile", "debian_fallback.DotBuildinfoFile"),
......
......@@ -41,3 +41,7 @@ class FilesystemFile(File):
def is_device(self):
mode = os.lstat(self._name).st_mode
return stat.S_ISCHR(mode) or stat.S_ISBLK(mode)
def is_socket_or_fifo(self):
mode = os.lstat(self._name).st_mode
return stat.S_ISSOCK(mode) or stat.S_ISFIFO(mode)
......@@ -97,7 +97,7 @@ CBFS_HEADER_VERSION2 = 0x31313132
CBFS_HEADER_SIZE = 8 * 4 # 8 * uint32_t
# On 2015-12-15, the largest image produced by coreboot is 16 MiB
CBFS_MAXIMUM_FILE_SIZE = 24 * 2 ** 20 # 24 MiB
CBFS_MAXIMUM_FILE_SIZE = 24 * 2**20 # 24 MiB
def is_header_valid(buf, size, offset=0):
......
......@@ -68,6 +68,9 @@ class DebControlMember(File):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
class DebControlContainer(Container):
def __init__(self, *args, **kwargs):
......
......@@ -183,6 +183,9 @@ class AsmFunction(File):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
if tlsh:
@property
......
......@@ -329,6 +329,9 @@ class ElfSection(File):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
def has_same_content_as(self, other):
# Always force diff of the section
return False
......
......@@ -67,6 +67,9 @@ class MachoContainerFile(File, metaclass=abc.ABCMeta):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
def has_same_content_as(self, other):
# Always force diff of the container
return False
......
......@@ -73,6 +73,9 @@ class MissingFile(File, AbstractMissingType):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
def compare(self, other, source=None):
# So now that comparators are all object-oriented, we don't have any
# clue on how to perform a meaningful comparison right here. So we are
......
......@@ -39,10 +39,10 @@ logger = logging.getLogger(__name__)
def convert_header_field(io, header):
if isinstance(header, list):
if len(header) == 0:
io.write(u"[]")
io.write("[]")
else:
for item in header:
io.write(u"\n - ")
io.write("\n - ")
convert_header_field(io, item)
return
......@@ -70,9 +70,9 @@ def get_rpm_header(path, ts):
for rpmtag in sorted(rpm.tagnames):
if rpmtag not in hdr:
continue
s.write(u"{}: ".format(rpm.tagnames[rpmtag]))
s.write("{}: ".format(rpm.tagnames[rpmtag]))
convert_header_field(s, hdr[rpmtag])
s.write(u"\n")
s.write("\n")
return s.getvalue()
......
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2021 Brent Spillner <s p i l l n e r @ a c m . o r g>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import stat
import logging
from diffoscope.tempfiles import get_named_temporary_file
from diffoscope.difference import Difference
from .binary import FilesystemFile
from .utils.file import File
logger = logging.getLogger(__name__)
class SocketOrFIFO(File):
DESCRIPTION = "local (UNIX domain) sockets and named pipes (FIFOs)"
@classmethod
def recognizes(cls, file):
return file.is_socket_or_fifo()
def get_type(self):
assert isinstance(self, FilesystemFile)
st = os.lstat(self.name)
return stat.S_IFMT(st.st_mode)
def has_same_content_as(self, other):
try:
# (filesystem ID, inode) pair uniquely identifies the socket/pipe
# Path comparison allows matching against pipes inside an archive
# (i.e. that would be created by extraction), while using .samefile()
# lets us match endpoints that might have more than one "canonical"
# pathname after a mount -o rebind
if self.get_type() != other.get_type():
return False
if os.path.exists(self.name) and os.path.exists(other.name):
return os.path.samefile(self.name, other.name)
return os.path.realname(self.name) == os.path.realname(other.name)
except (AttributeError, OSError):
# 'other' is likely something odd that doesn't support stat() and/or
# can't supply an fs_uuid/inode pair for samefile()
logger.debug(
"has_same_content: Not a socket, FIFO, or ordinary file: %s",
other,
)
return False
def create_placeholder(self):
with get_named_temporary_file(mode="w+", delete=False) as f:
f.write(format_socket(self.get_type(), self.name))
f.flush()
return f.name
@property
def path(self):
if not hasattr(self, "_placeholder"):
self._placeholder = self.create_placeholder()
return self._placeholder
def cleanup(self):
if hasattr(self, "_placeholder"):
os.remove(self._placeholder)
del self._placeholder
super().cleanup()
def compare(self, other, source=None):
with open(self.path) as my_content, open(other.path) as other_content:
return Difference.from_text_readers(
my_content,
other_content,
self.name,
other.name,
source=source,
comment="socket/FIFO",
)
def format_socket(mode, filename):
if stat.S_ISSOCK(mode):
kind = "UNIX domain socket"
elif stat.S_ISFIFO(mode):
kind = "named pipe (FIFO)"
else:
kind = "ERROR: problem with an is_socket_or_fifo() predicate"
return f"{kind}: {filename}\n"
......@@ -33,6 +33,7 @@ from diffoscope.tempfiles import get_temporary_directory
from .utils.file import File
from .device import Device
from .symlink import Symlink
from .socket_or_fifo import SocketOrFIFO
from .directory import Directory
from .utils.archive import Archive, ArchiveMember
from .utils.command import Command
......@@ -72,6 +73,9 @@ class SquashfsMember(ArchiveMember):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
@property
def path(self):
# Use our extracted version and also avoid creating a temporary
......@@ -217,6 +221,42 @@ class SquashfsDevice(Device, SquashfsMember):
return True
class SquashfsFIFO(SocketOrFIFO, SquashfsMember):
# Example line:
# crw-r--r-- root/root 0 2021-08-18 13:37 run/initctl
LINE_RE = re.compile(
r"^(?P<kind>s|p)\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(?P<member_name>.*)$"
)
KIND_MAP = {"s": stat.S_IFSOCK, "p": stat.S_IFIFO}
@staticmethod
def parse(line):
m = SquashfsFIFO.LINE_RE.match(line)
if not m:
raise SquashfsInvalidLineFormat("invalid line format")
d = m.groupdict()
try:
d["mode"] = SquashfsFIFO.KIND_MAP[d["kind"]]
del d["kind"]
except KeyError:
raise SquashfsInvalidLineFormat(
f"unknown socket/FIFO kind {d['kind']}"
)
return d
def __init__(self, archive, member_name, mode):
SquashfsMember.__init__(self, archive, member_name)
self._mode = mode
def get_type(self):
return stat.S_IFMT(self._mode)
def is_socket_or_fifo(self):
return True
class SquashfsContainer(Archive):
auto_diff_metadata = False
......@@ -225,6 +265,8 @@ class SquashfsContainer(Archive):
"l": SquashfsSymlink,
"c": SquashfsDevice,
"b": SquashfsDevice,
"p": SquashfsFIFO,
"s": SquashfsFIFO,
"-": SquashfsRegularFile,
}
......
......@@ -136,6 +136,9 @@ class ArchiveMember(File):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
class MissingArchiveLikeObject(AbstractMissingType):
def getnames(self):
......
......@@ -327,6 +327,7 @@ class File(metaclass=abc.ABCMeta):
(self.is_device, "device"),
(self.is_symlink, "symlink"),
(self.is_directory, "directory"),
(self.is_socket_or_fifo, "socket or FIFO"),
):
if x():
return y
......@@ -374,6 +375,10 @@ class File(metaclass=abc.ABCMeta):
def is_device():
raise NotImplementedError()
@abc.abstractmethod
def is_socket_or_fifo():
raise NotImplementedError()
def compare_bytes(self, other, source=None):
from .compare import compare_binary_files
......
......@@ -24,6 +24,7 @@ import ctypes
import logging
import libarchive
import collections
import stat
from diffoscope.exc import ContainerExtractionError
from diffoscope.config import Config
......@@ -34,6 +35,7 @@ from diffoscope.difference import Difference
from ..device import Device
from ..symlink import Symlink
from ..directory import Directory
from ..socket_or_fifo import SocketOrFIFO
from .archive import Archive, ArchiveMember
......@@ -187,6 +189,9 @@ class LibarchiveMember(ArchiveMember):
def is_device(self):
return False
def is_socket_or_fifo(self):
return False
class LibarchiveDirectory(Directory, LibarchiveMember):
def __init__(self, archive, entry):
......@@ -241,6 +246,18 @@ class LibarchiveDevice(Device, LibarchiveMember):
return True
class LibarchiveFIFO(SocketOrFIFO, LibarchiveMember):
def __init__(self, container, entry):
LibarchiveMember.__init__(self, container, entry)
self._mode = entry.mode
def get_type(self):
return stat.S_IFMT(self._mode)
def is_socket_or_fifo(self):
return True
class LibarchiveContainer(Archive):
def open_archive(self):
# libarchive is very very stream oriented an not for random access
......@@ -283,6 +300,8 @@ class LibarchiveContainer(Archive):
return LibarchiveSymlink(self, entry)
if entry.isblk or entry.ischr:
return LibarchiveDevice(self, entry)
if entry.isfifo:
return LibarchiveFIFO(self, entry)
return LibarchiveMember(self, entry)
......@@ -323,7 +342,7 @@ class LibarchiveContainer(Archive):
os.makedirs(os.path.dirname(dst), exist_ok=True)
try:
with open(dst, "wb") as f:
for block in entry.get_blocks(block_size=2 ** 17):
for block in entry.get_blocks(block_size=2**17):
f.write(block)
except Exception as e:
raise ContainerExtractionError(entry.pathname, e)
......
......@@ -43,16 +43,16 @@ class Config:
self.diff_context = 7
# GNU diff cannot process arbitrary large files :(
self.max_diff_input_lines = 2 ** 22
self.max_diff_input_lines = 2**22
self.max_diff_block_lines_saved = float("inf")
# hard limits, restricts single-file and multi-file formats
self.max_report_size = defaultint(40 * 2 ** 20) # 40 MB
self.max_diff_block_lines = defaultint(2 ** 10) # 1024 lines
self.max_report_size = defaultint(40 * 2**20) # 40 MB
self.max_diff_block_lines = defaultint(2**10) # 1024 lines
# structural limits, restricts single-file formats
# semi-restricts multi-file formats
self.max_page_size = defaultint(40 * 2 ** 20) # 4 MB
self.max_page_diff_block_lines = defaultint(2 ** 7) # 128 lines
self.max_page_size = defaultint(40 * 2**20) # 4 MB
self.max_page_diff_block_lines = defaultint(2**7) # 128 lines
self.max_text_report_size = 0
......
......@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
class TextPresenter(Presenter):
PREFIX = u"│ "
PREFIX = "│ "
RE_PREFIX = re.compile(r"(^|\n)")
def __init__(self, print_func, color):
......@@ -76,13 +76,13 @@ class TextPresenter(Presenter):
self.output("--- {}".format(difference.source1))
self.output("+++ {}".format(difference.source2))
elif difference.source1 == difference.source2:
self.output(u"├── {}".format(difference.source1))
self.output("├── {}".format(difference.source1))
else:
self.output(u"│ --- {}".format(difference.source1))
self.output(u"├── +++ {}".format(difference.source2))
self.output("│ --- {}".format(difference.source1))
self.output("├── +++ {}".format(difference.source2))
for x in difference.comments:
self.output(u"│┄ {}".format(x))
self.output("│┄ {}".format(x))
diff = difference.unified_diff
......
......@@ -45,7 +45,7 @@ def differences(pcap1, pcap2):
@skip_unless_tools_exist("tcpdump")
def test_diff(differences):
expected_diff = get_data("pcap_expected_diff")
assert differences[0].unified_diff[: 2 ** 13] == expected_diff[: 2 ** 13]
assert differences[0].unified_diff[: 2**13] == expected_diff[: 2**13]
@skip_unless_tools_exist("tcpdump")
......
#
# diffoscope: in-depth comparison of files, archives, and directories
#
# Copyright © 2017, 2020 Chris Lamb <lamby@debian.org>
# Copyright © 2021 Brent Spillner <s p i l l n e r @ a c m . o r g>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import os
import socket
import pytest
from diffoscope.comparators.binary import FilesystemFile
from diffoscope.comparators.socket_or_fifo import SocketOrFIFO, format_socket
from diffoscope.comparators.utils.specialize import specialize
from ..utils.data import get_data, load_fixture
sample_textfile = "text_ascii1"
sampletext = load_fixture(sample_textfile)
def make_socket(path):
if os.path.exists(path):
os.remove(path)
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.bind(path)
return specialize(FilesystemFile(path))
def make_pipe(path):
if os.path.exists(path):
os.remove(path)
os.mkfifo(path)
return specialize(FilesystemFile(path))
@pytest.fixture
def endpoints(tmpdir):
def makename(tag):
return os.path.join(tmpdir, "test_" + tag)
test_points = zip(
[make_socket, make_socket, make_pipe, make_pipe],
map(makename, ["socket1", "socket2", "pipe1", "pipe2"]),
)
yield [(name, f(name)) for (f, name) in test_points]
for (_, name) in test_points:
os.remove(name)
@pytest.fixture
def expected_results(endpoints):
descriptions = [
format_socket(obj.get_type(), path) for (path, obj) in endpoints
]
[sock1_desc, sock2_desc, pipe1_desc, pipe2_desc] = descriptions
# Prefix every line of the sample text file with '+' to predict RHS of the diff
sampletext_contents = get_data(sample_textfile)
sample_lines = sampletext_contents.count("\n")
added_text = "+" + "\n+".join(sampletext_contents.split("\n")[:-1]) + "\n"
sock_text_diff = (
"@@ -1 +1,{} @@\n".format(sample_lines) + "-" + sock1_desc + added_text
)
pipe_text_diff = (
"@@ -1 +1,{} @@\n".format(sample_lines) + "-" + pipe1_desc + added_text
)
sock_sock_diff = "@@ -1 +1 @@\n" + "-" + sock1_desc + "+" + sock2_desc
pipe_pipe_diff = "@@ -1 +1 @@\n" + "-" + pipe1_desc + "+" + pipe2_desc
sock_pipe_diff = "@@ -1 +1 @@\n" + "-" + sock1_desc + "+" + pipe1_desc
pipe_sock_diff = "@@ -1 +1 @@\n" + "-" + pipe1_desc + "+" + sock1_desc
yield (
sock_text_diff,
pipe_text_diff,
sock_sock_diff,
pipe_pipe_diff,
sock_pipe_diff,
pipe_sock_diff,
)
def test_sockets(endpoints, expected_results, sampletext):
(names, objects) = zip(*endpoints)
(sock1, sock2, pipe1, pipe2) = objects
(
sock_text_diff,
pipe_text_diff,
sock_sock_diff,
pipe_pipe_diff,
sock_pipe_diff,
pipe_sock_diff,
) = expected_results
assert isinstance(sock1, SocketOrFIFO)
assert isinstance(pipe1, SocketOrFIFO)
assert sock1.compare(sampletext).unified_diff == sock_text_diff
assert pipe1.compare(sampletext).unified_diff == pipe_text_diff
assert sock1.compare(sock1) == None
assert pipe1.compare(pipe1) == None
assert sock1.compare(sock2).unified_diff == sock_sock_diff
assert pipe1.compare(pipe2).unified_diff == pipe_pipe_diff
assert sock1.compare(pipe1).unified_diff == sock_pipe_diff
assert pipe1.compare(sock1).unified_diff == pipe_sock_diff