Commit 3cc336c7 authored by Jérémy Bobbio's avatar Jérémy Bobbio

Use libarchive to list files in tar and cpio archives

The output of cpio and tar is too hard to control properly, so let's
create a similar enough output using libarchive functions.

We need to monkeypatch the libarchive binding in order to add the
missing accessors.

Sadly, there's no way in libarchive API to distinguish an archive
which would have usernames and uids from one that would have only usernames.
But good enough, I guess.

Closes: #808809
parent cdcc1dd5
......@@ -21,7 +21,7 @@
import re
from diffoscope import tool_required
from diffoscope.comparators.binary import File
from diffoscope.comparators.libarchive import LibarchiveContainer
from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
from diffoscope.comparators.utils import Command
from diffoscope.difference import Difference
......@@ -41,4 +41,6 @@ class CpioFile(File):
return CpioFile.RE_FILE_TYPE.search(file.magic_file_type)
def compare_details(self, other, source=None):
return [Difference.from_command(CpioContent, self.path, other.path, source="file list")]
return [Difference.from_text_readers(list_libarchive(self.path),
list_libarchive(other.path),
self.path, other.path, source="file list")]
......@@ -22,10 +22,10 @@ import os.path
from diffoscope import logger
from diffoscope.difference import Difference
from diffoscope.comparators.binary import File
from diffoscope.comparators.libarchive import LibarchiveContainer
from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
from diffoscope.comparators.utils import \
Archive, ArchiveMember, get_ar_content
from diffoscope.comparators.tar import TarContainer, TarListing
from diffoscope.comparators.tar import TarContainer
class DebContainer(LibarchiveContainer):
......@@ -113,4 +113,6 @@ class DebDataTarFile(File):
isinstance(file.container.source.container.source, DebFile)
def compare_details(self, other, source=None):
return [Difference.from_command(TarListing, self.path, other.path)]
return [Difference.from_text_readers(list_libarchive(self.path),
list_libarchive(other.path),
self.path, other.path, source="file list")]
......@@ -21,6 +21,7 @@
from contextlib import contextmanager
import ctypes
import os.path
import time
import libarchive
from diffoscope import logger
from diffoscope.comparators.device import Device
......@@ -36,12 +37,54 @@ if not hasattr(libarchive.ffi, 'entry_rdevmajor'):
if not hasattr(libarchive.ffi, 'entry_rdevminor'):
libarchive.ffi.ffi('entry_rdevminor', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint)
libarchive.ArchiveEntry.rdevminor = property(lambda self: libarchive.ffi.entry_rdevminor(self._entry_p))
# Monkeypatch libarchive-c (<< 2.3)
if not hasattr(libarchive.ffi, 'entry_nlink'):
libarchive.ffi.ffi('entry_nlink', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint)
libarchive.ArchiveEntry.nlink = property(lambda self: libarchive.ffi.entry_nlink(self._entry_p))
if not hasattr(libarchive.ffi, 'entry_uid'):
libarchive.ffi.ffi('entry_uid', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint32)
libarchive.ArchiveEntry.uid = property(lambda self: libarchive.ffi.entry_uid(self._entry_p))
if not hasattr(libarchive.ffi, 'entry_gid'):
libarchive.ffi.ffi('entry_gid', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint32)
libarchive.ArchiveEntry.gid = property(lambda self: libarchive.ffi.entry_uid(self._entry_p))
if not hasattr(libarchive.ffi, 'entry_mtime_nsec'):
libarchive.ffi.ffi('entry_mtime_nsec', [libarchive.ffi.c_archive_entry_p], ctypes.c_long)
libarchive.ArchiveEntry.mtime_nsec = property(lambda self: libarchive.ffi.entry_mtime_nsec(self._entry_p))
if not hasattr(libarchive.ffi, 'entry_uname'):
libarchive.ffi.ffi('entry_uname', [libarchive.ffi.c_archive_entry_p], ctypes.c_char_p)
libarchive.ArchiveEntry.uname = property(lambda self: libarchive.ffi.entry_uname(self._entry_p))
if not hasattr(libarchive.ffi, 'entry_gname'):
libarchive.ffi.ffi('entry_gname', [libarchive.ffi.c_archive_entry_p], ctypes.c_char_p)
libarchive.ArchiveEntry.gname = property(lambda self: libarchive.ffi.entry_gname(self._entry_p))
# Monkeypatch libarchive-c so we always get pathname as (Unicode) str
# Otherwise, we'll get sometimes str and sometimes bytes and always pain.
libarchive.ArchiveEntry.pathname = property(lambda self: libarchive.ffi.entry_pathname(self._entry_p).decode('utf-8', errors='surrogateescape'))
def list_libarchive(path):
with libarchive.file_reader(path) as archive:
for entry in archive:
if entry.isblk or entry.ischr:
size_or_dev = '{major:>3},{minor:>3}'.format(major=entry.rdevmajor, minor=entry.rdevminor)
else:
size_or_dev = entry.size
mtime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(entry.mtime)) + '.{:06d}'.format(entry.mtime_nsec // 1000)
if entry.issym:
name_and_link = '{entry.name} -> {entry.linkname}'.format(entry=entry)
else:
name_and_link = entry.name
if entry.uname:
user = '{user:<8} {uid:>7}'.format(user=entry.uname.decode('utf-8', errors='surrogateescape'), uid='({})'.format(entry.uid))
else:
user = entry.uid
if entry.gname:
group = '{group:<8} {gid:>7}'.format(group=entry.gname.decode('utf-8', errors='surrogateescape'), gid='({})'.format(entry.gid))
else:
group = entry.gid
yield '{strmode} {entry.nlink:>3} {user:>8} {group:>8} {size_or_dev:>8} {mtime:>8} {name_and_link}\n'.format(strmode=entry.strmode.decode('us-ascii'), entry=entry, user=user, group=group, size_or_dev=size_or_dev, mtime=mtime, name_and_link=name_and_link)
class LibarchiveMember(ArchiveMember):
def __init__(self, archive, entry):
super().__init__(archive, entry.pathname)
......
......@@ -20,17 +20,12 @@
import re
from diffoscope.difference import Difference
from diffoscope.comparators.binary import File
from diffoscope.comparators.libarchive import LibarchiveContainer
from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
from diffoscope.comparators.utils import Command, tool_required
class TarContainer(LibarchiveContainer):
pass
class TarListing(Command):
@tool_required('tar')
def cmdline(self):
return ['tar', '--full-time', '-tvf', self.path]
class TarFile(File):
CONTAINER_CLASS = TarContainer
......@@ -41,4 +36,6 @@ class TarFile(File):
return TarFile.RE_FILE_TYPE.search(file.magic_file_type)
def compare_details(self, other, source=None):
return [Difference.from_command(TarListing, self.path, other.path)]
return [Difference.from_text_readers(list_libarchive(self.path),
list_libarchive(other.path),
self.path, other.path, source="file list")]
......@@ -74,8 +74,8 @@ def test_differences(differences):
tarinfo = differences[0].details[0]
tardiff = differences[0].details[1]
encodingdiff = tardiff.details[0]
assert tarinfo.source1 == 'tar --full-time -tvf {}'
assert tarinfo.source2 == 'tar --full-time -tvf {}'
assert tarinfo.source1 == 'file list'
assert tarinfo.source2 == 'file list'
assert tardiff.source1 == './date.txt'
assert tardiff.source2 == './date.txt'
assert encodingdiff.source1 == 'encoding'
......
......@@ -72,7 +72,7 @@ def test_no_fuzzy_matching(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
difference = fuzzy_tar_in_tar1.compare(fuzzy_tar_in_tar2)
assert len(difference.details) == 1
assert difference.details[0].source1 == 'tar --full-time -tvf {}'
assert difference.details[0].source1 == 'file list'
@pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
def test_no_fuzzy_matching_new_file(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
......
@@ -1,4 +1,4 @@
-lrwxrwxrwx 1 1000 1000 6 Jun 24 15:10 dir/link -> broken
+lrwxrwxrwx 1 1000 1000 13 Jun 24 15:11 dir/link -> really-broken
crw-r--r-- 1 0 0 1, 3 Jun 24 14:47 dir/null
--rw-r--r-- 1 1000 1000 446 Jun 24 15:10 dir/text
-drwxr-xr-x 2 1000 1000 0 Jun 24 15:10 dir
+-rw-r--r-- 1 1000 1000 671 Jun 24 15:11 dir/text
+drwxr-xr-x 2 1000 1000 0 Jun 24 15:11 dir
-lrwxrwxrwx 1 1000 1000 6 2015-06-24 15:10:12.000000 dir/link -> broken
+lrwxrwxrwx 1 1000 1000 13 2015-06-24 15:11:35.000000 dir/link -> really-broken
crw-r--r-- 1 0 0 1, 3 2015-06-24 14:47:34.000000 dir/null
--rw-r--r-- 1 1000 1000 446 2015-06-24 15:10:17.000000 dir/text
-drwxr-xr-x 2 1000 1000 0 2015-06-24 15:10:12.000000 dir
+-rw-r--r-- 1 1000 1000 671 2015-06-24 15:11:38.000000 dir/text
+drwxr-xr-x 2 1000 1000 0 2015-06-24 15:11:35.000000 dir
@@ -1,3 +1,3 @@
-drwxr-xr-x 0/0 0 2015-12-02 16:01:40 ./
+drwxr-xr-x 0/0 0 2015-12-02 16:03:11 ./
drwx------ 0/0 0 2015-12-02 16:00:55 ./lost+found/
--rw-rw-rw- 1234/1234 28 2015-12-02 16:01:40 ./date.txt
+-r--r--r-- 4321/4321 44 2015-12-02 16:03:11 ./date.txt
-drwxr-xr-x 0 0 0 0 2015-12-02 16:01:40.000000 ./
+drwxr-xr-x 0 0 0 0 2015-12-02 16:03:11.000000 ./
drwx------ 0 0 0 0 2015-12-02 16:00:55.000000 ./lost+found/
--rw-rw-rw- 0 1234 1234 28 2015-12-02 16:01:40.000000 ./date.txt
+-r--r--r-- 0 4321 4321 44 2015-12-02 16:03:11.000000 ./date.txt
@@ -1 +1 @@
-Wed Dec 2 17:01:40 CET 2015
+jeudi 3 décembre 2015, 06:03:11 (UTC+1400)
......
@@ -1 +1 @@
--rw-r--r-- 1 0 0 446 Jun 24 17:55 ./dir/text
+-rw-r--r-- 1 0 0 671 Jun 24 17:55 ./dir/text
--rw-r--r-- 1 0 0 446 2015-06-24 17:55:18.000000 ./dir/text
+-rw-r--r-- 1 0 0 671 2015-06-24 17:55:59.000000 ./dir/text
@@ -1,4 +1,4 @@
-drwxr-xr-x lunar/lunar 0 2015-06-29 15:49:09 dir/
--rw-r--r-- lunar/lunar 446 2015-06-29 15:49:09 dir/text
-crw-r--r-- root/root 1,3 2015-06-29 15:49:09 dir/null
-lrwxrwxrwx lunar/lunar 0 2015-06-29 15:49:09 dir/link -> broken
+drwxr-xr-x lunar/lunar 0 2015-06-29 15:49:41 dir/
+-rw-r--r-- lunar/lunar 671 2015-06-29 15:49:41 dir/text
+crw-r--r-- root/root 1,3 2015-06-29 15:49:41 dir/null
+lrwxrwxrwx lunar/lunar 0 2015-06-29 15:49:41 dir/link -> really-broken
-drwxr-xr-x 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:09.000000 dir/
--rw-r--r-- 0 lunar (1000) lunar (1000) 446 2015-06-29 15:49:09.000000 dir/text
-crw-r--r-- 0 root (0) root (0) 1, 3 2015-06-29 15:49:09.000000 dir/null
-lrwxrwxrwx 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:09.000000 dir/link -> broken
+drwxr-xr-x 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:41.000000 dir/
+-rw-r--r-- 0 lunar (1000) lunar (1000) 671 2015-06-29 15:49:41.000000 dir/text
+crw-r--r-- 0 root (0) root (0) 1, 3 2015-06-29 15:49:41.000000 dir/null
+lrwxrwxrwx 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:41.000000 dir/link -> really-broken
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment