Skip to content
Commits on Source (3)
......@@ -266,9 +266,11 @@ class DirectoryContainer(Container):
if not os.path.islink(member_path) and os.path.isdir(member_path):
return FilesystemDirectory(member_path)
return FilesystemFile(
os.path.join(self.source.path, member_name), container=self
)
path = os.path.join(self.source.path, member_name)
if not os.path.exists(path):
raise KeyError("%s not found in directory" % member_name)
return FilesystemFile(path, container=self)
def comparisons(self, other):
my_members = collections.OrderedDict(self.get_adjusted_members_sizes())
......
......@@ -18,6 +18,7 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
from diffoscope.tools import tool_required
from diffoscope.tempfiles import get_temporary_directory
from diffoscope.difference import Difference
from .utils.file import File
......@@ -25,6 +26,7 @@ from .utils.command import Command
import shutil
import os.path
import logging
import binascii
......@@ -35,27 +37,49 @@ DUMP_RDB = """lazyLoad(commandArgs(TRUE)); for (obj in ls()) { print(obj); for (
# unfortunately this above snippet can't detect the build-path differences so
# diffoscope still falls back to a hexdump
logger = logging.getLogger(__name__)
def check_rds_extension(f):
return f.name.endswith(".rds") or f.name.endswith(".rdx")
def ensure_archive_rdx(f):
if not f.container or f.path.endswith(".rdb"):
return f.path
def get_module_path_for_rdb(rdb):
"""
R's lazyLoad method does not take a filename directly to an .rdb file (eg.
`/path/to/foo.rdb`) but rather the path without any extension (eg.
`/path/to/foo`). It also requires that the .rdx file exists at
`/path/to/foo.fdx`.
We thus locate the corresponding .rdx file in the surrounding container and
copy that to `foo.rdx`. We use a temporary directory to ensure we do not
add files to the user's filesystem in the case of directly comparing two
.rdb files or, worse, overwriting a file in its place.
"""
# If we are not in a container, we will never be able to locate the
# corresponding .rdx
if rdb.container is None:
return
# Calculate location of parallel .rdx file
rdx_name = "{}.rdx".format(os.path.basename(os.path.splitext(rdb.name)[0]))
# if we're in an archive, copy the .rdx file over so R can read it
bname = os.path.basename(f.name)
assert bname.endswith(".rdb")
rdx_name = f.name[:-4] + ".rdx"
try:
rdx_path = f.container.get_member(rdx_name).path
rdx = rdb.container.get_member(rdx_name)
except KeyError:
return f.path
# R will fail, diffoscope will report the error and continue
shutil.copy(f.path, f.path + ".rdb")
shutil.copy(rdx_path, f.path + ".rdx")
return f.path + ".rdb"
# Corresponding .rdx does not exist
return
temp_dir = get_temporary_directory().name
prefix = os.path.join(temp_dir, "temp")
logger.debug("Copying %s and %s to %s", rdx.path, rdb.path, temp_dir)
shutil.copy(rdb.path, '{}.rdb'.format(prefix))
shutil.copy(rdx.path, '{}.rdx'.format(prefix))
# Return the "module" path, ie. without an extension
return os.path.join(temp_dir, "temp")
class RdsReader(Command):
......@@ -89,7 +113,7 @@ class RdsFile(File):
class RdbReader(Command):
@tool_required('Rscript')
def cmdline(self):
return ['Rscript', '-e', DUMP_RDB, self.path[:-4]]
return ['Rscript', '-e', DUMP_RDB, self.path]
class RdbFile(File):
......@@ -97,6 +121,10 @@ class RdbFile(File):
FILE_EXTENSION_SUFFIX = '.rdb'
def compare_details(self, other, source=None):
self_path = ensure_archive_rdx(self)
other_path = ensure_archive_rdx(other)
return [Difference.from_command(RdbReader, self_path, other_path)]
a = get_module_path_for_rdb(self)
b = get_module_path_for_rdb(other)
if a is None or b is None:
return []
return [Difference.from_command(RdbReader, a, b)]
......@@ -259,7 +259,7 @@ class LibarchiveContainer(Archive):
for entry in archive:
if entry.pathname == member_name:
return self.get_subclass(entry)
raise KeyError('%s not found in archive', member_name)
raise KeyError('%s not found in archive' % member_name)
def get_filtered_members(self):
try:
......