Commit ea4c94a7 authored by Chris Lamb's avatar Chris Lamb 💬

Rework and refactor the handling of R .rdb files with respect to locating the...

Rework and refactor the handling of R .rdb files with respect to locating the parallel .rdx prior to inspecting the file to ensure that we do not add files to the user's filesystem in the case of directly comparing two .rdb files or, worse, overwriting a file in is place.
parent c98e40ff
Pipeline #81111 passed with stage
in 19 minutes and 31 seconds
......@@ -18,6 +18,7 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
from diffoscope.tools import tool_required
from diffoscope.tempfiles import get_temporary_directory
from diffoscope.difference import Difference
from .utils.file import File
......@@ -25,6 +26,7 @@ from .utils.command import Command
import shutil
import os.path
import logging
import binascii
......@@ -35,27 +37,49 @@ DUMP_RDB = """lazyLoad(commandArgs(TRUE)); for (obj in ls()) { print(obj); for (
# unfortunately this above snippet can't detect the build-path differences so
# diffoscope still falls back to a hexdump
logger = logging.getLogger(__name__)
def check_rds_extension(f):
return f.name.endswith(".rds") or f.name.endswith(".rdx")
def ensure_archive_rdx(f):
if not f.container or f.path.endswith(".rdb"):
return f.path
def get_module_path_for_rdb(rdb):
"""
R's lazyLoad method does not take a filename directly to an .rdb file (eg.
`/path/to/foo.rdb`) but rather the path without any extension (eg.
`/path/to/foo`). It also requires that the .rdx file exists at
`/path/to/foo.fdx`.
We thus locate the corresponding .rdx file in the surrounding container and
copy that to `foo.rdx`. We use a temporary directory to ensure we do not
add files to the user's filesystem in the case of directly comparing two
.rdb files or, worse, overwriting a file in its place.
"""
# If we are not in a container, we will never be able to locate the
# corresponding .rdx
if rdb.container is None:
return
# Calculate location of parallel .rdx file
rdx_name = "{}.rdx".format(os.path.basename(os.path.splitext(rdb.name)[0]))
# if we're in an archive, copy the .rdx file over so R can read it
bname = os.path.basename(f.name)
assert bname.endswith(".rdb")
rdx_name = f.name[:-4] + ".rdx"
try:
rdx_path = f.container.get_member(rdx_name).path
rdx = rdb.container.get_member(rdx_name)
except KeyError:
return f.path
# R will fail, diffoscope will report the error and continue
shutil.copy(f.path, f.path + ".rdb")
shutil.copy(rdx_path, f.path + ".rdx")
return f.path + ".rdb"
# Corresponding .rdx does not exist
return
temp_dir = get_temporary_directory().name
prefix = os.path.join(temp_dir, "temp")
logger.debug("Copying %s and %s to %s", rdx.path, rdb.path, temp_dir)
shutil.copy(rdb.path, '{}.rdb'.format(prefix))
shutil.copy(rdx.path, '{}.rdx'.format(prefix))
# Return the "module" path, ie. without an extension
return os.path.join(temp_dir, "temp")
class RdsReader(Command):
......@@ -89,7 +113,7 @@ class RdsFile(File):
class RdbReader(Command):
@tool_required('Rscript')
def cmdline(self):
return ['Rscript', '-e', DUMP_RDB, self.path[:-4]]
return ['Rscript', '-e', DUMP_RDB, self.path]
class RdbFile(File):
......@@ -97,6 +121,10 @@ class RdbFile(File):
FILE_EXTENSION_SUFFIX = '.rdb'
def compare_details(self, other, source=None):
self_path = ensure_archive_rdx(self)
other_path = ensure_archive_rdx(other)
return [Difference.from_command(RdbReader, self_path, other_path)]
a = get_module_path_for_rdb(self)
b = get_module_path_for_rdb(other)
if a is None or b is None:
return []
return [Difference.from_command(RdbReader, a, b)]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment