Skip to content
Snippets Groups Projects
Commit 223b1588 authored by Dimitrios Apostolou's avatar Dimitrios Apostolou
Browse files

Avoid invoking diff for short outputs that are identical

Commands with short outputs, like `stat`, `getfacl` and `lsattr`, now store
all their output in memory buffers and we run an in-memory comparison first.
We proceed to call `diff` only if the buffers are not identical.
parent cc38d8e4
No related branches found
No related tags found
1 merge request!72Performance and logging fixes
......@@ -161,13 +161,17 @@ def compare_meta(path1, path2):
return differences
try:
differences.append(Difference.from_operation(Stat, path1, path2))
differences.append(
Difference.from_operation(Stat, path1, path2, short=True)
)
except RequiredToolNotFound:
logger.error("Unable to find 'stat'! Is PATH wrong?")
if os.path.islink(path1) or os.path.islink(path2):
return [d for d in differences if d is not None]
try:
differences.append(Difference.from_operation(Getfacl, path1, path2))
differences.append(
Difference.from_operation(Getfacl, path1, path2, short=True)
)
except RequiredToolNotFound:
logger.info(
"Unable to find 'getfacl', some directory metadata differences might not be noticed."
......
......@@ -18,6 +18,7 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import heapq
import io
import logging
import subprocess
......@@ -224,6 +225,12 @@ class Difference:
@staticmethod
def from_text(content1, content2, *args, **kwargs):
"""
Works for both bytes and str objects.
"""
# Avoid spawning diff if buffers have same contents
if content1 == content2:
return None
return Difference.from_feeder(
feeders.from_text(content1),
feeders.from_text(content2),
......@@ -283,9 +290,26 @@ class Difference:
kwargs["source"] = source_op.full_name(truncate=120)
try:
difference = Difference.from_feeder(
feeder1, feeder2, path1, path2, *args, **kwargs
)
short = kwargs.pop("short", False)
# If the outputs are expected to be short, store them in memory
# and do a direct comparison, and only spawn diff if needed.
if short:
memfile1 = io.BytesIO()
feeder1(memfile1)
memfile2 = io.BytesIO()
feeder2(memfile2)
bytes1 = memfile1.getbuffer().tobytes()
bytes2 = memfile2.getbuffer().tobytes()
# Check if the buffers are the same before invoking diff
if bytes1 == bytes2:
return None, True
difference = Difference.from_text(
bytes1, bytes2, path1, path2, *args, **kwargs
)
else:
difference = Difference.from_feeder(
feeder1, feeder2, path1, path2, *args, **kwargs
)
except subprocess.CalledProcessError as exc:
if exc.returncode in ignore_returncodes:
return None, False
......
......@@ -146,10 +146,17 @@ def from_operation(operation):
def from_text(content):
"""
Works for both bytes and str objects.
"""
def feeder(f):
for offset in range(0, len(content), DIFF_CHUNK):
buf = filter_reader(content[offset : offset + DIFF_CHUNK])
f.write(buf.encode("utf-8"))
if isinstance(buf, str):
f.write(buf.encode("utf-8"))
else:
f.write(buf)
return content and content[-1] == "\n"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment