Skip to content
Snippets Groups Projects
Commit 37649ac3 authored by Brett Smith's avatar Brett Smith Committed by Chris Lamb
Browse files

diffoscope.diff: Improve FIFO writing robustness.


We used to give input to diff using FIFO objects.
fec9e97c changed this to avoid relying on
/dev/fd, which is a good change for portability.  Unfortunately, the
implementation has a few issues of its own:

* It feeds diff with normal files, rather than FIFOs.  This could cause
  trouble if diff tries to read farther than the underlying feeder
  processes have written.
* It uses threads to write to the pseudo-FIFO, but the file object is
  manipulated in multiple threads.  I suspect this is the primary cause of
  the segfaults observed in #852013.
* fd_from_feeder is decorated as a context manager, but it never yields
  anything, which is not how it's expected to be used.  I'm not sure this is
  causing any problems, but it makes it harder to reason about what's going
  on.

This commit introduces a new FIFOFeeder class.  It is wholly responsible for
creating and feeding the FIFO, so we don't have to pass file objects across
the thread boundary and risk segfaults.  It uses context management to tell
when the FIFO is no longer needed, so it can clean up nicely.

Signed-off-by: Chris Lamb's avatarChris Lamb <lamby@debian.org>
Signed-off-by: Mattia Rizzolo's avatarMattia Rizzolo <mattia@debian.org>
parent f0343d83
No related branches found
No related tags found
No related merge requests found
......@@ -20,6 +20,8 @@
import re
import io
import os
import errno
import fcntl
import hashlib
import logging
import threading
......@@ -192,54 +194,55 @@ def run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2):
return parser.diff
def feed(feeder, f, end_nl_q):
# work-around unified diff limitation: if there's no newlines in both
# don't make it a difference
try:
end_nl = feeder(f)
end_nl_q.put(end_nl)
finally:
f.close()
class ExThread(threading.Thread):
"""
Inspired by https://stackoverflow.com/a/6874161
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__status_queue = Queue()
def run(self, *args, **kwargs):
try:
super().run(*args, **kwargs)
except Exception as ex:
#except_type, except_class, tb = sys.exc_info()
self.__status_queue.put(ex)
self.__status_queue.put(None)
class FIFOFeeder(threading.Thread):
def __init__(self, feeder, fifo_path, end_nl_q=None, *, daemon=True):
os.mkfifo(fifo_path)
super().__init__(daemon=daemon)
self.feeder = feeder
self.fifo_path = fifo_path
self.end_nl_q = Queue() if end_nl_q is None else end_nl_q
self._exception = None
self._want_join = threading.Event()
def wait_for_exc_info(self):
return self.__status_queue.get()
def __enter__(self):
self.start()
return self
def join(self):
ex = self.wait_for_exc_info()
if ex is None:
return
raise ex
def __exit__(self, exc_type, exc_value, exc_tb):
self.join()
@contextlib.contextmanager
def fd_from_feeder(feeder, end_nl_q, fifo):
f = open(fifo, 'wb')
t = ExThread(target=feed, args=(feeder, f, end_nl_q))
def run(self):
try:
# Try to open the FIFO nonblocking, so we can periodically check
# if the main thread wants us to wind down. If it does, there's no
# more need for the FIFO, so stop the thread.
while True:
try:
fifo_fd = os.open(self.fifo_path, os.O_WRONLY | os.O_NONBLOCK)
except OSError as error:
if error.errno != errno.ENXIO:
raise
elif self._want_join.is_set():
return
else:
break
# Now clear the fd's nonblocking flag to let writes block normally.
fcntl.fcntl(fifo_fd, fcntl.F_SETFL, 0)
with open(fifo_fd, 'wb') as fifo:
# The queue works around a unified diff limitation: if there's
# no newlines in both don't make it a difference
end_nl = self.feeder(fifo)
self.end_nl_q.put(end_nl)
except Exception as error:
self._exception = error
t.daemon = True
t.start()
def join(self):
self._want_join.set()
super().join()
if self._exception is not None:
raise self._exception
try:
t.join()
finally:
f.close()
def empty_file_feeder():
def feeder(f):
......@@ -275,17 +278,13 @@ def make_feeder_from_raw_reader(in_file, filter=lambda buf: buf):
return feeder
def diff(feeder1, feeder2):
end_nl_q1 = Queue()
end_nl_q2 = Queue()
tmpdir = get_temporary_directory().name
fifo1 = os.path.join(tmpdir, 'f1')
fifo2 = os.path.join(tmpdir, 'f2')
fd_from_feeder(feeder1, end_nl_q1, fifo1)
fd_from_feeder(feeder2, end_nl_q2, fifo2)
return run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2)
fifo1_path = os.path.join(tmpdir, 'fifo1')
fifo2_path = os.path.join(tmpdir, 'fifo2')
with FIFOFeeder(feeder1, fifo1_path) as fifo1, \
FIFOFeeder(feeder2, fifo2_path) as fifo2:
return run_diff(fifo1_path, fifo2_path, fifo1.end_nl_q, fifo2.end_nl_q)
def reverse_unified_diff(diff):
res = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment