Commit f0666e4e authored by Jérémy Bobbio's avatar Jérémy Bobbio

Read lines using an iterator instead of loading a full list in memory

The StreamReader.readlines() creates a list instead of an iterator. This
meant that we were previously loading the entire content fead to diff
in memory instead of streaming it as it was produced. This meant we
were creating huge buffers for no reasons!

So let's replace all `for line in f.readlines():` by `for line in f:` which
will properly use an iterator for the same end result.

Thanks Mike Hommey for the report and good test case.

Closes: #808120
parent 4530f664
......@@ -71,7 +71,7 @@ class Md5sumsFile(File):
try:
md5sums = {}
with open(self.path, 'r', encoding='utf-8') as f:
for line in f.readlines():
for line in f:
md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
md5sums['./%s' % path] = md5sum
return md5sums
......
......@@ -56,7 +56,7 @@ class DiffParser(object):
return self._success
def parse(self):
for line in self._output.readlines():
for line in self._output:
self._action = self._action(line.decode('utf-8', errors='replace'))
self._action('')
self._success = True
......@@ -226,7 +226,7 @@ def make_feeder_from_raw_reader(in_file, filter=lambda buf: buf):
def feeder(out_file):
line_count = 0
end_nl = False
for buf in in_file.readlines():
for buf in in_file:
line_count += 1
out_file.write(filter(buf))
max_lines = Config.general.max_diff_input_lines
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment