Skip to content
Snippets Groups Projects
Commit e75871b0 authored by Chris Lamb's avatar Chris Lamb :eyes:
Browse files

Do not call marshal.loads(...) of precompiled Python bytecode as it is...

Do not call marshal.loads(...) of precompiled Python bytecode as it is inherently unsafe. Replace for now with a brief summary of the code section of .pyc files. (Closes: reproducible-builds/diffoscope#371)
parent e6ef1100
No related branches found
No related tags found
No related merge requests found
Pipeline #715977 passed
......@@ -18,14 +18,11 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import binascii
import dis
import io
import marshal
import os
import re
import struct
import time
import types
from diffoscope.difference import Difference
......@@ -85,55 +82,10 @@ def parse_pyc(f):
filesz = struct.unpack("<L", filesz)
yield f"files sz: {filesz[0]}"
code = marshal.load(f)
yield from show_code(code)
def show_code(code, indent=""):
yield f"{indent}code"
indent += " "
for x in ("argcount", "nlocals", "stacksize", "flags"):
yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
yield from show_hex("code", code.co_code, indent=indent)
s = io.StringIO()
dis.disassemble(code, file=s)
for x in s.getvalue().splitlines():
yield "{}{}".format(indent, re_memory_address.sub("", x))
yield f"{indent}consts"
for const in code.co_consts:
if isinstance(const, types.CodeType):
yield from show_code(const, f"{indent} ")
else:
yield f" {indent}{const!r}"
for x in (
"names",
"varnames",
"freevars",
"cellvars",
"filename",
"name",
"firstlineno",
):
yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
yield from show_hex("lnotab", code.co_lnotab, indent=indent)
def show_hex(label, val, indent):
val = hexlify(val)
if len(val) < 60:
yield f"{indent}{label} {val}"
return
yield f"{indent}{label}"
for i in range(0, len(val), 60):
yield "{} {}".format(indent, val[i : i + 60])
start = f.tell()
f.seek(0, os.SEEK_END)
size = f.tell() - start
yield f"code: starts at offset {start} (size: {size} bytes)"
def hexlify(val):
......
......@@ -32,13 +32,24 @@ pyc1 = load_fixture("test1.pyc-renamed")
pyc2 = load_fixture("test2.pyc-renamed")
def skip_unless_correct_python_version():
TEST_FIXTURES_GENERATED_BY = (3, 12)
display = ".".join(str(x) for x in TEST_FIXTURES_GENERATED_BY)
return skipif(
  • Developer

    @lamby I'm not sure why you're skipping this test like that? Since it no longer unmarshals the code (as that was both unsafe and not supported between different versions) the Python version is no longer relevant.

  • Author Owner

    The first n bytes could theoretically change, no? All of the moddate stuff. I grant that it isn't much, however. Do you suspect this will "never" change, because if so I will remove all this skipping apparatus indeed.

  • Developer

    Yes, you will get different results if you generate the .pyc from the same .py with different Python versions. But these files are already generated. Their contents -- which is just being read and unpacked here, not interpreted as bytecode for the running Python version -- will not change depending on the Python version you read them with.

  • Author Owner

    You are, of course, absolutely right. The test fixture and the code is 'fixed' in time as it is just calls to struct and all that. Removed all the version checking in 288c65c1. Serves me right for coding straight after being in the sun :sunglasses:

  • Please register or sign in to reply
sys.version_info[:2] != TEST_FIXTURES_GENERATED_BY,
reason=f"Only Python {display} can de-marshal test1.pyc-renamed",
)
@skip_unless_file_version_is_at_least("5.39")
def test_identification(pyc1, pyc2):
assert isinstance(pyc1, PycFile)
assert isinstance(pyc2, PycFile)
@skipif(sys.version_info >= (3, 10), reason="Unstable on 3.10+")
@skip_unless_correct_python_version()
def test_no_differences(pyc1):
# Disassembling bytecode prior to Python 3.10 is stable when applied to
# itself, otherwise various memory offsets (or memory addresses?) are
......@@ -52,15 +63,9 @@ def differences(pyc1, pyc2):
@skip_unless_file_version_is_at_least("5.39")
@skipif(
sys.version_info[:2] not in {(3, 9), (3, 10)},
reason="Only Python 3.9 and 3.10 can de-marshal test1.pyc-renamed",
)
@skip_unless_correct_python_version()
def test_diff(differences):
assert_diff_startswith(
differences[0],
"pyc_expected_diff",
)
assert_diff_startswith(differences[0], "pyc_expected_diff")
def test_compare_non_existing(monkeypatch, pyc1):
......
@@ -1,9 +1,9 @@
magic: 0x610d0d0a
-moddate: 0xbd103561 (Sun Sep 5 18:47:25 2021 UTC)
+moddate: 0xae814d61 (Fri Sep 24 07:43:42 2021 UTC)
files sz: 14217
code
argcount : 0
nlocals : 0
stacksize : 3
flags : 64
code
@@ -1,4 +1,4 @@
magic: 0xcb0d0d0a
-moddate: 0x436ebb66 (Tue Aug 13 14:31:31 2024 UTC)
+moddate: 0x3f6ebb66 (Tue Aug 13 14:31:27 2024 UTC)
files sz: 13
code: starts at offset 16 (size: 121 bytes)
No preview for this file type
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment