Skip to content
Snippets Groups Projects
Commit c9c69fcd authored by Chris Lamb's avatar Chris Lamb 👀
Browse files

Do not call marshal.loads(...) of precompiled Python bytecode as it is...

Do not call marshal.loads(...) of precompiled Python bytecode as it is inherently unsafe. Replace for now with a brief summary of the code section of .pyc files. (Closes: #371)
parent 3bda1dbc
No related branches found
No related tags found
No related merge requests found
...@@ -18,13 +18,11 @@ ...@@ -18,13 +18,11 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>. # along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import binascii import binascii
import dis
import io import io
import marshal import os
import re import re
import struct import struct
import time import time
import types
from diffoscope.difference import Difference from diffoscope.difference import Difference
...@@ -78,55 +76,10 @@ def parse_pyc(f): ...@@ -78,55 +76,10 @@ def parse_pyc(f):
filesz = struct.unpack("<L", filesz) filesz = struct.unpack("<L", filesz)
yield f"files sz: {filesz[0]}" yield f"files sz: {filesz[0]}"
code = marshal.load(f) start = f.tell()
yield from show_code(code) f.seek(0, os.SEEK_END)
size = f.tell() - start
yield f"code: starts at offset {start} (size: {size} bytes)"
def show_code(code, indent=""):
yield f"{indent}code"
indent += " "
for x in ("argcount", "nlocals", "stacksize", "flags"):
yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
yield from show_hex("code", code.co_code, indent=indent)
s = io.StringIO()
dis.disassemble(code, file=s)
for x in s.getvalue().splitlines():
yield "{}{}".format(indent, re_memory_address.sub("", x))
yield f"{indent}consts"
for const in code.co_consts:
if isinstance(const, types.CodeType):
yield from show_code(const, f"{indent} ")
else:
yield f" {indent}{const!r}"
for x in (
"names",
"varnames",
"freevars",
"cellvars",
"filename",
"name",
"firstlineno",
):
yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
yield from show_hex("lnotab", code.co_lnotab, indent=indent)
def show_hex(label, val, indent):
val = hexlify(val)
if len(val) < 60:
yield f"{indent}{label} {val}"
return
yield f"{indent}{label}"
for i in range(0, len(val), 60):
yield "{} {}".format(indent, val[i : i + 60])
def hexlify(val): def hexlify(val):
......
...@@ -32,13 +32,24 @@ pyc1 = load_fixture("test1.pyc-renamed") ...@@ -32,13 +32,24 @@ pyc1 = load_fixture("test1.pyc-renamed")
pyc2 = load_fixture("test2.pyc-renamed") pyc2 = load_fixture("test2.pyc-renamed")
def skip_unless_correct_python_version():
  • @lamby with this unneeded skip you removed in a later commit these tests will not be run on bookworm (as it doesn't have Python 3.12).

  • Author Owner

    Thanks. This was done on master (288c65c1) but I didn't backport it to the proposed bookworm branch ... will do so now as it makes the total diff simpler.

  • Please register or sign in to reply
TEST_FIXTURES_GENERATED_BY = (3, 12)
display = ".".join(str(x) for x in TEST_FIXTURES_GENERATED_BY)
return skipif(
sys.version_info[:2] != TEST_FIXTURES_GENERATED_BY,
reason=f"Only Python {display} can de-marshal test1.pyc-renamed",
)
@skip_unless_file_version_is_at_least("5.39") @skip_unless_file_version_is_at_least("5.39")
def test_identification(pyc1, pyc2): def test_identification(pyc1, pyc2):
assert isinstance(pyc1, PycFile) assert isinstance(pyc1, PycFile)
assert isinstance(pyc2, PycFile) assert isinstance(pyc2, PycFile)
@skipif(sys.version_info >= (3, 10), reason="Unstable on 3.10+") @skip_unless_correct_python_version()
def test_no_differences(pyc1): def test_no_differences(pyc1):
# Disassembling bytecode prior to Python 3.10 is stable when applied to # Disassembling bytecode prior to Python 3.10 is stable when applied to
# itself, otherwise various memory offsets (or memory addresses?) are # itself, otherwise various memory offsets (or memory addresses?) are
...@@ -52,15 +63,9 @@ def differences(pyc1, pyc2): ...@@ -52,15 +63,9 @@ def differences(pyc1, pyc2):
@skip_unless_file_version_is_at_least("5.39") @skip_unless_file_version_is_at_least("5.39")
@skipif( @skip_unless_correct_python_version()
sys.version_info[:2] not in {(3, 9), (3, 10)},
reason="Only Python 3.9 and 3.10 can de-marshal test1.pyc-renamed",
)
def test_diff(differences): def test_diff(differences):
assert_diff_startswith( assert_diff_startswith(differences[0], "pyc_expected_diff")
differences[0],
"pyc_expected_diff",
)
def test_compare_non_existing(monkeypatch, pyc1): def test_compare_non_existing(monkeypatch, pyc1):
......
@@ -1,9 +1,9 @@ @@ -1,4 +1,4 @@
magic: 0x610d0d0a magic: 0xcb0d0d0a
-moddate: 0xbd103561 (Sun Sep 5 18:47:25 2021 UTC) -moddate: 0x436ebb66 (Tue Aug 13 14:31:31 2024 UTC)
+moddate: 0xae814d61 (Fri Sep 24 07:43:42 2021 UTC) +moddate: 0x3f6ebb66 (Tue Aug 13 14:31:27 2024 UTC)
files sz: 14217 files sz: 13
code code: starts at offset 16 (size: 121 bytes)
argcount : 0
nlocals : 0
stacksize : 3
flags : 64
code
No preview for this file type
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment