Skip to content
GitLab
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Register
Sign in
Toggle navigation
Menu
Reproducible Builds
diffoscope
Compare revisions
9aefdb654df57c4e4c6fdeb37ba135492a64d623...8b673b26d07df9184241a64429695f65d78b205f
Commits (2)
Rewrite the calculation of a file's "fuzzy hash" to make the control flow cleaner.
· 15590583
Chris Lamb
authored
Jul 16, 2021
For next commits.
15590583
Don't traceback on an broken symlink in a directory. (Closes:
#269
)
· 8b673b26
Chris Lamb
authored
Jul 16, 2021
8b673b26
Hide whitespace changes
Inline
Side-by-side
diffoscope/comparators/utils/file.py
View file @
8b673b26
...
...
@@ -337,21 +337,29 @@ class File(metaclass=abc.ABCMeta):
@property
def
fuzzy_hash
(
self
):
if
not
hasattr
(
self
,
"_fuzzy_hash"
):
def
calc
(
):
# tlsh is not meaningful with files smaller than 512 bytes
if
os
.
stat
(
self
.
path
).
st_size
>=
512
:
h
=
tlsh
.
Tlsh
()
with
open
(
self
.
path
,
"rb"
)
as
f
:
for
buf
in
iter
(
lambda
:
f
.
read
(
32768
),
b
""
):
h
.
update
(
buf
)
h
.
final
()
try
:
self
.
_fuzzy_hash
=
h
.
hexdigest
()
except
ValueError
:
# File must contain a certain amount of randomness.
self
.
_fuzzy_hash
=
None
else
:
self
.
_fuzzy_hash
=
None
try
:
if
os
.
stat
(
self
.
path
).
st_size
<
512
:
return
None
except
FileNotFoundError
:
# eg. invalid symlink
return
None
h
=
tlsh
.
Tlsh
()
with
open
(
self
.
path
,
"rb"
)
as
f
:
for
buf
in
iter
(
lambda
:
f
.
read
(
32768
),
b
""
):
h
.
update
(
buf
)
h
.
final
()
try
:
self
.
_fuzzy_hash
=
h
.
hexdigest
()
except
ValueError
:
# File must contain a certain amount of randomness.
return
None
if
not
hasattr
(
self
,
"_fuzzy_hash"
):
self
.
_fuzzy_hash
=
calc
()
return
self
.
_fuzzy_hash
@abc.abstractmethod
...
...