Skip to content
Commits on Source (8)
......@@ -35,8 +35,7 @@ mypy:
- apt-get -qqy update
- apt-get -qqy install --no-install-recommends python3-pip
- pip3 install mypy
- mypy mat2 libmat2/*.py --ignore-missing-imports
- mypy --ignore-missing-imports ./nautilus/mat2.py
- mypy --ignore-missing-imports mat2 libmat2/*.py ./nautilus/mat2.py
tests:debian:
stage: test
......
# 0.6.0 - 2018-11-10
- Add lightweight cleaning for jpeg
- Add support for zip files
- Add support for mp4 files
- Improve metadata extraction for archives
- Improve robustness against corrupted embedded files
- Fix a possible security issue on some terminals (control character
injection via --show)
- Various internal cleanup/improvements
# 0.5.0 - 2018-10-23
- Video (.avi files for now) support, via FFmpeg, optionally
......@@ -5,7 +16,7 @@
- Processing files starting with a dash is now quicker
- Metadata are now displayed sorted
- Recursive metadata support for FLAC files
- Unsupported extensions aren't displayed in `/.mat -l` anymore
- Unsupported extensions aren't displayed in `./mat2 -l` anymore
- Improve the display when no metadata are found
- Update the logo according to the GNOME guidelines
- The testsuite is now runnable on the installed version of mat2
......
mat2 (0.6.0-1) unstable; urgency=medium
* New upstream release.
* d/control:
- Add debhelper-compat as build dependency.
- Declare that the build doesn't need root privileges.
- Mention '.mp4' support in description.
* d/compat:
- Drop obsolete compat control file.
* d/gitlab-ci.yml:
- Drop custom config, rely on the "Salsa CI Team".
* d/u/signing-key.asc:
- Use clean and minimal key to remove cruft. Thanks, lintian.
* d/watch:
- Use '@ARCHIVE_EXT@' substitution.
- Relax archive name regex, due to inconsistent upstream naming scheme.
-- Georg Faerber <georg@riseup.net> Sat, 10 Nov 2018 13:33:52 +0000
mat2 (0.5.0-1) unstable; urgency=medium
* New upstream release.
......
......@@ -66,3 +66,4 @@ Description: Metadata anonymisation toolkit v2
- Free Lossless Audio Codec (.flac)
- Torrent (.torrent)
- Audio Video Interleave (.avi)
- MPEG-4 (.mp4)
image: debian:sid
include: https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
build:
script:
- export DEBIAN_FRONTEND=noninteractive
- apt-get update
- apt-get -y upgrade
- apt-get -y dist-upgrade
- export BUILD_DEPENDS=`perl -ne 'next if /^#/; $p=(s/^Build-Depends:\s*/ / or (/^ / and $p)); s/,|\n|\([^)]+\)//mg; print if $p' < debian/control`
- apt-get install -y --no-install-recommends build-essential dpkg-dev fakeroot git-buildpackage lintian pristine-tar $BUILD_DEPENDS
- apt-get -y autoremove --purge
- apt-get clean
- rm -rf /var/lib/apt/lists/*
- dpkg-checkbuilddeps
- git checkout pristine-tar
- echo "make branch current for gbp"
- git checkout -B "$CI_COMMIT_REF_NAME" "$CI_COMMIT_SHA"
- gbp buildpackage -us -uc --git-ignore-branch --lintian-opts -iIE --pedantic
- mkdir results
- cp ../*.deb results/
- cp ../*.dsc results/
- cp ../*.changes results/
artifacts:
paths:
- results
expire_in: 3 days
extends: .build-unstable
lintian:
extends: .test-lintian
piuparts:
extends: .test-piuparts
autopkgtest:
extends: .test-autopkgtest
reprotest:
extends: .test-reprotest
......@@ -11,67 +11,67 @@ lQJUCkGFxyIpcDNRnf3ApjT4+QuEaw98tKvgRzCozFx2D94wsSFz858vZrdYj4pt
BYnPNFBEeGMRWkxuVwV1o+WKNJfwg2UcDghSkJGBCPCAiC2fDlfyk3njjLjxZHP/
mYNwUkxTlQolzknJZ5wg7vbE6r4rfQX4gTi3mNzYtqUAb17GIczOARZK7qdSapOb
rXPFGgX3Bd4FZJEaIq3p5xWcWS8fcMveoYO7m9cyaSkSQxAPrPZE3hDF1QARAQAB
tDFKdWxpZW4gKGp2b2lzaW4pIFZvaXNpbiA8anZvaXNpbkBvcGVubWFpbGJveC5u
ZXQ+iQI9BBMBCgAnAhsDBQkJZgGABQsJCAcDBRUKCQgLBRYCAwEAAh4BAheABQJV
vnKvAAoJEATQQegXGQHMRUQP/jr1Wy2x6lBTFZz/bg9b4G+FB98V/OcDmVNEoSnL
HKEg/X6Z/ePrLOiOxX15owqhWtXZbAC83SkkqZL0/xhZ8G8gVIHej4+Wm8gwza9q
x/vG9Z4KE2Ed1zrBtOOuVMjTbANaHCEmfbVdoEVkAU+s9P1V/vx2Zff4kFmOqJv3
btj+pRynjfwie5r53oJGfM5l6/3/VMazMuDSP6a5REBs5b19rgTWI2ocJvZxeemA
iinuklyPYjCNMMj43d5vSKZX8vV25mtTP1szbKHvaTHId5k4D8hgozkyVp5iAQUj
vGRgWJHFf+7bKDf8hd94dW6Aoi5gVIMEfLwo7/1HOIuv4wFWehqmg0N96dPj87JN
qyZbprBLxC1pjqXwhP4e+yp4s7pzPc6HgWQwxZSxodINDlvFlPydDvRwexcsG4Yw
OhLvuu2HohmjfjyjzsLkOkKML20nHvMSt5aBz2H1zdbGNKAoxpgBL03e06gkWH3R
fWOy7RBG0iZJrA9/MQH0i1A+BzGqQ8ZmcC1zPQNJHSN0duec4za+7FA820Y9wgEV
8Sv2dv2iyuouMjlALtyBHY1faJgVHDt6HzTfj6kxqGILGmrKz+VVIH/4QHq2I7VT
fauIujDFEdtVHojFe2edHaOx3pyfNoVRkLre98AucNS/BDgKLO8UdnviSo3QXRn/
tJmKtDJKdWxpZW4gKGp2b2lzaW4pIFZvaXNpbiA8anVsaWVuLnZvaXNpbkBkdXN0
cmkub3JnPokCQAQTAQoAKgIbAwUJCWYBgAULCQgHAwUVCgkICwUWAgMBAAIeAQIX
gAUCVb5yrwIZAQAKCRAE0EHoFxkBzKOaD/kBQIAeM/zCbwPkTqNjO3OLD1Jr49XD
lcsuRiBO286SjntWyipPyDy4hc5TDrt5R8wkC4htYdGk+iqogoDc6q1pT6VDvOKp
RVg4BypxwyRsn81RMNVz5LkqEavLAwpnwow3tHcElmg4SuFyEObZhpmhEsMpS5KS
zQ3lDnAISQi0hw/xb3HbRDA5rKLs8f57OUgh3Dbl2IskRGVyhBrBaZBfTJVKpodH
ZsZyH26z8dO19yLlukNVyVgifIiLc/Pd3Jh2LCXJFl4T++i7dmiZZqxWXVIS+EsS
TZxZHW3lxrRBzU1mVJK/udJbdm3lfz4kwmkUgqjA+3loPIiyd3m1kA3Z4s2DmZmE
GAR1r50+vQF0uwTluz64qPikt4X2eMQ4Fs3DSZVn9hA1oGU2g3+/V4h5OyVy5OSM
nvwkpOrV7M+5ie34Ush1fRgIlKyme5W/DKHPU8HZ+C19mLVhd/aGQREP/SJJagrV
xmAkSbdUGNbP7haDRm6/dxzE3FG5gZ4o5gKSAPfcwNTxPtdbIENfPRHWukkiovPN
RvJtm+rV3TljJAsHh0tL+Hre2MoTBA0Qi6wccYcW7mleyJ7HLPJA9unFpmATVeyX
F7pgbTYJ2RgbR1DebaPJ6RNtM57GHWAQ+wY0KUUJZi3luDb0goIZjXMoOPV18lFu
+IQH/bWic0Y7cbQsSnVsaWVuIChqdm9pc2luKSBWb2lzaW4gPGp2b2lzaW5Acmlz
ZXVwLm5ldD6JAj0EEwEKACcFAlWzyiYCGwMFCQlmAYAFCwkIBwMFFQoJCAsFFgID
AQACHgECF4AACgkQBNBB6BcZAczsXA/7BSyvfqxCJ5FP+9348nZu0+FvvcE/C/hf
LTFeyVJL85+JBi350RRcJmPIcrHZnWHqFG0uPO2UGCDATbTbxbVfL1Wvtqx51FGc
iZ3bnon0MtEyDZjnQPCDDgQq4SPPlh8gvZcdKVC1ySBYEnikd1ATfzrjpx/jP7dd
euV8vn1i8f7BbR9ySQJHdKd6/jMeQkvO3l1ZC7msFW41nTEbyuRbyRepWoZqThx8
PMtDRVH57SPrEew9Komtje+Svq5WD/NRAENjh76FKzId2b3xnQXaP6SwqeOlsFNG
0/SEkt+8MZh12/EJbOkeWUv6mGQP3hOFmcAc0pzzKv7kC8Irg9u9Fkt0zuZC7uvH
7B4kPo/whjTR26JUss95HNOeS2XZwZ8kl8xbsXv4gEs/HDo68fl2KEy7ZD6umX3N
R3Agp8SnPuNpfEwL7ZChtVzKa20StF7AuN4XxxeEpn+j6f8FdGzTSJhwP8IWoK/G
ihpO07uIagNwJxAuWBcGM9r3j6Df2muwdiSLGkjm8dpdlxi2gfbuUhxiXvv3jDMn
XCGveGeQoxwCRQEGwZMPYhOdvz4TTbNXNn/Olwj2Iv67xM6ZcvnzE4pq9RaVz9am
OwIA7fKpGpY8NB5qfMGzLqW9UpUDNMk7IIOKZipUk3LI4CtOr8tt9JUVSg+v3dqc
UtL8F1+KY/G5Ag0EVbPFoAEQAL+nkRJIiULqpUmfF+G0ilqmmOQeYWMwdaYWsHTk
qea0YH8icYfR7nLY5VorPRESst1/YIDNTnNv1zIs+eoekTtJqq08GZqFMFfudXMg
WtuMulT9c8dpasdrozWJ/K7XLaaxd7lTWWeg5WMldNWzKg0C3zDN/vcjhK3b6lju
RAGW/kBROyxrhPbaFmzcEiZx3g6wuly3ZogZ3c0ah82SyIsHdmIT+8omRdYhvH3+
Gbye4qVC4caj6weGjxP5BBxz5gKjroFgHOfMP7IS/9QAuOAdZT89fDdFUzySpDda
WZaxgqpBiCRoDZxU+uRsuXGIeDbxjh167qhlHihKi9eMmTiOPeb9RyRo3R9zjsEb
VXApUfGOD9zyHLV6W6eEm/RV/Z4sDqlzRR8BylKQlk2Q4uMVn1hE4d2IqJQTikR6
Pj+jdYTvpMhfnxpRG6i5UBXB7XiwgQPdAug77jBzNNmw5f+Nw6DYPao+q5vLm1o4
AzrI8/+/t/JMAb1Rh3GrUv3/ArCEfWmZqCROYu7uTlfOBD/qutHg0k9FEwROu2oM
Uk1vZTGFGWeX8Jq+S3rkErsPRAn2f2mEVYLahMhox3my4VkrcjvVR15NoE6Wuqao
vO4FWHNB4Ara9okjVzojOqMZGXE8SboG4d9FZp/4suPhnvCu6lbADEuMeA9QIvYK
GTZnABEBAAGJAiUEGAEKAA8FAlWzxaACGwwFCQlmAYAACgkQBNBB6BcZAczeYxAA
k14yHOWLV1gA1/nSc5wCcMTNzMnM2UD1BtytpJKQ/SXJcAWHgNk6HlH+pwEF7s13
9OsZj3GkjJyOIRGYP8ZEdfetrnVlcz41tS2DcITZEbfiRiFIaMBMiCB1CIKeEjHf
ERiF/Z+1lpCpSEaecwtuJrPySJO4XV2KplgxrVt5no26ahhBnf7kKP/gcaWs2Z1O
NzplnMFWt7Vq5T2Ayr1p4+Zt/UMov6rRvJ4IcvlHIF6qLhwVi5yHFej2bpB55WQ9
SceqV1CT1KWJ45I71DyBbPyW1NQUFtza+wyFI0F1ges4d/XvS4sdA3egvjbIdlwY
Hq42wpGbVB+w2R03fL0n5wgRaSZlQsNAJdf3gFQkNFgBLnXQreTyVJq5TrzTCzNj
TWIvhmJoFteX+Prh1t3Wyxrj5zYbL4GfN1w0gl5QGpWLPORn/UeoboR9tjDKga2d
M4d/q/qeLemIZE+box4WFrmv35CZ47TbN7dz1X6nnSNkqUaMtZVqWJMFPyYezmSG
KwdLl5xP54IQle/EhGoWVFViB//tfAaRTqeXBWK/U/bY+dvEfQZwpeswM2sy8rYg
CvESHird2rOqtA7Sn/FkDRa23nrtZVJsB/dtBGEc5vNSJ1fI24iAY0UZE+ntcoTj
INpYmBCcTk9aF3APuer6h65IbSZxlesI660drDlxHe4=
=Ijk3
tCxKdWxpZW4gKGp2b2lzaW4pIFZvaXNpbiA8anZvaXNpbkByaXNldXAubmV0PokC
PQQTAQoAJwUCVbPKJgIbAwUJCWYBgAULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAK
CRAE0EHoFxkBzOxcD/sFLK9+rEInkU/73fjydm7T4W+9wT8L+F8tMV7JUkvzn4kG
LfnRFFwmY8hysdmdYeoUbS487ZQYIMBNtNvFtV8vVa+2rHnUUZyJndueifQy0TIN
mOdA8IMOBCrhI8+WHyC9lx0pULXJIFgSeKR3UBN/OuOnH+M/t1165Xy+fWLx/sFt
H3JJAkd0p3r+Mx5CS87eXVkLuawVbjWdMRvK5FvJF6lahmpOHHw8y0NFUfntI+sR
7D0qia2N75K+rlYP81EAQ2OHvoUrMh3ZvfGdBdo/pLCp46WwU0bT9ISS37wxmHXb
8Qls6R5ZS/qYZA/eE4WZwBzSnPMq/uQLwiuD270WS3TO5kLu68fsHiQ+j/CGNNHb
olSyz3kc055LZdnBnySXzFuxe/iASz8cOjrx+XYoTLtkPq6Zfc1HcCCnxKc+42l8
TAvtkKG1XMprbRK0XsC43hfHF4Smf6Pp/wV0bNNImHA/whagr8aKGk7Tu4hqA3An
EC5YFwYz2vePoN/aa7B2JIsaSObx2l2XGLaB9u5SHGJe+/eMMydcIa94Z5CjHAJF
AQbBkw9iE52/PhNNs1c2f86XCPYi/rvEzply+fMTimr1FpXP1qY7AgDt8qkaljw0
Hmp8wbMupb1SlQM0yTsgg4pmKlSTcsjgK06vy230lRVKD6/d2pxS0vwXX4pj8bQx
SnVsaWVuIChqdm9pc2luKSBWb2lzaW4gPGp2b2lzaW5Ab3Blbm1haWxib3gubmV0
PokCHwQwAQoACQUCV7Qc3AIdAAAKCRAE0EHoFxkBzMJPEACbYe3MHK4qzKMZdCz/
OXffUCvrjXYiZ0yJnz0/7sE2iP7J6vsOMlF7HFk/ReNZTmYHacnJl7S83tCipojh
/x2KO+uti7n3fyA6iTvFU2oFh0XMcp6VO4gi0pQdXGZKUosI0z3VMoIkV6trT00P
OlwW/8yk6wL3fkqB2YUVwS9sxVOA0QIRk5o1EmTGhS7gd1e5C5yl/NChK67Nh3Cm
Hvctld6qaRNCaIyGiHK6MihgtjGDtuxqDN/iwColNFpWO6ccjR55KK9qWd2cwauR
O0HTTiUZ+dpYFwWNQ/PHyo4W06ea9TXRB7w28M4391GOEF6ZJprqHumXB3wwwcz1
7zdz+qKcVAQhEYGRjl/4ke56DuhI2/9KxS8PnfZPLydmLuUkRUZ3p3oc0C1yK05O
e898tlcbnIAw6AgheI3//IVHbgitN7pf8TP7gFLI+M6d6gmrSHauIfGFGkLKn8Eu
obF8x8jB4Tvn3rg+Sv4VpGYGn/hiF5l/JIC/IkboU4q85wv0XJwbtqgm16Q2Itwb
MNcmLZMFWHqKktLnw1sLbQJeB/bGO70ML/sFY5745NU6wZ1qpDxP6Lg1gopLTcXY
7PG4fQc1ZEZeqgBNAn3BRD6zsfP4ioBZ+2J83Cmp2SkaG1McW3WZtWjb+/Yu44WP
9qhYBPgM8HPjacjN19zO6sSeGrQySnVsaWVuIChqdm9pc2luKSBWb2lzaW4gPGp1
bGllbi52b2lzaW5AZHVzdHJpLm9yZz6JAkAEEwEKACoCGwMFCQlmAYAFCwkIBwMF
FQoJCAsFFgIDAQACHgECF4AFAlW+cq8CGQEACgkQBNBB6BcZAcyjmg/5AUCAHjP8
wm8D5E6jYztziw9Sa+PVw5XLLkYgTtvOko57VsoqT8g8uIXOUw67eUfMJAuIbWHR
pPoqqIKA3OqtaU+lQ7ziqUVYOAcqccMkbJ/NUTDVc+S5KhGrywMKZ8KMN7R3BJZo
OErhchDm2YaZoRLDKUuSks0N5Q5wCEkItIcP8W9x20QwOayi7PH+ezlIIdw25diL
JERlcoQawWmQX0yVSqaHR2bGch9us/HTtfci5bpDVclYInyIi3Pz3dyYdiwlyRZe
E/vou3ZomWasVl1SEvhLEk2cWR1t5ca0Qc1NZlSSv7nSW3Zt5X8+JMJpFIKowPt5
aDyIsnd5tZAN2eLNg5mZhBgEda+dPr0BdLsE5bs+uKj4pLeF9njEOBbNw0mVZ/YQ
NaBlNoN/v1eIeTslcuTkjJ78JKTq1ezPuYnt+FLIdX0YCJSspnuVvwyhz1PB2fgt
fZi1YXf2hkERD/0iSWoK1cZgJEm3VBjWz+4Wg0Zuv3ccxNxRuYGeKOYCkgD33MDU
8T7XWyBDXz0R1rpJIqLzzUbybZvq1d05YyQLB4dLS/h63tjKEwQNEIusHHGHFu5p
XsiexyzyQPbpxaZgE1Xslxe6YG02CdkYG0dQ3m2jyekTbTOexh1gEPsGNClFCWYt
5bg29IKCGY1zKDj1dfJRbviEB/21onNGO3G5Ag0EVbPFoAEQAL+nkRJIiULqpUmf
F+G0ilqmmOQeYWMwdaYWsHTkqea0YH8icYfR7nLY5VorPRESst1/YIDNTnNv1zIs
+eoekTtJqq08GZqFMFfudXMgWtuMulT9c8dpasdrozWJ/K7XLaaxd7lTWWeg5WMl
dNWzKg0C3zDN/vcjhK3b6ljuRAGW/kBROyxrhPbaFmzcEiZx3g6wuly3ZogZ3c0a
h82SyIsHdmIT+8omRdYhvH3+Gbye4qVC4caj6weGjxP5BBxz5gKjroFgHOfMP7IS
/9QAuOAdZT89fDdFUzySpDdaWZaxgqpBiCRoDZxU+uRsuXGIeDbxjh167qhlHihK
i9eMmTiOPeb9RyRo3R9zjsEbVXApUfGOD9zyHLV6W6eEm/RV/Z4sDqlzRR8BylKQ
lk2Q4uMVn1hE4d2IqJQTikR6Pj+jdYTvpMhfnxpRG6i5UBXB7XiwgQPdAug77jBz
NNmw5f+Nw6DYPao+q5vLm1o4AzrI8/+/t/JMAb1Rh3GrUv3/ArCEfWmZqCROYu7u
TlfOBD/qutHg0k9FEwROu2oMUk1vZTGFGWeX8Jq+S3rkErsPRAn2f2mEVYLahMho
x3my4VkrcjvVR15NoE6WuqaovO4FWHNB4Ara9okjVzojOqMZGXE8SboG4d9FZp/4
suPhnvCu6lbADEuMeA9QIvYKGTZnABEBAAGJAiUEGAEKAA8FAlWzxaACGwwFCQlm
AYAACgkQBNBB6BcZAczeYxAAk14yHOWLV1gA1/nSc5wCcMTNzMnM2UD1BtytpJKQ
/SXJcAWHgNk6HlH+pwEF7s139OsZj3GkjJyOIRGYP8ZEdfetrnVlcz41tS2DcITZ
EbfiRiFIaMBMiCB1CIKeEjHfERiF/Z+1lpCpSEaecwtuJrPySJO4XV2KplgxrVt5
no26ahhBnf7kKP/gcaWs2Z1ONzplnMFWt7Vq5T2Ayr1p4+Zt/UMov6rRvJ4IcvlH
IF6qLhwVi5yHFej2bpB55WQ9SceqV1CT1KWJ45I71DyBbPyW1NQUFtza+wyFI0F1
ges4d/XvS4sdA3egvjbIdlwYHq42wpGbVB+w2R03fL0n5wgRaSZlQsNAJdf3gFQk
NFgBLnXQreTyVJq5TrzTCzNjTWIvhmJoFteX+Prh1t3Wyxrj5zYbL4GfN1w0gl5Q
GpWLPORn/UeoboR9tjDKga2dM4d/q/qeLemIZE+box4WFrmv35CZ47TbN7dz1X6n
nSNkqUaMtZVqWJMFPyYezmSGKwdLl5xP54IQle/EhGoWVFViB//tfAaRTqeXBWK/
U/bY+dvEfQZwpeswM2sy8rYgCvESHird2rOqtA7Sn/FkDRa23nrtZVJsB/dtBGEc
5vNSJ1fI24iAY0UZE+ntcoTjINpYmBCcTk9aF3APuer6h65IbSZxlesI660drDlx
He4=
=bpEm
-----END PGP PUBLIC KEY BLOCK-----
version=4
opts="pgpmode=next" https://0xacab.org/jvoisin/mat2/tags (?:.*/)mat2-@ANY_VERSION@\.tar\.xz
opts="pgpmode=next" https://0xacab.org/jvoisin/mat2/tags (?:.*/)mat.+-@ANY_VERSION@@ARCHIVE_EXT@
opts="pgpmode=previous" https://0xacab.org/jvoisin/mat2/tags (?:.*/)mat2-@ANY_VERSION@@SIGNATURE_EXT@
opts="pgpmode=previous" https://0xacab.org/jvoisin/mat2/tags (?:.*/)mat.+-@ANY_VERSION@@SIGNATURE_EXT@
.TH MAT2 "1" "October 2018" "MAT2 0.5.0" "User Commands"
.TH MAT2 "1" "November 2018" "MAT2 0.6.0" "User Commands"
.SH NAME
mat2 \- the metadata anonymisation toolkit 2
......
......@@ -67,6 +67,31 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
return metadata
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = dict() # type: Dict[str, Union[str, dict]]
with zipfile.ZipFile(self.filename) as zin:
temp_folder = tempfile.mkdtemp()
for item in zin.infolist():
if item.filename[-1] == '/': # pragma: no cover
# `is_dir` is added in Python3.6
continue # don't keep empty folders
zin.extract(member=item, path=temp_folder)
full_path = os.path.join(temp_folder, item.filename)
tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser:
continue
local_meta = tmp_parser.get_meta()
if local_meta:
meta[item.filename] = local_meta
shutil.rmtree(temp_folder)
return meta
def remove_all(self) -> bool:
# pylint: disable=too-many-branches
......@@ -113,7 +138,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
abort = True
continue
if tmp_parser:
tmp_parser.remove_all()
if tmp_parser.remove_all() is False:
logging.warning("In file %s, something went wrong \
with the cleaning of %s \
(format: %s)",
self.filename, item.filename, mtype)
abort = True
continue
os.rename(tmp_parser.output_filename, full_path)
zinfo = zipfile.ZipInfo(item.filename) # type: ignore
......@@ -126,3 +157,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
os.remove(self.output_filename)
return False
return True
class ZipParser(ArchiveBasedAbstractParser):
mimetypes = {'application/zip'}
......@@ -53,15 +53,14 @@ class ExiftoolParser(abstract.AbstractParser):
return True
def _get_exiftool_path() -> str: # pragma: no cover
exiftool_path = '/usr/bin/exiftool'
if os.path.isfile(exiftool_path):
if os.access(exiftool_path, os.X_OK):
return exiftool_path
possible_pathes = {
'/usr/bin/exiftool', # debian/fedora
'/usr/bin/vendor_perl/exiftool', # archlinux
}
# ArchLinux
exiftool_path = '/usr/bin/vendor_perl/exiftool'
if os.path.isfile(exiftool_path):
if os.access(exiftool_path, os.X_OK):
return exiftool_path
for possible_path in possible_pathes:
if os.path.isfile(possible_path):
if os.access(possible_path, os.X_OK):
return possible_path
raise RuntimeError("Unable to find exiftool")
......@@ -6,7 +6,7 @@ import cairo
import gi
gi.require_version('GdkPixbuf', '2.0')
from gi.repository import GdkPixbuf
from gi.repository import GdkPixbuf, GLib
from . import exiftool
......@@ -50,15 +50,21 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
def __init__(self, filename):
super().__init__(filename)
if imghdr.what(filename) != self._type: # better safe than sorry
# we can't use imghdr here because of https://bugs.python.org/issue28591
try:
GdkPixbuf.Pixbuf.new_from_file(self.filename)
except GLib.GError:
raise ValueError
def remove_all(self) -> bool:
if self.lightweight_cleaning:
return self._lightweight_cleanup()
_, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
if extension.lower() == '.jpg':
extension = '.jpeg' # gdk is picky
pixbuf.savev(self.output_filename, extension[1:], [], [])
pixbuf.savev(self.output_filename, type=extension[1:], option_keys=[], option_values=[])
return True
......
......@@ -301,7 +301,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want.
"""
metadata = {}
metadata = super().get_meta()
zipin = zipfile.ZipFile(self.filename)
for item in zipin.infolist():
if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):
......
......@@ -2,10 +2,37 @@ import os
import subprocess
import logging
from typing import Dict, Union
from . import exiftool
class AVIParser(exiftool.ExiftoolParser):
class AbstractFFmpegParser(exiftool.ExiftoolParser):
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
def remove_all(self) -> bool:
cmd = [_get_ffmpeg_path(),
'-i', self.filename, # input file
'-y', # overwrite existing output file
'-map', '0', # copy everything all streams from input to output
'-codec', 'copy', # don't decode anything, just copy (speed!)
'-loglevel', 'panic', # Don't show log
'-hide_banner', # hide the banner
'-map_metadata', '-1', # remove supperficial metadata
'-map_chapters', '-1', # remove chapters
'-disposition', '0', # Remove dispositions (check ffmpeg's manpage)
'-fflags', '+bitexact', # don't add any metadata
'-flags:v', '+bitexact', # don't add any metadata
'-flags:a', '+bitexact', # don't add any metadata
self.output_filename]
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
class AVIParser(AbstractFFmpegParser):
mimetypes = {'video/x-msvideo', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
......@@ -24,25 +51,55 @@ class AVIParser(exiftool.ExiftoolParser):
'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
'Duration', 'ImageSize', 'Megapixels'}
class MP4Parser(AbstractFFmpegParser):
mimetypes = {'video/mp4', }
meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
'XResolution', 'YResolution', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
'FileName', 'FilePermissions', 'MIMEType', 'FileType',
'FileTypeExtension', 'Directory', 'ImageWidth',
'ImageSize', 'ImageHeight', 'FileSize', 'SourceFile',
'BitDepth', 'Duration', 'AudioChannels',
'AudioBitsPerSample', 'AudioSampleRate', 'Megapixels',
'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
'SourceImageHeight', 'SourceImageWidth',
'MatrixStructure', 'MediaDuration'}
meta_key_value_whitelist = { # some metadata are mandatory :/
'CreateDate': '0000:00:00 00:00:00',
'CurrentTime': '0 s',
'MediaCreateDate': '0000:00:00 00:00:00',
'MediaLanguageCode': 'und',
'MediaModifyDate': '0000:00:00 00:00:00',
'ModifyDate': '0000:00:00 00:00:00',
'OpColor': '0 0 0',
'PosterTime': '0 s',
'PreferredRate': '1',
'PreferredVolume': '100.00%',
'PreviewDuration': '0 s',
'PreviewTime': '0 s',
'SelectionDuration': '0 s',
'SelectionTime': '0 s',
'TrackCreateDate': '0000:00:00 00:00:00',
'TrackModifyDate': '0000:00:00 00:00:00',
'TrackVolume': '0.00%',
}
def remove_all(self) -> bool:
cmd = [_get_ffmpeg_path(),
'-i', self.filename, # input file
'-y', # overwrite existing output file
'-loglevel', 'panic', # Don't show log
'-hide_banner', # hide the banner
'-codec', 'copy', # don't decode anything, just copy (speed!)
'-map_metadata', '-1', # remove supperficial metadata
'-map_chapters', '-1', # remove chapters
'-fflags', '+bitexact', # don't add any metadata
'-flags:v', '+bitexact', # don't add any metadata
'-flags:a', '+bitexact', # don't add any metadata
self.output_filename]
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
logging.warning('The format of "%s" (video/mp4) has some mandatory '
'metadata fields; mat2 filled them with standard data.',
self.filename)
return super().remove_all()
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = super().get_meta()
ret = dict() # type: Dict[str, Union[str, dict]]
for key, value in meta.items():
if key in self.meta_key_value_whitelist.keys():
if value == self.meta_key_value_whitelist[key]:
continue
ret[key] = value
return ret
def _get_ffmpeg_path() -> str: # pragma: no cover
......
......@@ -6,6 +6,7 @@ import sys
import mimetypes
import argparse
import logging
import unicodedata
try:
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
......@@ -14,12 +15,14 @@ except ValueError as e:
print(e)
sys.exit(1)
__version__ = '0.5.0'
__version__ = '0.6.0'
# Make pyflakes happy
assert Tuple
assert Union
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
if not os.path.exists(filename):
......@@ -81,6 +84,15 @@ def __print_meta(filename: str, metadata: dict, depth: int=1):
if isinstance(v, dict):
__print_meta(k, v, depth+1)
continue
# Remove control characters
# We might use 'Cc' instead of 'C', but better safe than sorry
# https://www.unicode.org/reports/tr44/#GC_Values_Table
try:
v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
except TypeError:
pass # for things that aren't iterable
try: # FIXME this is ugly.
print(padding + " %s: %s" % (k, v))
except UnicodeEncodeError:
......
......@@ -14,7 +14,7 @@ thread, so we'll have to resort to using a `queue` to pass "messages" around.
import queue
import threading
from typing import Tuple
from typing import Tuple, Optional, List
from urllib.parse import unquote
import gi
......@@ -25,10 +25,8 @@ from gi.repository import Nautilus, GObject, Gtk, Gio, GLib, GdkPixbuf
from libmat2 import parser_factory
# make pyflakes happy
assert Tuple
def _remove_metadata(fpath):
def _remove_metadata(fpath) -> Tuple[bool, Optional[str]]:
""" This is a simple wrapper around libmat2, because it's
easier and cleaner this way.
"""
......@@ -63,7 +61,7 @@ class ColumnExtension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationW
self.infobar.get_content_area().pack_start(self.infobar_hbox, True, True, 0)
self.infobar.show_all()
def get_widget(self, uri, window):
def get_widget(self, uri, window) -> Gtk.Widget:
""" This is the method that we have to implement (because we're
a LocationWidgetProvider) in order to show our infobar.
"""
......@@ -228,7 +226,7 @@ class ColumnExtension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationW
""" https://bugzilla.gnome.org/show_bug.cgi?id=784278 """
return None
def get_file_items(self, window, files):
def get_file_items(self, window, files) -> Optional[List[Nautilus.MenuItem]]:
""" This method is the one allowing us to create a menu item.
"""
# Do not show the menu item if not a single file has a chance to be
......
......@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="mat2",
version='0.5.0',
version='0.6.0',
author="Julien (jvoisin) Voisin",
author_email="julien.voisin+mat2@dustri.org",
description="A handy tool to trash your metadata",
......
......@@ -121,7 +121,7 @@ class TestGetMeta(unittest.TestCase):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'producer: pdfTeX-1.40.14', stdout)
self.assertIn(b'Producer: pdfTeX-1.40.14', stdout)
def test_png(self):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.png'],
......@@ -174,3 +174,10 @@ class TestGetMeta(unittest.TestCase):
self.assertIn(b'genre: Python', stdout)
self.assertIn(b'i am a : various comment', stdout)
self.assertIn(b'artist: jvoisin', stdout)
class TestControlCharInjection(unittest.TestCase):
def test_jpg(self):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/control_chars.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: GQ\n', stdout)
......@@ -4,6 +4,7 @@ import unittest
import shutil
import os
import logging
import zipfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import harmless, video
......@@ -222,3 +223,17 @@ class TestCorruptedFiles(unittest.TestCase):
p = video.AVIParser('./tests/data/--output.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/--output.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/embedded_corrupted.docx')
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
self.assertFalse(p.remove_all())
os.remove('./tests/data/dirty.zip')