Unverified Commit 3cc78283 authored by Pierre-Elliott Bécue's avatar Pierre-Elliott Bécue 🚼
Browse files

Archive old mailboxes

parent 2809c54f
Pipeline #328418 passed with stage
in 5 minutes and 48 seconds
......@@ -16,11 +16,14 @@ import os.path
from email.parser import BytesHeaderParser
from email.utils import getaddresses
from nm2.lib.fileops import is_gzipped
# TODO: once nm.debian.org is python3, move most of this code to process/ and
# make it unit-tested
VERSION="0.2"
class umask_override:
"""
Context manager that temporarily overrides the umask during its lifetime
......@@ -80,6 +83,9 @@ class IncomingMessage:
self.msg.add_header("NM-Archive-Lookup-History", "exception: {}: {}".format(exc.__class__.__name__, str(exc)))
def deliver_to_mailbox(self, pathname):
if is_gzipped(pathname):
print(f"{pathname} is archived, ignoring the incoming message.")
return
with umask_override(0o037) as uo:
with open(pathname, "ab") as out:
out.write(self.msg.as_string(True).encode("utf-8"))
......@@ -114,37 +120,6 @@ class IncomingMessage:
return None
def lookup_mailbox_filename(self, key, sqlite=False):
db, Q = open_db(sqlite)
cur = db.cursor()
query = """
SELECT pr.archive_key
FROM person p
JOIN process pr ON pr.person_id = p.id
WHERE pr.is_active
"""
if '=' in key:
# Lookup email
email = key.replace("=", "@")
self.log_lookup("lookup by email '%s'" % email)
cur.execute(Q(query + "AND p.email=%s"), (email,))
else:
# Lookup uid
self.log_lookup("lookup by uid '%s'" % key)
cur.execute(Q(query + "AND p.uid=%s"), (key,))
basename = None
for i, in cur:
basename = i
if basename is None:
return None
else:
return basename + ".mbox"
def get_dest_pathname(msg, sqlite=False):
"""
Return a couple (destdir, filename) with the default directory and mailbox
......@@ -159,13 +134,7 @@ def get_dest_pathname(msg, sqlite=False):
# New-style processes
return "/srv/nm.debian.org/mbox/processes", "process-{}.mbox".format(key)
else:
# Old-style processes, need a DB lookup
fname = msg.lookup_mailbox_filename(key, sqlite)
if fname is None:
msg.log_lookup("Key {} not found in the database".format(repr(key)))
return "/srv/nm.debian.org/mbox/", "archive-failsafe.mbox"
else:
return "/srv/nm.debian.org/mbox/applicants", fname
return None, None
except Exception as e:
msg.log_exception(e)
return "/srv/nm.debian.org/mbox/", "archive-failsafe.mbox"
......@@ -183,6 +152,9 @@ def main():
msg = IncomingMessage(sys.stdin.buffer)
destdir, filename = get_dest_pathname(msg, args.sqlite)
if filename is None:
return 1
# Override destdir if requested
if args.dest: destdir = args.dest
......
#!/usr/bin/python3
"""
Small library for file manipulation
"""
import os
import gzip
import lzma
import shutil
from contextlib import contextmanager
GZIP_MAGIC_NUMBER = "1f8b"
def is_gzipped(pathname):
"""
Returns True if the file pointed by `pathname` is very probably
gzipped
"""
file_begin = open(pathname, "rb").read(2).hex()
if file_begin == GZIP_MAGIC_NUMBER:
return True
def do_gzip_file(pathname):
"""
Actually gzip a file from pathname to pathname + ".gz"
Doesn't do anything if the file seems already gzipped.
"""
dirname, fname = os.path.split(pathname)
gzip_pathname = os.path.join(dirname, f"{fname}.gz")
if is_gzipped(pathname):
print(f"{pathname} looks already gzipped")
return None
if os.path.exists(gzip_pathname):
os.unlink(gzip_pathname)
with open(pathname, 'rb') as f_in:
with gzip.open(gzip_pathname, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
if os.path.exists(gzip_pathname):
os.unlink(pathname)
return True
@contextmanager
def open(pathname, mode="r"):
"""
Opens a file with the appropriate compression library if needed
"""
if filename.endswith(".gz"):
my_fd = gzip.open(filename, mode)
elif filename.endswith(".xz"):
my_fd = lzma.open(filename, mode)
else:
my_fd = open(filename, mode)
try:
yield my_fd
finally:
my_fd.close()
......@@ -5,6 +5,7 @@ from backend.housekeeping import Housekeeper
from .maintenance import (
ping_stuck_processes,
submit_rt_ticket_for_fd_approved_processes,
archive_old_processes_mailbox,
)
STAGES = ["main"]
......@@ -24,3 +25,12 @@ class OpenApprovedRTTickets(hk.Task):
def run_main(self, stage):
approval_to_rt_delay = datetime.timedelta(days=7)
submit_rt_ticket_for_fd_approved_processes(approval_to_rt_delay)
class ArchiveOldProcessesMbox(hk.Task):
DEPENDS = [Housekeeper]
def run_main(self, stage):
closed_archival_delay = datetime.timedelta(days=28)
closed_archival_treshold = datetime.timedelta(days=56)
archive_old_processes_mailbox(closed_archival_delay, closed_archival_treshold)
from __future__ import annotations
from typing import Optional
from django.utils.timezone import now
import datetime
from nm2.lib import fileops
from . import models as pmodels
from . import ops as pops
import datetime
def ping_stuck_processes(stuck_cutoff, audit_author, logdate=None):
......@@ -165,3 +168,36 @@ def send_rt_ticket(process, statement):
)
op.rt_text = statement.statement
op.execute()
def archive_old_processes_mailbox(closed_archival_delay: datetime.timedelta,
closed_archival_threshold: datetime.datetime):
"""
Grabs all processes closed for more than closed_archival_delay and less
than closed_archival_delay*2, and archive their mailboxes
"""
most_recent = now - closed_archival_delay
oldest = now - closed_archival_threshold
processes = pmodels.Process.objects.filter(
closed_time__lte=most_recent, closed_time__gte=oldest
)
for process in processes:
archive_old_process_mailbox(process)
def archive_old_process_mailbox(process):
"""
Archive the mailbox of a process by making it a GZip file
"""
mailbox_path = process.mailbox_file
if fileops.is_gzipped(mailbox_path):
return True
if not process.closed:
return False
fileops.do_gzip_file(mailbox_path)
return True
......@@ -15,7 +15,7 @@ from backend.shortcuts import build_absolute_uri
from backend.mixins import VisitorMixin, VisitPersonMixin, TokenAuthMixin
from backend import const
import backend.models as bmodels
from nm2.lib import assets
from nm2.lib import assets, fileops
import nm2.lib.forms
from .mixins import VisitProcessMixin, RequirementMixin, StatementMixin
import datetime
......@@ -528,7 +528,7 @@ class MailArchive(VisitProcessMixin, View):
# The last mtime argument seems to only be supported in python 2.7
outfd = GzipFile(user_fname, "wb", 9, res) # , os.path.getmtime(fname))
try:
with open(fname, "rb") as infd:
with fileops.open(fname, "rb") as infd:
shutil.copyfileobj(infd, outfd)
outfd.write(b"\n")
outfd.write(self.process.get_statements_as_mbox(self.request.user))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment