#! /usr/bin/env python ############################################################################ # Copyright (C) 2002 Paul Rodger , # (C) 2006 Peter Poeml , # (C) 2006 Nikolaus Schulz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################ """ Archive and compress old mail in mbox, MH or maildir-format mailboxes. Website: http://archivemail.sourceforge.net/ """ # global administrivia __version__ = "archivemail v0.6.2" __cvs_id__ = "$Id$" __copyright__ = """\ Copyright (C) 2002 Paul Rodger (C) 2006 Peter Poeml , (C) 2006 Nikolaus Schulz This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.""" import sys def check_python_version(): """Abort if we are running on python < v2.3""" too_old_error = "This program requires python v2.3 or greater. " + \ "Your version of python is:\n%s""" % sys.version try: version = sys.version_info # we might not even have this function! :) if (version[0] < 2) or (version[0] == 2 and version[1] < 3): print too_old_error sys.exit(1) except AttributeError: print too_old_error sys.exit(1) # define & run this early # (IMAP over SSL requires Python >= 2.3) check_python_version() import fcntl import getopt import gzip import mailbox import os import pwd import re import rfc822 import shutil import signal import stat import string import tempfile import time import urlparse # From_ mangling regex. from_re = re.compile(r'^From ', re.MULTILINE) ############## class definitions ############### class Stats: """Class to collect and print statistics about mailbox archival""" __archived = 0 __mailbox_name = None __archive_name = None __start_time = 0 __total = 0 def __init__(self, mailbox_name, final_archive_name): """Constructor for a new set of statistics. Arguments: mailbox_name -- filename/dirname of the original mailbox final_archive_name -- filename for the final 'mbox' archive, without compression extension (eg .gz) """ assert(mailbox_name) assert(final_archive_name) self.__start_time = time.time() self.__mailbox_name = mailbox_name self.__archive_name = final_archive_name + ".gz" def another_message(self): """Add one to the internal count of total messages processed""" self.__total = self.__total + 1 def another_archived(self): """Add one to the internal count of messages archived""" self.__archived = self.__archived + 1 def display(self): """Print statistics about how many messages were archived""" end_time = time.time() time_seconds = end_time - self.__start_time action = "archived" if options.delete_old_mail: action = "deleted" if options.dry_run: action = "I would have " + action print "%s: %s %d of %d message(s) in %.1f seconds" % \ (self.__mailbox_name, action, self.__archived, self.__total, time_seconds) class StaleFiles: """Class to keep track of files to be deleted on abnormal exit""" archive = None # tempfile for messages to be archived procmail_lock = None # original_mailbox.lock retain = None # tempfile for messages to be retained temp_dir = None # our tempfile directory container def clean(self): """Delete any temporary files or lockfiles that exist""" if self.procmail_lock: vprint("removing stale procmail lock '%s'" % self.procmail_lock) try: os.remove(self.procmail_lock) self.procmail_lock = None except (IOError, OSError): pass if self.retain: vprint("removing stale retain file '%s'" % self.retain) try: os.remove(self.retain) self.retain = None except (IOError, OSError): pass if self.archive: vprint("removing stale archive file '%s'" % self.archive) try: os.remove(self.archive) self.archive = None except (IOError, OSError): pass if self.temp_dir: vprint("removing stale tempfile directory '%s'" % self.temp_dir) try: os.rmdir(self.temp_dir) self.temp_dir = None except (IOError, OSError): pass class Options: """Class to store runtime options, including defaults""" archive_suffix = "_archive" days_old_max = 180 date_old_max = None delete_old_mail = 0 dry_run = 0 filter_append = None include_flagged = 0 lockfile_attempts = 5 lockfile_extension = ".lock" lockfile_sleep = 1 no_compress = 0 only_archive_read = 0 output_dir = None pwfile = None preserve_unread = 0 mangle_from = 1 quiet = 0 read_buffer_size = 8192 script_name = os.path.basename(sys.argv[0]) min_size = None verbose = 0 warn_duplicates = 0 def parse_args(self, args, usage): """Set our runtime options from the command-line arguments. Arguments: args -- this is sys.argv[1:] usage -- a usage message to display on '--help' or bad arguments Returns the remaining command-line arguments that have not yet been parsed as a string. """ try: opts, args = getopt.getopt(args, '?D:S:Vd:hno:F:P:qs:uv', ["date=", "days=", "delete", "dry-run", "help", "include-flagged", "no-compress", "output-dir=", "filter-append=", "pwfile=", "dont-mangle-from", "preserve-unread", "quiet", "size=", "suffix=", "verbose", "version", "warn-duplicate"]) except getopt.error, msg: user_error(msg) archive_by = None for o, a in opts: if o == '--delete': self.delete_old_mail = 1 if o == '--include-flagged': self.include_flagged = 1 if o == '--no-compress': self.no_compress = 1 if o == '--warn-duplicate': self.warn_duplicates = 1 if o in ('-D', '--date'): if archive_by: user_error("you cannot specify both -d and -D options") archive_by = "date" self.date_old_max = self.date_argument(a) if o in ('-d', '--days'): if archive_by: user_error("you cannot specify both -d and -D options") archive_by = "days" self.days_old_max = string.atoi(a) if o in ('-o', '--output-dir'): self.output_dir = a if o in ('-P', '--pwfile'): self.pwfile = a if o in ('-F', '--filter-append'): self.filter_append = a if o in ('-h', '-?', '--help'): print usage sys.exit(0) if o in ('-n', '--dry-run'): self.dry_run = 1 if o in ('-q', '--quiet'): self.quiet = 1 if o in ('-s', '--suffix'): self.archive_suffix = a if o in ('-S', '--size'): self.min_size = string.atoi(a) if o in ('-u', '--preserve-unread'): self.preserve_unread = 1 if o == '--dont-mangle-from': self.mangle_from = 0 if o in ('-v', '--verbose'): self.verbose = 1 if o in ('-V', '--version'): print __version__ + "\n\n" + __copyright__ sys.exit(0) return args def sanity_check(self): """Complain bitterly about our options now rather than later""" if self.output_dir: if not os.path.isdir(self.output_dir): user_error("output directory does not exist: '%s'" % \ self.output_dir) if not os.access(self.output_dir, os.W_OK): user_error("no write permission on output directory: '%s'" % \ self.output_dir) if is_world_writable(self.output_dir): unexpected_error(("output directory is world-writable: " + \ "%s -- I feel nervous!") % self.output_dir) if self.days_old_max < 1: user_error("--days argument must be greater than zero") if self.days_old_max >= 10000: user_error("--days argument must be less than 10000") if self.min_size is not None and self.min_size < 1: user_error("--size argument must be greater than zero") if self.quiet and self.verbose: user_error("you cannot use both the --quiet and --verbose options") if self.pwfile: if not os.path.isfile(self.pwfile): user_error("pwfile %s does not exist" % self.pwfile) def date_argument(self, string): """Converts a date argument string into seconds since the epoch""" date_formats = ( "%Y-%m-%d", # ISO format "%d %b %Y" , # Internet format "%d %B %Y" , # Internet format with full month names ) time.accept2dyear = 0 # I'm not going to support 2-digit years for format in date_formats: try: date = time.strptime(string, format) seconds = time.mktime(date) return seconds except (ValueError, OverflowError): pass user_error("cannot parse the date argument '%s'\n" "The date should be in ISO format (eg '2002-04-23'),\n" "Internet format (eg '23 Apr 2002') or\n" "Internet format with full month names (eg '23 April 2002')" % string) class Mbox(mailbox.UnixMailbox): """Class that allows read/write access to a 'mbox' mailbox. Subclasses the mailbox.UnixMailbox class. """ mbox_file = None # file handle for the mbox file mbox_file_name = None # GzipFile class has no .name variable mbox_file_closed = 0 # GzipFile class has no .closed variable original_atime = None # last-accessed timestamp original_mtime = None # last-modified timestamp original_mode = None # file permissions to preserve starting_size = None # file size of mailbox on open def __init__(self, path, mode="r+"): """Constructor for opening an existing 'mbox' mailbox. Extends constructor for mailbox.UnixMailbox() Named Arguments: path -- file name of the 'mbox' file to be opened mode -- mode to open the file in (default is read-write) """ assert(path) try: self.original_atime = os.path.getatime(path) self.original_mtime = os.path.getmtime(path) self.original_mode = os.stat(path)[stat.ST_MODE] self.starting_size = os.path.getsize(path) self.mbox_file = open(path, mode) except IOError, msg: unexpected_error(msg) self.mbox_file_name = path mailbox.UnixMailbox.__init__(self, self.mbox_file) def write(self, msg): """Write a rfc822 message object to the 'mbox' mailbox. If the rfc822 has no Unix 'From_' line, then one is constructed from other headers in the message. Arguments: msg -- rfc822 message object to be written """ assert(msg) assert(self.mbox_file) vprint("saving message to file '%s'" % self.mbox_file_name) unix_from = msg.unixfrom if unix_from: msg_has_mbox_format = True else: msg_has_mbox_format = False unix_from = make_mbox_from(msg) self.mbox_file.write(unix_from) assert(msg.headers) self.mbox_file.writelines(msg.headers) self.mbox_file.write(os.linesep) # The following while loop is about twice as fast in # practice to 'self.mbox_file.writelines(msg.fp.readlines())' assert(options.read_buffer_size > 0) linebuf = "" while 1: body = msg.fp.read(options.read_buffer_size) if (not msg_has_mbox_format) and options.mangle_from: # Be careful not to break pattern matching splitindex = body.rfind(os.linesep) nicebody = linebuf + body[:splitindex] linebuf = body[splitindex:] body = from_re.sub('>From ', nicebody) if not body: break self.mbox_file.write(body) if not msg_has_mbox_format: self.mbox_file.write(os.linesep) def remove(self): """Close and delete the 'mbox' mailbox file""" file_name = self.mbox_file_name self.close() vprint("removing file '%s'" % self.mbox_file_name) os.remove(file_name) def is_empty(self): """Return true if the 'mbox' file is empty, false otherwise""" return (os.path.getsize(self.mbox_file_name) == 0) def close(self): """Close the mbox file""" if not self.mbox_file_closed: vprint("closing file '%s'" % self.mbox_file_name) self.mbox_file.close() self.mbox_file_closed = 1 def reset_stat(self): """Set the file timestamps and mode to the original value""" assert(self.original_atime) assert(self.original_mtime) assert(self.mbox_file_name) assert(self.original_mode) # I doubt this will be 000? os.utime(self.mbox_file_name, (self.original_atime, \ self.original_mtime)) os.chmod(self.mbox_file_name, self.original_mode) def exclusive_lock(self): """Set an advisory lock on the 'mbox' mailbox""" vprint("obtaining exclusive lock on file '%s'" % self.mbox_file_name) fcntl.flock(self.mbox_file.fileno(), fcntl.LOCK_EX) def exclusive_unlock(self): """Unset any advisory lock on the 'mbox' mailbox""" vprint("dropping exclusive lock on file '%s'" % self.mbox_file_name) fcntl.flock(self.mbox_file.fileno(), fcntl.LOCK_UN) def procmail_lock(self): """Create a procmail lockfile on the 'mbox' mailbox""" lock_name = self.mbox_file_name + options.lockfile_extension attempt = 0 while os.path.isfile(lock_name): vprint("lockfile '%s' exists - sleeping..." % lock_name) time.sleep(options.lockfile_sleep) attempt = attempt + 1 if (attempt >= options.lockfile_attempts): unexpected_error("Giving up waiting for procmail lock '%s'" % lock_name) vprint("writing lockfile '%s'" % lock_name) old_umask = os.umask(022) # is this dodgy? lock = open(lock_name, "w") _stale.procmail_lock = lock_name lock.close() old_umask = os.umask(old_umask) def procmail_unlock(self): """Delete the procmail lockfile on the 'mbox' mailbox""" assert(self.mbox_file_name) lock_name = self.mbox_file_name + options.lockfile_extension vprint("removing lockfile '%s'" % lock_name) os.remove(lock_name) _stale.procmail_lock = None def leave_empty(self): """Replace the 'mbox' mailbox with a zero-length file. This should be the same as 'cp /dev/null mailbox'. This will leave a zero-length mailbox file so that mail reading programs don't get upset that the mailbox has been completely deleted.""" assert(os.path.isfile(self.mbox_file_name)) vprint("turning '%s' into a zero-length file" % self.mbox_file_name) blank_file = open(self.mbox_file_name, "w") blank_file.close() def get_size(self): """Return the current size of the mbox file""" return os.path.getsize(self.mbox_file_name) class RetainMbox(Mbox): """Class for holding messages that will be retained from the original mailbox (ie. the messages are not considered 'old'). Extends the 'Mbox' class. This 'mbox' file starts off as a temporary file but will eventually overwrite the original mailbox if everything is OK. """ __final_name = None def __init__(self, final_name): """Constructor - create a temporary file for the mailbox. Arguments: final_name -- the name of the original mailbox that this mailbox will replace when we call finalise() """ assert(final_name) temp_name = tempfile.mkstemp("retain")[1] self.mbox_file = open(temp_name, "w") self.mbox_file_name = temp_name _stale.retain = temp_name vprint("opened temporary retain file '%s'" % self.mbox_file_name) self.__final_name = final_name def finalise(self): """Overwrite the original mailbox with this temporary mailbox.""" assert(self.__final_name) self.close() # make sure that the retained mailbox has the same timestamps and # permission as the original mailbox atime = os.path.getatime(self.__final_name) mtime = os.path.getmtime(self.__final_name) mode = os.stat(self.__final_name)[stat.ST_MODE] os.chmod(self.mbox_file_name, mode) vprint("renaming '%s' to '%s'" % (self.mbox_file_name, self.__final_name)) try: os.rename(self.mbox_file_name, self.__final_name) except OSError: # file might be on a different filesystem -- move it manually shutil.copy2(self.mbox_file_name, self.__final_name) os.remove(self.mbox_file_name) os.utime(self.__final_name, (atime, mtime)) # reset to original timestamps _stale.retain = None def remove(self): """Delete this temporary mailbox. Overrides Mbox.remove()""" Mbox.remove(self) _stale.retain = None class ArchiveMbox(Mbox): """Class for holding messages that will be archived from the original mailbox (ie. the messages that are considered 'old'). Extends the 'Mbox' class. This 'mbox' file starts off as a temporary file, copied from any pre-existing archive. It will eventually overwrite the original archive mailbox if everything is OK. """ __final_name = None def __init__(self, final_name): """Constructor -- copy any pre-existing compressed archive to a temporary file which we use as the new 'mbox' archive for this mailbox. Arguments: final_name -- the final name for this archive mailbox. This function will check to see if the filename already exists, and copy it to a temporary file if it does. It will also rename itself to this name when we call finalise() """ assert(final_name) if options.no_compress: self.__init_uncompressed(final_name) else: self.__init_compressed(final_name) self.__final_name = final_name def __init_uncompressed(self, final_name): """Used internally by __init__ when archives are uncompressed""" assert(final_name) compressed_archive = final_name + ".gz" if os.path.isfile(compressed_archive): unexpected_error("""There is already a file named '%s'! Have you been previously compressing this archive? You probably should uncompress it manually, and try running me again.""" % compressed_archive) temp_name = tempfile.mkstemp("archive")[1] if os.path.isfile(final_name): vprint("file already exists that is named: %s" % final_name) shutil.copy2(final_name, temp_name) _stale.archive = temp_name self.mbox_file = open(temp_name, "a") self.mbox_file_name = temp_name def __init_compressed(self, final_name): """Used internally by __init__ when archives are compressed""" assert(final_name) compressed_filename = final_name + ".gz" if os.path.isfile(final_name): unexpected_error("""There is already a file named '%s'! Have you been reading this archive? You probably should re-compress it manually, and try running me again.""" % final_name) temp_name = tempfile.mkstemp("archive.gz")[1] if os.path.isfile(compressed_filename): vprint("file already exists that is named: %s" % \ compressed_filename) shutil.copy2(compressed_filename, temp_name) _stale.archive = temp_name self.mbox_file = gzip.GzipFile(temp_name, "a") self.mbox_file_name = temp_name def finalise(self): """Close the archive and rename this archive temporary file to the final archive filename, overwriting any pre-existing archive if it exists. """ assert(self.__final_name) self.close() final_name = self.__final_name if not options.no_compress: final_name = final_name + ".gz" vprint("renaming '%s' to '%s'" % (self.mbox_file_name, final_name)) try: os.rename(self.mbox_file_name, final_name) except OSError: # file might be on a different filesystem -- move it manually shutil.copy2(self.mbox_file_name, final_name) os.remove(self.mbox_file_name) _stale.archive = None class IdentityCache: """Class used to remember Message-IDs and warn if they are seen twice""" seen_ids = {} mailbox_name = None def __init__(self, mailbox_name): """Constructor: takes the mailbox name as an argument""" assert(mailbox_name) self.mailbox_name = mailbox_name def warn_if_dupe(self, msg): """Print a warning message if the message has already appeared""" assert(msg) message_id = msg.get('Message-ID') assert(message_id) if self.seen_ids.has_key(message_id): user_warning("duplicate message id: '%s' in mailbox '%s'" % (message_id, self.mailbox_name)) self.seen_ids[message_id] = 1 # global class instances options = Options() # the run-time options object _stale = StaleFiles() # remember what we have to delete on abnormal exit def main(args = sys.argv[1:]): global _stale # this usage message is longer than 24 lines -- bad idea? usage = """Usage: %s [options] mailbox [mailbox...] Moves old mail in IMAP, mbox, MH or maildir-format mailboxes to an mbox-format mailbox compressed with gzip. Options are as follows: -d, --days=NUM archive messages older than NUM days (default: %d) -D, --date=DATE archive messages older than DATE -o, --output-dir=DIR directory to store archives (default: same as original) -P, --pwfile=FILE file to read imap password from (default: None) -F, --filter-append=STRING append arbitrary string to the IMAP filter string -s, --suffix=NAME suffix for archive filename (default: '%s') -S, --size=NUM only archive messages NUM bytes or larger -n, --dry-run don't write to anything - just show what would be done -u, --preserve-unread never archive unread messages --dont-mangle-from do not mangle From_ in message bodies --delete delete rather than archive old mail (use with caution!) --include-flagged messages flagged important can also be archived --no-compress do not compress archives with gzip --warn-duplicate warn about duplicate Message-IDs in the same mailbox -v, --verbose report lots of extra debugging information -q, --quiet quiet mode - print no statistics (suitable for crontab) -V, --version display version information -h, --help display this message Example: %s linux-kernel This will move all messages older than %s days to a 'mbox' mailbox called 'linux-kernel_archive.gz', deleting them from the original 'linux-kernel' mailbox. If the 'linux-kernel_archive.gz' mailbox already exists, the newly archived messages are appended. To archive IMAP mailboxes, format your mailbox argument like this: imap://username:password@server/mailbox (substitute 'imap' with 'imaps' for an SSL connection) Website: http://archivemail.sourceforge.net/ """ % \ (options.script_name, options.days_old_max, options.archive_suffix, options.script_name, options.days_old_max) args = options.parse_args(args, usage) if len(args) == 0: print usage sys.exit(1) options.sanity_check() for mailbox_path in args: archive(mailbox_path) ######## errors and debug ########## def vprint(string): """Print the string argument if we are in verbose mode""" if options.verbose: print string def unexpected_error(string): """Print the string argument, a 'shutting down' message and abort - this function never returns""" sys.stderr.write("%s: %s\n" % (options.script_name, string)) sys.stderr.write("%s: unexpected error encountered - shutting down\n" % options.script_name) sys.exit(1) def user_error(string): """Print the string argument and abort - this function never returns""" sys.stderr.write("%s: %s\n" % (options.script_name, string)) sys.exit(1) def user_warning(string): """Print the string argument""" sys.stderr.write("%s: Warning - %s\n" % (options.script_name, string)) ########### operations on a message ############ def make_mbox_from(message): """Return a string suitable for use as a 'From_' mbox header for the message. Arguments: message -- the rfc822 message object """ assert(message) address = guess_return_path(message) time_message = guess_delivery_time(message) gm_date = time.gmtime(time_message) assert(gm_date) date_string = time.asctime(gm_date) mbox_from = "From %s %s\n" % (address, date_string) return mbox_from def guess_return_path(message): """Return a guess at the Return Path address of an rfc822 message""" assert(message) for header in ('Return-path', 'From'): address_header = message.get(header) if address_header: (name, address) = rfc822.parseaddr(address_header) if address: return address # argh, we can't find any valid 'Return-path' guesses - just # just use the current unix username like mutt does login = pwd.getpwuid(os.getuid())[0] assert(login) return login def guess_delivery_time(message): """Return a guess at the delivery date of an rfc822 message""" assert(message) # try to guess the delivery date from various headers # get more desparate as we go through the array for header in ('Delivery-date', 'Date', 'Resent-Date'): try: date = message.getdate(header) if date: time_message = time.mktime(date) vprint("using valid time found from '%s' header" % header) return time_message except (IndexError, ValueError, OverflowError): pass # as a second-last resort, try the date from the 'From_' line (ugly) # this will only work from a mbox-format mailbox if (message.unixfrom): header = re.sub("From \S+", "", message.unixfrom) header = string.strip(header) date = rfc822.parsedate(header) if date: try: time_message = time.mktime(date) vprint("using valid time found from unix 'From_' header") return time_message except (ValueError, OverflowError): pass # the headers have no valid dates -- last resort, try the file timestamp # this will not work for mbox mailboxes try: file_name = message.fp.name except AttributeError: # we are looking at a 'mbox' mailbox - argh! # Just return the current time - this will never get archived :( vprint("no valid times found at all -- using current time!") return time.time() if not os.path.isfile(file_name): unexpected_error("mailbox file name '%s' has gone missing" % \ file_name) time_message = os.path.getmtime(message.fp.name) vprint("using valid time found from '%s' last-modification time" % \ file_name) return time_message def add_status_headers(message): """ Add Status and X-Status headers to a message from a maildir mailbox. Maildir messages store their information about being read/replied/etc in the suffix of the filename rather than in Status and X-Status headers in the message. In order to archive maildir messages into mbox format, it is nice to preserve this information by putting it into the status headers. """ status = "" x_status = "" match = re.search(":2,(.+)$", message.fp.name) if match: flags = match.group(1) for flag in flags: if flag == "D": # (draft): the user considers this message a draft pass # does this make any sense in mbox? elif flag == "F": # (flagged): user-defined 'important' flag x_status = x_status + "F" elif flag == "R": # (replied): the user has replied to this message x_status = x_status + "A" elif flag == "S": # (seen): the user has viewed this message status = status + "R" elif flag == "T": # (trashed): user has moved this message to trash pass # is this Status: D ? else: pass # no whingeing here, although it could be a good experiment # files in the maildir 'cur' directory are no longer new, # they are the same as messages with 'Status: O' headers in mbox last_dir = os.path.basename(os.path.dirname(message.fp.name)) if last_dir == "cur": status = status + "O" # Maildir messages should not already have 'Status' and 'X-Status' # headers, although I have seen it done. If they do already have them, just # preserve them rather than trying to overwrite/verify them. if not message.get('Status') and status: vprint("converting maildir status into Status header '%s'" % status) message['Status'] = status if not message.get('X-Status') and x_status: vprint("converting maildir status into X-Status header '%s'" % x_status) message['X-Status'] = x_status def add_status_headers_imap(message, flags): """Add Status and X-Status headers to a message from an imap mailbox.""" status = "" x_status = "" flags = list(flags) # convert from tuple for flag in flags: if flag == "\\Draft": # (draft): the user considers this message a draft pass # does this make any sense in mbox? elif flag == "\\Flagged": # (flagged): user-defined 'important' flag x_status = x_status + "F" elif flag == "\\Answered": # (replied): the user has replied to this message x_status = x_status + "A" elif flag == "\\Seen": # (seen): the user has viewed this message status = status + "R" elif flag == "\\Deleted": # (trashed): user has moved this message to trash pass # is this Status: D ? else: pass # no whingeing here, although it could be a good experiment if flags.count("\\Seen") == 0: if flags.count("\\Recent") == 1: status = status + "N" else: status = status + "O" # As with maildir folders, preserve Status and X-Status headers # if they exist (they shouldn't) if not message.get('Status') and status: vprint("converting imap status into Status header '%s'" % status) message['Status'] = status if not message.get('X-Status') and x_status: vprint("converting imap status into X-Status header '%s'" % x_status) message['X-Status'] = x_status def is_flagged(message): """return true if the message is flagged important, false otherwise""" # MH and mbox mailboxes use the 'X-Status' header to indicate importance x_status = message.get('X-Status') if x_status and re.search('F', x_status): vprint("message is important (X-Status header='%s')" % x_status) return 1 file_name = None try: file_name = message.fp.name except AttributeError: pass # maildir mailboxes use the filename suffix to indicate flagged status if file_name and re.search(":2,.*F.*$", file_name): vprint("message is important (filename info has 'F')") return 1 vprint("message is not flagged important") return 0 def is_unread(message): """return true if the message is unread, false otherwise""" # MH and mbox mailboxes use the 'Status' header to indicate read status status = message.get('Status') if status and re.search('R', status): vprint("message has been read (status header='%s')" % status) return 0 file_name = None try: file_name = message.fp.name except AttributeError: pass # maildir mailboxes use the filename suffix to indicate read status if file_name and re.search(":2,.*S.*$", file_name): vprint("message has been read (filename info has 'S')") return 0 vprint("message is unread") return 1 def is_smaller(message, size): """Return true if the message is smaller than size bytes, false otherwise""" assert(message) assert(size > 0) file_name = None message_size = None try: file_name = message.fp.name except AttributeError: pass if file_name: # with maildir and MH mailboxes, we can just use the file size message_size = os.path.getsize(file_name) else: # with mbox mailboxes, not so easy message_size = 0 if message.unixfrom: message_size = message_size + len(message.unixfrom) for header in message.headers: message_size = message_size + len(header) message_size = message_size + 1 # the blank line after the headers start_offset = message.fp.tell() message.fp.seek(0, 2) # seek to the end of the message end_offset = message.fp.tell() message.rewindbody() message_size = message_size + (end_offset - start_offset) if message_size < size: vprint("message is too small (%d bytes), minimum bytes : %d" % \ (message_size, size)) return 1 else: vprint("message is not too small (%d bytes), minimum bytes: %d" % \ (message_size, size)) return 0 def should_archive(message): """Return true if we should archive the message, false otherwise""" old = 0 time_message = guess_delivery_time(message) if options.date_old_max == None: old = is_older_than_days(time_message, options.days_old_max) else: old = is_older_than_time(time_message, options.date_old_max) # I could probably do this in one if statement, but then I wouldn't # understand it. if not old: return 0 if not options.include_flagged and is_flagged(message): return 0 if options.min_size and is_smaller(message, options.min_size): return 0 if options.preserve_unread and is_unread(message): return 0 return 1 def is_older_than_time(time_message, max_time): """Return true if a message is older than the specified time, false otherwise. Arguments: time_message -- the delivery date of the message measured in seconds since the epoch max_time -- maximum time allowed for message """ days_old = (max_time - time_message) / 24 / 60 / 60 if time_message < max_time: vprint("message is %.2f days older than the specified date" % days_old) return 1 vprint("message is %.2f days younger than the specified date" % \ abs(days_old)) return 0 def is_older_than_days(time_message, max_days): """Return true if a message is older than the specified number of days, false otherwise. Arguments: time_message -- the delivery date of the message measured in seconds since the epoch max_days -- maximum number of days before message is considered old """ assert(max_days >= 1) time_now = time.time() if time_message > time_now: vprint("warning: message has date in the future") return 0 secs_old_max = (max_days * 24 * 60 * 60) days_old = (time_now - time_message) / 24 / 60 / 60 vprint("message is %.2f days old" % days_old) if ((time_message + secs_old_max) < time_now): return 1 return 0 def build_imap_filter(): """Return an imap filter string""" filter = [] old = 0 if options.date_old_max == None: time_now = time.time() secs_old_max = (options.days_old_max * 24 * 60 * 60) time_old = time.gmtime(time_now - secs_old_max) time_str = time.strftime('%d-%b-%Y', time_old) filter.append("BEFORE %s" % time_str) else: time_old = time.gmtime(options.date_old_max) time_str = time.strftime('%d-%b-%Y', time_old) filter.append("BEFORE %s" % time_str) if not options.include_flagged: filter.append("UNFLAGGED") if options.min_size: filter.append("LARGER %d" % options.min_size) if options.preserve_unread: filter.append("SEEN") if options.filter_append: filter.append(options.filter_append) return '(' + string.join(filter, ' ') + ')' ############### mailbox operations ############### def archive(mailbox_name): """Archives a mailbox. Arguments: mailbox_name -- the filename/dirname of the mailbox to be archived final_archive_name -- the filename of the 'mbox' mailbox to archive old messages to - appending if the archive already exists """ assert(mailbox_name) # strip any trailing slash (we could be archiving a maildir or MH format # mailbox and somebody was pressing in bash) - we don't want to use # the trailing slash in the archive name mailbox_name = re.sub("/$", "", mailbox_name) assert(mailbox_name) set_signal_handlers() os.umask(077) # saves setting permissions on mailboxes/tempfiles # allow the user to embed time formats such as '%B' in the suffix string if options.date_old_max == None: parsed_suffix_time = time.time() - options.days_old_max*24*60*60 else: parsed_suffix_time = options.date_old_max parsed_suffix = time.strftime(options.archive_suffix, time.localtime(parsed_suffix_time)) imap_scheme = urlparse.urlparse(mailbox_name)[0] if imap_scheme == 'imap' or imap_scheme == 'imaps': final_archive_name = mailbox_name.split('/')[-1] + parsed_suffix else: final_archive_name = mailbox_name + parsed_suffix if options.output_dir: final_archive_name = os.path.join(options.output_dir, os.path.basename(final_archive_name)) vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name)) # check to see if we are running as root -- if so, change our effective # userid and groupid to that of the original mailbox former_gid = None # groupid doesn't have to be '0' for root on solaris 8? if (os.getuid() == 0) and os.path.exists(mailbox_name): former_gid = os.getgid(); # remember this so we can change back mailbox_user = os.stat(mailbox_name)[stat.ST_UID] mailbox_group = os.stat(mailbox_name)[stat.ST_GID] vprint("changing effective group id to: %d" % mailbox_group) os.setegid(mailbox_group) vprint("changing effective user id to: %d" % mailbox_user) os.seteuid(mailbox_user) old_temp_dir = tempfile.tempdir try: # create a temporary directory for us to work in securely tempfile.tempdir = None new_temp_dir = tempfile.mkdtemp('archivemail') assert(new_temp_dir) _stale.temp_dir = new_temp_dir tempfile.tempdir = new_temp_dir vprint("set tempfile directory to '%s'" % new_temp_dir) if os.path.islink(mailbox_name): unexpected_error("'%s' is a symbolic link -- I feel nervous!" % mailbox_name) if imap_scheme == 'imap' or imap_scheme == 'imaps': vprint("guessing mailbox is of type: imap(s)") _archive_imap(mailbox_name, final_archive_name) elif os.path.isfile(mailbox_name): vprint("guessing mailbox is of type: mbox") _archive_mbox(mailbox_name, final_archive_name) elif os.path.isdir(mailbox_name): cur_path = os.path.join(mailbox_name, "cur") new_path = os.path.join(mailbox_name, "new") if os.path.isdir(cur_path) and os.path.isdir(new_path): vprint("guessing mailbox is of type: maildir") _archive_dir(mailbox_name, final_archive_name, "maildir") else: vprint("guessing mailbox is of type: MH") _archive_dir(mailbox_name, final_archive_name, "mh") else: user_error("'%s': no such file or directory" % mailbox_name) # remove our special temp directory - hopefully empty os.rmdir(new_temp_dir) _stale.temp_dir = None finally: tempfile.tempdir = old_temp_dir clean_up() # if we are running as root, revert the seteuid()/setegid() above if (os.getuid() == 0): vprint("changing effective groupid and userid back to root") os.setegid(former_gid) os.seteuid(0) def _archive_mbox(mailbox_name, final_archive_name): """Archive a 'mbox' style mailbox - used by archive_mailbox() Arguments: mailbox_name -- the filename/dirname of the mailbox to be archived final_archive_name -- the filename of the 'mbox' mailbox to archive old messages to - appending if the archive already exists """ assert(mailbox_name) assert(final_archive_name) archive = None retain = None stats = Stats(mailbox_name, final_archive_name) original = Mbox(path=mailbox_name) cache = IdentityCache(mailbox_name) original.procmail_lock() original.exclusive_lock() msg = original.next() if not msg and (original.starting_size > 0): user_error("'%s' is not a valid mbox-format mailbox" % mailbox_name) while (msg): stats.another_message() vprint("processing message '%s'" % msg.get('Message-ID')) if options.warn_duplicates: cache.warn_if_dupe(msg) if should_archive(msg): stats.another_archived() if options.delete_old_mail: vprint("decision: delete message") else: vprint("decision: archive message") if not options.dry_run: if (not archive): archive = ArchiveMbox(final_archive_name) archive.write(msg) else: vprint("decision: retain message") if not options.dry_run: if (not retain): retain = RetainMbox(mailbox_name) retain.write(msg) msg = original.next() vprint("finished reading messages") original.exclusive_unlock() original.close() if original.starting_size != original.get_size(): unexpected_error("the mailbox '%s' changed size during reading!" % \ mailbox_name) original.reset_stat() if not options.dry_run: if retain: retain.close() if archive: archive.close() if options.delete_old_mail: # we will never have an archive file if retain: retain.finalise() else: # nothing was retained - everything was deleted original.leave_empty() original.reset_stat() elif archive: archive.finalise() if retain: retain.finalise() else: # nothing was retained - everything was deleted original.leave_empty() original.reset_stat() else: # There was nothing to archive if retain: # retain will be the same as original mailbox retain.remove() original.procmail_unlock() if not options.quiet: stats.display() def _archive_dir(mailbox_name, final_archive_name, type): """Archive a 'maildir' or 'MH' style mailbox - used by archive_mailbox()""" assert(mailbox_name) assert(final_archive_name) assert(type) original = None archive = None stats = Stats(mailbox_name, final_archive_name) delete_queue = [] if type == "maildir": original = mailbox.Maildir(mailbox_name) elif type == "mh": original = mailbox.MHMailbox(mailbox_name) else: unexpected_error("unknown type: %s" % type) assert(original) cache = IdentityCache(mailbox_name) msg = original.next() while (msg): stats.another_message() vprint("processing message '%s'" % msg.get('Message-ID')) if options.warn_duplicates: cache.warn_if_dupe(msg) if should_archive(msg): stats.another_archived() if options.delete_old_mail: vprint("decision: delete message") else: vprint("decision: archive message") if not options.dry_run: if not archive: archive = ArchiveMbox(final_archive_name) if type == "maildir": add_status_headers(msg) archive.write(msg) if not options.dry_run: delete_queue.append(msg.fp.name) else: vprint("decision: retain message") msg = original.next() vprint("finished reading messages") if not options.dry_run: if archive: archive.close() archive.finalise() for file_name in delete_queue: if os.path.isfile(file_name): vprint("removing original message: '%s'" % file_name) os.remove(file_name) if not options.quiet: stats.display() def _archive_imap(mailbox_name, final_archive_name): """Archive an imap mailbox - used by archive_mailbox()""" assert(mailbox_name) assert(final_archive_name) import imaplib import cStringIO import getpass archive = None stats = Stats(mailbox_name, final_archive_name) imap_str = mailbox_name[mailbox_name.find('://') + 3:] filter = build_imap_filter() vprint("imap filter: '%s'" % filter) try: imap_username, imap_str = imap_str.split('@', 1) imap_server, imap_folder = imap_str.split('/', 1) except: unexpected_error("you must provide a properly formatted \ IMAP connection string") if options.pwfile: imap_password = open(options.pwfile).read().rstrip() else: try: imap_username, imap_password = imap_username.split(':', 1) except: imap_password = getpass.getpass() if mailbox_name[:5] == 'imaps': vprint("Using SSL") imap_srv = imaplib.IMAP4_SSL(imap_server) else: imap_srv = imaplib.IMAP4(imap_server) vprint("connected to server %s" % imap_server) try: result, response = imap_srv.login_cram_md5(imap_username, imap_password) except: result, response = imap_srv.login(imap_username, imap_password) if result != 'OK': unexpected_error("authentication failure") vprint("logged in to server as %s" % imap_username) result, response = imap_srv.select(imap_folder) if result != 'OK': unexpected_error("cannot select imap folder") vprint("selected imap folder %s" % imap_folder) result, response = imap_srv.search(None, filter) if result != 'OK': unexpected_error("imap search failed") message_list = response[0].split() vprint("%d messages found matching filter" % len(message_list)) if not options.dry_run: if not options.delete_old_mail: for msg_id in message_list: result, response = imap_srv.fetch(msg_id, '(RFC822 FLAGS)') if result != 'OK': unexpected_error("Failed to fetch message") if "\r\n" == os.linesep: msg_str = response[0][1] else: msg_str = response[0][1].replace("\r\n", os.linesep) msg_flags = imaplib.ParseFlags(response[1]) msg = rfc822.Message(cStringIO.StringIO(msg_str)) add_status_headers_imap(msg, msg_flags) vprint("processing message '%s'" % msg.get('Message-ID')) if options.warn_duplicates: cache.warn_if_dupe(msg) if not archive: archive = ArchiveMbox(final_archive_name) archive.write(msg) # FIXME: stats are not complete yet. #stats.another_archived() if archive: archive.close() archive.finalise() # do not delete more than a certain number of messages at a time, # because the command length is limited. This avoids that servers # terminate the connection with EOF or TCP RST. vprint("Deleting %s messages" % len(message_list)) max_delete = 100 for i in range(0, len(message_list), max_delete): imap_srv.store(string.join(message_list[i:i+max_delete], ','), '+FLAGS.SILENT', '\\Deleted') imap_srv.close() imap_srv.logout() ############### misc functions ############### def set_signal_handlers(): """set signal handlers to clean up temporary files on unexpected exit""" # Make sure we clean up nicely - we don't want to leave stale procmail # lockfiles about if something bad happens to us. This is quite # important, even though procmail will delete stale files after a while. signal.signal(signal.SIGHUP, clean_up_signal) # signal 1 # SIGINT (signal 2) is handled as a python exception signal.signal(signal.SIGQUIT, clean_up_signal) # signal 3 signal.signal(signal.SIGTERM, clean_up_signal) # signal 15 def clean_up(): """Delete stale files""" vprint("cleaning up ...") _stale.clean() def clean_up_signal(signal_number, stack_frame): """Delete stale files -- to be registered as a signal handler. Arguments: signal_number -- signal number of the terminating signal stack_frame -- the current stack frame """ # this will run the above clean_up(), since unexpected_error() # will abort with sys.exit() and clean_up will be registered # at this stage unexpected_error("received signal %s" % signal_number) def is_world_writable(path): """Return true if the path is world-writable, false otherwise""" assert(path) return (os.stat(path)[stat.ST_MODE] & stat.S_IWOTH) # this is where it all happens, folks if __name__ == '__main__': main()