Added the ability to archive messages older than a given absolute date with

the new option '--date' and fixed a bug where archivemail would complain about
messages older than 1970.
This commit is contained in:
Paul Rodger 2002-04-23 03:01:26 +00:00
parent 623f3ba4be
commit 2bfde528cd
7 changed files with 248 additions and 67 deletions

View File

@ -1,4 +1,10 @@
Version 0.4.2 - ???
* Added the ability to archive messages older than a given absolute date
with the new option '--date'.
* Fixed a bug where archivemail would complain about messages older than
1970. Yes, someone had a 'Date' header with 1967 :)
Version 0.4.1 - 21 April 2002
* Don't archive messages that are flagged important unless we are given the
--include-flagged option.

View File

@ -1,5 +1,5 @@
VERSION=0.4.1
VERSION=0.4.2
VERSION_TAG=v$(subst .,_,$(VERSION))
TARFILE=archivemail-$(VERSION).tar.gz
@ -16,6 +16,7 @@ test:
clobber: clean
rm -rf build dist
sdist: clobber doc
cp archivemail.py archivemail
fakeroot python setup.py sdist
@ -24,11 +25,11 @@ tag:
cvs tag -F current
cvs tag $(VERSION_TAG)
doc: archivemail.1 archivemail.html
upload:
(cd dist && lftp -c 'open upload.sf.net && cd incoming && put $(TARFILE)')
doc: archivemail.1 archivemail.html
archivemail.1: archivemail.sgml
nsgmls archivemail.sgml | sgmlspl docbook2man-spec.pl
chmod 644 archivemail.1

4
TODO
View File

@ -1,9 +1,11 @@
Goals for next minor release (0.4.2):
Goals for next minor release (0.4.3):
-------------------------------------
* Think about the best way to specify the names of archives created with
possibly an --archive-name option.
* Add a lot more tests (see top of test_archivemail.py)
* We need some better checking to see if we are really looking at a valid
mbox-format mailbox.
Goals for next major release (0.5.0):
-------------------------------------

View File

@ -22,7 +22,7 @@ Website: http://archivemail.sourceforge.net/
"""
# global administrivia
__version__ = "archivemail v0.4.1"
__version__ = "archivemail v0.4.2"
__cvs_id__ = "$Id$"
__copyright__ = """Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
This is free software; see the source for copying conditions. There is NO
@ -133,6 +133,7 @@ class Options:
"""Class to store runtime options, including defaults"""
archive_suffix = "_archive"
days_old_max = 180
date_old_max = None
delete_old_mail = 0
dry_run = 0
include_flagged = 0
@ -161,15 +162,16 @@ class Options:
"""
try:
opts, args = getopt.getopt(args, '?Vd:hno:qs:uv',
["days=", "delete", "dry-run", "help",
"include-flagged", "no-compress",
"output-dir=", "preserve-unread", "quiet",
"suffix", "verbose", "version",
"warn-duplicate"])
opts, args = getopt.getopt(args, '?D:Vd:hno:qs:uv',
["date=", "days=", "delete", "dry-run", "help",
"include-flagged", "no-compress", "output-dir=",
"preserve-unread", "quiet", "suffix", "verbose",
"version", "warn-duplicate"])
except getopt.error, msg:
user_error(msg)
archive_by = None
for o, a in opts:
if o == '--delete':
self.delete_old_mail = 1
@ -179,7 +181,15 @@ class Options:
self.no_compress = 1
if o == '--warn-duplicate':
self.warn_duplicates = 1
if o in ('-D', '--date'):
if archive_by:
user_error("you cannot specify both -d and -D options")
archive_by = "date"
self.date_old_max = self.date_argument(a)
if o in ('-d', '--days'):
if archive_by:
user_error("you cannot specify both -d and -D options")
archive_by = "days"
self.days_old_max = string.atoi(a)
if o in ('-o', '--output-dir'):
self.output_dir = a
@ -218,6 +228,27 @@ class Options:
if (self.days_old_max >= 10000):
user_error("argument to -d must be less than 10000")
def date_argument(self, string):
"""Converts a date argument string into seconds since the epoch"""
date_formats = (
"%Y-%m-%d", # ISO format
"%d %b %Y" , # Internet format
"%d %B %Y" , # Internet format with full month names
)
time.accept2dyear = 0 # I'm not going to support 2-digit years
for format in date_formats:
try:
date = time.strptime(string, format)
seconds = time.mktime(date)
return seconds
except (ValueError, OverflowError):
pass
user_error("cannot parse the date argument '%s'\n"
"The date should be in ISO format (eg '2002-04-23'),\n"
"Internet format (eg '23 Apr 2002') or\n"
"Internet format with full month names (eg '23 April 2002')" %
string)
class Mbox(mailbox.UnixMailbox):
"""Class that allows read/write access to a 'mbox' mailbox.
@ -526,7 +557,8 @@ Moves old mail in mbox, MH or maildir-format mailboxes to an mbox-format
mailbox compressed with gzip.
Options are as follows:
-d, --days=<days> archive messages older than <days> days (default: %d)
-d, --days=NUM archive messages older than NUM days (default: %d)
-D, --date=DATE archive messages older than DATE
-o, --output-dir=DIR directory to store archives (default: same as original)
-s, --suffix=NAME suffix for archive filename (default: '%s')
-n, --dry-run don't write to anything - just show what would be done
@ -747,34 +779,54 @@ def is_unread(message):
def should_archive(message):
"""Return 1 if we should archive the message, 0 otherwise"""
"""Return true if we should archive the message, false otherwise"""
old = 0
time_message = guess_delivery_time(message)
old = is_too_old(time_message, options.days_old_max)
# I could probably do this in one if statement, but then I wouldn't
# understand it.
if old:
if not options.include_flagged and is_flagged(message):
return 0
if options.preserve_unread:
if is_unread(message):
return 0
else:
return 1
else:
return 1
return 0
if options.date_old_max == None:
old = is_older_than_days(time_message, options.days_old_max)
else:
old = is_older_than_time(time_message, options.date_old_max)
def is_too_old(time_message, max_days):
"""Return true if a message is too old (and should be archived),
# I could probably do this in one if statement, but then I wouldn't
# understand it.
if not old:
return 0
if not options.include_flagged and is_flagged(message):
return 0
if options.preserve_unread and is_unread(message):
return 0
return 1
def is_older_than_time(time_message, max_time):
"""Return true if a message is older than the specified time,
false otherwise.
Arguments:
time_message -- the delivery date of the message measured in seconds
since the epoch
max_time -- maximum time allowed for message
"""
days_old = (max_time - time_message) / 24 / 60 / 60
if time_message < max_time:
vprint("message is %.2f days older than the specified date" % days_old)
return 1
vprint("message is %.2f days younger than the specified date" % \
abs(days_old))
return 0
def is_older_than_days(time_message, max_days):
"""Return true if a message is older than the specified number of days,
false otherwise.
Arguments:
time_message -- the delivery date of the message measured in seconds
since the epoch
max_days -- maximum number of days before message is considered old
"""
assert(time_message > 0)
assert(max_days >= 1)
time_now = time.time()
@ -1021,6 +1073,7 @@ def set_signal_handlers():
signal.signal(signal.SIGQUIT, clean_up_signal) # signal 3
signal.signal(signal.SIGTERM, clean_up_signal) # signal 15
def clean_up():
"""Delete stale files -- to be registered with atexit.register()"""
vprint("cleaning up ...")

View File

@ -93,7 +93,20 @@ mailbox it is reading, creating any archive files as that user.
<Option>-d <Replaceable/NUM/, --days=<Replaceable/NUM/</Option>
</Term>
<ListItem><Para>Archive messages older than <Replaceable/NUM/ days.
The default is 180.
The default is 180. This option is incompatible with the
<Option/--date/ option below.
</Para></ListItem>
</VarListEntry>
<VarListEntry>
<Term>
<Option>-D <Replaceable/DATE/, --date=<Replaceable/DATE/</Option>
</Term>
<ListItem><Para>Archive messages older than <Replaceable/DATE/.
<Replaceable/DATE/ can be a date string in ISO format (eg '2002-04-23'),
Internet format (eg '23 Apr 2002') or Internet format with full month names
(eg '23 April 2002'). Two-digit years are not supported.
This option is incompatible with the <Option/--days/ option above.
</Para></ListItem>
</VarListEntry>
@ -283,6 +296,22 @@ are older than 180 days to a compressed mailbox called
</screen>
</Para>
<Para>
To archive all messages in the mailbox <filename>cm-melb</filename> that
are older than the first of January 2002 to a compressed mailbox called
<filename>cm-melb_archive.gz</filename> in the current directory:
<screen>
<prompt>bash$ </prompt><userinput>archivemail --date'1 Jan 2002' cm-melb</userinput>
</screen>
</Para>
<Para>
Exactly the same as the above example, using an ISO date format instead:
<screen>
<prompt>bash$ </prompt><userinput>archivemail --date=2002-01-01 cm-melb</userinput>
</screen>
</Para>
<Para>
To delete all messages in the mailbox <filename>spam</filename> that
are older than 30 days:

View File

@ -18,7 +18,7 @@ check_python_version() # define & run this early - 'distutils.core' is new
from distutils.core import setup
setup(name="archivemail",
version="0.4.1",
version="0.4.2",
description="archive and compress old email",
platforms="POSIX",
license="GNU GPL",

View File

@ -26,9 +26,10 @@ TODO: add tests for:
* archiving maildir-format mailboxes
* archiving MH-format mailboxes
* running archivemail via os.system()
* test the include_flagged option works
* preservation of status information from maildir to mbox
* a 3rd party process changing the mbox file being read
* test to make sure the --date option works
* test to make sure archiving dates < 1970 works
"""
@ -244,44 +245,54 @@ class TestOptionDefaults(unittest.TestCase):
"""no-compression should be off by default"""
self.assertEqual(archivemail.options.no_compress, 0)
########## archivemail.is_too_old() unit testing #################
def testIncludeFlagged(self):
"""we should not archive flagged messages by default"""
self.assertEqual(archivemail.options.include_flagged, 0)
########## archivemail.is_older_than_days() unit testing #################
class TestIsTooOld(unittest.TestCase):
def testVeryOld(self):
"""is_too_old(max_days=360) should be true for these dates > 1 year"""
"""with max_days=360, should be true for these dates > 1 year"""
for years in range(1, 10):
time_msg = time.time() - (years * 365 * 24 * 60 * 60)
assert(archivemail.is_too_old(time_message=time_msg, max_days=360))
assert(archivemail.is_older_than_days(time_message=time_msg,
max_days=360))
def testOld(self):
"""is_too_old(max_days=14) should be true for these dates > 14 days"""
"""with max_days=14, should be true for these dates > 14 days"""
for days in range(14, 360):
time_msg = time.time() - (days * 24 * 60 * 60)
assert(archivemail.is_too_old(time_message=time_msg, max_days=14))
assert(archivemail.is_older_than_days(time_message=time_msg,
max_days=14))
def testJustOld(self):
"""is_too_old(max_days=1) should be true for these dates >= 1 day"""
"""with max_days=1, should be true for these dates >= 1 day"""
for minutes in range(0, 61):
time_msg = time.time() - (25 * 60 * 60) + (minutes * 60)
assert(archivemail.is_too_old(time_message=time_msg, max_days=1))
assert(archivemail.is_older_than_days(time_message=time_msg,
max_days=1))
def testNotOld(self):
"""is_too_old(max_days=9) should be false for these dates < 9 days"""
"""with max_days=9, should be false for these dates < 9 days"""
for days in range(0, 9):
time_msg = time.time() - (days * 24 * 60 * 60)
assert(not archivemail.is_too_old(time_message=time_msg, max_days=9))
assert(not archivemail.is_older_than_days(time_message=time_msg,
max_days=9))
def testJustNotOld(self):
"""is_too_old(max_days=1) should be false for these hours <= 1 day"""
"""with max_days=1, should be false for these hours <= 1 day"""
for minutes in range(0, 60):
time_msg = time.time() - (23 * 60 * 60) - (minutes * 60)
assert(not archivemail.is_too_old(time_message=time_msg, max_days=1))
assert(not archivemail.is_older_than_days(time_message=time_msg,
max_days=1))
def testFuture(self):
"""is_too_old(max_days=1) should be false for times in the future"""
"""with max_days=1, should be false for times in the future"""
for minutes in range(0, 60):
time_msg = time.time() + (minutes * 60)
assert(not archivemail.is_too_old(time_message=time_msg, max_days=1))
assert(not archivemail.is_older_than_days(time_message=time_msg,
max_days=1))
################ archivemail.choose_temp_dir() unit testing #############
@ -516,6 +527,67 @@ class TestArchiveMboxPreserveStatus(unittest.TestCase):
os.remove(name)
class TestArchiveMboxFlagged(unittest.TestCase):
"""make sure the 'include_flagged' option works"""
def setUp(self):
archivemail.options.quiet = 1
def testOld(self):
"""by default, old flagged messages should not be archived"""
archivemail.options.include_flagged = 0
self.mbox_name = make_mbox(messages=3, hours_old=(24 * 181), \
x_status="F")
self.copy_name = tempfile.mktemp()
shutil.copyfile(self.mbox_name, self.copy_name)
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
assert(filecmp.cmp(self.mbox_name, self.copy_name, shallow=0))
archive_name = self.mbox_name + "_archive.gz"
assert(not os.path.exists(archive_name))
def testIncludeFlaggedNew(self):
"""new flagged messages should not be archived with include_flagged"""
archivemail.options.include_flagged = 1
self.mbox_name = make_mbox(messages=3, hours_old=(24 * 179), \
x_status="F")
self.copy_name = tempfile.mktemp()
shutil.copyfile(self.mbox_name, self.copy_name)
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
assert(filecmp.cmp(self.mbox_name, self.copy_name, shallow=0))
archive_name = self.mbox_name + "_archive.gz"
assert(not os.path.exists(archive_name))
def testIncludeFlaggedOld(self):
"""old flagged messages should be archived with include_flagged"""
archivemail.options.include_flagged = 1
self.mbox_name = make_mbox(messages=3, hours_old=(24 * 181), \
x_status="F")
self.copy_name = tempfile.mktemp()
shutil.copyfile(self.mbox_name, self.copy_name)
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
self.assertEqual(os.path.getsize(self.mbox_name), 0)
archive_name = self.mbox_name + "_archive.gz"
assert(os.path.exists(archive_name))
self.assertEqual(os.system("gzip -d %s" % archive_name), 0)
archive_name = self.mbox_name + "_archive"
assert(os.path.exists(archive_name))
assert(filecmp.cmp(archive_name, self.copy_name, shallow=0))
def tearDown(self):
archivemail.options.include_flagged = 0
archivemail.options.quiet = 0
archive = self.mbox_name + "_archive"
for name in (self.mbox_name, self.copy_name, archive, archive + ".gz"):
if os.path.exists(name):
os.remove(name)
class TestArchiveMboxUncompressedOld(unittest.TestCase):
"""make sure that the 'no_compress' option works"""
mbox_name = None
@ -646,27 +718,44 @@ class TestArchiveMboxMode(unittest.TestCase):
########## helper routines ############
def make_message(hours_old=0, status=None):
time_message = time.time() - (60 * 60 * hours_old)
time_string = time.asctime(time.localtime(time_message))
def make_message(body=None, date=None, delivery_date=None, from_address=None,
hours_old=0, resent_date=None, status=None, subject=None, to_address=None,
unix_from=None, x_status=None):
msg = """From sender@domain %s
From: sender@domain
To: receipient@domain
Subject: This is a dummy message
Date: %s
""" % (time_string, time_string)
if not date:
time_message = time.time() - (60 * 60 * hours_old)
date = time.asctime(time.localtime(time_message))
if not from_address:
from_address = "sender@dummy.domain"
if not to_address:
to_address = "receipient@dummy.domain"
if not subject:
subject = "This is the subject"
if not unix_from:
unix_from = "From %s %s" % (from_address, date)
if not body:
body = "This is the message body"
msg = ""
if unix_from:
msg = msg + ("%s\n" % unix_from)
if date:
msg = msg + ("Date: %s\n" % date)
if delivery_date:
msg = msg + ("Delivery-Date: %s\n" % delivery_date)
if resent_date:
msg = msg + ("Resent-Date: %s\n" % resent_date)
if status:
msg = msg + ("Status: %s\n" % status)
msg = msg + """
This is the message body.
It's very exciting.
"""
if x_status:
msg = msg + ("X-Status: %s\n" % x_status)
if from_address:
msg = msg + ("From: %s\n" % from_address)
if to_address:
msg = msg + ("To: %s\n" % to_address)
if subject:
msg = msg + ("Subject: %s\n" % subject)
msg = msg + "\n\n" + body + "\n\n"
return msg
@ -680,11 +769,12 @@ def append_file(source, dest):
read.close()
write.close()
def make_mbox(messages=1, hours_old=0, status=None):
def make_mbox(messages=1, hours_old=0, status=None, x_status=None):
name = tempfile.mktemp()
file = open(name, "w")
for count in range(messages):
file.write(make_message(hours_old=hours_old, status=status))
file.write(make_message(hours_old=hours_old, status=status, \
x_status=x_status))
file.close()
return name