Saturday, December 6, 2008

Rewriting Matt Wilson's ugly code

Matt Wilson posted asking for help cleaning up some date manipulation code he had written. Here's my attempt:

import simplejson
from datetime import date, datetime, timedelta
import calendar

def get_dict_or_cookie_value(key, d, s):
value = d.get(key)
if not value and s.has_key(key) and s[key].value:
value = simplejson.loads(s[key].value)
return value

def get_start_and_stop_dates(d, s):

"""
Returns a tuple of datetime.date objects.

First checks dictionary d, then looks in the cookie s, then returns
the first and last day of the month.

We return values from the dictionary d, even if the values exist in
simple_cookie s:

>>> d = {'start_date':'12-07-2008', 'stop_date':'12-20-2008'}
>>> import Cookie, simplejson
>>> s = Cookie.SimpleCookie()
>>> s['start_date'] = simplejson.dumps('12-08-2008')
>>> s['stop_date'] = simplejson.dumps('12-11-2008')
>>> a, b = get_start_and_stop_dates(d, s)
>>> from datetime import date
>>> isinstance(a, date) and isinstance(b, date)
True
>>> a.strftime('%m-%d-%Y'), b.strftime('%m-%d-%Y')
('12-07-2008', '12-20-2008')

If the dictionary d doesn't have values, then we get them from the
simple_cookie object s:

>>> a, b = get_start_and_stop_dates({}, s)
>>> from datetime import date
>>> isinstance(a, date) and isinstance(b, date)
True
>>> a.strftime('%m-%d-%Y'), b.strftime('%m-%d-%Y')
('12-08-2008', '12-11-2008')

We handle mix-and-match scenarios, like where one value is in d and
another is in s:

>>> s2 = Cookie.SimpleCookie()
>>> s2['stop_date'] = simplejson.dumps('2-28-1975')
>>> get_start_and_stop_dates({'start_date':'2-17-1975'}, s2)
(datetime.date(1975, 2, 17), datetime.date(1975, 2, 28))

When just one of the dates is specified, then the other will be
the first/last day of the month containing the other date:

>>> get_start_and_stop_dates({'start_date':'2-17-1975'},
... Cookie.SimpleCookie())
(datetime.date(1975, 2, 17), datetime.date(1975, 2, 28))

>>> get_start_and_stop_dates({'stop_date':'2-17-1975'},
... Cookie.SimpleCookie())
(datetime.date(1975, 2, 1), datetime.date(1975, 2, 17))

Finally, we use the first and last days of the current month.
"""
# These are the dateformats that the dates will be in.
dateformats = ['%m-%d-%Y', '%Y-%m-%d', '%Y-%m-%d %H:%M:%S']

start_date = stop_date = None

# Figure out the start_date first.
start_date_source = get_dict_or_cookie_value('start_date', d, s)
if start_date_source:
start_date = stubborn_datetimeparser(start_date_source,
dateformats).date()

# Now repeat the process for stop_date.
# TODO: pull this redundancy into a single function and call it
# twice.
stop_date_source = get_dict_or_cookie_value('stop_date', d, s)
if stop_date_source:
stop_date = stubborn_datetimeparser(stop_date_source,
dateformats).date()

# Now figure out what to return. Remember, if we found one date,
# but not the other, then we return the first/last date of that month,
# not the current month.

if not start_date or not stop_date:
if start_date:
month_source = start_date
elif stop_date:
month_source = stop_date
else:
month_source = datetime.now()

if not start_date:
# first day of the month
start_date = date(month_source.year, month_source.month, 1)

if not stop_date:
# last day of the month
stop_date = date(month_source.year, month_source.month,
calendar.monthrange(month_source.year, month_source.month)[1])

return (start_date, stop_date)


def stubborn_datetimeparser(s, dateformats):
"""
Keep trying to parse s into a datetime object until we succeed or
run out of dateformats.

When the first format works, we immediately return:

>>> dateformats = ['%Y-%m-%d', '%m-%d-%Y', '%m-%d-%Y %H:%M']
>>> stubborn_datetimeparser('12-1-2008', dateformats)
datetime.datetime(2008, 12, 1, 0, 0)

Otherwise, we keep trying until we parse it:

>>> stubborn_datetimeparser('12-1-2008', dateformats)
datetime.datetime(2008, 12, 1, 0, 0)

>>> stubborn_datetimeparser('12-1-2008 15:47', dateformats)
datetime.datetime(2008, 12, 1, 15, 47)

or we run out of formats, and raise a ValueError:

>>> stubborn_datetimeparser('12/1/2008', dateformats)
Traceback (most recent call last):
...
ValueError: I couldn't parse '12/1/2008' with any of my formats!
"""

for datefmt in dateformats:
try:
return datetime.strptime(s, datefmt)

except ValueError:
pass

# This else matches the for datefmt in dateformats loop. It means
# that we didn't break out of the loop early.
else:
raise ValueError("I couldn't parse '%s' with any of my formats!" % s)


[Updated with all of the code. D'oh!]

5 comments:

Matt Wilson said...

Where is get_dict_or_cookie_value defined?

Doug Hellmann said...

I missed some of the output of pygments in the first post, but it's fixed now.

nes said...

import simplejson
from datetime import date, datetime, timedelta
import calendar

def get_dict_or_cookie_value(key, d, s):
return d.get(key, simplejson.loads(s[key].value) if s.get(key) else None)

def get_start_and_stop_dates(d, s):

"""
Returns a tuple of datetime.date objects.

First checks dictionary d, then looks in the cookie s, then returns
the first and last day of the month.

We return values from the dictionary d, even if the values exist in
simple_cookie s:

>>> d = {'start_date':'12-07-2008', 'stop_date':'12-20-2008'}
>>> import Cookie, simplejson
>>> s = Cookie.SimpleCookie()
>>> s['start_date'] = simplejson.dumps('12-08-2008')
>>> s['stop_date'] = simplejson.dumps('12-11-2008')
>>> a, b = get_start_and_stop_dates(d, s)
>>> from datetime import date
>>> isinstance(a, date) and isinstance(b, date)
True
>>> a.strftime('%m-%d-%Y'), b.strftime('%m-%d-%Y')
('12-07-2008', '12-20-2008')

If the dictionary d doesn't have values, then we get them from the
simple_cookie object s:

>>> a, b = get_start_and_stop_dates({}, s)
>>> from datetime import date
>>> isinstance(a, date) and isinstance(b, date)
True
>>> a.strftime('%m-%d-%Y'), b.strftime('%m-%d-%Y')
('12-08-2008', '12-11-2008')

We handle mix-and-match scenarios, like where one value is in d and
another is in s:

>>> s2 = Cookie.SimpleCookie()
>>> s2['stop_date'] = simplejson.dumps('2-28-1975')
>>> get_start_and_stop_dates({'start_date':'2-17-1975'}, s2)
(datetime.date(1975, 2, 17), datetime.date(1975, 2, 28))

When just one of the dates is specified, then the other will be
the first/last day of the month containing the other date:

>>> get_start_and_stop_dates({'start_date':'2-17-1975'},
... Cookie.SimpleCookie())
(datetime.date(1975, 2, 17), datetime.date(1975, 2, 28))

>>> get_start_and_stop_dates({'stop_date':'2-17-1975'},
... Cookie.SimpleCookie())
(datetime.date(1975, 2, 1), datetime.date(1975, 2, 17))

Finally, we use the first and last days of the current month.
"""
# These are the dateformats that the dates will be in.
dateformats = ['%m-%d-%Y', '%Y-%m-%d', '%Y-%m-%d %H:%M:%S']

def get_date(date_descr):
date_source = get_dict_or_cookie_value(date_descr, d, s)
return stubborn_datetimeparser(date_source,
dateformats).date() if date_source else None

# Figure out the start_date first.
start_date=get_date('start_date')

# Now repeat the process for stop_date.
# TODO: pull this redundancy into a single function and call it
# twice.
stop_date=get_date('stop_date')

# Now figure out what to return. Remember, if we found one date,
# but not the other, then we return the first/last date of that month,
# not the current month.

month_source = start_date or stop_date or datetime.now()

if not start_date:
# first day of the month
start_date = date(month_source.year, month_source.month, 1)

if not stop_date:
# last day of the month
stop_date = date(month_source.year, month_source.month,
calendar.monthrange(month_source.year, month_source.month)[1])

return (start_date, stop_date)


def stubborn_datetimeparser(s, dateformats):
"""
Keep trying to parse s into a datetime object until we succeed or
run out of dateformats.

When the first format works, we immediately return:

>>> dateformats = ['%Y-%m-%d', '%m-%d-%Y', '%m-%d-%Y %H:%M']
>>> stubborn_datetimeparser('12-1-2008', dateformats)
datetime.datetime(2008, 12, 1, 0, 0)

Otherwise, we keep trying until we parse it:

>>> stubborn_datetimeparser('12-1-2008', dateformats)
datetime.datetime(2008, 12, 1, 0, 0)

>>> stubborn_datetimeparser('12-1-2008 15:47', dateformats)
datetime.datetime(2008, 12, 1, 15, 47)

or we run out of formats, and raise a ValueError:

>>> stubborn_datetimeparser('12/1/2008', dateformats)
Traceback (most recent call last):
...
ValueError: I couldn't parse '12/1/2008' with any of my formats!
"""

for datefmt in dateformats:
try:
return datetime.strptime(s, datefmt)

except ValueError:
pass

# This else matches the for datefmt in dateformats loop. It means
# that we didn't break out of the loop early.
else:
raise ValueError("I couldn't parse '%s' with any of my formats!" % s)

Doug Hellmann said...

Nice use of a closure for get_date(). Too bad Blogger's comment formatting makes that code so hard to read -- do you have a blog somewhere you can post it?

nes said...

Yes, the formatting comes out pretty bad and the commenting doesn't seem to allow "pre" tags. You know, I meant to start blogging about Python a long time ago and set up an account but have been procrastinating and now I can't remember what I called it anymore. It doesn't help that at work half of the internet is blocked. Time to go through old emails and try to revive that blog after work I guess.