Sunday, April 27, 2008

PyMOTW: functools

The functools module includes tools for wrapping functions and other callable objects.

Module: functools
Purpose: Tools for making decorators and other function wrappers.
Python Version: new in 2.5

Description:

The primary tool supplied by the functools module is the class partial, which can be used to "wrap" a callable with default arguments. The resulting object is itself callable and can be treated as though it is the original function. It takes all of the same arguments as the original callable and can be invoked with extra positional or named arguments as well.

partial:

This example shows two simple partial objects for the function myfunc(). Notice that show_details() prints the func, args, and keywords attributes of the partial object.

import functools

def myfunc(a, b=2):
"""Docstring for myfunc()."""
print '\tcalled myfunc with:', (a, b)
return

def show_details(name, f, is_partial=False):
"""Show details of a callable object."""
print '%s:' % name
print '\tobject:', f
if not is_partial:
print '\t__name__:', f.__name__
print '\t__doc__', repr(f.__doc__)
if is_partial:
print '\tfunc:', f.func
print '\targs:', f.args
print '\tkeywords:', f.keywords
return

show_details('myfunc', myfunc)
myfunc('a', 3)
print

p1 = functools.partial(myfunc, b=4)
show_details('partial with named default', p1, True)
p1('default a')
p1('override b', b=5)
print

p2 = functools.partial(myfunc, 'default a', b=99)
show_details('partial with defaults', p2, True)
p2()
p2(b='override b')
print

print 'Insufficient arguments:'
p1()


At the end of the example, the first partial created is invoked without passing a value for a, causing an exception.


$ python functools_partial.py
myfunc:
object: <function myfunc at 0x7cbf0>
__name__: myfunc
__doc__ 'Docstring for myfunc().'
called myfunc with: ('a', 3)

partial with named default:
object: <functools.partial object at 0x74ea0>
__doc__ 'partial(func, *args, **keywords) - new function with partial application\n\tof the given arguments and keywords.\n'
func: <function myfunc at 0x7cbf0>
args: ()
keywords: {'b': 4}
called myfunc with: ('default a', 4)
called myfunc with: ('override b', 5)

partial with defaults:
object: <functools.partial object at 0x74ed0>
__doc__ 'partial(func, *args, **keywords) - new function with partial application\n\tof the given arguments and keywords.\n'
func: <function myfunc at 0x7cbf0>
args: ('default a',)
keywords: {'b': 99}
called myfunc with: ('default a', 99)
called myfunc with: ('default a', 'override b')

Insufficient arguments:
Traceback (most recent call last):
File "/Users/dhellmann/Documents/PyMOTW/in_progress/functools/functools_partial.py", line 48, in <module>
p1()
TypeError: myfunc() takes at least 1 non-keyword argument (0 given)


update_wrapper:

As illustrated in the previous example, the partial object does not have a __name__ or __doc__ attributes by default. Losing those attributes for decorated functions makes them more difficult to debug. By using update_wrapper, you can copy or add attributes from the original function to the partial object.

import functools

def myfunc(a, b=2):
"""Docstring for myfunc()."""
print '\tcalled myfunc with:', (a, b)
return

def show_details(name, f):
"""Show details of a callable object."""
print '%s:' % name
print '\tobject:', f
print '\t__name__:',
try:
print f.__name__
except AttributeError:
print '(no __name__)'
print '\t__doc__', repr(f.__doc__)
print
return

show_details('myfunc', myfunc)

p1 = functools.partial(myfunc, b=4)
show_details('raw wrapper', p1)

print 'Updating wrapper:'
print '\tassign:', functools.WRAPPER_ASSIGNMENTS
print '\tupdate:', functools.WRAPPER_UPDATES
print

functools.update_wrapper(p1, myfunc)
show_details('updated wrapper', p1)


The attributes added to the wrapper are defined in functools.WRAPPER_ASSIGNMENTS, while functools.WRAPPER_UPDATES lists values to be modified.


$ python functools_update_wrapper.py
myfunc:
object: <function myfunc at 0x7cb30>
__name__: myfunc
__doc__ 'Docstring for myfunc().'

raw wrapper:
object: <functools.partial object at 0x74f30>
__name__: (no __name__)
__doc__ 'partial(func, *args, **keywords) - new function with partial application\n\tof the given arguments and keywords.\n'

Updating wrapper:
assign: ('__module__', '__name__', '__doc__')
update: ('__dict__',)

updated wrapper:
object: <functools.partial object at 0x74f30>
__name__: myfunc
__doc__ 'Docstring for myfunc().'


Methods and Other Callables:

Partials work with any callable object, including methods and instances.

import functools

class MyClass(object):
"""Demonstration class for functools"""

def meth1(self, a, b=2):
"""Docstring for meth1()."""
print '\tcalled meth1 with:', (self, a, b)
return

def meth2(self, c, d=5):
"""Docstring for meth2"""
print '\tcalled meth2 with:', (self, c, d)
return
wrapped_meth2 = functools.partial(meth2, 'wrapped c')
functools.update_wrapper(wrapped_meth2, meth2)

def __call__(self, e, f=6):
"""Docstring for MyClass.__call__"""
print '\tcalled object with:', (self, e, f)
return

def show_details(name, f):
"""Show details of a callable object."""
print '%s:' % name
print '\tobject:', f
print '\t__name__:',
try:
print f.__name__
except AttributeError:
print '(no __name__)'
print '\t__doc__', repr(f.__doc__)
return

o = MyClass()

show_details('meth1 straight', o.meth1)
o.meth1('no default for a', b=3)
print

p1 = functools.partial(o.meth1, b=4)
functools.update_wrapper(p1, o.meth1)
show_details('meth1 wrapper', p1)
p1('a goes here')
print

show_details('meth2', o.meth2)
o.meth2('no default for c', d=6)
print

show_details('wrapped meth2', o.wrapped_meth2)
o.wrapped_meth2('no default for c', d=6)
print

show_details('instance', o)
o('no default for e')
print

p2 = functools.partial(o, f=7)
show_details('instance wrapper', p2)
p2('e goes here')



$ python functools_method.py
meth1 straight:
object: <bound method MyClass.meth1 of <__main__.MyClass object at 0x7ecd0>>
__name__: meth1
__doc__ 'Docstring for meth1().'
called meth1 with: (<__main__.MyClass object at 0x7ecd0>, 'no default for a', 3)

meth1 wrapper:
object: <functools.partial object at 0x81060>
__name__: meth1
__doc__ 'Docstring for meth1().'
called meth1 with: (<__main__.MyClass object at 0x7ecd0>, 'a goes here', 4)

meth2:
object: <bound method MyClass.meth2 of <__main__.MyClass object at 0x7ecd0>>
__name__: meth2
__doc__ 'Docstring for meth2'
called meth2 with: (<__main__.MyClass object at 0x7ecd0>, 'no default for c', 6)

wrapped meth2:
object: <functools.partial object at 0x74f90>
__name__: meth2
__doc__ 'Docstring for meth2'
called meth2 with: ('wrapped c', 'no default for c', 6)

instance:
object: <__main__.MyClass object at 0x7ecd0>
__name__: (no __name__)
__doc__ 'Demonstration class for functools'
called object with: (<__main__.MyClass object at 0x7ecd0>, 'no default for e', 6)

instance wrapper:
object: <functools.partial object at 0x81090>
__name__: (no __name__)
__doc__ 'partial(func, *args, **keywords) - new function with partial application\n\tof the given arguments and keywords.\n'
called object with: (<__main__.MyClass object at 0x7ecd0>, 'e goes here', 7)


wraps:

As mentioned earlier, these capabilities are especially useful when used in decorators, since the decorated function ends up with properties of the original, "raw", function. functools provides a convenience function, wraps(), to be used as a decorator itself and to apply update_wrapper() automatically.

import functools

def show_details(name, f):
"""Show details of a callable object."""
print '%s:' % name
print '\tobject:', f
print '\t__name__:',
try:
print f.__name__
except AttributeError:
print '(no __name__)'
print '\t__doc__', repr(f.__doc__)
print
return

def simple_decorator(f):
@functools.wraps(f)
def decorated(a='decorated defaults', b=1):
print '\tdecorated:', (a, b)
print '\t',
f(a, b=b)
return
return decorated

def myfunc(a, b=2):
print '\tmyfunc:', (a,b)
return

show_details('myfunc', myfunc)
myfunc('unwrapped, default b')
myfunc('unwrapped, passing b', 3)
print

wrapped_myfunc = simple_decorator(myfunc)
show_details('wrapped_myfunc', wrapped_myfunc)
wrapped_myfunc()
wrapped_myfunc('args to decorated', 4)



$ python functools_wraps.py
myfunc:
object: <function myfunc at 0x7cc70>
__name__: myfunc
__doc__ None

myfunc: ('unwrapped, default b', 2)
myfunc: ('unwrapped, passing b', 3)

wrapped_myfunc:
object: <function myfunc at 0x7ccb0>
__name__: myfunc
__doc__ None

decorated: ('decorated defaults', 1)
myfunc: ('decorated defaults', 1)
decorated: ('args to decorated', 4)
myfunc: ('args to decorated', 4)


References:

Python Module of the Week Home
Download Sample Code


Technorati Tags:
,


Friday, April 25, 2008

Python Magazine for April 2008

The April issue of Python Magazine is ready for download now.

This month's cover story from Zach Voase introduces bioinformatics with Python using BioPython. It's amazing how easy it is to work with gene sequences in Python.

Jeff Scudder provides an excellent article about using the Google Spreadsheet API like a database. We use Google Spreadsheet to manage some parts of the magazine, so I'm definitely looking forward to applying some of these techniques myself.

You will recognize Eugen Wintersberger from previous issues. This month he returns to explain how to use PyTables and HDF5 to work with large datasets. As compute power and storage capacity have increased over the years, the algorithms and tools for processing those datasets have evolved to take advantage of them.

Every good editor and IDE includes customization features, and Mac OS X's XCode is no exception. JC Cruz introduces techniques for customizing XCode through Python scripts.

Mark Mruss' column this month features the AVC library for GUI programming. The "Application View Controller" connects the widgets on the screen to variables, automatically syncing between them, saving you a lot of manual work.

To cap off the issue, we also have plenty of PyCon 2008 coverage from both Steve Holden and me. I had a lot of fun, and I'm already planning my trip for next year.

I hope you'll head over to the site and download your copy now!

Sunday, April 20, 2008

PyMOTW: filecmp

Compare files and directories easily with the filecmp module.

Module: filecmp
Purpose: Compare files and directories on the filesystem.
Python Version: 2.1 and later

Example Data:

The examples in the discussion below use these test files:


$ ls -Rlast example/
total 0
0 drwxr-xr-x 4 dhellmann dhellmann 136 Apr 20 17:04 .
0 drwxr-xr-x 9 dhellmann dhellmann 306 Apr 20 17:04 ..
0 drwxr-xr-x 8 dhellmann dhellmann 272 Apr 20 17:04 dir1
0 drwxr-xr-x 8 dhellmann dhellmann 272 Apr 20 17:04 dir2

example//dir1:
total 32
0 drwxr-xr-x 8 dhellmann dhellmann 272 Apr 20 17:04 .
0 drwxr-xr-x 4 dhellmann dhellmann 136 Apr 20 17:04 ..
0 drwxr-xr-x 2 dhellmann dhellmann 68 Apr 20 17:04 common_dir
8 -rw-r--r-- 1 dhellmann dhellmann 21 Apr 20 17:04 common_file
0 drwxr-xr-x 2 dhellmann dhellmann 68 Apr 20 17:04 dir_only_in_dir1
8 -rw-r--r-- 1 dhellmann dhellmann 22 Apr 20 17:04 file_in_dir1
8 -rw-r--r-- 1 dhellmann dhellmann 22 Apr 20 17:04 file_only_in_dir1
8 -rw-r--r-- 1 dhellmann dhellmann 17 Apr 20 17:04 not_the_same

example//dir2:
total 24
0 drwxr-xr-x 8 dhellmann dhellmann 272 Apr 20 17:04 .
0 drwxr-xr-x 4 dhellmann dhellmann 136 Apr 20 17:04 ..
0 drwxr-xr-x 2 dhellmann dhellmann 68 Apr 20 17:04 common_dir
8 -rw-r--r-- 1 dhellmann dhellmann 21 Apr 20 17:04 common_file
0 drwxr-xr-x 2 dhellmann dhellmann 68 Apr 20 17:04 dir_only_in_dir2
0 drwxr-xr-x 2 dhellmann dhellmann 68 Apr 20 17:04 file_in_dir1
8 -rw-r--r-- 1 dhellmann dhellmann 22 Apr 20 17:04 file_only_in_dir2
8 -rw-r--r-- 1 dhellmann dhellmann 17 Apr 20 17:04 not_the_same


The same directory structure is repeated one time under the "common_dir" directories to give interesting recursive comparison options.

Comparing Files:

The filecmp module includes functions and a class for comparing files and directories on the filesystem. If you need to compare two files, use the cmp() function.

import filecmp

print 'common_file:',
print filecmp.cmp('example/dir1/common_file',
'example/dir2/common_file'),
print filecmp.cmp('example/dir1/common_file',
'example/dir2/common_file',
shallow=False)

print 'not_the_same:',
print filecmp.cmp('example/dir1/not_the_same',
'example/dir2/not_the_same'),
print filecmp.cmp('example/dir1/not_the_same',
'example/dir2/not_the_same',
shallow=False)

print 'identical:',
print filecmp.cmp('example/dir1/file_only_in_dir1',
'example/dir1/file_only_in_dir1'),
print filecmp.cmp('example/dir1/file_only_in_dir1',
'example/dir1/file_only_in_dir1',
shallow=False)


By default, cmp() looks only at the information available from os.stat(). The shallow argument tells cmp() whether to look at the contents of the file, as well. The default is to perform a shallow comparison, without looking inside the files. Notice that files of the same size created at the same time seem to be the same if their contents are not compared.


$ python filecmp_cmp.py
common_file: True True
not_the_same: True False
identical: True True


To compare a set of files in two directories without recursing, use filecmp.cmpfiles(). The arguments are the names of the directories and a list of files to be checked in the two locations. The list of common files should contain only filenames (directories always seem to result in a mismatch) and the files must be present in both locations. The code below shows a simple way to build the common list. If you have a shorter formula, post it in the comments. The comparison also takes the shallow flag, just as with cmp().

import filecmp
import os

# Determine the items that exist in both directories
d1_contents = set(os.listdir('example/dir1'))
d2_contents = set(os.listdir('example/dir2'))
common = list(d1_contents & d2_contents)
common_files = [ f
for f in common
if os.path.isfile(os.path.join('example/dir1', f))
]
print 'Common files:', common_files

# Compare the directories
match, mismatch, errors = filecmp.cmpfiles('example/dir1',
'example/dir2',
common_files)
print 'Match:', match
print 'Mismatch:', mismatch
print 'Errors:', errors


cmpfiles() returns three lists of filenames for files that match, files that do not match, and files that could not be compared (due to permission problems or for any other reason).


$ python filecmp_cmpfiles.py
Common files: ['not_the_same', 'file_in_dir1', 'common_file']
Match: ['not_the_same', 'common_file']
Mismatch: ['file_in_dir1']
Errors: []



Using dircmp:

The functions described above are suitable for relatively simple comparisons, but for recursive comparison of large directory trees or for more complete analysis, the dircmp class is more useful. In its simplest use case, you can print a report comparing two directories with the report() method:

import filecmp

filecmp.dircmp('example/dir1', 'example/dir2').report()


The output is a plain-text report showing the results of just the contents of the directories given, without recursing. In this case, the file "not_the_same" is thought to be the same because the contents are not being compared. There doesn't seem to be a way to have dircmp compare the contents of files like cmp() can.


$ python filecmp_dircmp_report.py
diff example/dir1 example/dir2
Only in example/dir1 : ['dir_only_in_dir1', 'file_only_in_dir1']
Only in example/dir2 : ['dir_only_in_dir2', 'file_only_in_dir2']
Identical files : ['common_file', 'not_the_same']
Common subdirectories : ['common_dir']
Common funny cases : ['file_in_dir1']


For more detail, and a recursive comparison, use report_full_closure():

import filecmp

filecmp.dircmp('example/dir1', 'example/dir2').report_full_closure()


The output includes comparisons of all parallel subdirectories.


$ python filecmp_dircmp_report_full_closure.py
diff example/dir1 example/dir2
Only in example/dir1 : ['dir_only_in_dir1', 'file_only_in_dir1']
Only in example/dir2 : ['dir_only_in_dir2', 'file_only_in_dir2']
Identical files : ['common_file', 'not_the_same']
Common subdirectories : ['common_dir']
Common funny cases : ['file_in_dir1']

diff example/dir1/common_dir example/dir2/common_dir
Common subdirectories : ['dir1', 'dir2']

diff example/dir1/common_dir/dir2 example/dir2/common_dir/dir2
Identical files : ['common_file', 'file_only_in_dir2', 'not_the_same']
Common subdirectories : ['common_dir', 'dir_only_in_dir2', 'file_in_dir1']

diff example/dir1/common_dir/dir2/common_dir example/dir2/common_dir/dir2/common_dir

diff example/dir1/common_dir/dir2/dir_only_in_dir2 example/dir2/common_dir/dir2/dir_only_in_dir2

diff example/dir1/common_dir/dir2/file_in_dir1 example/dir2/common_dir/dir2/file_in_dir1

diff example/dir1/common_dir/dir1 example/dir2/common_dir/dir1
Identical files : ['common_file', 'file_in_dir1', 'file_only_in_dir1', 'not_the_same']
Common subdirectories : ['common_dir', 'dir_only_in_dir1']

diff example/dir1/common_dir/dir1/common_dir example/dir2/common_dir/dir1/common_dir

diff example/dir1/common_dir/dir1/dir_only_in_dir1 example/dir2/common_dir/dir1/dir_only_in_dir1


Using differences in your program:

Besides producing printed reports, dircmp calculates useful lists of files you can use in your programs directly. Each of the following attributes is calculated only when requested, so instantiating a dircmp does not incur a lot of extra overhead.

The files and subdirectories contained in the directories being compared are listed in left_list and right_list:

import filecmp

dc = filecmp.dircmp('example/dir1', 'example/dir2')
print 'Left :', dc.left_list
print 'Right:', dc.right_list



$ python filecmp_dircmp_list.py
Left : ['common_dir', 'common_file', 'dir_only_in_dir1', 'file_in_dir1', 'file_only_in_dir1', 'not_the_same']
Right: ['common_dir', 'common_file', 'dir_only_in_dir2', 'file_in_dir1', 'file_only_in_dir2', 'not_the_same']


The inputs can be filtered by passing a list of names to ignore to the constructor. By default the names RCS, CVS, and tags are ignored.

import filecmp

dc = filecmp.dircmp('example/dir1', 'example/dir2', ignore=['common_file'])
print 'Left :', dc.left_list
print 'Right:', dc.right_list


In this case, the "common_file" is left out of the list of files to be compared.


$ python filecmp_dircmp_list_filter.py
Left : ['common_dir', 'dir_only_in_dir1', 'file_in_dir1', 'file_only_in_dir1', 'not_the_same']
Right: ['common_dir', 'dir_only_in_dir2', 'file_in_dir1', 'file_only_in_dir2', 'not_the_same']


The set of files common to both input directories is maintained in common, and the files unique to each directory are listed in left_only, and right_only.

import filecmp

dc = filecmp.dircmp('example/dir1', 'example/dir2')
print 'Common:', dc.common
print 'Left :', dc.left_only
print 'Right :', dc.right_only



$ python filecmp_dircmp_membership.py
Common: ['not_the_same', 'common_file', 'file_in_dir1', 'common_dir']
Left : ['dir_only_in_dir1', 'file_only_in_dir1']
Right : ['dir_only_in_dir2', 'file_only_in_dir2']


The common members can be further broken down into files, directories and "funny" items (anything that has a different type in the two directories or where there is an error from os.stat()).

import filecmp

dc = filecmp.dircmp('example/dir1', 'example/dir2')
print 'Common :', dc.common
print 'Directories:', dc.common_dirs
print 'Files :', dc.common_files
print 'Funny :', dc.common_funny


In the example data, the item named "file_in_dir1" is a file in one directory and a subdirectory in the other, so it shows up in the "funny" list.


$ python filecmp_dircmp_common.py
Common : ['not_the_same', 'common_file', 'file_in_dir1', 'common_dir']
Directories: ['common_dir']
Files : ['not_the_same', 'common_file']
Funny : ['file_in_dir1']


The differences between files are broken down similarly:

import filecmp

dc = filecmp.dircmp('example/dir1', 'example/dir2')
print 'Same :', dc.same_files
print 'Different :', dc.diff_files
print 'Funny :', dc.funny_files


Remember, the file "not_the_same" is only being compared via os.stat, and the contents are not examined.


$ python filecmp_dircmp_diff.py
Same : ['not_the_same', 'common_file']
Different : []
Funny : []


Finally, the subdirectories are also mapped to new dircmp objects in the attribute subdirs to allow easy recursive comparison.

import filecmp

dc = filecmp.dircmp('example/dir1', 'example/dir2')
print 'Subdirectories:'
print dc.subdirs



$ python filecmp_dircmp_subdirs.py
Subdirectories:
{'common_dir': <filecmp.dircmp instance at 0x85da0>}



References:

Python Module of the Week Home
Download Sample Code


Technorati Tags:
,


tools for literate programming with Python?

I've had an idea kicking around in my head for a couple of days. It's one of those things I just can't seem to let go of, but I don't really have time to build it right now. I'm hoping someone out there had the same idea already and written something that works sort of like what I want. Failing that, maybe someone looking for a project to start will like this idea.

I've seen discussions in the past of something called "literate programming". My understanding of the gist of the idea is that you write prose and code together in a file, then use a set of tools to split them apart as part of a build process. The benefits are supposed to be exceptionally well documented code, since you're essentially writing the documentation first and code later. I never really bought into that idea; it seemed like a lot of extra overhead. Unless you're writing libraries to be shared by other developers there's just not a need for so much documentation.

On the other hand, if what you're doing is writing about code, then it seems like a great idea. I've been doing a lot of writing about code lately, so I want to see if I can improve my tools. I'm specifically thinking of this for working on my Python Module of the Week series, but it could be useful in other areas as well. The way I blog now means that I have to make sure I regenerate all examples before posting, in case I've edited any source files along the way. So my cycle goes: read docs, write code, write description (pasting in code and examples), change code, fix pasted code and examples, repeat until done.

A quick Google search turned up a few tools I need to look into, but I'm not sure what I envision is literate programming as originally defined.

I want a tool that lets me write prose and code in the same file, then extract the code for use separately, but also run the code and re-process the input file. The idea would be to edit a single file, mark the sections that are code, mark an output area for each code block. I would then use a tool to extract and run the code with the output inserted back into the original file (replacing any output from previous runs, of course). Ideally the source file would be HTML or something close, since I have to convert to that for posting anyway. I would rather not have to learn some random new markup language that I could only use with this one tool, but something like Markdown or reST would be ok.

It doesn't seem like it would be that complicated to write a tool to do what I want using a library like BeautifulSoup to find the source blocks and output destinations. And I could set up my own trigger in TextMate to run it, so I wouldn't have to change editors. Before I spend a bunch of time implementing something I could just download, though, I thought it would be prudent to ask a couple of Dear Lazy Web questions:

1. Does something like what I describe exist?

2. If not, what literate programming tools for Python do you recommend? I may use them as inspiration for a design.

Thanks in advance for any suggestions.

Sunday, April 13, 2008

PyMOTW: fnmatch

Handle Unix-style filename comparison with the fnmatch module.

Module: fnmatch
Purpose: Compare filenames against Unix-style glob patterns.
Python Version: 1.4 and later.

Description:

The fnmatch module is used to compare filenames against glob-style patterns such as used by Unix shells.

Simple Matching:

fnmatch() compares a single filename against a pattern and returns a boolean indicating whether or not they match. If the operating system uses a case-insensitive filesystem, the comparison is not case sensitive. Otherwise it is.

import fnmatch
import os

pattern = 'fnmatch_*.py'
print 'Pattern :', pattern
print

files = os.listdir('.')
for name in files:
print 'Filename: %-25s %s' % (name, fnmatch.fnmatch(name, pattern))


In this example, the pattern matches all files starting with 'fnmatch_' and ending in '.py'.


$ python fnmatch_fnmatch.py
Pattern : fnmatch_*.py

Filename: .svn False
Filename: __init__.py False
Filename: fnmatch_filter.py True
Filename: fnmatch_fnmatch.py True
Filename: fnmatch_fnmatchcase.py True
Filename: fnmatch_translate.py True


To force a case-sensitive comparison, regardless of the filesystem and operating system settings, use fnmatchcase().

import fnmatch
import os

pattern = 'FNMATCH_*.PY'
print 'Pattern :', pattern
print

files = os.listdir('.')

for name in files:
print 'Filename: %-25s %s' % (name, fnmatch.fnmatchcase(name, pattern))


Since my laptop uses a case-sensitive filesystem, no files match the modified pattern.


$ python fnmatch_fnmatchcase.py
Pattern : FNMATCH_*.PY

Filename: .svn False
Filename: __init__.py False
Filename: fnmatch_filter.py False
Filename: fnmatch_fnmatch.py False
Filename: fnmatch_fnmatchcase.py False
Filename: fnmatch_translate.py False


Filtering:

To test a sequence of filenames, you can use filter(). It returns a list of the names that match the pattern argument.

import fnmatch
import os

pattern = 'fnmatch_*.py'
print 'Pattern :', pattern

files = os.listdir('.')
print 'Files :', files

print 'Matches :', fnmatch.filter(files, pattern)


In this example, filter() returns the list of names of the example source files associated with this post.


$ python fnmatch_filter.py
Pattern : fnmatch_*.py
Files : ['.svn', '__init__.py', 'fnmatch_filter.py', 'fnmatch_fnmatch.py', 'fnmatch_fnmatchcase.py', 'fnmatch_translate.py']
Matches : ['fnmatch_filter.py', 'fnmatch_fnmatch.py', 'fnmatch_fnmatchcase.py', 'fnmatch_translate.py']


Translating Patterns:

Internally, fnmatch converts the glob pattern to a regular expression and uses the re module to compare the name and pattern. The translate() function is the public API for converting glob patterns to regular expressions.

import fnmatch

pattern = 'fnmatch_*.py'
print 'Pattern :', pattern
print 'Regex :', fnmatch.translate(pattern)


Notice that some of the characters are escaped to make a valid expression.


$ python fnmatch_translate.py
Pattern : fnmatch_*.py
Regex : fnmatch\_.*\.py$



References:

PyMOTW: glob
glob module documentation
Python Module of the Week Home
Download Sample Code


Technorati Tags:
,


Shell history, jigs, & subversion

Everyone else is showing theirs, so here's mine:

$ history|awk '{a[$2]++} END{for(i in a){printf "%5d\t%s\n",a[i],i}}'|sort -rn|head
162 svn
99 ls
80 rtop
69 sudo
63 cd
55 dotest
51 workon
23 make
21 close_branch
21 cl2svn


Software Jigs:

Does it say anything in particular about me that half of those commands are aliases or scripts I or my co-workers have created to wrap up other tools?

rtop - is a bash alias to change directory to the top of sandbox. I have an environment variable pointing there, too, but I guess I don't like typing $.

dotest - is an alias to run tests with our tracing module turned on, preserving the output in the same log file each time. We have a very verbose trace module that prints function inputs and outputs as our program executes. It is superior to logging for low-level debugging, but entirely unsuitable for production use (it's easy to turn on and off).

workon - is a shell function that swaps out different sandboxes so I can work on multiple branches on the same system. Our test framework requires an installed version of the whole system, unfortunately, and I don't like to mix patches from multiple branches by copying files into the install tree. Running workon rearranges symlinks so I can replace the install tree with the build tree from my sandbox of choice. Shell functions are an under-appreciated implementation technique for something that has to operate on the current environment (workon changes directory to the new sandbox) but is more complicated than what would fit in an alias.

close_branch - is a bash script that takes a short branch name and deletes the branch and any "rebase" branches based on it using the long URL. We have a whole set of little scripts like this that we've written in house.

cl2svn - finds changes in ChangeLog files in my svn sandbox, extracts the new messages, and produces a single (sorted) output list formatted nicely to show up in trac. We use ChangeLog files and trac commit messages as part of the documentation for our code review process, so having everything formatted nicely is important. I used to do this by hand, but after one particularly large changeset I came up with this Python app to do the work for me.

Wrapping Subversion:

I mentioned close_branch as a subversion wrapper. There's a make_branch script, too, to save from making typos in long URLs.

Another shell function, mksbox, finds a free sandbox in my pool and switches it to use a particular branch. Our build tree is pretty large, so it is way more efficient to just keep a bunch of sandboxes around and switch them to point to different branches with "svn switch" instead of checking out a full copy every time.

My favorite, though, is merge_branch, which figures out the start point of a svn branch and merges all of the changes from that branch into the current sandbox. I'm a little surprised that make_branch and merge_branch didn't show up higher in the list, but they're in the top 20.

We wrote these wrapper scripts a couple of years ago, when we switched from CVS to svn. We had similar tools for CVS, but branching worked differently and we didn't use branches as often then. Now every ticket gets its own branch, so managing branches is a daily operation. A typical development cycle for me looks something like this:


$ make_branch 6583 # that's a trac ticket number
$ mksbox 6583 # automatically does a workon for that sandbox
$ dcctl restart # restart our daemon services to pick up the sandbox change
# add feature or remove bug
# update ChangeLog files
$ cl2svn | tee changes.txt
$ svn commit -F changes.txt
# request code review for changeset
$ prepare4commit.sh # switch current sandbox to trunk & merge in the branch
$ docommit # commit, using the first line of changes.txt for log message
$ close_branch 6583 # clean up after myself


When we switched off of CVS, we had some particular needs that weren't met by svn directly (especially the way we do code reviews). There are a whole host of tools for wrapping svn out there now. sv-subversion looks interesting, but I haven't tried it. If our code didn't make assumptions about the install path, we could probably just use DivmodCombinator, which looks like it has a lot of the features we've rolled ourselves, but the inertia for changing now is pretty high, and the benefits aren't great enough.

Sunday, April 6, 2008

New release: virtualenvwrapper

Last week I finally cleaned up the wrapper code I've been using with Ian Bicking's virtualenv. The results are a set of bash functions imaginatively dubbed "virtualenvwrapper".

The basic idea is that you source the script in your ~/.bashrc, and then you can use the functions it defines to manage your virtual environments, either from other scripts or from the command line.

mkvirtualenv is a thin wrapper around virtualenv itself, and creates environments in a special directory, which you can control through the WORKON_HOME variable. Now that I think of it, that variable should probably have a different name. Oh, well.

Most of the time, you'll use the function workon. Without arguments, it will list the environments you have available. With a single argument, it switches to that environment.

Once you're done with an environment, use rmvirtualenv to remove it.

So it's nothing earth-shattering, but I find it very useful. I put it together because I create and delete virtual environments so frequently -- one per article and column for the magazine, plus one for each of my own projects -- that I really needed to enforce some sanity around them.

In addition to the organization, the extra feature workon adds is activation hooks for the environment. Each time you switch environments, workon looks for (and runs) $VIRTUAL_ENV/bin/predeactivate in the current environment (before switching) and $VIRTUAL_ENV/bin/postactivate in the new environment (after switching). The hooks are intended for saving editor state, loading new project files, etc.

For now there's just one file, but I'm still distributing it in a versioned tarball because that's just easier with my existing workflow and release tools. I'm not sure how to use distutils with a bash script, or even if that's a good idea, so it's a manual download, too.

One year of "The Python Module of the Week"

It's a bit passé to recognize blogging anniversaries, but as it's my first I'm going to do a little navel gazing retrospecting anyway. :-)

I just realized this afternoon that I had missed celebrating the first anniversary of PyMOTW by a few weeks. I started the series as an excuse to force myself to write something once a week. At the time, it seemed like a somewhat lame idea and I wasn't sure I would keep it up. There are any number of reference guides for the standard library out there. Sitting down to read through one isn't that exciting, though, so I thought writing example code with all of the modules would be a way to force myself to actually study the modules I didn't use on a regular basis.

The first real post from 25 March 2007 covered the fileinput module. It wasn't until several posts into the series that I started collecting and releasing the code through PyPI, so the version number for the source package is only up to 1.48 even though I've done more than 52 weeks worth of modules. (The os module took 4 weeks, so I haven't done as many modules as weeks of posts.)

My Writing Process:

My process for creating the posts has changed substantially over the last year. The first few posts were posted through the web form on blogger.com. They consisted of a lot of hand-edited HTML combined with output from the web version of pygments (used to highlight the syntax in the code examples). I also used to write the prosoe for each post first, and the code samples later.

Now, I have the entire process reversed. I work through all of the code examples before writing any prose. The code comes more quickly, and I can revise and refactor it so the examples work together without having to go back and edit the rest of the text. Once I have the code finished, I use a combination of shortcuts I've built for TextMate and MarsEdit to assemble the post and write the prose portions. It takes me a lot less time to create a single post now that I've refined the workflow. The post on the operator module from today, for example, only took a couple of hours (with interruptions). It is a little skimpy on prose, though.

Future Plans:

Since the beginning, I've had a fair number of comments (online and off) from people who tell me that the posts have been personally useful to them. I appreciate that sort of feedback, and it motivates me to keep going. I'm running out of the "simple" modules, and as I've also started working on Python Magazine over the past year, I don't actually have the same amount of free time any more. Having a bit of extra motivation will spur me to pick up some of the bigger modules like email and elementtree.

At the rate I'm going, I'm not going to finish the whole library before Python 3.0 comes out, and the current plans call for some modules to be removed, deprecated APIs to be dropped, and other sorts of changes. The rules say some modules can even be renamed. When that settles down and there is an actual release, I'll probably stop writing about 2.x and pick up with 3.0. I haven't decided yet, though.

Python Module of the Week Home


Technorati Tags:
,




PyMOTW: operator

The operator module contains functions that perform the same operations as man of the built-in operators.

Module: operator
Purpose: Functional interface to built-in operators.
Python Version: 1.4 and later

Description:

Functional programming using iterators occasionally requires you to create small functions for simple expressions. Sometimes these can be expressed as lambda functions. But for some operations, you don't need to define your own function at all. The operator module defines functions that correspond to built-in operations for arithmetic, and comparison as well as sequence and dictionary operations.

Logical Operations:

There are logical operations for determining the boolean equivalent for a value, negating that to create the opposite boolean value, and comparing objects to see if they are identical.

from operator import *

a = -1
b = 5

print 'a =', a
print 'b =', b

print 'not_(a):', not_(a)
print 'truth(a):', truth(a)
print 'is_(a, b):', is_(a,b)
print 'is_not(a, b):', is_not(a,b)



$ python operator_boolean.py
a = -1
b = 5
not_(a): False
truth(a): True
is_(a, b): False
is_not(a, b): True


Comparison Operators:

All of the rich comparison operators are supported:

from operator import *

a = 1
b = 5.0

print 'a =', a
print 'b =', b
for func in (lt, le, eq, ne, ge, gt):
print '%s(a, b):' % func.__name__, func(a, b)



$ python operator_comparisons.py
a = 1
b = 5.0
lt(a, b): True
le(a, b): True
eq(a, b): False
ne(a, b): True
ge(a, b): False
gt(a, b): False


Arithmetic Operators:

The arithmetic operators for manipulating numerical values are also supported.

from operator import *

a = -1
b = 5.0
c = 2
d = 6

print 'a =', a
print 'b =', b
print 'c =', c
print 'd =', d

print '\nPositive/Negative:'
print 'abs(a):', abs(a)
print 'neg(a):', neg(a)
print 'neg(b):', neg(b)
print 'pos(a):', pos(a)
print 'pos(b):', pos(b)

print '\nArithmetic:'
print 'add(a, b):', add(a, b)
print 'div(a, b):', div(a, b)
print 'div(d, c):', div(d, c)
print 'floordiv(a, b):', floordiv(a, b)
print 'floordiv(d, c):', floordiv(d, c)
print 'mod(a, b):', mod(a, b)
print 'mul(a, b):', mul(a, b)
print 'pow(c, d):', pow(c, d)
print 'sub(b, a):', sub(b, a)
print 'truediv(a, b):', truediv(a, b)
print 'truediv(d, c):', truediv(d, c)

print '\nBitwise:'
print 'and_(c, d):', and_(c, d)
print 'invert(c):', invert(c)
print 'lshift(c, d):', lshift(c, d)
print 'or_(c, d):', or_(c, d)
print 'rshift(d, c):', rshift(d, c)
print 'xor(c, d):', xor(c, d)


Notice the two division operators: floordiv (pre-3.0 integer division) and truediv (floating point division).


$ python operator_math.py
a = -1
b = 5.0
c = 2
d = 6

Positive/Negative:
abs(a): 1
neg(a): 1
neg(b): -5.0
pos(a): -1
pos(b): 5.0

Arithmetic:
add(a, b): 4.0
div(a, b): -0.2
div(d, c): 3
floordiv(a, b): -1.0
floordiv(d, c): 3
mod(a, b): 4.0
mul(a, b): -5.0
pow(c, d): 64
sub(b, a): 6.0
truediv(a, b): -0.2
truediv(d, c): 3.0

Bitwise:
and_(c, d): 2
invert(c): -3
lshift(c, d): 128
or_(c, d): 6
rshift(d, c): 1
xor(c, d): 4


Sequence Operators:

The operators for working with sequences can be divided into roughly 4 groups for building up sequences, searching, working with items, and removing items from sequences.

from operator import *

a = [ 1, 2, 3 ]
b = [ 'a', 'b', 'c' ]

print 'a =', a
print 'b =', b

print '\nConstructive:'
print 'concat(a, b):', concat(a, b)
print 'repeat(a, 3):', repeat(a, 3)

print '\nSearching:'
print 'contains(a, 1):', contains(a, 1)
print 'contains(b, "d"):', contains(b, "d")
print 'countOf(a, 1):', countOf(a, 1)
print 'countOf(b, "d"):', countOf(b, "d")
print 'indexOf(a, 5):', indexOf(a, 1)

print '\nAccess Items:'
print 'getitem(b, 1):', getitem(b, 1)
print 'getslice(a, 1, 3)', getslice(a, 1, 3)
print 'setitem(b, 1, "d"):', setitem(b, 1, "d"), ',after b =', b
print 'setslice(a, 1, 3, [4, 5]):', setslice(a, 1, 3, [4, 5]), ', after a =', a

print '\nDestructive:'
print 'delitem(b, 1):', delitem(b, 1), ',after b =', b
print 'delslice(a, 1, 3):', delslice(a, 1, 3), ', after a =', a



$ python operator_sequences.py
a = [1, 2, 3]
b = ['a', 'b', 'c']

Constructive:
concat(a, b): [1, 2, 3, 'a', 'b', 'c']
repeat(a, 3): [1, 2, 3, 1, 2, 3, 1, 2, 3]

Searching:
contains(a, 1): True
contains(b, "d"): False
countOf(a, 1): 1
countOf(b, "d"): 0
indexOf(a, 5): 0

Access Items:
getitem(b, 1): b
getslice(a, 1, 3) [2, 3]
setitem(b, 1, "d"): None ,after b = ['a', 'd', 'c']
setslice(a, 1, 3, [4, 5]): None , after a = [1, 4, 5]

Destructive:
delitem(b, 1): None ,after b = ['a', 'c']
delslice(a, 1, 3): None , after a = [1]


In-place Operators:

In addition to the standard operators, many types of objects support "in-place" modification through special operators such as +=. There are equivalent functions for in-place modifications, too:

from operator import *

a = -1
b = 5.0
c = [ 1, 2, 3 ]
d = [ 'a', 'b', 'c']
print 'a =', a
print 'b =', b