Sort Versions in Python
MAJOR EDIT: old answer was too unpythonic. Here are two prettier solutions.
So, I currently see about three ways of achieving the wished ordering, releases candidates "rc" before actual releases.
- my old, imperative-style ordering
- use "b" instead of "rc" in order to use
StrictVersion
, from the same package - extend the
Version
class to add support for arbitrary tags and tag ordering
1. Old, imperative-style ordering
from distutils.version import LooseVersion
versions = ["1.7.0", "1.7.0.rc0", "1.8.0"]
lv = [LooseVersion(v) for v in versions]
lv.sort()
sorted_rc = [v.vstring for v in lv]
import re
p = re.compile('rc\\d+$')
i = 0
# skip the first RCs
while i + 1 < len(sorted_rc):
m = p.search(sorted_rc[i])
if m:
i += 1
else:
break
while i + 1 < len(sorted_rc):
tmp = sorted_rc[i]
m = p.search(sorted_rc[i+1])
if m and sorted_rc[i+1].startswith(tmp):
sorted_rc[i] = sorted_rc[i+1]
sorted_rc[i+1] = tmp
i += 1
with this I get:
['1.7.0rc0', '1.7.0', '1.11.0']
2. Use "b" instead of "rc"
The package distutils.version
also has another class, StrictVersion
which does the job, if your 1.7.0.rc0
is allowed to be written as 1.7.0a0
or 1.7.0b0
noting alpha or beta releases.
That is:
from distutils.version import StrictVersion
versions = ["1.7.0", "1.7.0b0", "1.11.0"]
sorted(versions, key=StrictVersion)
This gives:
['1.7.0b0', '1.7.0', '1.11.0']
Translation from one form to another can be done using the re module.
3. Extend the Version class
The obvious problem of the previous solution is the lack of flexibility of StrictVersion
. Altering the version_re
class attribute to use rc
instead of a
or b
, even if it accepts 1.7.1rc0
, still prints it as 1.7.1r0
(as of python 2.7.3).
We can get it right by implementing our own custom version class. This can be done like this, with some unit tests to ensure correctness at least in some cases:
#!/usr/bin/python
# file: version2.py
from distutils import version
import re
import functools
@functools.total_ordering
class NumberedVersion(version.Version):
"""
A more flexible implementation of distutils.version.StrictVersion
This implementation allows to specify:
- an arbitrary number of version numbers:
not only '1.2.3' , but also '1.2.3.4.5'
- the separator between version numbers:
'1-2-3' is allowed when '-' is specified as separator
- an arbitrary ordering of pre-release tags:
1.1alpha3 < 1.1beta2 < 1.1rc1 < 1.1
when ["alpha", "beta", "rc"] is specified as pre-release tag list
"""
def __init__(self, vstring=None, sep='.', prerel_tags=('a', 'b')):
version.Version.__init__(self)
# super() is better here, but Version is an old-style class
self.sep = sep
self.prerel_tags = dict(zip(prerel_tags, xrange(len(prerel_tags))))
self.version_re = self._compile_pattern(sep, self.prerel_tags.keys())
self.sep_re = re.compile(re.escape(sep))
if vstring:
self.parse(vstring)
_re_prerel_tag = 'rel_tag'
_re_prerel_num = 'tag_num'
def _compile_pattern(self, sep, prerel_tags):
sep = re.escape(sep)
tags = '|'.join(re.escape(tag) for tag in prerel_tags)
if tags:
release_re = '(?:(?P<{tn}>{tags})(?P<{nn}>\d+))?'\
.format(tags=tags, tn=self._re_prerel_tag, nn=self._re_prerel_num)
else:
release_re = ''
return re.compile(r'^(\d+)(?:{sep}(\d+))*{rel}$'\
.format(sep=sep, rel=release_re))
def parse(self, vstring):
m = self.version_re.match(vstring)
if not m:
raise ValueError("invalid version number '{}'".format(vstring))
tag = m.group(self._re_prerel_tag)
tag_num = m.group(self._re_prerel_num)
if tag is not None and tag_num is not None:
self.prerelease = (tag, int(tag_num))
vnum_string = vstring[:-(len(tag) + len(tag_num))]
else:
self.prerelease = None
vnum_string = vstring
self.version = tuple(map(int, self.sep_re.split(vnum_string)))
def __repr__(self):
return "{cls} ('{vstring}', '{sep}', {prerel_tags})"\
.format(cls=self.__class__.__name__, vstring=str(self),
sep=self.sep, prerel_tags = list(self.prerel_tags.keys()))
def __str__(self):
s = self.sep.join(map(str,self.version))
if self.prerelease:
return s + "{}{}".format(*self.prerelease)
else:
return s
def __lt__(self, other):
"""
Fails when the separator is not the same or when the pre-release tags
are not the same or do not respect the same order.
"""
# TODO deal with trailing zeroes: e.g. "1.2.0" == "1.2"
if self.prerel_tags != other.prerel_tags or self.sep != other.sep:
raise ValueError("Unable to compare: instances have different"
" structures")
if self.version == other.version and self.prerelease is not None and\
other.prerelease is not None:
tag_index = self.prerel_tags[self.prerelease[0]]
other_index = self.prerel_tags[other.prerelease[0]]
if tag_index == other_index:
return self.prerelease[1] < other.prerelease[1]
return tag_index < other_index
elif self.version == other.version:
return self.prerelease is not None and other.prerelease is None
return self.version < other.version
def __eq__(self, other):
tag_index = self.prerel_tags[self.prerelease[0]]
other_index = other.prerel_tags[other.prerelease[0]]
return self.prerel_tags == other.prerel_tags and self.sep == other.sep\
and self.version == other.version and tag_index == other_index and\
self.prerelease[1] == other.prerelease[1]
import unittest
class TestNumberedVersion(unittest.TestCase):
def setUp(self):
self.v = NumberedVersion()
def test_compile_pattern(self):
p = self.v._compile_pattern('.', ['a', 'b'])
tests = {'1.2.3': True, '1a0': True, '1': True, '1.2.3.4a5': True,
'b': False, '1c0': False, ' 1': False, '': False}
for test, result in tests.iteritems():
self.assertEqual(result, p.match(test) is not None, \
"test: {} result: {}".format(test, result))
def test_parse(self):
tests = {"1.2.3.4a5": ((1, 2, 3, 4), ('a', 5))}
for test, result in tests.iteritems():
self.v.parse(test)
self.assertEqual(result, (self.v.version, self.v.prerelease))
def test_str(self):
tests = (('1.2.3',), ('10-2-42rc12', '-', ['rc']))
for t in tests:
self.assertEqual(t[0], str(NumberedVersion(*t)))
def test_repr(self):
v = NumberedVersion('1,2,3rc4', ',', ['lol', 'rc'])
expected = "NumberedVersion ('1,2,3rc4', ',', ['lol', 'rc'])"
self.assertEqual(expected, repr(v))
def test_order(self):
test = ["1.7.0", "1.7.0rc0", "1.11.0"]
expected = ['1.7.0rc0', '1.7.0', '1.11.0']
versions = [NumberedVersion(v, '.', ['rc']) for v in test]
self.assertEqual(expected, list(map(str,sorted(versions))))
if __name__ == '__main__':
unittest.main()
So, it can be used like this:
import version2
versions = ["1.7.0", "1.7.0rc2", "1.7.0rc1", "1.7.1", "1.11.0"]
sorted(versions, key=lambda v: version2.NumberedVersion(v, '.', ['rc']))
output:
['1.7.0rc1', '1.7.0rc2', '1.7.0', '1.7.1', '1.11.0']
So, in conclusion, use python's included batteries or roll out your own.
About this implementation: it could be improved by dealing with the trailing zeroes in the releases, and memoize the compilation of the regular expressions.
I found this to be helpful and a bit simpler:
from packaging import version
vers = ["1.7.0", "1.7.0rc2", "1.7.0rc1", "1.7.1", "1.11.0"]
sorted(vers, key=lambda x: version.Version(x))
Which results in:
['1.7.0rc1', '1.7.0rc2', '1.7.0', '1.7.1', '1.11.0']
Adding reverse=True
puts them in "descending" order which I find helpful.
['1.11.0', '1.7.1', '1.7.0', '1.7.0rc2', '1.7.0rc1']
It can sort a pretty wide variety of version-style numbers (my testbed was Linux versions v4.11.16, etc)
I use the pkg_resources module like so:
from pkg_resources import parse_version
def test_version_sorting():
expected = ['1.0.0dev0',
'1.0.0dev1',
'1.0.0dev2',
'1.0.0dev10',
'1.0.0rc0',
'1.0.0rc2',
'1.0.0rc5',
'1.0.0rc21',
'1.0.0',
'1.1.0',
'1.1.1',
'1.1.11',
'1.2.0',
'1.3.0',
'1.23.0',
'2.0.0', ]
alphabetical = sorted(expected)
shuffled = sorted(expected, key=lambda x: random())
assert expected == sorted(alphabetical, key=parse_version)
assert expected == sorted(shuffled, key=parse_version)
Note that creating a random ordering from the expected version list makes this a potentially unstable unit test as two runs will not have the same data. Still, in this instance, it should not matter… Hopefully.
>>> from distutils.version import LooseVersion
>>> versions = ["1.7.0", "1.7.0rc0", "1.11.0"]
>>> sorted(versions, key=LooseVersion)
['1.7.0', '1.7.0rc0', '1.11.0']
from the docs
Version numbering for anarchists and software realists. Implements the standard interface for version number classes as described above. A version number consists of a series of numbers, separated by either periods or strings of letters. When comparing version numbers, the numeric components will be compared numerically, and the alphabetic components lexically.
...
In fact, there is no such thing as an invalid version number under this scheme; the rules for comparison are simple and predictable, but may not always give the results you want (for some definition of "want").
so you see there is no smarts about treating "rc" specially
You can see how the version number is broken down like this
>>> LooseVersion('1.7.0rc0').version
[1, 7, 0, 'rc', 0]