How to get Case Insensitive Python SET
If you need to preserve case, you could use a dictionary instead. Case-fold the keys, then extract the values to a set:
set({v.casefold(): v for v in l}.values())
The str.casefold()
method uses the Unicode case folding rules (pdf) to normalize strings for case-insensitive comparisons. This is especially important for non-ASCII alphabets, and text with ligatures. E.g. the German ß
sharp S, which is normalised to ss
, or, from the same language, the s
long s:
>>> print(s := 'Waſſerſchloß', s.lower(), s.casefold(), sep=" - ")
Waſſerſchloß - waſſerſchloß - wasserschloss
You can encapsulate this into a class.
If you don't care about preserving case, just use a set comprehension:
{v.casefold() for v in l}
Note that Python 2 doesn't have this method, use str.lower()
in that case.
Demo:
>>> l = ['#Trending', '#Trending', '#TrendinG', '#Yax', '#YAX', '#Yax']
>>> set({v.casefold(): v for v in l}.values())
{'#Yax', '#TrendinG'}
>>> {v.lower() for v in l}
{'#trending', '#yax'}
Wrapping the first approach into a class would look like:
try:
# Python 3
from collections.abc import MutableSet
except ImportError:
# Python 2
from collections import MutableSet
class CasePreservingSet(MutableSet):
"""String set that preserves case but tests for containment by case-folded value
E.g. 'Foo' in CasePreservingSet(['FOO']) is True. Preserves case of *last*
inserted variant.
"""
def __init__(self, *args):
self._values = {}
if len(args) > 1:
raise TypeError(
f"{type(self).__name__} expected at most 1 argument, "
f"got {len(args)}"
)
values = args[0] if args else ()
try:
self._fold = str.casefold # Python 3
except AttributeError:
self._fold = str.lower # Python 2
for v in values:
self.add(v)
def __repr__(self):
return '<{}{} at {:x}>'.format(
type(self).__name__, tuple(self._values.values()), id(self))
def __contains__(self, value):
return self._fold(value) in self._values
def __iter__(self):
try:
# Python 2
return self._values.itervalues()
except AttributeError:
# Python 3
return iter(self._values.values())
def __len__(self):
return len(self._values)
def add(self, value):
self._values[self._fold(value)] = value
def discard(self, value):
try:
del self._values[self._fold(value)]
except KeyError:
pass
Usage demo:
>>> cps = CasePreservingSet(l)
>>> cps
<CasePreservingSet('#TrendinG', '#Yax') at 1047ba290>
>>> '#treNdinG' in cps
True
You can use lower()
:
>>> set(i.lower() for i in l)
set(['#trending', '#yax'])
You could convert the entire list to lowercase before creating a set.
l = map(lambda s: s.lower(), l)
set(l)
Create a case-insensitive set
class of your own.
class CaseInsensitiveSet(set):
def add(self, item):
try:
set.add(self, item.lower())
except Exception: # not a string
set.add(self, item)
def __contains__(self, item):
try:
return set.__contains__(self, item.lower())
except Exception:
return set.__contains__(self, item)
# and so on... other methods will need to be overridden for full functionality