PostgreSQL: Case insensitive string comparison
select *
where email ilike '[email protected]'
ilike
is similar to like
but case insensitive. For escape character use replace()
where email ilike replace(replace(replace($1, '~', '~~'), '%', '~%'), '_', '~_') escape '~'
or you could create a function to escape text; for array of text use
where email ilike any(array['[email protected]', '[email protected]'])
First, what not to do: don't use ILIKE
...
create table y
(
id serial not null,
email text not null unique
);
insert into y(email)
values('[email protected]') ,('[email protected]');
insert into y(email)
select n from generate_series(1,1000) as i(n);
-- no need to create an index on email,
-- UNIQUE constraint on email already makes an index.
-- thanks a_horse_with_no_name
-- create index ix_y on y(email);
explain select * from y
where email ilike
ANY(ARRAY['[email protected]','[email protected]']);
Execution Plan:
memdb=# explain select * from y where email ilike ANY(ARRAY['[email protected]','[email protected]']);
QUERY PLAN
----------------------------------------------------------------------------------------
Seq Scan on y (cost=0.00..17.52 rows=1 width=7)
Filter: (email ~~* ANY ('{[email protected],[email protected]}'::text[]))
(2 rows)
It's either you create an indexed lower expression...
create function lower(t text[]) returns text[]
as
$$
select lower($1::text)::text[]
$$ language sql;
create unique index ix_y_2 on y(lower(email));
explain select * from y
where lower(email) =
ANY(lower(ARRAY['[email protected]','[email protected]']));
...which properly uses index:
memdb=# explain select * from y where lower(email) = ANY(lower(ARRAY['[email protected]','[email protected]']));
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on y (cost=22.60..27.98 rows=10 width=7)
Recheck Cond: (lower(email) = ANY ((lower(('{[email protected],[email protected]}'::text[])::text))::text[]))
-> Bitmap Index Scan on ix_y_2 (cost=0.00..22.60 rows=10 width=0)
Index Cond: (lower(email) = ANY ((lower(('{[email protected],[email protected]}'::text[])::text))::text[]))
(4 rows)
Or you use citext data type...
create table x
(
id serial not null,
email citext not null unique
);
insert into x(email)
values('[email protected]'),('[email protected]');
insert into x(email)
select n from generate_series(1,1000) as i(n);
-- no need to create an index on email,
-- UNIQUE constraint on email already makes an index.
-- thanks a_horse_with_no_name
-- create index ix_x on x(email);
explain select * from x
where email =
ANY(ARRAY['[email protected]','[email protected]']::citext[]);
...which properly uses index even you don't create an index on expression (e.g. create index zzz on yyy(lower(field))):
memdb=# explain select * from x where email = ANY(ARRAY['[email protected]','[email protected]']::citext[]);
QUERY PLAN
--------------------------------------------------------------------------------------------------
Bitmap Heap Scan on x (cost=8.57..13.91 rows=2 width=36)
Recheck Cond: (email = ANY ('{[email protected],[email protected]}'::citext[]))
-> Bitmap Index Scan on x_email_key (cost=0.00..8.57 rows=2 width=0)
Index Cond: (email = ANY ('{[email protected],[email protected]}'::citext[]))
If citext
field type is not yet installed, run this:
CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;