T-SQL strip all non-alpha and non-numeric characters
One flexible-ish way;
CREATE FUNCTION [dbo].[fnRemovePatternFromString](@BUFFER VARCHAR(MAX), @PATTERN VARCHAR(128)) RETURNS VARCHAR(MAX) AS
BEGIN
DECLARE @POS INT = PATINDEX(@PATTERN, @BUFFER)
WHILE @POS > 0 BEGIN
SET @BUFFER = STUFF(@BUFFER, @POS, 1, '')
SET @POS = PATINDEX(@PATTERN, @BUFFER)
END
RETURN @BUFFER
END
select dbo.fnRemovePatternFromString('cake & beer $3.99!?c', '%[$&.!?]%')
(No column name)
cake beer 399c
Create a function:
CREATE FUNCTION dbo.StripNonAlphaNumerics
(
@s VARCHAR(255)
)
RETURNS VARCHAR(255)
AS
BEGIN
DECLARE @p INT = 1, @n VARCHAR(255) = '';
WHILE @p <= LEN(@s)
BEGIN
IF SUBSTRING(@s, @p, 1) LIKE '[A-Za-z0-9]'
BEGIN
SET @n += SUBSTRING(@s, @p, 1);
END
SET @p += 1;
END
RETURN(@n);
END
GO
Then:
SELECT Result = dbo.StripNonAlphaNumerics
('My Customer''s dog & #1 friend are dope, yo!');
Results:
Result
------
MyCustomersdog1friendaredopeyo
To make it more flexible, you could pass in the pattern you want to allow:
CREATE FUNCTION dbo.StripNonAlphaNumerics
(
@s VARCHAR(255),
@pattern VARCHAR(255)
)
RETURNS VARCHAR(255)
AS
BEGIN
DECLARE @p INT = 1, @n VARCHAR(255) = '';
WHILE @p <= LEN(@s)
BEGIN
IF SUBSTRING(@s, @p, 1) LIKE @pattern
BEGIN
SET @n += SUBSTRING(@s, @p, 1);
END
SET @p += 1;
END
RETURN(@n);
END
GO
Then:
SELECT r = dbo.StripNonAlphaNumerics
('Bob''s dog & #1 friend are dope, yo!', '[A-Za-z0-9]');
Results:
r
------
Bobsdog1friendaredopeyo
I faced this problem several years ago, so I wrote a SQL function to do the trick. Here is the original article (was used to scrape text out of HTML). I have since updated the function, as follows:
IF (object_id('dbo.fn_CleanString') IS NOT NULL)
BEGIN
PRINT 'Dropping: dbo.fn_CleanString'
DROP function dbo.fn_CleanString
END
GO
PRINT 'Creating: dbo.fn_CleanString'
GO
CREATE FUNCTION dbo.fn_CleanString
(
@string varchar(8000)
)
returns varchar(8000)
AS
BEGIN
---------------------------------------------------------------------------------------------------
-- Title: CleanString
-- Date Created: March 26, 2011
-- Author: William McEvoy
--
-- Description: This function removes special ascii characters from a string.
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
declare @char char(1),
@len int,
@count int,
@newstring varchar(8000),
@replacement char(1)
select @count = 1,
@len = 0,
@newstring = '',
@replacement = ' '
---------------------------------------------------------------------------------------------------
-- M A I N P R O C E S S I N G
---------------------------------------------------------------------------------------------------
-- Remove Backspace characters
select @string = replace(@string,char(8),@replacement)
-- Remove Tabs
select @string = replace(@string,char(9),@replacement)
-- Remove line feed
select @string = replace(@string,char(10),@replacement)
-- Remove carriage return
select @string = replace(@string,char(13),@replacement)
-- Condense multiple spaces into a single space
-- This works by changing all double spaces to be OX where O = a space, and X = a special character
-- then all occurrences of XO are changed to O,
-- then all occurrences of X are changed to nothing, leaving just the O which is actually a single space
select @string = replace(replace(replace(ltrim(rtrim(@string)),' ', ' ' + char(7)),char(7)+' ',''),char(7),'')
-- Parse each character, remove non alpha-numeric
select @len = len(@string)
WHILE (@count <= @len)
BEGIN
-- Examine the character
select @char = substring(@string,@count,1)
IF (@char like '[a-z]') or (@char like '[A-Z]') or (@char like '[0-9]')
select @newstring = @newstring + @char
ELSE
select @newstring = @newstring + @replacement
select @count = @count + 1
END
return @newstring
END
GO
IF (object_id('dbo.fn_CleanString') IS NOT NULL)
PRINT 'Function created.'
ELSE
PRINT 'Function NOT created.'
GO