How to check for Non-Ascii Characters
Ranges in the pattern syntax use the sorting rules of your collation.
Use a binary collate clause so the range is ordered by character code.
(I also changed it to LIKE
as I find that more obvious than PATINDEX > 0
)
SELECT *
FROM mbrnotes
WHERE LINE_TEXT LIKE '%[' + CHAR(127)+ '-' +CHAR(255)+']%' COLLATE Latin1_General_100_BIN2
If you're like me and you've gotten tired over the years searching for these characters in your company's terrible data, you can use this function or rewrite it for your own purpose. It's admittedly wordy, but it goes the extra step of identifying special characters if you want - uncomment lines 19 - 179 to do so.
If the string does not contain non-printable or extended ascii values - it returns NULL.
CREATE FUNCTION [dbo].[Find_Invalid_Chars]
(
@SearchString VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN
DECLARE @CharPosition INT, @CharVal VARCHAR(100), @ReturnVal VARCHAR(8000) = '', @isValid BIT = 1 -- Start as valid
SET @CharPosition = 1
WHILE @CharPosition <= DATALENGTH(@SearchString)
BEGIN
IF (ASCII(SUBSTRING(@SearchString, @CharPosition, 1)) NOT BETWEEN (32) AND (127))
BEGIN
SET @CharVal = '[' + CAST(ASCII(SUBSTRING(@SearchString, @CharPosition, 1)) AS VARCHAR(3)) + ']'
SET @isValid = 0
---- Non-Printables
--SET @CharVal = REPLACE(@CharVal, '[0]', '[NUL]')
--SET @CharVal = REPLACE(@CharVal, '[1]', '[SOH]')
--SET @CharVal = REPLACE(@CharVal, '[2]', '[STX]')
--SET @CharVal = REPLACE(@CharVal, '[3]', '[ETX]')
--SET @CharVal = REPLACE(@CharVal, '[4]', '[EOT]')
--SET @CharVal = REPLACE(@CharVal, '[5]', '[ENQ]')
--SET @CharVal = REPLACE(@CharVal, '[6]', '[ACK]')
--SET @CharVal = REPLACE(@CharVal, '[7]', '[BEL]')
--SET @CharVal = REPLACE(@CharVal, '[8]', '[BS]')
--SET @CharVal = REPLACE(@CharVal, '[9]', '[HT]')
--SET @CharVal = REPLACE(@CharVal, '[10]', '[LF]')
--SET @CharVal = REPLACE(@CharVal, '[11]', '[VT]')
--SET @CharVal = REPLACE(@CharVal, '[12]', '[FF]')
--SET @CharVal = REPLACE(@CharVal, '[13]', '[CR]')
--SET @CharVal = REPLACE(@CharVal, '[14]', '[SO]')
--SET @CharVal = REPLACE(@CharVal, '[15]', '[SI]')
--SET @CharVal = REPLACE(@CharVal, '[16]', '[DLE]')
--SET @CharVal = REPLACE(@CharVal, '[17]', '[DC1]')
--SET @CharVal = REPLACE(@CharVal, '[18]', '[DC2]')
--SET @CharVal = REPLACE(@CharVal, '[19]', '[DC3]')
--SET @CharVal = REPLACE(@CharVal, '[20]', '[DC4]')
--SET @CharVal = REPLACE(@CharVal, '[21]', '[NAK]')
--SET @CharVal = REPLACE(@CharVal, '[22]', '[SYN]')
--SET @CharVal = REPLACE(@CharVal, '[23]', '[ETB]')
--SET @CharVal = REPLACE(@CharVal, '[24]', '[CAN]')
--SET @CharVal = REPLACE(@CharVal, '[25]', '[EM]')
--SET @CharVal = REPLACE(@CharVal, '[26]', '[SUB]')
--SET @CharVal = REPLACE(@CharVal, '[27]', '[ESC]')
--SET @CharVal = REPLACE(@CharVal, '[28]', '[FS]')
--SET @CharVal = REPLACE(@CharVal, '[29]', '[GS]')
--SET @CharVal = REPLACE(@CharVal, '[30]', '[RS]')
--SET @CharVal = REPLACE(@CharVal, '[31]', '[US]')
---- Extended Ascii
--SET @CharVal = REPLACE(@CharVal, '[128]', '[€]')
--SET @CharVal = REPLACE(@CharVal, '[129]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[130]', '[‚]')
--SET @CharVal = REPLACE(@CharVal, '[131]', '[ƒ]')
--SET @CharVal = REPLACE(@CharVal, '[132]', '[„]')
--SET @CharVal = REPLACE(@CharVal, '[133]', '[…]')
--SET @CharVal = REPLACE(@CharVal, '[134]', '[†]')
--SET @CharVal = REPLACE(@CharVal, '[135]', '[‡]')
--SET @CharVal = REPLACE(@CharVal, '[136]', '[ˆ]')
--SET @CharVal = REPLACE(@CharVal, '[137]', '[‰]')
--SET @CharVal = REPLACE(@CharVal, '[138]', '[Š]')
--SET @CharVal = REPLACE(@CharVal, '[139]', '[‹]')
--SET @CharVal = REPLACE(@CharVal, '[140]', '[Œ]')
--SET @CharVal = REPLACE(@CharVal, '[141]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[142]', '[Ž]')
--SET @CharVal = REPLACE(@CharVal, '[143]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[144]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[145]', '[‘]')
--SET @CharVal = REPLACE(@CharVal, '[146]', '[’]')
--SET @CharVal = REPLACE(@CharVal, '[147]', '[“]')
--SET @CharVal = REPLACE(@CharVal, '[148]', '[”]')
--SET @CharVal = REPLACE(@CharVal, '[149]', '[•]')
--SET @CharVal = REPLACE(@CharVal, '[150]', '[–]')
--SET @CharVal = REPLACE(@CharVal, '[151]', '[—]')
--SET @CharVal = REPLACE(@CharVal, '[152]', '[˜]')
--SET @CharVal = REPLACE(@CharVal, '[153]', '[™]')
--SET @CharVal = REPLACE(@CharVal, '[154]', '[š]')
--SET @CharVal = REPLACE(@CharVal, '[155]', '[›]')
--SET @CharVal = REPLACE(@CharVal, '[156]', '[œ]')
--SET @CharVal = REPLACE(@CharVal, '[157]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[158]', '[ž]')
--SET @CharVal = REPLACE(@CharVal, '[159]', '[Ÿ]')
--SET @CharVal = REPLACE(@CharVal, '[160]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[161]', '[¡]')
--SET @CharVal = REPLACE(@CharVal, '[162]', '[¢]')
--SET @CharVal = REPLACE(@CharVal, '[163]', '[£]')
--SET @CharVal = REPLACE(@CharVal, '[164]', '[¤]')
--SET @CharVal = REPLACE(@CharVal, '[165]', '[¥]')
--SET @CharVal = REPLACE(@CharVal, '[166]', '[¦]')
--SET @CharVal = REPLACE(@CharVal, '[167]', '[§]')
--SET @CharVal = REPLACE(@CharVal, '[168]', '[¨]')
--SET @CharVal = REPLACE(@CharVal, '[169]', '[©]')
--SET @CharVal = REPLACE(@CharVal, '[170]', '[ª]')
--SET @CharVal = REPLACE(@CharVal, '[171]', '[«]')
--SET @CharVal = REPLACE(@CharVal, '[172]', '[¬]')
--SET @CharVal = REPLACE(@CharVal, '[173]', '[]]')
--SET @CharVal = REPLACE(@CharVal, '[174]', '[®]')
--SET @CharVal = REPLACE(@CharVal, '[175]', '[¯]')
--SET @CharVal = REPLACE(@CharVal, '[176]', '[°]')
--SET @CharVal = REPLACE(@CharVal, '[177]', '[±]')
--SET @CharVal = REPLACE(@CharVal, '[178]', '[²]')
--SET @CharVal = REPLACE(@CharVal, '[179]', '[³]')
--SET @CharVal = REPLACE(@CharVal, '[180]', '[´]')
--SET @CharVal = REPLACE(@CharVal, '[181]', '[µ]')
--SET @CharVal = REPLACE(@CharVal, '[182]', '[¶]')
--SET @CharVal = REPLACE(@CharVal, '[183]', '[·]')
--SET @CharVal = REPLACE(@CharVal, '[184]', '[¸]')
--SET @CharVal = REPLACE(@CharVal, '[185]', '[¹]')
--SET @CharVal = REPLACE(@CharVal, '[186]', '[º]')
--SET @CharVal = REPLACE(@CharVal, '[187]', '[»]')
--SET @CharVal = REPLACE(@CharVal, '[188]', '[¼]')
--SET @CharVal = REPLACE(@CharVal, '[189]', '[½]')
--SET @CharVal = REPLACE(@CharVal, '[190]', '[¾]')
--SET @CharVal = REPLACE(@CharVal, '[191]', '[¿]')
--SET @CharVal = REPLACE(@CharVal, '[192]', '[À]')
--SET @CharVal = REPLACE(@CharVal, '[193]', '[Á]')
--SET @CharVal = REPLACE(@CharVal, '[194]', '[Â]')
--SET @CharVal = REPLACE(@CharVal, '[195]', '[Ã]')
--SET @CharVal = REPLACE(@CharVal, '[196]', '[Ä]')
--SET @CharVal = REPLACE(@CharVal, '[197]', '[Å]')
--SET @CharVal = REPLACE(@CharVal, '[198]', '[Æ]')
--SET @CharVal = REPLACE(@CharVal, '[199]', '[Ç]')
--SET @CharVal = REPLACE(@CharVal, '[200]', '[È]')
--SET @CharVal = REPLACE(@CharVal, '[201]', '[É]')
--SET @CharVal = REPLACE(@CharVal, '[202]', '[Ê]')
--SET @CharVal = REPLACE(@CharVal, '[203]', '[Ë]')
--SET @CharVal = REPLACE(@CharVal, '[204]', '[Ì]')
--SET @CharVal = REPLACE(@CharVal, '[205]', '[Í]')
--SET @CharVal = REPLACE(@CharVal, '[206]', '[Î]')
--SET @CharVal = REPLACE(@CharVal, '[207]', '[Ï]')
--SET @CharVal = REPLACE(@CharVal, '[208]', '[Ð]')
--SET @CharVal = REPLACE(@CharVal, '[209]', '[Ñ]')
--SET @CharVal = REPLACE(@CharVal, '[210]', '[Ò]')
--SET @CharVal = REPLACE(@CharVal, '[211]', '[Ó]')
--SET @CharVal = REPLACE(@CharVal, '[212]', '[Ô]')
--SET @CharVal = REPLACE(@CharVal, '[213]', '[Õ]')
--SET @CharVal = REPLACE(@CharVal, '[214]', '[Ö]')
--SET @CharVal = REPLACE(@CharVal, '[215]', '[×]')
--SET @CharVal = REPLACE(@CharVal, '[216]', '[Ø]')
--SET @CharVal = REPLACE(@CharVal, '[217]', '[Ù]')
--SET @CharVal = REPLACE(@CharVal, '[218]', '[Ú]')
--SET @CharVal = REPLACE(@CharVal, '[219]', '[Û]')
--SET @CharVal = REPLACE(@CharVal, '[220]', '[Ü]')
--SET @CharVal = REPLACE(@CharVal, '[221]', '[Ý]')
--SET @CharVal = REPLACE(@CharVal, '[222]', '[Þ]')
--SET @CharVal = REPLACE(@CharVal, '[223]', '[ß]')
--SET @CharVal = REPLACE(@CharVal, '[224]', '[à]')
--SET @CharVal = REPLACE(@CharVal, '[225]', '[á]')
--SET @CharVal = REPLACE(@CharVal, '[226]', '[â]')
--SET @CharVal = REPLACE(@CharVal, '[227]', '[ã]')
--SET @CharVal = REPLACE(@CharVal, '[228]', '[ä]')
--SET @CharVal = REPLACE(@CharVal, '[229]', '[å]')
--SET @CharVal = REPLACE(@CharVal, '[230]', '[æ]')
--SET @CharVal = REPLACE(@CharVal, '[231]', '[ç]')
--SET @CharVal = REPLACE(@CharVal, '[232]', '[è]')
--SET @CharVal = REPLACE(@CharVal, '[233]', '[é]')
--SET @CharVal = REPLACE(@CharVal, '[234]', '[ê]')
--SET @CharVal = REPLACE(@CharVal, '[235]', '[ë]')
--SET @CharVal = REPLACE(@CharVal, '[236]', '[ì]')
--SET @CharVal = REPLACE(@CharVal, '[237]', '[í]')
--SET @CharVal = REPLACE(@CharVal, '[238]', '[î]')
--SET @CharVal = REPLACE(@CharVal, '[239]', '[ï]')
--SET @CharVal = REPLACE(@CharVal, '[240]', '[ð]')
--SET @CharVal = REPLACE(@CharVal, '[241]', '[ñ]')
--SET @CharVal = REPLACE(@CharVal, '[242]', '[ò]')
--SET @CharVal = REPLACE(@CharVal, '[243]', '[ó]')
--SET @CharVal = REPLACE(@CharVal, '[244]', '[ô]')
--SET @CharVal = REPLACE(@CharVal, '[245]', '[õ]')
--SET @CharVal = REPLACE(@CharVal, '[246]', '[ö]')
--SET @CharVal = REPLACE(@CharVal, '[247]', '[÷]')
--SET @CharVal = REPLACE(@CharVal, '[248]', '[ø]')
--SET @CharVal = REPLACE(@CharVal, '[249]', '[ù]')
--SET @CharVal = REPLACE(@CharVal, '[250]', '[ú]')
--SET @CharVal = REPLACE(@CharVal, '[251]', '[û]')
--SET @CharVal = REPLACE(@CharVal, '[252]', '[ü]')
--SET @CharVal = REPLACE(@CharVal, '[253]', '[ý]')
--SET @CharVal = REPLACE(@CharVal, '[254]', '[þ]')
--SET @CharVal = REPLACE(@CharVal, '[255]', '[ÿ]')
END
ELSE
BEGIN
SET @CharVal = SUBSTRING(@SearchString, @CharPosition, 1)
END
SET @ReturnVal = @ReturnVal + @CharVal
SET @CharPosition = @CharPosition + 1
END
IF (@isValid = 1) SET @ReturnVal = NULL
RETURN @ReturnVal
END
And then, call it like:
SELECT BadString, dbo.Find_Invalid_Chars(BadString) [Invalid Characters]
FROM #tmp_Table tmp_mc
WHERE dbo.Find_Invalid_Chars(BadString) IS NOT NULL
Sample Output:
Or