Detecting utf8 broken characters in MySQL
Thanks for your answers!!
I fixed my tables with this, and wanted to share the full list of changes. Note that it also includes fixing html decoded characters, besides latin ones, it was really a mess:
(If you need more conversions, look them up at https://www.utf8-chartable.de/unicode-utf8-table.pl)
update `table` set `field` = replace(`field`, 'â€', '"');
update `table` set `field` = replace(`field`, '–', '–');
update `table` set `field` = replace(`field`, '•', '-');
update `table` set `field` = replace(`field`, '“', '"');
update `table` set `field` = replace(`field`, '¡', '¡');
update `table` set `field` = replace(`field`, '¢', '¢');
update `table` set `field` = replace(`field`, '£', '£');
update `table` set `field` = replace(`field`, '¤', '¤');
update `table` set `field` = replace(`field`, 'Â¥', '¥');
update `table` set `field` = replace(`field`, '¦', '¦');
update `table` set `field` = replace(`field`, '§', '§');
update `table` set `field` = replace(`field`, '¨', '¨');
update `table` set `field` = replace(`field`, '©', '©');
update `table` set `field` = replace(`field`, 'ª', 'ª');
update `table` set `field` = replace(`field`, '«', '«');
update `table` set `field` = replace(`field`, '¬', '¬');
# This one looks like it's missing a character, but it's there. 0xad
update `table` set `field` = replace(`field`, 'Â', '');
update `table` set `field` = replace(`field`, '®', '®');
update `table` set `field` = replace(`field`, '¯', '¯');
update `table` set `field` = replace(`field`, '°', '°');
update `table` set `field` = replace(`field`, '±', '±');
update `table` set `field` = replace(`field`, '²', '²');
update `table` set `field` = replace(`field`, '³', '³');
update `table` set `field` = replace(`field`, '´', '´');
update `table` set `field` = replace(`field`, 'µ', 'µ');
update `table` set `field` = replace(`field`, '¶', '¶');
update `table` set `field` = replace(`field`, '·', '·');
update `table` set `field` = replace(`field`, '¸', '¸');
update `table` set `field` = replace(`field`, '¹', '¹');
update `table` set `field` = replace(`field`, 'º', 'º');
update `table` set `field` = replace(`field`, '»', '»');
update `table` set `field` = replace(`field`, '¼', '¼');
update `table` set `field` = replace(`field`, '½', '½');
update `table` set `field` = replace(`field`, '¾', '¾');
update `table` set `field` = replace(`field`, '¿', '¿');
update `table` set `field` = replace(`field`, 'À', 'À');
# This one looks like it's missing a character, but it's there. 0x81
update `table` set `field` = replace(`field`, 'Ã', 'Á');
update `table` set `field` = replace(`field`, 'Â', 'Â');
update `table` set `field` = replace(`field`, 'Ã', 'Ã');
update `table` set `field` = replace(`field`, 'Ä', 'Ä');
update `table` set `field` = replace(`field`, 'Ã…', 'Å');
update `table` set `field` = replace(`field`, 'Æ', 'Æ');
update `table` set `field` = replace(`field`, 'Ç', 'Ç');
update `table` set `field` = replace(`field`, 'È', 'È');
update `table` set `field` = replace(`field`, 'É', 'É');
update `table` set `field` = replace(`field`, 'Ê', 'Ê');
update `table` set `field` = replace(`field`, 'Ë', 'Ë');
update `table` set `field` = replace(`field`, 'ÃŒ', 'Ì');
# This one looks like it's missing a character, but it's there. 0x8d
update `table` set `field` = replace(`field`, 'Ã', 'Í');
update `table` set `field` = replace(`field`, 'ÃŽ', 'Î');
# This one looks like it's missing a character, but it's there. 0x8f
update `table` set `field` = replace(`field`, 'Ã', 'Ï');
# This one looks like it's missing a character, but it's there. 0x90
update `table` set `field` = replace(`field`, 'Ã', 'Ð');
update `table` set `field` = replace(`field`, 'Ñ', 'Ñ');
update `table` set `field` = replace(`field`, 'Ã’', 'Ò');
update `table` set `field` = replace(`field`, 'Ó', 'Ó');
update `table` set `field` = replace(`field`, 'Ô', 'Ô');
update `table` set `field` = replace(`field`, 'Õ', 'Õ');
update `table` set `field` = replace(`field`, 'Ö', 'Ö');
update `table` set `field` = replace(`field`, '×', '×');
update `table` set `field` = replace(`field`, 'Ø', 'Ø');
update `table` set `field` = replace(`field`, 'Ù', 'Ù');
update `table` set `field` = replace(`field`, 'Ú', 'Ú');
update `table` set `field` = replace(`field`, 'Û', 'Û');
update `table` set `field` = replace(`field`, 'Ãœ', 'Ü');
# This one looks like it's missing a character, but it's there. 0x9d
update `table` set `field` = replace(`field`, 'Ã', 'Ý');
update `table` set `field` = replace(`field`, 'Þ', 'Þ');
update `table` set `field` = replace(`field`, 'ß', 'ß');
update `table` set `field` = replace(`field`, 'Ã ', 'à');
update `table` set `field` = replace(`field`, 'á', 'á');
update `table` set `field` = replace(`field`, 'â', 'â');
update `table` set `field` = replace(`field`, 'ã', 'ã');
update `table` set `field` = replace(`field`, 'ä', 'ä');
update `table` set `field` = replace(`field`, 'Ã¥', 'å');
update `table` set `field` = replace(`field`, 'æ', 'æ');
update `table` set `field` = replace(`field`, 'ç', 'ç');
update `table` set `field` = replace(`field`, 'è', 'è');
update `table` set `field` = replace(`field`, 'é', 'é');
update `table` set `field` = replace(`field`, 'ê', 'ê');
update `table` set `field` = replace(`field`, 'ë', 'ë');
update `table` set `field` = replace(`field`, 'ì', 'ì');
# This one looks like it's missing a character, but it's there. 0xad
update `table` set `field` = replace(`field`, 'Ã', 'í');
update `table` set `field` = replace(`field`, 'î', 'î');
update `table` set `field` = replace(`field`, 'ï', 'ï');
update `table` set `field` = replace(`field`, 'ð', 'ð');
update `table` set `field` = replace(`field`, 'ñ', 'ñ');
update `table` set `field` = replace(`field`, 'ò', 'ò');
update `table` set `field` = replace(`field`, 'ó', 'ó');
update `table` set `field` = replace(`field`, 'ô', 'ô');
update `table` set `field` = replace(`field`, 'õ', 'õ');
update `table` set `field` = replace(`field`, 'ö', 'ö');
update `table` set `field` = replace(`field`, '÷', '÷');
update `table` set `field` = replace(`field`, 'ø', 'ø');
update `table` set `field` = replace(`field`, 'ù', 'ù');
update `table` set `field` = replace(`field`, 'ú', 'ú');
update `table` set `field` = replace(`field`, 'û', 'û');
update `table` set `field` = replace(`field`, 'ü', 'ü');
update `table` set `field` = replace(`field`, 'ý', 'ý');
update `table` set `field` = replace(`field`, 'þ', 'þ');
update `table` set `field` = replace(`field`, 'ÿ', 'ÿ');
update `table` set `field` = replace(`field` ,'ç','ç');
update `table` set `field` = replace(`field` ,'ã','ã');
update `table` set `field` = replace(`field` ,'á','á');
update `table` set `field` = replace(`field` ,'â','â');
update `table` set `field` = replace(`field` ,'é','é');
update `table` set `field` = replace(`field` ,'í','í');
update `table` set `field` = replace(`field` ,'õ','õ');
update `table` set `field` = replace(`field` ,'ú','ú');
update `table` set `field` = replace(`field` ,'ç','ç');
update `table` set `field` = replace(`field` ,'Á','Á');
update `table` set `field` = replace(`field` ,'Â','Â');
update `table` set `field` = replace(`field` ,'É','É');
update `table` set `field` = replace(`field` ,'Í','Í');
update `table` set `field` = replace(`field` ,'Õ','Õ');
update `table` set `field` = replace(`field` ,'Ú','Ú');
update `table` set `field` = replace(`field` ,'Ç','Ç');
update `table` set `field` = replace(`field` ,'Ã','Ã');
update `table` set `field` = replace(`field` ,'À','À');
update `table` set `field` = replace(`field` ,'Ê','Ê');
update `table` set `field` = replace(`field` ,'Ó','Ó');
update `table` set `field` = replace(`field` ,'Ô','Ô');
update `table` set `field` = replace(`field` ,'Ü','Ü');
update `table` set `field` = replace(`field` ,'ã','ã');
update `table` set `field` = replace(`field` ,'à','à');
update `table` set `field` = replace(`field` ,'ê','ê');
update `table` set `field` = replace(`field` ,'ó','ó');
update `table` set `field` = replace(`field` ,'ô','ô');
update `table` set `field` = replace(`field` ,'ü','ü');
update `table` set `field` = replace(`field` ,'&','&');
update `table` set `field` = replace(`field` ,'>','>');
update `table` set `field` = replace(`field` ,'<','<');
update `table` set `field` = replace(`field` ,'ˆ','ˆ');
update `table` set `field` = replace(`field` ,'˜','˜');
update `table` set `field` = replace(`field` ,'¨','¨');
update `table` set `field` = replace(`field` ,'&cute;','´');
update `table` set `field` = replace(`field` ,'¸','¸');
update `table` set `field` = replace(`field` ,'"','"');
update `table` set `field` = replace(`field` ,'“','“');
update `table` set `field` = replace(`field` ,'”','”');
update `table` set `field` = replace(`field` ,'‘','‘');
update `table` set `field` = replace(`field` ,'’','’');
update `table` set `field` = replace(`field` ,'‹','‹');
update `table` set `field` = replace(`field` ,'›','›');
update `table` set `field` = replace(`field` ,'«','«');
update `table` set `field` = replace(`field` ,'»','»');
update `table` set `field` = replace(`field` ,'º','º');
update `table` set `field` = replace(`field` ,'ª','ª');
update `table` set `field` = replace(`field` ,'–','–');
update `table` set `field` = replace(`field` ,'—','—');
update `table` set `field` = replace(`field` ,'¯','¯');
update `table` set `field` = replace(`field` ,'…','…');
update `table` set `field` = replace(`field` ,'¦','¦');
update `table` set `field` = replace(`field` ,'•','•');
update `table` set `field` = replace(`field` ,'¶','¶');
update `table` set `field` = replace(`field` ,'§','§');
update `table` set `field` = replace(`field` ,'¹','¹');
update `table` set `field` = replace(`field` ,'²','²');
update `table` set `field` = replace(`field` ,'³','³');
update `table` set `field` = replace(`field` ,'½','½');
update `table` set `field` = replace(`field` ,'¼','¼');
update `table` set `field` = replace(`field` ,'¾','¾');
update `table` set `field` = replace(`field` ,'⅛','⅛');
update `table` set `field` = replace(`field` ,'⅜','⅜');
update `table` set `field` = replace(`field` ,'⅝','⅝');
update `table` set `field` = replace(`field` ,'⅞','⅞');
update `table` set `field` = replace(`field` ,'>','>');
update `table` set `field` = replace(`field` ,'<','<');
update `table` set `field` = replace(`field` ,'±','±');
update `table` set `field` = replace(`field` ,'−','−');
update `table` set `field` = replace(`field` ,'×','×');
update `table` set `field` = replace(`field` ,'÷','÷');
update `table` set `field` = replace(`field` ,'∗','∗');
update `table` set `field` = replace(`field` ,'⁄','⁄');
update `table` set `field` = replace(`field` ,'‰','‰');
update `table` set `field` = replace(`field` ,'∫','∫');
update `table` set `field` = replace(`field` ,'∑','∑');
update `table` set `field` = replace(`field` ,'∏','∏');
update `table` set `field` = replace(`field` ,'√','√');
update `table` set `field` = replace(`field` ,'∞','∞');
update `table` set `field` = replace(`field` ,'≈','≈');
update `table` set `field` = replace(`field` ,'≅','≅');
update `table` set `field` = replace(`field` ,'∝','∝');
update `table` set `field` = replace(`field` ,'≡','≡');
update `table` set `field` = replace(`field` ,'≠','≠');
update `table` set `field` = replace(`field` ,'≤','≤');
update `table` set `field` = replace(`field` ,'≥','≥');
update `table` set `field` = replace(`field` ,'∴','∴');
update `table` set `field` = replace(`field` ,'⋅','⋅');
update `table` set `field` = replace(`field` ,'·','·');
update `table` set `field` = replace(`field` ,'∂','∂');
update `table` set `field` = replace(`field` ,'ℑ','ℑ');
update `table` set `field` = replace(`field` ,'ℜ','ℜ');
update `table` set `field` = replace(`field` ,'′','′');
update `table` set `field` = replace(`field` ,'″','″');
update `table` set `field` = replace(`field` ,'°','°');
update `table` set `field` = replace(`field` ,'∠','∠');
update `table` set `field` = replace(`field` ,'⊥','⊥');
update `table` set `field` = replace(`field` ,'∇','∇');
update `table` set `field` = replace(`field` ,'⊕','⊕');
update `table` set `field` = replace(`field` ,'⊗','⊗');
update `table` set `field` = replace(`field` ,'ℵ','ℵ');
update `table` set `field` = replace(`field` ,'ø','ø');
update `table` set `field` = replace(`field` ,'Ø','Ø');
update `table` set `field` = replace(`field` ,'∈','∈');
update `table` set `field` = replace(`field` ,'∉','∉');
update `table` set `field` = replace(`field` ,'∩','∩');
update `table` set `field` = replace(`field` ,'∪','∪');
update `table` set `field` = replace(`field` ,'⊂','⊂');
update `table` set `field` = replace(`field` ,'⊃','⊃');
update `table` set `field` = replace(`field` ,'⊆','⊆');
update `table` set `field` = replace(`field` ,'⊇','⊇');
update `table` set `field` = replace(`field` ,'∃','∃');
update `table` set `field` = replace(`field` ,'∀','∀');
update `table` set `field` = replace(`field` ,'∅','∅');
update `table` set `field` = replace(`field` ,'¬','¬');
update `table` set `field` = replace(`field` ,'∧','∧');
update `table` set `field` = replace(`field` ,'∨','∨');
update `table` set `field` = replace(`field` ,'↵','↵');
I fixed with
UPDATE wp_zcs9ck_posts_copy SET post_title =
CONVERT(BINARY CONVERT(post_title USING latin1) USING utf8);
Complete solution: http://jonisalonen.com/2012/fixing-doubly-utf-8-encoded-text-in-mysql/
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'á','á');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'ä','ä');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'é','é');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'í©','é');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'ó','ó');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'íº','ú');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'ú','ú');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'ñ','ñ');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'í‘','Ñ');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'Ã','í');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'–','–');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'’','\'');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'…','...');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'–','-');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'“','"');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'â€','"');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'‘','\'');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'•','-');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name`,'‡','c');
UPDATE `table_name` SET `column_name` = REPLACE(`column_name` ,'Â','');
The SELECT
statement you need is the following:
SELECT * FROM TABLE WHERE LENGTH(name) != CHAR_LENGTH(name);
This returns all rows which contain multi-byte characters.
name
is assumed to be a field / the field where weird characters would be found. *