Does anyone have a good Proper Case algorithm
@zwol: I'll post it as a separate reply.
Here's an example based on ljs's post.
void Main()
{
List<string> names = new List<string>() {
"bill o'reilly",
"johannes diderik van der waals",
"mr. moseley-williams",
"Joe VanWyck",
"mcdonald's",
"william the third",
"hrh prince charles",
"h.r.m. queen elizabeth the third",
"william gates, iii",
"pope leo xii",
"a.k. jennings"
};
names.Select(name => name.ToProperCase()).Dump();
}
// http://stackoverflow.com/questions/32149/does-anyone-have-a-good-proper-case-algorithm
public static class ProperCaseHelper
{
public static string ToProperCase(this string input)
{
if (IsAllUpperOrAllLower(input))
{
// fix the ALL UPPERCASE or all lowercase names
return string.Join(" ", input.Split(' ').Select(word => wordToProperCase(word)));
}
else
{
// leave the CamelCase or Propercase names alone
return input;
}
}
public static bool IsAllUpperOrAllLower(this string input)
{
return (input.ToLower().Equals(input) || input.ToUpper().Equals(input));
}
private static string wordToProperCase(string word)
{
if (string.IsNullOrEmpty(word)) return word;
// Standard case
string ret = capitaliseFirstLetter(word);
// Special cases:
ret = properSuffix(ret, "'"); // D'Artagnon, D'Silva
ret = properSuffix(ret, "."); // ???
ret = properSuffix(ret, "-"); // Oscar-Meyer-Weiner
ret = properSuffix(ret, "Mc", t => t.Length > 4); // Scots
ret = properSuffix(ret, "Mac", t => t.Length > 5); // Scots except Macey
// Special words:
ret = specialWords(ret, "van"); // Dick van Dyke
ret = specialWords(ret, "von"); // Baron von Bruin-Valt
ret = specialWords(ret, "de");
ret = specialWords(ret, "di");
ret = specialWords(ret, "da"); // Leonardo da Vinci, Eduardo da Silva
ret = specialWords(ret, "of"); // The Grand Old Duke of York
ret = specialWords(ret, "the"); // William the Conqueror
ret = specialWords(ret, "HRH"); // His/Her Royal Highness
ret = specialWords(ret, "HRM"); // His/Her Royal Majesty
ret = specialWords(ret, "H.R.H."); // His/Her Royal Highness
ret = specialWords(ret, "H.R.M."); // His/Her Royal Majesty
ret = dealWithRomanNumerals(ret); // William Gates, III
return ret;
}
private static string properSuffix(string word, string prefix, Func<string, bool> condition = null)
{
if (string.IsNullOrEmpty(word)) return word;
if (condition != null && ! condition(word)) return word;
string lowerWord = word.ToLower();
string lowerPrefix = prefix.ToLower();
if (!lowerWord.Contains(lowerPrefix)) return word;
int index = lowerWord.IndexOf(lowerPrefix);
// If the search string is at the end of the word ignore.
if (index + prefix.Length == word.Length) return word;
return word.Substring(0, index) + prefix +
capitaliseFirstLetter(word.Substring(index + prefix.Length));
}
private static string specialWords(string word, string specialWord)
{
if (word.Equals(specialWord, StringComparison.InvariantCultureIgnoreCase))
{
return specialWord;
}
else
{
return word;
}
}
private static string dealWithRomanNumerals(string word)
{
// Roman Numeral parser thanks to [djk](https://stackoverflow.com/users/785111/djk)
// Note that it excludes the Chinese last name Xi
return new Regex(@"\b(?!Xi\b)(X|XX|XXX|XL|L|LX|LXX|LXXX|XC|C)?(I|II|III|IV|V|VI|VII|VIII|IX)?\b", RegexOptions.IgnoreCase).Replace(word, match => match.Value.ToUpperInvariant());
}
private static string capitaliseFirstLetter(string word)
{
return char.ToUpper(word[0]) + word.Substring(1).ToLower();
}
}
Unless I've misunderstood your question I don't think you need to roll your own, the TextInfo class can do it for you.
using System.Globalization;
CultureInfo.InvariantCulture.TextInfo.ToTitleCase("GeOrGE bUrdEll")
Will return "George Burdell. And you can use your own culture if there's some special rules involved.
Update: Michael (in a comment to this answer) pointed out that this will not work if the input is all caps since the method will assume that it is an acronym. The naive workaround for this is to .ToLower() the text before submitting it to ToTitleCase.