Generate c# model class from csv file structure

Thank you Bedford, I took your code and added three things:

  • It removes symbols invalid for property names. For example "Order No." will become "OrderNo" property.
  • Ability to add property and class attributes. In my case I need [DelimitedRecord(",")] and [FieldOptional()], because I'm using FileHelpers.
  • Some columns don't have names, so it generates names itself. Naming convention is Column10, Column11 and so on.

Final code:

public class CsvToClass
{
    public static string CSharpClassCodeFromCsvFile(string filePath, string delimiter = ",", 
        string classAttribute = "", string propertyAttribute = "")
    {
        if (string.IsNullOrWhiteSpace(propertyAttribute) == false)
            propertyAttribute += "\n\t";
        if (string.IsNullOrWhiteSpace(propertyAttribute) == false)
            classAttribute += "\n";

        string[] lines = File.ReadAllLines(filePath);
        string[] columnNames = lines.First().Split(',').Select(str => str.Trim()).ToArray();
        string[] data = lines.Skip(1).ToArray();

        string className = Path.GetFileNameWithoutExtension(filePath);
        // use StringBuilder for better performance
        string code = String.Format("{0}public class {1} {{ \n", classAttribute, className);

        for (int columnIndex = 0; columnIndex < columnNames.Length; columnIndex++)
        {
            var columnName = Regex.Replace(columnNames[columnIndex], @"[\s\.]", string.Empty, RegexOptions.IgnoreCase);
            if (string.IsNullOrEmpty(columnName))
                columnName = "Column" + (columnIndex + 1);
            code += "\t" + GetVariableDeclaration(data, columnIndex, columnName, propertyAttribute) + "\n\n";
        }

        code += "}\n";
        return code;
    }

    public static string GetVariableDeclaration(string[] data, int columnIndex, string columnName, string attribute = null)
    {
        string[] columnValues = data.Select(line => line.Split(',')[columnIndex].Trim()).ToArray();
        string typeAsString;

        if (AllDateTimeValues(columnValues))
        {
            typeAsString = "DateTime";
        }
        else if (AllIntValues(columnValues))
        {
            typeAsString = "int";
        }
        else if (AllDoubleValues(columnValues))
        {
            typeAsString = "double";
        }
        else
        {
            typeAsString = "string";
        }

        string declaration = String.Format("{0}public {1} {2} {{ get; set; }}", attribute, typeAsString, columnName);
        return declaration;
    }

    public static bool AllDoubleValues(string[] values)
    {
        double d;
        return values.All(val => double.TryParse(val, out d));
    }

    public static bool AllIntValues(string[] values)
    {
        int d;
        return values.All(val => int.TryParse(val, out d));
    }

    public static bool AllDateTimeValues(string[] values)
    {
        DateTime d;
        return values.All(val => DateTime.TryParse(val, out d));
    }

    // add other types if you need...
}

Usage example:

class Program
{
    static void Main(string[] args)
    {
        var cSharpClass = CsvToClass.CSharpClassCodeFromCsvFile(@"YourFilePath.csv", ",", "[DelimitedRecord(\",\")]", "[FieldOptional()]");
        File.WriteAllText(@"OutPutPath.cs", cSharpClass);
    }
}

There is a link to full code and working example https://github.com/povilaspanavas/CsvToCSharpClass


You can generate the class code with a little C# app which checks all the values for each column. You can determine which is the narrowest type each one fits:

public static string CSharpClassCodeFromCsvFile(string filePath)
{
    string[] lines = File.ReadAllLines(filePath);
    string[] columnNames = lines.First().Split(',').Select(str => str.Trim()).ToArray();
    string[] data = lines.Skip(1).ToArray();

    string className = Path.GetFileNameWithoutExtension(filePath);
    // use StringBuilder for better performance
    string code = String.Format("public class {0} {{ \n", className);

    for (int columnIndex = 0; columnIndex < columnNames.Length; columnIndex++)
    {
        code += "\t" + GetVariableDeclaration(data, columnIndex, columnNames[columnIndex]) + "\n";
    }

    code += "}\n";
    return code;
}

public static string GetVariableDeclaration(string[] data, int columnIndex, string columnName)
{
    string[] columnValues = data.Select(line => line.Split(',')[columnIndex].Trim()).ToArray();
    string typeAsString;

    if (AllDateTimeValues(columnValues))
    {
        typeAsString = "DateTime";
    }
    else if (AllIntValues(columnValues))
    {
        typeAsString = "int";
    }
    else if (AllDoubleValues(columnValues))
    {
        typeAsString = "double";
    } 
    else
    {
        typeAsString = "string";
    }

    string declaration = String.Format("public {0} {1} {{ get; set; }}", typeAsString, columnName);
    return declaration;
}

public static bool AllDoubleValues(string[] values)
{
    double d;
    return values.All(val => double.TryParse(val, out d));
}

public static bool AllIntValues(string[] values)
{
    int d;
    return values.All(val => int.TryParse(val, out d));
}

public static bool AllDateTimeValues(string[] values)
{
    DateTime d;
    return values.All(val => DateTime.TryParse(val, out d));
}

// add other types if you need...

You can create a command line application from this which can be used in an automated solution.

Tags:

C#

Csv