XML to CSV Using XSLT

This CsvEscape function is XSLT 1.0 and escapes column values ,, ", and newlines like RFC 4180 or Excel. It makes use of the fact that you can recursively call XSLT templates:

  • The template EscapeQuotes replaces all double quotes with 2 double quotes, recursively from the start of the string.
  • The template CsvEscape checks if the text contains a comma or double quote, and if so surrounds the whole string with a pair of double quotes and calls EscapeQuotes for the string.

Example usage: xsltproc xmltocsv.xslt file.xml > file.csv

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text" encoding="UTF-8"/>

  <xsl:template name="EscapeQuotes">
    <xsl:param name="value"/>
    <xsl:choose>
      <xsl:when test="contains($value,'&quot;')">
    <xsl:value-of select="substring-before($value,'&quot;')"/>
    <xsl:text>&quot;&quot;</xsl:text>
    <xsl:call-template name="EscapeQuotes">
      <xsl:with-param name="value" select="substring-after($value,'&quot;')"/>
    </xsl:call-template>
      </xsl:when>
      <xsl:otherwise>
    <xsl:value-of select="$value"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

  <xsl:template name="CsvEscape">
    <xsl:param name="value"/>
    <xsl:choose>
    <xsl:when test="contains($value,',')">
      <xsl:text>&quot;</xsl:text>
      <xsl:call-template name="EscapeQuotes">
    <xsl:with-param name="value" select="$value"/>
      </xsl:call-template>
      <xsl:text>&quot;</xsl:text>
    </xsl:when>
    <xsl:when test="contains($value,'&#xA;')">
      <xsl:text>&quot;</xsl:text>
      <xsl:call-template name="EscapeQuotes">
    <xsl:with-param name="value" select="$value"/>
      </xsl:call-template>
      <xsl:text>&quot;</xsl:text>
    </xsl:when>
    <xsl:when test="contains($value,'&quot;')">
      <xsl:text>&quot;</xsl:text>
      <xsl:call-template name="EscapeQuotes">
    <xsl:with-param name="value" select="$value"/>
      </xsl:call-template>
      <xsl:text>&quot;</xsl:text>
    </xsl:when>
    <xsl:otherwise>
      <xsl:value-of select="$value"/>
    </xsl:otherwise>
    </xsl:choose>
  </xsl:template>
  
  <xsl:template match="/">
    <xsl:text>project,name,language,owner,state,startDate</xsl:text>
    <xsl:text>&#xA;</xsl:text>
    <xsl:for-each select="projects/project">
      <xsl:call-template name="CsvEscape"><xsl:with-param name="value" select="normalize-space(name)"/></xsl:call-template>
      <xsl:text>,</xsl:text>
      <xsl:call-template name="CsvEscape"><xsl:with-param name="value" select="normalize-space(language)"/></xsl:call-template>
      <xsl:text>,</xsl:text>
      <xsl:call-template name="CsvEscape"><xsl:with-param name="value" select="normalize-space(owner)"/></xsl:call-template>
      <xsl:text>,</xsl:text>
      <xsl:call-template name="CsvEscape"><xsl:with-param name="value" select="normalize-space(state)"/></xsl:call-template>
      <xsl:text>,</xsl:text>
      <xsl:call-template name="CsvEscape"><xsl:with-param name="value" select="normalize-space(startDate)"/></xsl:call-template>
      <xsl:text>&#xA;</xsl:text>
    </xsl:for-each>
  </xsl:template>
</xsl:stylesheet>

Found an XML transform stylesheet here (wayback machine link, site itself is in german)

The stylesheet added here could be helpful:

<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="iso-8859-1"/>

<xsl:strip-space elements="*" />

<xsl:template match="/*/child::*">
<xsl:for-each select="child::*">
<xsl:if test="position() != last()">"<xsl:value-of select="normalize-space(.)"/>",    </xsl:if>
<xsl:if test="position()  = last()">"<xsl:value-of select="normalize-space(.)"/>"<xsl:text>&#xD;</xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:template>

</xsl:stylesheet>

Perhaps you want to remove the quotes inside the xsl:if tags so it doesn't put your values into quotes, depending on where you want to use the CSV file.


This xsl:stylesheet can use a specified list of column headers and will ensure that the rows will be ordered correctly. It requires XSLT version 2.0.

<?xml version="1.0"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:csv="csv:csv">
    <xsl:output method="text" encoding="utf-8"/>
    <xsl:strip-space elements="*"/>

    <xsl:variable name="delimiter" select="','"/>

    <csv:columns>
        <column>name</column>
        <column>sublease</column>
        <column>addressBookID</column>
        <column>boundAmount</column>
        <column>rentalAmount</column>
        <column>rentalPeriod</column>
        <column>rentalBillingCycle</column>
        <column>tenureIncome</column>
        <column>tenureBalance</column>
        <column>totalIncome</column>
        <column>balance</column>
        <column>available</column>
    </csv:columns>

    <xsl:template match="/property-manager/properties">
        <!-- Output the CSV header -->
        <xsl:for-each select="document('')/*/csv:columns/*">
                <xsl:value-of select="."/>
                <xsl:if test="position() != last()">
                    <xsl:value-of select="$delimiter"/>
                </xsl:if>
        </xsl:for-each>
        <xsl:text>&#xa;</xsl:text>
    
        <!-- Output rows for each matched property -->
        <xsl:apply-templates select="property"/>
    </xsl:template>

    <xsl:template match="property">
        <xsl:variable name="property" select="."/>
    
        <!-- Loop through the columns in order  -->
        <xsl:for-each select="document('')/*/csv:columns/*">
            <!-- Extract the column name and value  -->
            <xsl:variable name="column" select="."/>
            <xsl:variable name="value" select="$property/*[name() = $column]"/>
        
            <!-- Quote the value if required -->
            <xsl:choose>
                <xsl:when test="contains($value, '&quot;')">
                    <xsl:variable name="x" select="replace($value, '&quot;',  '&quot;&quot;')"/>
                    <xsl:value-of select="concat('&quot;', $x, '&quot;')"/>
                </xsl:when>
                <xsl:when test="contains($value, $delimiter)">
                    <xsl:value-of select="concat('&quot;', $value, '&quot;')"/>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:value-of select="$value"/>
                </xsl:otherwise>
            </xsl:choose>
        
            <!-- Add the delimiter unless we are the last expression -->
            <xsl:if test="position() != last()">
                <xsl:value-of select="$delimiter"/>
            </xsl:if>
        </xsl:for-each>
    
        <!-- Add a newline at the end of the record -->
        <xsl:text>&#xa;</xsl:text>
    </xsl:template>

</xsl:stylesheet>

Here is a version with configurable parameters that you can set programmatically:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text" encoding="utf-8" />

  <xsl:param name="delim" select="','" />
  <xsl:param name="quote" select="'&quot;'" />
  <xsl:param name="break" select="'&#xA;'" />

  <xsl:template match="/">
    <xsl:apply-templates select="projects/project" />
  </xsl:template>

  <xsl:template match="project">
    <xsl:apply-templates />
    <xsl:if test="following-sibling::*">
      <xsl:value-of select="$break" />
    </xsl:if>
  </xsl:template>

  <xsl:template match="*">
    <!-- remove normalize-space() if you want keep white-space at it is --> 
    <xsl:value-of select="concat($quote, normalize-space(), $quote)" />
    <xsl:if test="following-sibling::*">
      <xsl:value-of select="$delim" />
    </xsl:if>
  </xsl:template>

  <xsl:template match="text()" />
</xsl:stylesheet>

Tags:

Xml

Csv

Xslt