Best way to compare XElement objects

I had an issue comparing XElements for equality where one of the elements had child nodes that where self closing tags but the other had the open and close tags, e.g. [blah/] vs [blah][/blah]

The deep equals function was of course reporting them to be different so I needed a normalise function. I ended up using a variant of what is posted in this blog (by "marianor"):

http://weblogs.asp.net/marianor/archive/2009/01/02/easy-way-to-compare-two-xmls-for-equality.aspx

A minor change being that I use the deep equals function after normalising (rather than string compare) and also I added logic to treat elements that contain empty text the same as empty elements (to resolve the afore mentioned issue). The result is below.

private bool CompareXml(string xml)
{
    var a = Normalize(currentElement);
    var b = Normalize(newElement);

    return XElement.DeepEquals(a, b);
}

private static XElement Normalize(XElement element)
{
    if (element.HasElements)
    {
        return new XElement(element.Name, element.Attributes().Where(a => a.Name.Namespace == XNamespace.Xmlns)
                                                                .OrderBy(a => a.Name.ToString()),element.Elements().OrderBy(a => a.Name.ToString())
                                                                .Select(e => Normalize(e)));
    }

    if (element.IsEmpty || string.IsNullOrEmpty(element.Value))
    {
        return new XElement(element.Name, element.Attributes()
            .OrderBy(a => a.Name.ToString()));
    }

    return new XElement(element.Name, element.Attributes()
        .OrderBy(a => a.Name.ToString()), element.Value);
}

I found this excellent article useful. It contains a code sample that implements an alternative to XNode.DeepEquals that normalises the XML trees before comparison which makes non-semantic content irrelevant.

To illustrate, the implementation of XNode.DeepEquals returns false for these semantically-equivalent documents:

XElement root1 = XElement.Parse("<Root a='1' b='2'><Child>1</Child></Root>");
XElement root2 = XElement.Parse("<Root b='2' a='1'><Child>1</Child></Root>");

However, using the implementation of DeepEqualsWithNormalization from the article, you'll get the value true because the ordering of attributes is not considered significant. This implementation is included below.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Xml.Schema;

public static class MyExtensions
{
    public static string ToStringAlignAttributes(this XDocument document)
    {
        XmlWriterSettings settings = new XmlWriterSettings();
        settings.Indent = true;
        settings.OmitXmlDeclaration = true;
        settings.NewLineOnAttributes = true;
        StringBuilder stringBuilder = new StringBuilder();
        using (XmlWriter xmlWriter = XmlWriter.Create(stringBuilder, settings))
            document.WriteTo(xmlWriter);
        return stringBuilder.ToString();
    }
}

class Program
{
    private static class Xsi
    {
        public static XNamespace xsi = "http://www.w3.org/2001/XMLSchema-instance";

        public static XName schemaLocation = xsi + "schemaLocation";
        public static XName noNamespaceSchemaLocation = xsi + "noNamespaceSchemaLocation";
    }

    public static XDocument Normalize(XDocument source, XmlSchemaSet schema)
    {
        bool havePSVI = false;
        // validate, throw errors, add PSVI information
        if (schema != null)
        {
            source.Validate(schema, null, true);
            havePSVI = true;
        }
        return new XDocument(
            source.Declaration,
            source.Nodes().Select(n =>
            {
                // Remove comments, processing instructions, and text nodes that are
                // children of XDocument.  Only white space text nodes are allowed as
                // children of a document, so we can remove all text nodes.
                if (n is XComment || n is XProcessingInstruction || n is XText)
                    return null;
                XElement e = n as XElement;
                if (e != null)
                    return NormalizeElement(e, havePSVI);
                return n;
            }
            )
        );
    }

    public static bool DeepEqualsWithNormalization(XDocument doc1, XDocument doc2,
        XmlSchemaSet schemaSet)
    {
        XDocument d1 = Normalize(doc1, schemaSet);
        XDocument d2 = Normalize(doc2, schemaSet);
        return XNode.DeepEquals(d1, d2);
    }

    private static IEnumerable<XAttribute> NormalizeAttributes(XElement element,
        bool havePSVI)
    {
        return element.Attributes()
                .Where(a => !a.IsNamespaceDeclaration &&
                    a.Name != Xsi.schemaLocation &&
                    a.Name != Xsi.noNamespaceSchemaLocation)
                .OrderBy(a => a.Name.NamespaceName)
                .ThenBy(a => a.Name.LocalName)
                .Select(
                    a =>
                    {
                        if (havePSVI)
                        {
                            var dt = a.GetSchemaInfo().SchemaType.TypeCode;
                            switch (dt)
                            {
                                case XmlTypeCode.Boolean:
                                    return new XAttribute(a.Name, (bool)a);
                                case XmlTypeCode.DateTime:
                                    return new XAttribute(a.Name, (DateTime)a);
                                case XmlTypeCode.Decimal:
                                    return new XAttribute(a.Name, (decimal)a);
                                case XmlTypeCode.Double:
                                    return new XAttribute(a.Name, (double)a);
                                case XmlTypeCode.Float:
                                    return new XAttribute(a.Name, (float)a);
                                case XmlTypeCode.HexBinary:
                                case XmlTypeCode.Language:
                                    return new XAttribute(a.Name,
                                        ((string)a).ToLower());
                            }
                        }
                        return a;
                    }
                );
    }

    private static XNode NormalizeNode(XNode node, bool havePSVI)
    {
        // trim comments and processing instructions from normalized tree
        if (node is XComment || node is XProcessingInstruction)
            return null;
        XElement e = node as XElement;
        if (e != null)
            return NormalizeElement(e, havePSVI);
        // Only thing left is XCData and XText, so clone them
        return node;
    }

    private static XElement NormalizeElement(XElement element, bool havePSVI)
    {
        if (havePSVI)
        {
            var dt = element.GetSchemaInfo();
            switch (dt.SchemaType.TypeCode)
            {
                case XmlTypeCode.Boolean:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        (bool)element);
                case XmlTypeCode.DateTime:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        (DateTime)element);
                case XmlTypeCode.Decimal:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        (decimal)element);
                case XmlTypeCode.Double:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        (double)element);
                case XmlTypeCode.Float:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        (float)element);
                case XmlTypeCode.HexBinary:
                case XmlTypeCode.Language:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        ((string)element).ToLower());
                default:
                    return new XElement(element.Name,
                        NormalizeAttributes(element, havePSVI),
                        element.Nodes().Select(n => NormalizeNode(n, havePSVI))
                    );
            }
        }
        else
        {
            return new XElement(element.Name,
                NormalizeAttributes(element, havePSVI),
                element.Nodes().Select(n => NormalizeNode(n, havePSVI))
            );
        }
    }
}

I started down the same path as @llasarov, but also didn't like the use of strings either. I discovered XElement.DeepEquals() here, so finding the question helped me.

I could see that it could be difficult if your test returns a massive XML structure, but in my opinion, this should not be done - the test should check as small a structure as possible.

Say you have a method that you expect to return an element that looks like <Test Sample="Value" />. You can use the XElement and XAttribute constructors to build your expected value pretty easily, like this:

[TestMethod()]
public void MyXmlMethodTest()
{
    // Use XElement API to build expected element.
    XElement expected = new XElement("Test", new XAttribute("Sample", "Value"));

    // Call the method being tested.
    XElement actual = MyXmlMethod();

    // Assert using XNode.DeepEquals
    Assert.IsTrue(XNode.DeepEquals(expected, actual));
}

Even if there are a handful of elements and attributes, this is manageable and consistent.