Purify That XML

Processing XML using XPATH or XSLT is much more difficult when the XML contains namespaces; sometimes the namespaces are a very necessary part of the data structure and should be left intact; it is when namespaces are used arbitrarily (and contribute nothing to the underlying data structure) that they become a problem. For the latter case, I have developed this XML purifier routine, which processes an XML string prior to parsing, translating, or even viewing the content. Even the presence of only the “default” namespace is enough to break MSXML XPATH parsing as well as MSXML XSLT transformations or using the .NET XslCompiledTransform class.

using System;
using System.Text.RegularExpressions;
namespace Utilities
{
    public class XmlPurifier
    {
        public static string Purify(string input)
        {
            string output = input;
             
            //
            // remove all "xmlns" attribute assignments:
            //
 
            string xmlns = "(?i)xmlns\\s*=\\s*\\\"[^\\s]*\\\"";
             
            Regex regex = new Regex(xmlns);
 
            MatchCollection nsCollection = regex.Matches(output);
             
            foreach (Match match in nsCollection)
            {
                output = output.Replace(match.Value, String.Empty); 
            }                      
             
            //
            // find and remove all namespace references:
            //
 
            string nsr = "(?i)xmlns:(([a-z0-9_]+))\\s*=\\s*\\\"[^\\s]*\\\"";
             
            Regex nsrx = new Regex(nsr);
 
            MatchCollection nsrCollection = nsrx.Matches(output);
 
            foreach (Match match in nsrCollection)
            {
                // remove the namespace reference:
                output = output.Replace(match.Value, String.Empty);
 
                // remove all namespace prefixes:
                output = output.Replace(match.Groups[1].ToString() + ":", String.Empty);
            }
             
            return output;
        }
    }
}