Search for values in xml data

I assume you want to look in all attributes and all elements.

Use the function exist against the XML column in table B with a predicate to check for the existence of values from Table A. The function sql:column is used to get values from A into the XQuery expression.

declare @A table(Name varchar(50));
insert into @A(Name) values('John'),('Jim'),('Jason'),('Mike');

declare @B table(XMLCol xml);

insert into @B(XMLCol) values
('<Show Title="" ShowTitle="=False" ShowLine="=False" ShowDescription="=False" ShowExpandCollapse="=False" IsVisible="=True" Description="" PageBreakAfter="=False" PageCaption="" AppLink="" InfoLink="" ImageLink="" Pause="=False" PauseMessage="" PauseMessageStyle="" PauseTitle="" ScreenStyle="">
  <ShowOption Sequence="1" Name="Jim" Caption="Test" SelectOptionsImageLinkFieldExpression="ImageLink" />
</Show>'),
('<vars>
  <var name="MatrixName"><value>="LockExitPairCompatability"</value></var>
  <var name="Jason"><value>=If(Exists(Minute.Value.Min), ToNumber(Minute.Value.Min), 0)</value></var>
  <var name="MinNum"><value>=If(Exists(Agency) AND Agency = "Cert", 0, MinNum)</value></var>
  <var name="Where"><value>="Site=''" + Root.Site + "'' AND HWType IN " + ToSQLArray(Root.Components[ActiveDoor].HardwareType)</value></var>
  <var name="Where2"><value>=" AND HWSubType IN " + ToSQLArray(Root.Components[ActiveDoor].LockSubType)</value></var>
  <var name="CompatibleList"><value>=usr.FetchMatrix</value></var>
</vars>'),
('<rule>
  <property name="CollectionVariable" DisplayName="Collection Variable" ValueType="RValueExpression">John</property>
  <property name="KeyVariable" DisplayName="Key Variable" ValueType="LValueExpression">NextLock</property>
</rule>');

select A.Name
from @A as A
where not exists (
                 select *
                 from @B as B
                 where B.XMLCol.exist('(//@*, //text())[. = sql:column("A.Name")]') = 1
                 );

// searches all descendants.

@ specifies that you are looking for attributes.

* is a wild card for the attribute name.

//text() gives you all descendant text values (not attributes).

(//@*, //text()) combines the attributes with the text values

[. = sql:column("A.Name")] a predicate that checks the current value against A.Name.


If you want to check if a value contains A.Name you should use the contains function.

select A.Name
from @A as A
where not exists (
                 select *
                 from @B as B
                 where B.XMLCol.exist('(//@*, //text())[contains(., sql:column("A.Name"))]') = 1
                 );

If need a case-insensitive comparison, lower-case both parameters to contains.


I love an excuse to mention this old chestnut. It's a TSQL based XML shredder that decomposes your XML doc into a series of (sort of) key-value pairs. When you don't know what you're looking for, you've kind of got to just look for everything.

CREATE OR ALTER FUNCTION dbo.fnXmlNodeValue 
(   
    @x xml   
)
RETURNS TABLE 
AS
RETURN 
-- credit to https://stackoverflow.com/a/10885014/4709762
(
    WITH cte AS 
    (  
        SELECT 
            lvl =               1,  
            Name =              x.value('local-name(.)','NVARCHAR(MAX)'),  
            ParentName =        CAST(NULL AS NVARCHAR(MAX)), 
            ParentPosition =    CAST(1 AS INT), 
            NodeType =          CAST(N'Element' AS NVARCHAR(20)),  
            FullPath =          x.value('local-name(.)','NVARCHAR(MAX)'),  
            XPath =             x.value('local-name(.)','NVARCHAR(MAX)')  
                + N'[' + CAST(ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS NVARCHAR) + N']',  
            Position =          ROW_NUMBER() OVER(ORDER BY (SELECT 1)), 
            Tree =              x.value('local-name(.)','NVARCHAR(MAX)'),  
            Value =             x.value('text()[1]','NVARCHAR(MAX)'), 
            this =              x.query('.'),         
            t =                 x.query('*'),  
            Sort =              CAST(CAST(1 AS VARBINARY(4)) AS VARBINARY(MAX)),  
            ID =                CAST(1 AS INT)  
        FROM @x.nodes('/*') a(x)  
        UNION ALL 
        SELECT 
            lvl =               p.lvl + 1,  
            Name =              c.value('local-name(.)','NVARCHAR(MAX)'),  
            ParentName =        CAST(p.Name AS NVARCHAR(MAX)), 
            ParentPosition =    CAST(p.Position AS INT), 
            NodeType =          CAST(N'Element' AS NVARCHAR(20)),  
            FullPath =          CAST(p.FullPath + N'/' + c.value('local-name(.)','NVARCHAR(MAX)') AS NVARCHAR(MAX)),  
            XPath =             CAST(p.XPath + N'/'+ c.value('local-name(.)','NVARCHAR(MAX)') 
                + N'['+ CAST(ROW_NUMBER() OVER(PARTITION BY c.value('local-name(.)','NVARCHAR(MAX)') ORDER BY (SELECT 1)) AS NVARCHAR)+ N']' AS NVARCHAR(MAX)), 
            Position =          ROW_NUMBER() OVER(PARTITION BY c.value('local-name(.)','NVARCHAR(MAX)') ORDER BY (SELECT 1)), 
            Tree =              CAST( SPACE(2 * p.lvl - 1) + N'|' + REPLICATE(N'-', 1) + c.value('local-name(.)','NVARCHAR(MAX)') AS NVARCHAR(MAX)), 
            Value =             CAST( c.value('text()[1]','NVARCHAR(MAX)') AS NVARCHAR(MAX) ), 
            this =              c.query('.'),  
            t =                 c.query('*'),  
            Sort =              CAST(p.Sort + CAST( (lvl + 1) * 1024 + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS VARBINARY(4)) AS VARBINARY(MAX) ),  
            CAST((lvl + 1) * 1024 + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS INT)  
        FROM cte p  
        CROSS APPLY p.t.nodes('*') b(c)
    )
    ,cte2 AS 
    (
        SELECT 
            lvl AS Depth,  
            Name AS NodeName,  
            ParentName, 
            ParentPosition, 
            NodeType,  
            FullPath,  
            XPath,  
            Position, 
            Tree AS TreeView,  
            Value,  
            this AS XMLData,  
            Sort, ID  
        FROM cte  
        UNION ALL 
        SELECT 
            p.lvl,  
            x.value('local-name(.)','NVARCHAR(MAX)'),  
            p.Name, 
            p.Position, 
            CAST(N'Attribute' AS NVARCHAR(20)),  
            p.FullPath + N'/@' + x.value('local-name(.)','NVARCHAR(MAX)'),  
            p.XPath + N'/@' + x.value('local-name(.)','NVARCHAR(MAX)'),  
            1, 
            SPACE(2 * p.lvl - 1) + N'|' + REPLICATE('-', 1)  
            + N'@' + x.value('local-name(.)','NVARCHAR(MAX)'),  
            x.value('.','NVARCHAR(MAX)'),  
            NULL,  
            p.Sort,  
            p.ID + 1  
        FROM cte p  
        CROSS APPLY this.nodes('/*/@*') a(x)  
    )  
    SELECT 
        ID = ROW_NUMBER() OVER(ORDER BY Sort, ID),  
        ParentName, 
        ParentPosition, 
        Depth, 
        NodeName, 
        Position,   
        NodeType, 
        FullPath, 
        XPath, 
        TreeView, 
        Value, 
        XMLData 
    FROM cte2
);

How can I search to find which names in Table A aren't in any of the xml strings?

After decomposing your XML doc into its key-value pairs, you do an anti-semi join to find names that do not exist in the search target.

with shredder as (
    select b.id, 
           x.NodeName, 
           x.NodeType,
           x.FullPath, 
           x.Value 
    from TableB as b 
    cross apply dbo.fnXmlNodeValue(b.x) as x
) 
select * 
from TableA as a
where not exists (
    select 1 
    from shredder as s
    where s.Value = a.name
);

Full demo on dbfiddle!

... or you could always crtl+f a bunch ¯\_(ツ)_/¯

Tags:

Sql Server

Xml