How to get distinct count of values of all the columns of a table based on where condition in sql server?
First get the columns and use stuff to generate the select in this way:
SELECT COUNT(ColumnA) AS ColumnA, COUNT(ColumnB AS ColumnB), COUNT(ColumnC) AS ColumnC....
That way you only select on your table once to get all counts, After that, use CROSS APPLY
to "unpivot" those columns and return the output on one row per column
CROSS APPLY(
VALUES(1, 'ColumnA', ColumnA), (2, 'ColumnB', ColumnB), (3, 'ColumnC', ColumnC)
)(ID, ColumnName, DistinctCountValue)
For the filter, use sp_executesql and send the file_id as parameter
exec SP_executesql @SQL, N'@FID INT', @FID = @FileID
Since you are using all columns of the table Row_Number() over(partition by Table_Schema, Table_Name order by ORDINAL_POSITION) as RowNum
becomes redundant, ORDINAL_POSITION already has the value that you are looking for
declare @tablename nvarchar(50) = 'MyTestTable'
declare @fileID int = 1
declare @SQL nvarchar(max)
set @SQL = ''
;with cols as (
select TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION
from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = @TableName
)
select @SQL = ';WITH CTE AS (SELECT
' +
STUFF((
SELECT ', COUNT(DISTINCT ' + QUOTENAME(COLUMN_NAME) + ') AS ' + QUOTENAME(COLUMN_NAME)
FROM cols
ORDER BY ORDINAL_POSITION
FOR XML PATH('')
), 1, 1, '')
+ '
FROM ' + @TableName + '
WHERE File_ID = @FID
)
SELECT B.*
FROM CTE
CROSS APPLY (
VALUES ' +STUFF((
SELECT ',( ' + CAST(ORDINAL_POSITION AS VARCHAR) + ',' + QUOTENAME(COLUMN_NAME,'''') + ',' + QUOTENAME(COLUMN_NAME) + ')'
FROM cols
ORDER BY ORDINAL_POSITION
FOR XML PATH('')
), 1, 1, '') + '
)B (ID,ColumnName,DistinctCountValue)
'
from cols
exec SP_executesql @SQL, N'@FID INT', @FID = @FileID
The query below creates a table of all the column names and uses a while loop to select the count for whatever WHERE clause you want to use. This should be pretty flexible for any table; just update the top variables. Note that this will not count a column where its value is null. You can add a case to the @Query parameter if that's what you want. Since it processes each row individually, I added in a temp table so you only hit the db once.
IF OBJECT_ID('tempdb..##SourceValues') IS NOT NULL
DROP TABLE ##SourceValues
DECLARE @Schema VARCHAR(50) = 'SomeSchema'
DECLARE @Table VARCHAR(50) = 'SomeTable'
DECLARE @WhereClause VARCHAR(MAX) = ' Some WHERE clause'
DECLARE @ColumnName VARCHAR(50)
DECLARE @ProcessedRows TABLE(ColumnName VARCHAR(50), DistinctCount INT)
DECLARE @Columns TABLE(RowNumber INT, ColumnName VARCHAR(100))
INSERT INTO @Columns SELECT ROW_NUMBER() OVER(ORDER BY COLUMN_NAME DESC), COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @Table
DECLARE @Count INT = (SELECT MAX(RowNumber) FROM @Columns)
DECLARE @Counter INT = 0
DECLARE @DistinctCount INT
DECLARE @Query NVARCHAR(MAX)
EXEC('SELECT * INTO ##SourceValues FROM ' + @Table +' (NOLOCK)')
WHILE @Counter < @Count
BEGIN
SET @Counter += 1
SET @ColumnName = (SELECT ColumnName FROM @Columns WHERE RowNumber = @Counter)
SET @Query = 'SELECT @OutPut = COUNT(' + @ColumnName + ') FROM ' + @Schema + '.' + ' ##SourceValues ' + @WhereClause
EXECUTE sp_executesql @Query, N'@Output INT OUT', @DistinctCount OUT
INSERT INTO @ProcessedRows(ColumnName, DistinctCount) VALUES (@ColumnName, @DistinctCount)
END
SELECT * FROM @ProcessedRows
Let's try some different approach.
Get all values unpivoted as Param/Value:
1) Collect list of tables and columns to be used in dynamic SQL:
DROP TABLE IF EXISTS #Base;
;WITH SchemaData AS (
SELECT t.name AS [TableName],c.name AS [ColumnName],c.column_id AS [ColumnOrderID]
FROM sys.tables t
INNER JOIN sys.columns c ON c.object_id = t.object_id
)
SELECT t.TableName
,STUFF((SELECT ',CONVERT(NVARCHAR(MAX),' + QUOTENAME([ColumnName]) + ') AS ' + QUOTENAME([ColumnName])
FROM SchemaData a WHERE (a.TableName = t.TableName) FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS [SelectClause]
,STUFF((SELECT ',' + QUOTENAME([ColumnName]) FROM SchemaData a WHERE (a.TableName = t.TableName) FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS [UnpivotClause]
INTO #Base
FROM SchemaData t
GROUP BY t.TableName
;
2) Get all data inside a temp table
DROP TABLE IF EXISTS #Result;
CREATE TABLE #Result(TableName NVARCHAR(255),ColumnName NVARCHAR(255),[Value] NVARCHAR(MAX));
DECLARE @TableName NVARCHAR(255),@SelectClause NVARCHAR(MAX),@UnpivotClause NVARCHAR(MAX);
DECLARE crPopulateResult CURSOR LOCAL FAST_FORWARD READ_ONLY FOR SELECT b.TableName,b.SelectClause,b.UnpivotClause FROM #Base b;
OPEN crPopulateResult;
FETCH NEXT FROM crPopulateResult INTO @TableName,@SelectClause,@UnpivotClause;
DECLARE @dSql NVARCHAR(MAX);
WHILE @@FETCH_STATUS = 0
BEGIN
SELECT @dSql = N' INSERT INTO #Result(TableName,[ColumnName],[Value])
SELECT up.TableName,up.Param AS [ColumnName],up.[Value]
FROM (
SELECT ''' + @TableName + N''' AS [TableName]
,' + @SelectClause + N'
FROM ' + QUOTENAME(@TableName) + N'
) a
UNPIVOT(Value FOR Param IN (' + @UnpivotClause + N')) up
';
EXEC sp_executesql @stmt = @dSql;
FETCH NEXT FROM crPopulateResult INTO @TableName,@SelectClause,@UnpivotClause;
END
CLOSE crPopulateResult;
DEALLOCATE crPopulateResult;
3) Any filters can be applied with #Results, including Table names, column names, data filters, etc:
SELECT r.TableName,r.ColumnName,COUNT(*) AS [CountValue],COUNT(DISTINCT r.[Value]) AS [DistinctCountValue]
FROM #Result r
--
--WHERE r.ColumnName = 'file_id' AND r.[Value] = '1'
--
GROUP BY r.TableName,r.ColumnName
ORDER BY r.TableName,r.ColumnName
;