Sunday 6 July 2014

ASCII Value Character Counts Procedure

I wrote this to determine the ASCII values being stored inside my data. It builds upon Determine ASCII Values of a string and produces a breakdown of all the ASCII values in a column.
To cut a long story short I had an issue where NUL characters (ASCII value 0) were stored and were producing unexpected results.

Code for the results table and procedure is here -

IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE object_id = OBJECT_ID('dbo.charactercount'))
 CREATE TABLE dbo.charactercount
 (
  [id] int identity(1,1)
 ,[table_schema] sysname
 ,[table_name] sysname
 ,[column_name] sysname
 ,[character] int
 ,[column_count] int
 )
GO

IF EXISTS (SELECT 1 FROM sys.procedures WHERE name = 'charactercount_insert')
 DROP PROCEDURE dbo.charactercount_insert
GO
CREATE PROCEDURE dbo.charactercount_insert
(@table_schema sysname
,@table_name sysname
,@column_name sysname
,@sample_rows int = NULL)
AS
BEGIN
SET NOCOUNT ON
DECLARE @ColumnData table
(
UniqueRowID int IDENTITY (1, 1) Primary key NOT NULL ,
testeddata nvarchar(MAX)
)
DECLARE @sql NVARCHAR(MAX)
DECLARE @CurrentRow int  
DECLARE @TotalRows int  
DECLARE @Index INT
DECLARE @IntASCII INT
DECLARE @StatusMessage varchar (100)
DECLARE @currentdata NVARCHAR(MAX)
DECLARE @currentrowlength INT

IF @sample_rows IS NULL 
 SET @sql = 'SELECT [' + @column_name + '] FROM [' + @table_schema + '].[' + @table_name + ']'
ELSE
 SET @sql = 'SELECT TOP (' + CAST(@sample_rows AS VARCHAR(10)) + ') [' + @column_name + '] FROM [' + @table_schema + '].[' + @table_name + ']'
  
SET @StatusMessage = 'Fetching: [' + @column_name + '] FROM [' + @table_schema + '].[' + @table_name + ']'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT 


INSERT INTO @ColumnData (testeddata)
EXEC(@sql)

SELECT @TotalRows = MAX(UniqueRowID) FROM @ColumnData

SET @StatusMessage = CAST(@TotalRows AS VARCHAR(20)) + ' rows fetched'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT 

SELECT @CurrentRow = 1

SET @StatusMessage = 'Counting Characters'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT 

 IF EXISTS (SELECT * FROM tempdb..sysobjects WHERE name LIKE '#charactercount')  
    DROP TABLE #charactercount

 CREATE TABLE #charactercount
 (
  [id] int identity(1,1)
 ,[table_schema] sysname
 ,[table_name] sysname
 ,[column_name] sysname
 ,[character] int
 ,[column_count] int
 )

WHILE @CurrentRow <= @TotalRows  
 BEGIN   

 SET @Index = 1

 SELECT @currentdata = testeddata
 FROM @ColumnData
 WHERE [UniqueRowID] = @CurrentRow

 SELECT @currentrowlength = LEN(@currentdata)

 IF EXISTS (SELECT * FROM tempdb..sysobjects WHERE name LIKE '#charactercount')  
    TRUNCATE TABLE #charactercount

 WHILE @Index < 1 +(@currentrowlength)
  BEGIN

  SELECT @IntASCII = ASCII(SUBSTRING(@currentdata, @Index, 1))
  FROM @ColumnData
  WHERE [UniqueRowID] = @CurrentRow

  INSERT INTO #charactercount
  (table_schema ,table_name ,column_name ,character,column_count )
  SELECT @table_schema AS Table_Schema
    ,@table_name AS Table_Name
    ,@column_name AS Column_Name
    ,@IntASCII AS [Character]
    ,1 AS Number 

  SET @Index = @Index + 1
  END

  -- At end of text, update the character count 
  MERGE [dbo].[charactercount] AS [Target]
  USING (SELECT table_schema AS Table_Schema
      ,table_name AS Table_Name
      ,column_name AS Column_Name
      ,character AS character
      ,SUM(column_count) AS Number
      FROM #charactercount
      GROUP BY table_schema, table_name, column_name,character) AS [Source]
  ON  Source.Table_Schema = Target.Table_Schema
  AND Source.Table_Name   = Target.Table_Name
  AND Source.Column_Name  = Target.Column_Name
  AND Source.character  = Target.Character
  WHEN MATCHED 
   THEN UPDATE SET [Target].[column_count] = [Target].[column_count] + Source.Number
  WHEN NOT MATCHED 
   THEN INSERT ([table_schema] 
      ,[table_name] 
      ,[column_name] 
      ,[character] 
      ,[column_count])
     VALUES
      (Source.table_schema
      ,Source.table_name 
      ,Source.column_name
      ,Source.Character
      ,Source.Number);

 SELECT @CurrentRow = @CurrentRow + 1  
  END


 END 
SET @StatusMessage = 'Done - Check dbo.charactercount table'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT 
GO


Code to use this procedure is as follows ...
--Remember to clear down the results table before you run

TRUNCATE TABLE dbo.charactercount
GO


-- Provide the name of a column, like this
-- The final parameter is the number of rows to test.
EXEC charactercount_insert 'schemaname', 'tablename', 'columnname', 200
GO



-- View the data like this 
SELECT * FROM charactercount ORDER BY [character]
GO


-- Dynamically generate statements to interogate all columns, as follows

SELECT command = 'EXEC charactercount_insert ''' + TABLE_SCHEMA + ''',''' + TABLE_NAME+ ''',''' +  COLUMN_NAME + ''',200'
FROM INFORMATION_SCHEMA.COLUMNS 
WHERE DATA_TYPE LIKE '%char%'

No comments: