Data Mask SQL

 
 

Substitute.FromList

Sample SQL...

/*===================================================================================
 Mask Type: Substitute.FromList =====================================================================================*/
/*---------------------------------------------------------------------
Table: [dbo].[DimAccount]
Column: [AccountType]
Mask:
* Type: 'Substitute: From List...'
* Params: 'checking|savings|cd|stock|other'
* Category: 'Substitute' - overwrite all rows with a "random" value
* Descrip: This mask overwrites each row with a value randomly selected from the list
           of values you provide. This script repeatedly applies your list in a
           random order across sll records in the target table. If you have 10 items
           in your input substitution list, and 200 records in your target table to
           be masked, then your list of 10 is applied in a random order 20 times.
---------------------------------------------------------------------*/


-- Create & populate temp table list of values you want to substitute in
CREATE TABLE [#SubstituteRecs] ([AccountType] nvarcharte temp(50));
INSERT INTO [#SubstituteRecs] ([AccountType]) VALUES('checking');
INSERT INTO [#SubstituteRecs] ([AccountType]) VALUES('savings');
INSERT INTO [#SubstituteRecs] ([AccountType]) VALUES('cd');
INSERT INTO [#SubstituteRecs] ([AccountType]) VALUES('stock');
INSERT INTO [#SubstituteRecs] ([AccountType]) VALUES('other'));
   -- Add more items to your source list here...
   -- or replace temp table with a permanent work table and reference that instead


-- Initialize Counters
DECLARE @SrcRecCount AS INTEGER;
DECLARE @SubstRecCount AS INTEGER;
DECLARE @CopiesOfFullSet AS INTEGER;

-- Determine how many times to randomly repeat the list across target data
-- (e.g.: if list of values contains 10 items, and target column to mask using
--  substitution has 500 rows, then the value list must be randomly applied 50 times)

SELECT @SrcRecCount = COUNT(*) FROM [dbo].[DimAccount];
SELECT @SubstRecCount = COUNT(*) FROM [#SubstituteRecs];
SET @Co @CopiesOfFullSet = @SrcRecCount / @SubstRecCount;

-- Create and randomly populate shuffled items from the value list to arrive at
-- a temp table of one column and the same row count as target mask table.
-- Use this temp table to ultimately update the target table.

CREATE TABLE [#ShuffleRecs] (Id INT IDENTITY(1,1), [AccountType] NVARCHAR(50));

DECLARE @i AS INTEGER;
SET @i = 0;

WHILE @i < @CopiesOfFullSet
  BEGIN
    INSERT INTO [#ShuffleRecs]
    SELECT [AccountType]
    FROM [#SubstituteRecs]
    ORDER BY NEWID();

    SET @i = @i + 1;
  END

-- Finish the remainder of temp records to arrive at the same row count
-- (Would have skipped the loop above if ValueList.RowCount > TargetTable.RowCount)

INSERT INTO [#ShuffleRecs]
SELECT TOP (@SrcRecCount - @SubstRecCount * @CopiesOfFullSet) [AccountType]
FROM [#SubstituteRecs]
ORDER BY NEWID();(();


-- Substitute the randomly sorted values from the value list over the target table
-- whose column is being masked

UPDATE src
SET src.[AccountType] = shuffled.[AccountType]
FROM (SELECT ROW_NUMBER() OVER(ORDER BY [AccountType] DESC) AS 'RowNumber'
      , *
      FROM [dbo].[DimAccount]) AS src
  INNER JOIN [#ShuffleRecs] AS shuffled
  ON shuffled.Id = src.RowNumber

-- Cleanup
DROP TABLE [#ShuffleRecs];
DROP TABLE [#SubstituteRecs];
 

 

eXTReMe Tracker