Having trouble with MS Sql Server OPENJSON feature - sql-server

I created a sql test script below for this question.
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
DROP TABLE #temp;
CREATE TABLE #temp
(
Id INT NOT NULL PRIMARY KEY
, Attributes NVARCHAR(MAX) NULL
);
INSERT INTO #temp (Id, Attributes)
VALUES (1, '[{"Name":"Step","Value":"A"},{"Name":"State","Value":"Active"}]');
INSERT INTO #temp (Id, Attributes)
VALUES (2, '[{"Name":"Step","Value":"B"},{"Name":"State","Value":"Inactive"}]');
INSERT INTO #temp (Id, Attributes)
VALUES (3, '[{"Name":"State","Value":"Active"}]');
INSERT INTO #temp (Id, Attributes)
VALUES (4, '[{"Name":"Step","Value":"D"}]');
SELECT
t.Id
, t.Attributes
, stepname.Value AS [Step]
, statename.Value AS [State]
FROM #temp t
CROSS APPLY
OPENJSON(t.Attributes)
WITH
(
Name NVARCHAR(MAX) '$.Name'
, Value NVARCHAR(MAX) '$.Value'
) AS stepname
CROSS APPLY
OPENJSON(t.Attributes)
WITH
(
Name NVARCHAR(MAX) '$.Name'
, Value NVARCHAR(MAX) '$.Value'
) AS statename
WHERE 1 = 1
--AND (stepname.Name = statename.Name) -- A
--AND -- B
--( -- B
-- stepname.Name IS NULL -- B
-- OR stepname.Name = 'Step' -- B
--) -- B
--AND -- B
--( -- B
-- statename.Name IS NULL -- B
-- OR statename.Name = 'State' -- B
--); -- B
Running this script as is gives the following output:
Id
Attributes
Step
State
1
[{"Name":"Step","Value":"A"},{"Name":"State","Value":"Active"}]
A
A
1
[{"Name":"Step","Value":"A"},{"Name":"State","Value":"Active"}]
A
Active
1
[{"Name":"Step","Value":"A"},{"Name":"State","Value":"Active"}]
Active
A
1
[{"Name":"Step","Value":"A"},{"Name":"State","Value":"Active"}]
Active
Active
2
[{"Name":"Step","Value":"B"},{"Name":"State","Value":"Inactive"}]
B
B
2
[{"Name":"Step","Value":"B"},{"Name":"State","Value":"Inactive"}]
B
Inactive
2
[{"Name":"Step","Value":"B"},{"Name":"State","Value":"Inactive"}]
Inactive
B
2
[{"Name":"Step","Value":"B"},{"Name":"State","Value":"Inactive"}]
Inactive
Inactive
3
[{"Name":"State","Value":"Active"}]
Active
Active
4
[{"Name":"Step","Value":"D"}]
D
D
What I would like to see in the output though is only 4 rows, one per data row.
Id
Attributes
Step
State
1
[{"Name":"Step","Value":"A"},{"Name":"State","Value":"Active"}]
A
Active
2
[{"Name":"Step","Value":"B"},{"Name":"State","Value":"Inactive"}]
B
Inactive
3
[{"Name":"State","Value":"Active"}]
Active
4
[{"Name":"Step","Value":"D"}]
D
I have left some commented code in my example to see the types of things I've tried but all to no avail. Uncommenting the where statement with a '-- A' gets me closer, but not completely. I thought for sure that uncommenting the statements in the where clause with a '-- B' on the end would give me what I want, but it doesn't. Any ideas on how to do this?
I originally started out with only 1 OPENJSON block but had no success so I thought that perhaps having 2 OPENJSON blocks, one for the Step and one for the State would help, but still not able to get data rows 3 and 4 included since each of those rows is missing one of the 2 JSON values.
Thanks very much for any help!

Conditional aggregation could be used:
SELECT
t.Id
, t.Attributes
, [Step] = MAX(CASE WHEN stepname.Name = 'Step' THEN stepname.Value END)
, [State] = MAX(CASE WHEN statename.Name = 'State' THEN statename.Value END)
FROM #temp t
CROSS APPLY OPENJSON(t.Attributes) WITH (
Name NVARCHAR(MAX) '$.Name'
, Value NVARCHAR(MAX) '$.Value'
) AS stepname
CROSS APPLY OPENJSON(t.Attributes) WITH (
Name NVARCHAR(MAX) '$.Name'
, Value NVARCHAR(MAX) '$.Value'
) AS statename
GROUP BY t.Id, t.Attributes
ORDER BY t.Id;
db<>fiddle demo

Related

Dynamic where clause sql loop

Following is the stored procedure
ALTER PROCEDURE [dbo].[get_data_Dyna]
{
#param1 varchar(max) = null,
#param2 varchar(max) = null,
#start varchar(max) = null,
#end varchar(max) = null
}
AS
SELECT * from table where
(#param1 IS NULL OR column1 IN (SELECT data FROM dbo.delimited_list_to_table(#param1,',')))
AND (#param2 IS NULL OR column2 IN (SELECT data FROM dbo.delimited_list_to_table(#param2,',')))
AND ....?????
How this is working :
All the parameters can be comma seperated
#param1 value can be 'Germany' or 'Germany,USA' or null. This is working as expected.
Same goes for #param2
I'm trying to include rest of the parameters which is expected to work as follows :
#start='0' and #end='100' : In this case, where clause will look like this
...AND val BETWEEN #start AND #end
#start='48,60' and #end='51,99' : In this case, where clause will look like this
...AND ((val Between 48 and 51) or (val Between 60 and 99))
#start='48,60,75' and #end='51,99,203' : In this case, where clause will look like this
...AND ((val Between 48 and 51) or (val Between 60 and 99) or (val Between 75 and 203))
I'm unable to include above 2nd/3rd point correctly. I tried to write it dynamically which is working for single values [Point 1], but how to write point 2/3 ?
Any help is greatly appreciated.
Ok, i think the best approach here would be to use temp tables or table variable.
Lets go with temp tables.
create table #StartEnd (start int not null, end int not null, primary key (start,end))
then we insert from #start and #end into it using dbo.delimited_list_to_table. Now i am not sure about your implementation of it, so i will assume the values are numbered
insert into #StartEnd
select starts.data, ends.data
from dbo.delimited_list_to_table(#start,',') as starts
join dbo.delimited_list_to_table(#end,',') as ends
on starts.index = ends.index
Now we have to filter the values. Two approaches. Join or Exists condition
...
join #StartEnd on val between start and end
...
and exists (select 1 from #StartEnd where val between start and end)
Hope this helps
there you go. The comments / explainations are within the query
-- create a sample table
declare #tbl table
(
val int
)
-- put in some sample data
insert into #tbl
values (48), (60), (51), (99), (75), (203)
-- these are the input parameter
declare #start varchar(100),
#end varchar(100)
-- and these are the input value
select #start = '48,60,75',
#end = '51,99,203'
-- the actual query
; with
start_end as
(
-- here i am using [DelimitedSplit8K][1]
select s = s.Item, e = e.Item
from dbo.[DelimitedSplit8K](#start, ',') s
inner join dbo.[DelimitedSplit8K](#end, ',') e
on s.ItemNumber = e.ItemNumber
)
select t.val
from #tbl t
where exists
(
select *
from start_end x
where t.val between x.s and x.e
)
you can get it here DelimitedSplit8K
Sample input (from our understanding, we guess the your data):
select
* into ##delimit
from (
values
(1 ,'Ger','Ind', 100 )
,(2 ,'Ind',Null, 10 )
,(3 ,'Ger',Null, 24 )
,(4 ,'Ind','Ger', 54 )
,(5 ,'USA','Ind', 56 )
,(6 ,Null,'USA', 75 )-- NULL. But USA is three time came.
,(7 ,'USA','USA', 60 )-- same country with diff val.
,(8 ,'USA','USA', 80 )-- same country with diff val.
) demilit(Id,FromPr,ToPr,Val)
select * from ##delimit
Procedure (you just use this instead of your procedure):
CREATE PROCEDURE [dbo].[get_data_Dyna]
(#param1 varchar(max) = NULL,
#param2 varchar(max) = NULL,
#start varchar(max) = NULL,
#end varchar(max) = NULL)
AS
BEGIN
SELECT *
FROM ##delimit d
JOIN
( --| Here we check the val btw #start and #end
SELECT DISTINCT
s.FinalColumn StartVal, --|
e.FinalColumn EndVal --|
FROM
dbo.WithoutDelimit (#start, ',') s --| S means 'Start'
JOIN
(SELECT *
FROM dbo.WithoutDelimit (#end, ',')) e ON s.id = e.id --| E means 'End'
) se --| se mean StartEnd
ON d.val BETWEEN se.StartVal AND se.EndVal --| Here YOUR CONDITION is accomplished
WHERE
( -- | checks whether
frompr IN -- | column1 in #param1 or not
(SELECT FinalColumn FROM dbo.WithoutDelimit (#param1,',') -- | frompr means, 'column1'
) OR #param1 is NULL -- |
)
and ( -- | checks whether
ToPr in ( -- | column2 in #param2 or not
select FinalColumn from dbo.WithoutDelimit (#param2,',') -- | frompr means, 'column2'
) or #param2 is null -- |
)
end
Call stored procedure:
[get_data_Dyna] null,'usa','75','100,' -- 6 rows
[get_data_Dyna] 'Ind,Ger',null,'1,15','20,30' --2 and 3 rows are selected.
[get_data_Dyna] 'usa','usa','50,60','55,79'
-- 7 and 8 has same country. But due to Val, 8 has been rejected.
[get_data_Dyna] NULL,'usa','70,60','80,79'
-- 6 and 7 and 8 has been selected. Due to val condition.
Function (called from the stored procedure):
alter function WithoutDelimit -- We use one function for all conditions.
(#Parameter varchar (max),
#delimit varchar (1))
returns #FinalTable table (
Id int identity (1,1) -- Auto increment
, FinalColumn varchar (max) -- It returns the values as a column.
) as
begin
;with cte as -- recursive cte.
(
select convert (varchar (255), #Parameter + #delimit) con
, convert (varchar (255), #Parameter + #delimit) want
union all
select convert (varchar (255), stuff (con, 1, CHARINDEX (#demilit,con),'') )
, substring (con, 1, CHARINDEX (#delimit, con) - 1)
from cte
where con <> ''
) insert into #FinalTable (FinalColumn)
select want from cte
where con <> want
return
end
Revert us, if query need update.

Multiple tables in the where clause SQL

I have 2 tables:-
Table_1
GetID UnitID
1 1,2,3
2 4,5
3 5,6
4 6
Table_2
ID UnitID UserID
1 1 1
1 2 1
1 3 1
1 4 1
1 5 2
1 6 3
I want the 'GetID' based on 'UserID'.
Let me explain you with an example.
For e.g.
I want all the GetID where UserID is 1.
The result set should be 1 and 2. 2 is included because one of the Units of 2 has UserID 1.
I want all the GetID where UserID is 2
The result set should be 2 and 3. 2 is included because one of Units of 2 has UserID 2.
I want to achieve this.
Thank you in Advance.
You can try a query like this:
See live demo
select
distinct userid,getid
from Table_1 t1
join Table_2 t2
on t1.unitId+',' like '%' +cast(t2.unitid as varchar(max))+',%'
and t2.userid=1
The query for this will be relatively ugly, because you made the mistake of storing CSV data in the UnitID column (or maybe someone else did and you are stuck with it).
SELECT DISTINCT
t1.GetID
FROM Table_1 t1
INNER JOIN Table_2 t2
ON ',' + t1.UnitID + ',' LIKE '%,' + CONVERT(varchar(10), t2.UnitID) + ',%'
WHERE
t2.UserID = 1;
Demo
To understand the join trick being used here, for the first row of Table_1 we are comparing ,1,2,3, against other single UnitID values from Table_2, e.g. %,1,%. Hopefully it is clear that my logic would match a single UnitID value in the CSV string in any position, including the first and last.
But a much better long term approach would be to separate those CSV values across separate records. Then, in addition to requiring a much simpler query, you could take advantage of things like indices.
try this:
declare #Table_1 table(GetID INT, UnitId VARCHAR(10))
declare #Table_2 table(ID INT, UnitId INT,UserId INT)
INSERT INTO #Table_1
SELECT 1,'1,2,3'
union
SELECT 2,'4,5'
union
SELECT 3,'5,6'
union
SELECT 4,'6'
INSERT INTO #Table_2
SELECT 1,1,1
union
SELECT 1,2,1
union
SELECT 1,3,1
union
SELECT 1,4,1
union
SELECT 1,5,2
union
SELECT 1,6,3
declare #UserId INT = 2
DECLARE #UnitId VARCHAR(10)
SELECT #UnitId=COALESCE(#UnitId + ',', '') + CAST(UnitId AS VARCHAR(5)) from #Table_2 WHERE UserId=#UserId
select distinct t.GetId
from #Table_1 t
CROSS APPLY [dbo].[Split](UnitId,',') AS AA
CROSS APPLY [dbo].[Split](#UnitId,',') AS BB
WHERE AA.Value=BB.Value
Split Function:
CREATE FUNCTION [dbo].Split(#input AS Varchar(4000) )
RETURNS
#Result TABLE(Value BIGINT)
AS
BEGIN
DECLARE #str VARCHAR(20)
DECLARE #ind Int
IF(#input is not null)
BEGIN
SET #ind = CharIndex(',',#input)
WHILE #ind > 0
BEGIN
SET #str = SUBSTRING(#input,1,#ind-1)
SET #input = SUBSTRING(#input,#ind+1,LEN(#input)-#ind)
INSERT INTO #Result values (#str)
SET #ind = CharIndex(',',#input)
END
SET #str = #input
INSERT INTO #Result values (#str)
END
RETURN
END

SQL: If exists, limit user. If not exists show everything

I'm trying to found the best way to this requirements:
#fkStaffID INT = Current user.
If #fkStaffID got resource BLABLA only show rows of table X where is StaffID is here. If he DON'T have resource BLABLA, show everything.
SORRY I cannot paste full SQL, for employer's security policy. (I wish I show enough for help, not too much for security...)
What I do:
SELECT * FROM X
WHERE ((EXISTS
(SELECT 1 FROM STAFF WHERE pkStaff=#fkStaffID
AND STAFF.PkStaff IN (SELECT fkStaff FROM SECURITYSUBQUERY WHERE ResourceName='BLABLA')) AND X.fkStaff=#fkStaffID)
OR ((NOT EXISTS (SELECT 1 FROM STAFF WHERE pkStaff=#fkStaffID
AND STAFF.PkStaff IN (SELECT fkStaff FROM SECURITYSUBQUERY WHERE ResourceName='BLABLA')) )
PROBLEM: It's really slow. Can I do a more efficient way? Can I do another way? Thank you for your help!
I think you should be able to qrite the query thus:
SELECT * FROM x
WHERE #fkStaffID NOT IN (SELECT fkStaff FROM SecuritySubquery WHERE ResourceName= 'BLABLA')
OR #fkStaffID = fkStaff;
So either the #fkStaffID isn't 'BLABLA' or it matches the record's staff ID.
This NOT IN / OR still won't be very fast. Anyway, you should have the following indexes:
CREATE INDEX idx1 ON SecuritySubquery (ResourceName, fkStaff);
CREATE INDEX idx2 ON x (fkStaff);
I would try this:
if exists(select 1 from staff where pkstaff=#fkstaffid)
begin
select * from X where ResourceName = 'Blabla' and fkStaff = #fkStaffId
end
else
begin
select * from X where ResourceName = 'Blabla'
end
If we have a matching record, then we filter by that #fkStaffId, otherwise we select everything.
The below query will give you only the data for people in X who are in the STAFF table with a corresponding record in SECURITYSUBQUERY table ('BlaBla' records).
First, build test data.
IF OBJECT_ID(N'tempdb..#x') IS NOT NULL
DROP TABLE #x
CREATE TABLE #X ( fkStaff int, myStuff varchar(20) )
INSERT INTO #X ( fkStaff, myStuff )
VALUES
(1,'not me')
, (2,'not me')
, (3,'show me')
, (4,'not me')
, (5,'show me too')
IF OBJECT_ID(N'tempdb..#STAFF') IS NOT NULL
DROP TABLE #STAFF
CREATE TABLE #STAFF ( pkStaff int, name varchar(20) )
INSERT INTO #STAFF ( pkStaff, name )
VALUES
(1, 'Joe')
, (2, 'Jim')
, (3, 'Bill')
, (4, 'Ted')
, (5, 'Rufus')
IF OBJECT_ID(N'tempdb..#SECURITYSUBQUERY') IS NOT NULL
DROP TABLE #SECURITYSUBQUERY
CREATE TABLE #SECURITYSUBQUERY ( fkStaff int, ResourceName varchar(20) )
INSERT INTO #SECURITYSUBQUERY ( fkStaff, ResourceName )
VALUES
( 1, 'NotAuth' )
, ( 2, 'NotAuth' )
, ( 3, 'BlaBla' )
, ( 3, 'Extra Perm' )
, ( 4, 'NotAuth' )
, ( 5, 'BlaBla' )
Now for the query.
DECLARE #fkStaffID int ; /* Only 3 or 5 will return records. */
SELECT x.*
FROM #x x
LEFT OUTER JOIN (
SELECT s.pkStaff
FROM #STAFF s
INNER JOIN #SECURITYSUBQUERY ss ON s.pkStaff = ss.fkStaff
AND ss.ResourceName = 'BlaBla'
WHERE s.pkStaff = #fkStaffID
) t ON t.pkStaff = x.fkStaff
WHERE t.pkStaff IS NOT NULL
AND x.fkStaff = #fkStaffID
This will only give results if users Bill or Rufus are logged in (and passed as #fkStaffID).
I don't know how well this will scale, but the optimizer should work faster than EXISTS or NOT IN subqueries. Try it with your data.

need help in forming sql query

ItemId Name parentId
1 A null
2 b null
3 c 1
4 d 2
5 e 3
6 f 4
7 g 2
hi i need help in create sql query. I have a table that contain 3 column itemid ,name ,parentitemid. i need a sql query that result parent child relation.if parentitemid id null then it means root .please help
i need data like.
<1><3><5></5> </3></1>
For example you can use:
WITH HierarchicalTable
AS
(
SELECT Id, ParentId, Name, 0 as [Level]
FROM YourTable
WHERE ParentId IS NULL
UNION ALL
SELECT YourTable.Id, YourTable.ParentId, YourTable.Name, [Level] + 1
FROM YourTable
JOIN HierarchicalTable ON HierarchicalTable.Id = YourTable.ParentId
)
SELECT [Level], Name FROM HierarchicalTable
This is excessively complicated solution, but it will work for your problem:
DECLARE #temp TABLE (ItemId int, Name char(1), parentId int,l int)
DECLARE #xml TABLE (s nvarchar(max), e nvarchar(max), parentId int, itemid int)
DECLARE #l int
;WITH cte AS (
SELECT *
FROM (VALUES
(1, 'a', NULL),(2, 'b', NULL),(3, 'c', 1),(4, 'd', 2),(5, 'e', 3),(6, 'f', 4),(7, 'g', 2)
) as t(ItemId, Name, parentId)
--Here we create recursive cte to obtain levels of nesting
), res AS (
SELECT *,
1 [Level]
FROM cte c
where parentId IS null
UNION ALL
SELECT c.*,
[Level]+1
FROM res r
INNER JOIN cte c
ON c.parentId = r.ItemId
)
--put results into temp table
INSERT INTO #temp
SELECT *
FROM res
--obtain max level
SELECT #l = MAX(l)
FROM #temp
--from max level to 1 begin
WHILE #l > 0
BEGIN
--if there is nodes with same parentid - concatinating them
UPDATE x
SET x.e = x.e + v.s + v.e
FROM #xml x
INNER JOIN #xml v
ON v.parentId = x.parentId and v.e !=x.e;
--here we merge table with results
-- first run <e></e>
-- next run <c><e></e></c>
-- next run <a><c><e></e></c></a>
MERGE #xml AS target
USING (
SELECT '<'+ Name +'>' as s,'</'+ Name + '>' as e, parentId, ItemId
FROM #temp
WHERE l = #l
) as source
ON target.parentid = source.itemid
WHEN NOT MATCHED THEN INSERT VALUES (source.s, source.e, source.parentId, source.ItemId)
WHEN MATCHED THEN
UPDATE
SET target.s = source.s + target.s,
target.e = target.e + source.e,
target.parentid = source.parentid,
target.itemid = source.itemid;
--next level down
SET #l = #l - 1
END
SELECT x --CAST(x as xml)
FROM (
SELECT s+e as x,
DENSE_RANK() OVER (PARTITION BY itemid ORDER BY s ASC) as rn
--need that column to obtain first one of every string for itemid
FROM #xml
) as c
WHERE c.rn = 1
--FOR XML PATH ('')
Output will be:
x
<a><c><e></e></c></a>
<b><d><f></f></d><g></g></b>
If you remove -- near FOR XML PATH ('') and change this SELECT x --CAST(x as xml) to this SELECT CAST(x as xml) in last query you will get this:
<a>
<c>
<e />
</c>
</a>
<b>
<d>
<f />
</d>
<g />
</b>

Is multi-row uniqueness possible in SQL Server?

I have a table where I try to define a covariate gouping like so
ID Rank Covariate
1 1 Age
1 2 Gender
1 3 YearOfBirth
2 1 Gender
The ID captures which covariates belong together in the same group. So covariate group 1 (ID = 1) is composed of age, gender and year of birth, whereas group 2 is Gender only.
Now, inserting a new covariate group consisting of, say gender only, should be illegal as this group already exists, however inserting a new group consisting of Age and Gender should be allowed (it is a subset of group 1 but not an exact match).
Also the rank matters so
ID Rank Covariate
2 Age
1 Gender
3 YearOfBirth
Should not be considered equal to group 1.
Is there a way to enforce this in sql-server?
Ideally the ID column would autoincrement on a legal insert (but thats a different issue).
I don’t know of any means to enforce the Covariant group uniqueness criteria via standard uniqueness constraints or check constraints or any other elegant solution for that matter. However, you can enforce your constraints by only allowing access to the table via a stored procedure or alternatively a view with a “INSTEAD OF INSERT” trigger defined.
Method 1 - Stored Procedure
The following example shows the stored procedure method. First we create a table value type so that we can pass our covariant group as a read-only parameter to our stored procedure.
CREATE TYPE CovariateGroupEntry AS TABLE
(
[Rank] INT NOT NULL
,[Covariate] NVARCHAR(50)
PRIMARY KEY([Rank], [Covariate])
)
Next we create our base table that will contain our covariant groups:
CREATE TABLE CovariateGroups
(
[ID] INT NOT NULL
,[Rank] INT NOT NULL
,[Covariate] NVARCHAR(50)
PRIMARY KEY([ID], [Rank], [Covariate])
)
Next step we create a dummy table that will be used to auto generate our ID:
CREATE TABLE CovariateGroupIDs
(
[GroupID] INT PRIMARY KEY IDENTITY
,[CreatedDateTime] DATETIME NOT NULL
)
Final step we create our procedure:
CREATE PROCEDURE CovariateGroup_Add
(
#covariateGroupEntry dbo.CovariateGroupEntry READONLY
)
AS
BEGIN
SET NOCOUNT ON;
DECLARE #groupID INT;
DECLARE #groupSize INT;
DECLARE #groupMatchCount INT;
DECLARE #minRank INT;
DECLARE #maxRankDelta INT;
DECLARE #minRankDelta INT;
-- Get the size of the new group which user will attempt to add.
SELECT #groupSize = COUNT([Rank])
FROM #covariateGroupEntry
-- Validate that the new group rank starts at 1 and increments by 1 step value only.
SELECT #minRank = ISNULL(MIN([Rank]), 0)
,#maxRankDelta = ISNULL(MAX(Delta), 0)
,#minRankDelta = ISNULL(MIN(Delta), 0)
FROM (
SELECT [Rank]
,[Rank] - (LAG([Rank], 1, 0) OVER (ORDER BY [Rank])) AS Delta
FROM #covariateGroupEntry
) RankValidation
IF ( (#minRank > 1) OR (#maxRankDelta > 1) OR (#minRankDelta < 1) )
BEGIN
-- Raise an error if our input data sets rank column does not start at 1 or does not increment by 1 as expected.
RAISERROR (N'Attempting to add covariant group with invalid rank order.', -- Message text.
15, -- Severity,
1 -- State
); -- Second argument.
END
ELSE
BEGIN
-- Generate a new group ID
INSERT INTO [dbo].[CovariateGroupIDs]
(
[CreatedDateTime]
)
SELECT GETDATE() AS [CreatedDateTime]
SET #groupID = SCOPE_IDENTITY();
WITH CTE_GroupsCompareSize
AS
(
-- Compare the size of the new group with all of the existing groups. If the size is different we can
-- safely assume that the group is either a sub set or super set of the compared group. These groups
-- can be excluded from further consideration.
SELECT [CovariateGroups].[ID]
,[CovariateGroups].[Rank]
,[CovariateGroups].[Covariate]
,COUNT([CovariateGroups].[Rank]) OVER (PARTITION BY [CovariateGroups].[ID]) GroupSize
,#groupSize AS NewGroupSize
FROM [CovariateGroups]
)
,CTE_GroupsCompareRank
AS
(
-- For groups of the same size left outer join the new group on the original groups on both rank and covariant entry.
-- If the MIN() OVER window function return a value of 0 then there is at least on entry in the compared groups that does
-- not match and is therefore deemed different.
SELECT [OrginalGroup].[ID]
,[OrginalGroup].[Rank]
,[OrginalGroup].[Covariate]
,MIN(
CASE
WHEN [NewGroup].[Covariate] IS NULL THEN 0
ELSE 1
END
) OVER (PARTITION BY [OrginalGroup].[ID]) AS EntireGroupRankMatch
FROM CTE_GroupsCompareSize [OrginalGroup]
LEFT OUTER JOIN #covariateGroupEntry [NewGroup] ON ([OrginalGroup].[Rank] = [NewGroup].[Rank] AND [OrginalGroup].[Covariate] = [NewGroup].[Covariate])
WHERE GroupSize = NewGroupSize
)
SELECT #groupMatchCount = COUNT(EntireGroupRankMatch)
FROM CTE_GroupsCompareRank
WHERE EntireGroupRankMatch = 1
IF ISNULL(#groupMatchCount, 0) = 0
BEGIN
INSERT INTO [CovariateGroups]
(
[ID]
,[Rank]
,[Covariate]
)
SELECT #groupID AS [ID]
,[Rank]
,[Covariate]
FROM #covariateGroupEntry
END
ELSE
BEGIN
-- Raise an error if our uniqueness constraints are not met.
RAISERROR (N'Uniqueness contain violation, the covariant set is not unique with table "CovariateGroups".', -- Message text.
15, -- Severity,
1 -- State
); -- Second argument.
END
END
END
Method 2 - View with trigger
The second method involves using a views and creating an instead of insert trigger on the view.
First we create the view as follow:
CREATE VIEW CovariateGroupsView
AS
SELECT [ID]
,[Rank]
,[Covariate]
FROM CovariateGroups
Then we create the trigger:
ALTER TRIGGER CovariateGroupsViewInsteadOfInsert on CovariateGroupsView
INSTEAD OF INSERT
AS
BEGIN
DECLARE #groupID INT;
DECLARE #groupSize INT;
DECLARE #groupMatchCount INT;
DECLARE #minRank INT;
DECLARE #maxRankDelta INT;
DECLARE #minRankDelta INT;
-- Get the size of the new group which user will attempt to add.
SELECT #groupSize = COUNT([Rank])
FROM inserted
-- Validate that the new group rank starts at 1 and increments by 1 step value only.
SELECT #minRank = ISNULL(MIN([Rank]), 0)
,#maxRankDelta = ISNULL(MAX(Delta), 0)
,#minRankDelta = ISNULL(MIN(Delta), 0)
FROM (
SELECT [Rank]
,[Rank] - (LAG([Rank], 1, 0) OVER (ORDER BY [Rank])) AS Delta
FROM inserted
) RankValidation
IF ( (#minRank > 1) OR (#maxRankDelta > 1) OR (#minRankDelta < 1) )
BEGIN
RAISERROR (N'Attempting to add covariant group with invalid rank order.', -- Message text.
15, -- Severity,
1 -- State
); -- Second argument.
END
ELSE
BEGIN
-- Generate a new group ID
INSERT INTO [dbo].[CovariateGroupIDs]
(
[CreatedDateTime]
)
SELECT GETDATE() AS [CreatedDateTime]
SET #groupID = SCOPE_IDENTITY();
WITH CTE_GroupsCompareSize
AS
(
-- Compare the size of the new group with all of the existing groups. If the size is different we can
-- safely assume that the group is either a sub set or super set of the compared group. These groups
-- can be excluded from further consideration.
SELECT [CovariateGroups].[ID]
,[CovariateGroups].[Rank]
,[CovariateGroups].[Covariate]
,COUNT([CovariateGroups].[Rank]) OVER (PARTITION BY [CovariateGroups].[ID]) GroupSize
,#groupSize AS NewGroupSize
FROM [CovariateGroups]
)
,CTE_GroupsCompareRank
AS
(
-- For groups of the same size left outer join the new group on the original groups on both rank and covariant entry.
-- If the MIN() OVER window function return a value of 0 then there is at least on entry in the compared groups that does
-- not match and is therefore deemed different.
SELECT [OrginalGroup].[ID]
,[OrginalGroup].[Rank]
,[OrginalGroup].[Covariate]
,MIN(
CASE
WHEN [NewGroup].[Covariate] IS NULL THEN 0
ELSE 1
END
) OVER (PARTITION BY [OrginalGroup].[ID]) AS EntireGroupRankMatch
FROM CTE_GroupsCompareSize [OrginalGroup]
LEFT OUTER JOIN inserted [NewGroup] ON ([OrginalGroup].[Rank] = [NewGroup].[Rank] AND [OrginalGroup].[Covariate] = [NewGroup].[Covariate])
WHERE GroupSize = NewGroupSize
)
SELECT #groupMatchCount = COUNT(EntireGroupRankMatch)
FROM CTE_GroupsCompareRank
WHERE EntireGroupRankMatch = 1
IF ISNULL(#groupMatchCount, 0) = 0
BEGIN
INSERT INTO [CovariateGroups]
(
[ID]
,[Rank]
,[Covariate]
)
SELECT #groupID AS [ID]
,[Rank]
,[Covariate]
FROM inserted
END
ELSE
BEGIN
RAISERROR (N'Uniqueness contain violation, the covariant set is not unique with table "CovariateGroups".', -- Message text.
15, -- Severity,
1 -- State
); -- Second argument.
END
END
END;
The following example show how the stored procedure should be executed:
DECLARE #covariateGroupEntry AS dbo.CovariateGroupEntry
-- INSERT GROUP 1 -------------------
INSERT INTO #covariateGroupEntry
(
[Rank]
,[Covariate]
)
SELECT 1 ,'Age' UNION ALL
SELECT 2 ,'Gender' UNION ALL
SELECT 3 ,'YearOfBirth'
EXEC CovariateGroup_Add #covariateGroupEntry
Following example shows how to insert a group using the view:
DECLARE #covariateGroupEntry AS dbo.CovariateGroupEntry
-- INSERT GROUP 1 -------------------
INSERT INTO #covariateGroupEntry
(
[Rank]
,[Covariate]
)
SELECT 1 ,'Age' UNION ALL
SELECT 2 ,'Gender' UNION ALL
SELECT 3 ,'YearOfBirth'
INSERT INTO [dbo].[CovariateGroupsView]
(
[Rank]
,[Covariate]
)
SELECT [Rank]
,[Covariate]
FROM #covariateGroupEntry
DELETE #covariateGroupEntry -- Delete our memory table if we intend to use it again.
In general I would avoid using the view method since it will be susceptible to more edge cases than the stored procedure and can have some unexpected behaviors. Example the following call:
INSERT INTO [dbo].[CovariateGroupsView]
(
[Rank]
,[Covariate]
)
SELECT 1 ,'Age' UNION ALL
SELECT 2 ,'Gender' UNION ALL
SELECT 3 ,'YearOfBirth'
Will not work as expected since the trigger on the view will treat every row as a separate data set / group. As a result the validation checks will fail.
It's obvious that there is no way to produce an enforceable unique constraint that repeats across multiple rows, because if it repeats then it is not unique.
There are, however, many clever ways to create a simple check that ensures that a grouping of your Covariate values will not be inserted in multiple times.
In terms of simplicity the below SQL will produce two columns: An ID, and the ordered occurance of the covariate values:
CREATE TABLE #tmp_Covariate (ID INT, RANK INT, Covariate VARCHAR(24))
INSERT INTO #tmp_Covariate (ID, RANK, Covariate)
VALUES (1,1,'Age')
,(1,2,'Gender')
,(1,3,'YearOfBirth')
,(2,1,'Gender')
SELECT DISTINCT ID
,STUFF((SELECT N', ' + CAST(C2.[Covariate] AS VARCHAR(256))
FROM #tmp_Covariate C2
WHERE C1.ID = C2.ID
ORDER
BY C2.ID,C2.RANK
FOR XML PATH ('')),1,2,'') AS GroupCovariate
FROM #tmp_Covariate C1
The results of the SELECT are as follows:
ID GroupCovariate
1 Age, Gender, YearOfBirth
2 Gender
If a third group is added to the table, where the covariate values are:
ID Rank Covariate
2 Age
1 Gender
3 YearOfBirth
Then the ordered occurance of the Covariates do not match the GroupCovariate column returned above.
If I were solving this, I'd create a function which accepts a table valued parameter. Feed your inputs that need to be checked against the table into the table exactly as they would appear if committed successfully.
DECLARE #TVP TABLE (Rank INT, Covariate VARCHAR(24))
INSERT INTO #TVP(Rank, Covariate) VALUES (1,'Age'),(2,'Gender'),(3,'YearOfBirth')
SELECT COUNT(CheckTable.GroupCovariate) AS Exist
FROM (SELECT STUFF((SELECT N', ' + CAST(C2.[Covariate] AS VARCHAR(256))
FROM #TVP C2
ORDER
BY C2.RANK
FOR XML PATH ('')),1,2,'') AS GroupCovariate
) AS InputTable
JOIN (SELECT DISTINCT ID
,STUFF((SELECT N', ' + CAST(C2.[Covariate] AS VARCHAR(256))
FROM #tmp_Covariate C2
WHERE C1.ID = C2.ID
ORDER
BY C2.ID,C2.RANK
FOR XML PATH ('')),1,2,'') AS GroupCovariate
FROM #tmp_Covariate C1) AS CheckTable
ON CheckTable.GroupCovariate = InputTable.GroupCovariate
Because the supplied group of covariates already exists in the table, the output will be 1 (can be returned as bool for true, or 0 for false if no group does not exist).
Exist
1
If I supply "FavoriteColor" as part of my covariants:
DECLARE #TVP TABLE (Rank INT, Covariate VARCHAR(24))
INSERT INTO #TVP(Rank, Covariate) VALUES (1,'FavoriteColor'),(2,'Gender'),(3,'YearOfBirth')
SELECT COUNT(CheckTable.GroupCovariate) AS Exist
FROM (SELECT STUFF((SELECT N', ' + CAST(C2.[Covariate] AS VARCHAR(256))
FROM #TVP C2
ORDER
BY C2.RANK
FOR XML PATH ('')),1,2,'') AS GroupCovariate
) AS InputTable
JOIN (SELECT DISTINCT ID
,STUFF((SELECT N', ' + CAST(C2.[Covariate] AS VARCHAR(256))
FROM #tmp_Covariate C2
WHERE C1.ID = C2.ID
ORDER
BY C2.ID,C2.RANK
FOR XML PATH ('')),1,2,'') AS GroupCovariate
FROM #tmp_Covariate C1) AS CheckTable
ON CheckTable.GroupCovariate = InputTable.GroupCovariate
my result is 0:
Exist
0

Resources