UPDATE with a one-to-many JOIN (multiple columns)

UPDATE with a one-to-many JOIN (multiple columns) - sql-server

I've run into an issue and I'm not sure if it is the intended behaviour. I have searched online for an answer, but all I could find was about what row would update last and so what would be the value after the query. I know there is no such thing as order in that case, and you can't be sure what the value would be ahead of time.
In my case though, I'm updating different columns, so overriding a previous update is not a concern of mine.
CREATE TABLE #original (id int, value1 int, value2 int)
INSERT INTO #original (id) VALUES (1), (2)
CREATE TABLE #temp (id int, name varchar(10), value int)
INSERT INTO #temp (id, name, value) VALUES (1, 'value1', 10), (1, 'value2', 11), (2, 'value1', 20), (2, 'value2', 21)
SELECT * FROM #original
id value1 value2
----------- ----------- -----------
1 NULL NULL
2 NULL NULL
SELECT * FROM #temp
id name value
----------- ---------- -----------
1 value1 10
1 value2 11
2 value1 20
2 value2 21
UPDATE O SET
value1 = CASE WHEN T.name = 'value1' THEN T.value ELSE value1 END,
value2 = CASE WHEN T.name = 'value2' THEN T.value ELSE value2 END
FROM
#original O
INNER JOIN #temp T ON T.id = O.id
SELECT * FROM #original
id value1 value2
----------- ----------- -----------
1 10 NULL
2 20 NULL
I don't get why both value2 are NULL.
SELECT
O.id,
CASE WHEN T.name = 'value1' THEN T.value ELSE value1 END AS value1,
CASE WHEN T.name = 'value2' THEN T.value ELSE value2 END AS value2
FROM
#original O
INNER JOIN #temp T ON T.id = O.id
id value1 value2
----------- ----------- -----------
1 10 NULL
1 NULL 11
2 20 NULL
2 NULL 21
Running the above instead of the update, it looks exactly like what I thought it would and I would assume it means "four" updates, populating both value1 and value2 in both rows.
I would really appreciate if someone could explain this one to me.

The problem here is your assumption: "In my case though, I'm updating different columns, so overriding a previous update is not a concern of mine." It most certainly is a problem, because ultimately, your code
value1 = CASE WHEN T.name = 'value1' THEN T.value ELSE value1 END,
value2 = CASE WHEN T.name = 'value2' THEN T.value ELSE value2 END
only runs (or takes effect) once per row. SQL Server is not required to update the same row twice, and will normally just arbitrarily take a single row's values, one of which is NULL.
The documentation states (my bold):
Use caution when specifying the FROM clause to provide the criteria for the update operation. The results of an UPDATE statement are undefined if the statement includes a FROM clause that is not specified in such a way that only one value is available for each column occurrence that is updated, that is if the UPDATE statement is not deterministic. For example, in the UPDATE statement in the following script, both rows in Table1 meet the qualifications of the FROM clause in the UPDATE statement; but it is undefined which row from Table1 is used to update the row in Table2.
Only one row may be used and one update done, you cannot assume that the updates will happen sequentially. So you need to make sure you have a single match for each row you want to update.
Therefore you should pre-aggregate your values
UPDATE O SET
value1 = T.value1,
value2 = T.value2
FROM
#original O
INNER JOIN (
SELECT
id,
MAX(CASE WHEN T.name = 'value1' THEN T.value END) value1,
MAX(CASE WHEN T.name = 'value2' THEN T.value END) value2,
FROM #temp T
GROUP BY
id
) T ON T.id = O.id;
You could also use CROSS APPLY or a CTE for this.

Modified the update clause with a little tweak in getting the value1 and value2
;with cte as (SELECT
O.id,
max(CASE WHEN T.name = 'value1' THEN T.value ELSE value1 END) AS value1,
max(CASE WHEN T.name = 'value2' THEN T.value ELSE value2 END) AS value2
FROM
#original O
INNER JOIN #temp T ON T.id = O.id
group by o.id
)
UPDATE o SET
o.value1 = i.value1 ,
o.value2 = i.value2
FROM
#original o
INNER JOIN cte i ON i.id = O.id

Related

SqlQuery to display the value which is not bull from two rows with same ID

Here I Have one ID having different Codes, I have to display if the Id has a Code value then need to display the code value row else null value row.
ID
Code
Name
12
null
Three
12
2345
Three
13
null
four
14
1543
rewq

Essentially we want to lookup up and pick out the entire "first" row giving precedence to a non-null. (The logic would be easy to reverse in all of the options below.) These all then go with the assumption that a maximum of two rows can be present and that one of them must have a null Code in that case.
with data as (select *, count(*) over (partition by ID) as cnt from T)
select ID, Code, Value from data
where cnt = 1 or Code is not null;
or
select distinct t1.ID,
coalesce(t2.Code, t1.Code) as Code, coalesce(t2.Value, t1.Value) as Value
from T t1 left outer join T t2 on t2.ID = t1.ID and t2.Code is not null;
or
--SQL Server
select ID, Code, Value
from T t1 cross apply (
select 1 as Keep from T t2
where t2.ID = t1.ID
having case when t1.Code is null then 0 else 1 end =
case when max(t2.Code) is null then 0 else 1 end) v;
or
with data as (select *, max(Code) over (partition by ID) as maxCode from T)
select ID, Code, Value from data
where coalesce(Code, '!##') = coalesce(maxCode, '!##');
or
with data as (
select *,
row_number() over (
partition by ID
order by case when Code is not null then 0 else 1 end) as rn
from T
) select ID, Code, Value from data where rn = 1;
or
with data as (
select distinct ID,
max(Code) over (partition by ID) as Code,
first_value(Value) over (
partition by ID
order by case when Code is not null then 0 else 1 end) as Value
from T
) select * from data;
or
select * from T t1
where Code is not null or not exists
(select 1 from T t2 where t2.ID = t1.ID and t2.Code is not null);
or
select ID, max(Code) as Code,
/* works better with character values */
substring(max(left(coalesce(Code, '') + ' ', 10) + Value), 10, 50)
from T group by ID;
https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=2c08e5bd0b67118d39cf9f6404218b09

Well this worked for me
SELECT * FROM tablename

Faster execution of non nulls for a column

I need to get percentage of nulls for a given column in a table. The table contains close to 368081344 records as of now in table. Number of records will increase by 20 million each day. Below is the query am using.
SELECT (COUNT_BIG(column)/ count_big(*)) * 100
from <table>
Then, I perform 100 - above output to fetch the required output
Please let me know best possible solution which can yield faster result

Have you tried the below method :
DECLARE #T TABLE
(
Id INT
)
;WITH CTE
AS
(
SELECT
SeqNo = 1,
NULL "Val"
UNION ALL
SELECT
SeqNo = SeqNo+1,
Val
FROM CTE
WHERE SeqNo<100
)
INSERT INTO #T(Id)
SELECT Val FROM CTE
UNION ALL
SELECT SeqNo FROM CTE
SELECT
TotCount = COUNT(1),
ValCount = SUM(CASE WHEN Id IS NULL THEN 0 ELSE 1 END),
NullCount = SUM(CASE WHEN Id IS NOT NULL THEN 0 ELSE 1 END),
NullPercent = (CAST(SUM(CASE WHEN Id IS NOT NULL THEN 0 ELSE 1 END) AS FLOAT)/CAST(COUNT(1) AS FLOAT))*100
FROM #T

Partial answer only. Not sure how to get the count for a specific column
You can speed up the total row count using this query.
SELECT P.ROWS
FROM SYS.OBJECTS AS O INNER JOIN SYS.PARTITIONS AS P
ON O.OBJECT_ID = P.OBJECT_ID
WHERE O.NAME = 'PARENT' AND
P.INDEX_ID < 2
ORDER BY O.NAME

Convert 2 rows to fields in SQL Server

In SQL server (2016), I want to convert 2 rows into 1 row with fields of both rows.
I have this example:
IF OBJECT_ID('tempdb.dbo.#MyTable') IS not NULL DROP TABLE #MyTable
CREATE TABLE #MyTable (
Direction varchar(1),
DateKey int,
ID varchar(8),
[Sessions] int
)
insert into #MyTable values('S', 20180301, 'ID123456', 46)
insert into #MyTable values('R', 20180301, 'ID123456', 99)
select * from #MyTable
Output:
Direction DateKey ID Sessions
S 20180301 ID123456 46
R 20180301 ID123456 99
The output I want is:
DateKey ID S_Sessions R_Sessions
20180301 ID123456 46 99
So I tried this query but it won't work:
select DateKey,ID,
case Direction
when 'S' then [Sessions] as S_Sessions -- Incorrect syntax near the keyword 'as'.
else [Sessions] as R_Sessions
end
from #MyTable
Maybe I have to create an extra table, insert rows where direction='S' and then update the records with data where direction='R' but I wonder if there is a better way to do this.

use PIVOT
select *
from #MyTable
pivot
(
max(Sessions)
for Direction in ([S], [R])
) p

assuming that your table contains the "pairs" S and R you can also use a self join
SELECT s.DateKey , s.ID , s.Sessions S_Sessions , r.Sessions R_Sessions
FROM #MyTable S
JOIN #MyTable R
ON s.ID = r.ID
AND s.DateKey = r.DateKey
WHERE S.Direction = 'S'
AND r.Direction = 'R'

CASE in SQL is an expression that returns a single value. It cannot be used to control execution flow like in procedural languages.
You can use conditional aggregation for this:
select DateKey, ID,
max(case Direction when 'S' then [Sessions] end) as S_Sessions,
max(case Direction when 'R' then [Sessions] end) as R_Sessions
from #MyTable
group by DateKey, ID
Demo here

Try It ... It works for me . more variable more case and more left join table.
select a.DateKey,a.ID,
(case a.Direction
when 'S' then a.Sessions
end) as S_Sessions,
(case b.Direction
when 'R' then b.Sessions
end) as R_Sessions
from mytable as a CROSS JOIN mytable as b ON a.ID=b.ID LIMIT 2,1

Rewriting function to conditional CTE

Considering following table:
SELECT [ItemID]
,[ParentID]
,[PolicyID]
,[PolicyRoot]
FROM [AdventureWorks2008R2].[dbo].[Example]
ItemID ParentID PolicyID PolicyRoot
----------- ----------- ---------- ----------
1 NULL default 1
2 1 b 1
3 1 c 0
4 NULL d 1
5 3 e 0
6 3 f 1
7 NULL g 0
I'm trying to select the PolicyID from each item where PolicyRoot = 1, in case PolicyRoot = 0 I need to use PolicyID from its ParentID. This is recursive...
Working with a function:
CREATE FUNCTION dbo.Policies(#ItemID INT) RETURNS VARCHAR(10)
AS
BEGIN
DECLARE #ParentID INT, #PolicyRoot BIT, #PolicyID VARCHAR(10)
SELECT #ParentID = ParentID
, #PolicyRoot = PolicyRoot
, #PolicyID = PolicyID
FROM [dbo].[Example]
WHERE ItemID = #ItemID
IF #PolicyRoot != 1
SELECT #PolicyID = dbo.Policies(#ParentID)
RETURN #PolicyID
END;
GO
SELECT ItemID
, dbo.Policies(ItemID) AS Policy
FROM [dbo].[Example];
ItemID Policy
----------- ----------
1 default
2 b
3 default
4 d
5 default
6 f
7 NULL
I'm trying to rewrite this function to a CTE, but I don't have any CTE knowledge yet. I've read into multiple CTE's but I don't have a single clue how to manage a conditional CTE. This is as far as I've gotten, I'm not familiar (enough) with the UNION ALL.
WITH Policies (ItemID, PolicyID) AS (
SELECT ItemID
, PolicyID
FROM dbo.Example
UNION ALL
...
)
SELECT ItemID
, PolicyID
FROM Policies;
Can someone explain me in plain steps how such a CTE works and push me in the right direction?

A recursive CTE works by joining to itself, using a UNION ALL to collate the results.
You start with yourtable to populate the initial dataset of the recursive query
select * from yourtable
and you add to that with the UNION ALL, further results
select c.ItemID, t2.ParentID, t2.PolicyID, t2.PolicyRoot
from yourtable t2
inner join c on c.ParentID = t2.ItemID
where c.PolicyRoot=0
and the recursion occurs in this - where the results of this query are fed through this query again and again, up to the MAXRECURSION limit, or when no more results are added.
;with c as
(
select * from yourtable
union all
select c.ItemID, t2.ParentID, t2.PolicyID, t2.PolicyRoot
from yourtable t2
inner join c on c.ParentID = t2.ItemID
where c.PolicyRoot=0
)
select t.ItemID, c.PolicyID
from yourtable t
left join c on t.ItemID = c.ItemID
and c.PolicyRoot=1

Order of Recursion (SQL Server CTE)

I can achieve recursion by using SQL Server's With command (CTE).
WITH MyCTE(ParentID,ID,Name,Level)
AS
(
SELECT ManagerID AS ParentID, UserID AS ID, UserName AS Name, 0 AS Level
FROM USERS U
WHERE U.ManagerID IS NULL
UNION ALL
SELECT U.ManagerID AS ParentID, U.UserID AS ID, U.UserName AS Name, H.Level+1 AS Level
FROM USERS U
INNER JOIN MyCTE H ON H.ID = U.ManagerID
)
SELECT ParentID,ID FROM MyCTE
returns
ParentID ID
NULL 1
1 2
1 3
2 4
What I want to achieve is to reverse this result set. Namely,reversing the root node and the deepest child node as,
ParentID ID
NULL 4
4 2
2 1
3 1
Couldn't figure out how to programmatically implement this (preferably by using CTE), like by using a parameter to determine the recursion order etc. Any help is greatly appreciated, thanks.
Edit :
Modified this a bit inserting my first CTE's results into a temp table, then using another recursion I reverse the order as (I know "WHERE T.ID = (SELECT MAX(ID) FROM #tmp)" wont work in a real situation, I also gotta determine the deepest node with the "Level" column, just tried to simplify this for this example),
INSERT INTO #tmp
SELECT ParentID,ID,Level FROM MyCTE
WITH MyCTE2(ParentID,ID,Level)
AS
(
SELECT NULL AS ParentID, ID AS ID, 0 AS Level FROM #tmp T
WHERE T.ID = (SELECT MAX(ID) FROM #tmp)
UNION ALL
SELECT R2.ID AS ParentID, T.ParentID AS ID, R2.Level+1 FROM #tmp T
INNER JOIN MyCTE2 R2 ON R2.ID = T.ID
WHERE T.ParentID IS NOT NULL
)
Original Results (removed the 1,3 pair)
ParentID ID Level
NULL 1 0
1 2 1
2 4 2
Reversed results,
ParentID ID Level
NULL 4 0
4 2 1
2 1 2
Edit 2:
I did something like this,
SELECT TTT.ParentID,TTT.ID,TTT.Level FROM
(
SELECT ParentID,ID,Level FROM MyCTE2
UNION ALL
SELECT TT.ID AS ParentID,TT.ParentID AS ID,(SELECT Level+1 FROM #tmp WHERE ID=TT.ID)
AS Level FROM
(
SELECT ID FROM #tmp
EXCEPT
SELECT ID FROM MyCTE2
)T INNER JOIN #tmp TT ON TT.ID = T.ID
)TTT
ORDER BY TTT.Level
gives,
ParentID ID Level
NULL 4 0
4 2 1
2 1 2
3 1 2
This may contain errors, im not sure yet, just wanted to show to make sure that pair (3,1) is whther correct with level 2 ? Been thinking on this for quite a while now, I might make some silly mistakes.

Sample data
declare #T table
(
ParentID int,
ID int
)
insert into #T values
(NULL, 1),
(1 , 2),
(1 , 3),
(2 , 4)
Recursion from root:
;with C as
(
select ParentID, ID
from #T
where ParentID is null
union all
select T.ParentID, T.ID
from #T as T
inner join C
on T.ParentID = C.ID
)
select *
from C
Result
ParentID ID
----------- -----------
NULL 1
1 2
1 3
2 4
Recursion from leafs:
;with C as
(
select null as PParentID, ID, ParentID
from #T
where ID not in (select ParentID
from #T
where ParentID is not null)
union all
select C.ID, T.ID, T.ParentID
from #T as T
inner join C
on T.ID = C.ParentID
)
select distinct
PParentID as ParentID,
ID
from C
Result:
ParentID ID
----------- -----------
NULL 3
NULL 4
4 2
2 1
3 1
If you have many branches you will have duplicate rows as merge together. Using distinct takes care of that.
To get the levels correct you need to first calculate the level from top down. Store that in a table variable (or temp table) and then use that as the source for leaf->root recursion.
-- Primary key and unique is in there to get the indexes used in the recursion
declare #T2 table
(
ParentID int,
ID int,
Level int,
primary key (ID),
unique(ParentID, ID)
)
;with C as
(
select ParentID, ID, 0 as Level
from #T
where ParentID is null
union all
select T.ParentID, T.ID, Level + 1
from #T as T
inner join C
on T.ParentID = C.ID
)
insert into #T2
select ParentID, ID, Level
from C
;with C as
(
select null as PParentID, ID, ParentID, Level
from #T2
where ID not in (select ParentID
from #T2
where ParentID is not null)
union all
select C.ID, T.ID, T.ParentID, T.Level
from #T2 as T
inner join C
on T.ID = C.ParentID
)
select distinct
PParentID as ParentID,
ID,
max(Level) over() - Level as level
from C
Result:
ParentID ID level
----------- ----------- -----------
NULL 3 1
NULL 4 0
2 1 2
3 1 2
4 2 1
It is possible but a really bad idea to replace #T2 with a multi CTE query. It will kill performance because to first CTE will be rebuilt for each recursion. At least that is my guess of what is happening but believe me it is not fast.