cross apply in sql with sum

cross apply in sql with sum - sql-server

I have the following query:
insert into [MyDB].[dbo].[Reports_ActivityStat] (ActivityID,TaskID,LS_HowOthersAnswered,LS_ImproveMyChances)
(
SELECT
ActivityID=tasks.ActivityID,
TaskID=tasks.ID,
CAST(
CASE
WHEN CHARINDEX('stats',item.value('(UsedLifeSavers)[1]', 'NVARCHAR(MAX)'))>0
THEN 1
ELSE 0
END AS bit) as LS_HowOthersAnswered,
CAST(
CASE
WHEN CHARINDEX('FiftyFifty',item.value('(UsedLifeSavers)[1]', 'NVARCHAR(MAX)'))>0
THEN 1
ELSE 0
END AS bit) as LS_ImproveMyChances
FROM [MyDB].[dbo].[Tasks] as tasks CROSS APPLY [Progress].nodes ('//Progress/Steps/ProgressStep') Progress(item)
)
which acts on the following Tasks table:
ID | ActivityID | Progress
1 | 1 | [example below..]
2 | 1 | [example below..]
Where Progress is an xml of the sort:
<Progress xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Steps>
<ProgressStep>
<FinishedOn>2012-10-30T13:07:52.6374861+02:00</FinishedOn>
<Score>0</Score>
<StartedOn>2012-10-30T13:07:45.8234861+02:00</StartedOn>
<Status>Finished</Status>
<StepIndex>0</StepIndex>
<StepType>Summary</StepType>
<UsedLifeSavers xmlns:a="http://schemas.datacontract.org/2004/07/MindLab.Logic.Study" />
</ProgressStep>
<ProgressStep>
<FinishedOn i:nil="true" />
<PartNumber>1</PartNumber>
<Score>0</Score>
<StartedOn>2012-10-30T13:07:52.6374861+02:00</StartedOn>
<Status>NotFinished</Status>
<StepIndex>1</StepIndex>
<StepType>Information</StepType>
<SubmittedAnswersCount>0</SubmittedAnswersCount>
<UsedLifeSavers xmlns:a="http://schemas.datacontract.org/2004/07/MindLab.Logic.Study">
<a:LifeSavers>Stats</a:LifeSavers>
<a:LifeSavers>FiftyFifty</a:LifeSavers>
</UsedLifeSavers>
</ProgressStep>
</Steps>
</Progress>
(usually there are many more than 2 steps..)
My query yields (not actual data, just sample):
ID | ActivityID | TaskID | LS_HowOthersAnswered | LS_ImproveMyChances
1 | 1 | 1 | 0 | 0
2 | 1 | 1 | 1 | 0
3 | 1 | 1 | 0 | 0
This is almost what I need but not quite.
I need for every unique TaskID the SUM of all LS_HowOthersAnswered and LS_ImproveMyChances.
I was trying to group by, but couldn't make it because it's just too different from regular inner join in that matter..

Found out:
insert into [MyDB].[dbo].[Reports_ActivityStat] (ActivityID,TaskID,LS_HowOthersAnswered,LS_ImproveMyChances)
(
SELECT
ActivityID=tasks.ActivityID,
TaskID=tasks.ID,
SUM (CAST(
CASE
WHEN CHARINDEX('stats',item.value('(UsedLifeSavers)[1]', 'NVARCHAR(MAX)'))>0
THEN 1
ELSE 0
END AS int)
),
SUM (CAST(
CASE
WHEN CHARINDEX('FiftyFifty',item.value('(UsedLifeSavers)[1]', 'NVARCHAR(MAX)'))>0
THEN 1
ELSE 0
END AS int)
)
FROM [MyDB].[dbo].[Tasks] as tasks CROSS APPLY [Progress].nodes ('//Progress/Steps/ProgressStep') Progress(item)
group by tasks.ID
)

Related

Sum two columns in SQL and optimise SQL query

I have the following query which returns two columns, I want to sum both columns and create the third column through summing the two.
Is there any way I can recreate the below query by removing the subquery? Any way I can achieve the same through joins?
SELECT
IIF(c2.isdeleted = 1 OR c2.approved = 0, 0, 1) AS Contentcount,
(SELECT COUNT(c1.content)
FROM comments c1
WHERE c1.parentcommentid = c2.id
AND c1.isdeleted = 0
AND c1.approved = 1) ChildContentcount --Anyway to remove the subquery
FROM
comments c2
WHERE
c2.discussionid = '257943'
AND c2.parentcommentid IS NULL
ORDER BY
c2.pinned DESC,
c2.createddate
Sample data:
+----------+--------------+
| content | childcontent |
+----------+--------------+
| 1 | 8 |
| 0 | 0 |
| 1 | 3 |
+----------+--------------+
Expected output:
+----------+----------------+---------+
| content | childcontent | sumdata |
+----------+----------------+---------+
| 1 | 8 | 9 |
| 0 | 0 | 0 |
| 1 | 3 | 4 |
| 1 | 8 | 9 |
+----------+----------------+---------+

You can use CROSS APPLY or OUTER APPLY instead of a correlated subquery.
Then you can re-use the values.
select c.pinned, c.createddate
, c.discussionid
, ca1.content
, ca2.childcontent
, (ca1.content + ca2.childcontent) AS sumdata
FROM comments c
CROSS APPLY
(
SELECT CASE
WHEN c.isdeleted = 1 OR c.approved = 0 THEN 0
ELSE 1
END AS content
) ca1
CROSS APPLY
(
SELECT COUNT(c2.content) AS childcontent
FROM comments c2
WHERE c2.parentcommentid = c.id
AND c2.isdeleted = 0
AND c2.approved = 1
) ca2
WHERE c.discussionid = '257943'
AND c.parentcommentid IS NULL
ORDER BY
c.pinned DESC,
c.createddate;

Subquery and sum the columns :
select tbl.* , Contentcount+ChildContentcount third_sum from
(
select IIF(c2.isdeleted = 1 OR c2.approved = 0, 0, 1) AS Contentcount,
(SELECT COUNT(c1.content)
FROM comments c1
WHERE c1.parentcommentid = c2.id
AND c1.isdeleted = 0
AND c1.approved = 1) ChildContentcount
FROM
comments c2
WHERE
c2.discussionid = '257943'
AND c2.parentcommentid IS NULL ) tbl
If you supply sql fiddle, we can try to create it alternative ways

Running Totals Skipping Over Certain Values

I need to calculate a column that has a value column subtracted from a Total column but can skip rows until it can no longer find a smaller value. The sequence relates to dates so the order must be preserved. The value (Need) cannot be larger than the total as those are deleted prior.
This is for SQL Server 2016. My initial thought process was to use window functions and a running total, but I cannot figure out how to skip the 400 and continue to the 2 rows below. I included my attempts in the CASE statement as TransferQty and the running total as ReferenceCol.
Code to reproduce:
DECLARE #i TABLE
(
sequence INT IDENTITY(1,1)
,Total INT
,Need INT
)
INSERT INTO #i
VALUES (500,100)
,(500,200)
,(500,50)
,(500,400)
,(500,50)
,(500,50)
SELECT
sequence
,Total
,Need
,CASE
WHEN Total - SUM(Need) OVER (ORDER BY sequence) > 0
THEN Need
ELSE 0
END AS TransferQty
,Total - SUM(Need) OVER (ORDER BY sequence) as ReferenceCol
FROM #i
Current Results
+----------+-------+------+-------------+--------------+
| Sequence | Total | Need | TransferQty | ReferenceCol |
+----------+-------+------+-------------+--------------+
| 1 | 500 | 100 | 100 | 400 |
| 2 | 500 | 200 | 200 | 200 |
| 3 | 500 | 50 | 50 | 150 |
| 4 | 500 | 400 | 0 | -250 |
| 5 | 500 | 50 | 0 | -300 |
| 6 | 500 | 50 | 0 | -350 |
+----------+-------+------+-------------+--------------+
Desired Results
+----------+-------+------+-------------+--------------+
| Sequence | Total | Need | TransferQty | ReferenceCol |
+----------+-------+------+-------------+--------------+
| 1 | 500 | 100 | 100 | 400 |
| 2 | 500 | 200 | 200 | 200 |
| 3 | 500 | 50 | 50 | 150 |
| 4 | 500 | 400 | 0 | 150 | --skip calc
| 5 | 500 | 50 | 50 | 100 |
| 6 | 500 | 50 | 50 | 50 |
+----------+-------+------+-------------+--------------+

You should be able to use this code if you have a single skip, but when you have multi skips then you have to loop through and perform the delete of record based on the existence of rolling value exceeding the total.
DECLARE #i TABLE
(
sequence INT IDENTITY(1,1)
,Total INT
,Need INT
)
INSERT INTO #i
VALUES
(500,100 )
,(500,200 )
,(500,50 )
,(500,400 )
,(500,50 )
,(500,50 )
select sequence,Total,Need
into #temp_original
from #i
select
b.sequence,b.Total, SUM( a.need) rollingvalue ,
case when SUM( a.need) > b.Total
then 0
when SUM( a.need) = b.Total then SUM( a.need)
else b.Total - SUM( a.need) end how_much_needed
into #temp
from #i a
join #i b
on a.sequence < b.sequence + 1
group by b.sequence,b.Total
delete from a
from #i a
join (
select min(sequence) min_sequence
from #temp
where how_much_needed = 0
) minseq
on minseq.min_sequence = a.sequence
select
b.sequence,b.Total, SUM( a.need) rollingvalue ,
case when SUM( a.need) > b.Total
then 0
when SUM( a.need) = b.Total then SUM( a.need)
else b.Total - SUM( a.need) end how_much_needed
into #temp2
from #i a
join #i b
on a.sequence < b.sequence + 1
group by b.sequence,b.Total
select a.sequence,a.Total,a.Need, case when isnull (b.rollingvalue , 0) = 0 then 0 else case when b.rollingvalue > a.Total then 0 else a.Need end end as TransferQty , ISNULL( case when b.how_much_needed = b.Total then a.Need else b.how_much_needed end, case when ( select how_much_needed from #temp2 where sequence = a.sequence -1) = a.Total then 0 else (select how_much_needed from #temp where sequence = a.sequence -1) end ) ReferenceCol
from #temp_original a
LEFT join #temp2 b
on a.sequence = b.sequence
join #temp c
on c.sequence = a.sequence
drop table #temp
drop table #temp2
drop table

Here is the solution I went with which is based off of the "Quirky Update" from the original comment.
DROP TABLE IF EXISTS #i
GO
CREATE TABLE #i
(
sequence INT IDENTITY(1,1) PRIMARY KEY CLUSTERED
,Total INT
,Need INT
,RunningTransfer INT NULL
)
INSERT INTO #i
VALUES
(500,100,NULL)
,(500,200,NULL)
,(500,50,NULL)
,(500,400,NULL)
,(500,50,NULL)
,(500,50,NULL)
,(500,100,NULL)
,(500,49,NULL)
,(500,50,NULL)
DECLARE #TransferRunningTotal INT
UPDATE #i
SET #TransferRunningTotal = RunningTransfer = CASE
--this skips values larger than running total
WHEN #TransferRunningTotal < Need THEN #TransferRunningTotal
--this creates the running total
WHEN #TransferRunningTotal > Need THEN #TransferRunningTotal - Need
--creates the initial value
ELSE Total - Need
END
FROM #i WITH (TABLOCKX)
OPTION (MAXDOP 1)
SELECT sequence
,Total
,Need
,CASE
WHEN need <= RunningTransfer THEN Need
ELSE 0
END AS TsfQty
,RunningTransfer
FROM #i

Partion based on Specified value

Am trying to write q query which Partition based on value 90. Below is My table
create table #temp(StudentID char(2), Status int)
insert #temp values('S1',75 )
insert #temp values('S1',85 )
insert #temp values('S1',90)
insert #temp values('S1',85)
insert #temp values('S1',83)
insert #temp values('S1',90 )
insert #temp values('S1',85)
insert #temp values('S1',90)
insert #temp values('S1',93 )
insert #temp values('S1',93 )
insert #temp values('S1',93 )
Required Out put:
ID Status Result
S1 75 0
S1 85 0
S1 90 0
S1 85 1
S1 83 1
S1 90 1
S1 85 2
S1 90 2
S1 93 3
S1 93 3
S1 93 3
Please any one has the solution to partition based status id 90,Result should be 1,2,3 ..etc incrementing based on number of time value 90

Assuming that the actual question is "How can I find ranges/islands of incrementing values", the answer could use LAG to compare the current Status value with the previous one base on some order. If the previous value is 90, you have a new island :
declare #temp table (ID int identity PRIMARY KEY, StudentID char(2), Status int)
insert into #temp (StudentID,Status)
values
('S1',75),
('S1',85),
('S1',90),
('S1',85),
('S1',83),
('S1',90),
('S1',85),
('S1',90),
('S1',93),
('S1',93),
('S1',93);
select
* ,
case LAG(Status,1,0) OVER (PARTITION BY StudentID ORDER BY ID)
when 90 then 1 else 0 end as NewIsland
from #temp
This returns :
+----+-----------+--------+-----------+
| ID | StudentID | Status | NewIsland |
+----+-----------+--------+-----------+
| 1 | S1 | 75 | 0 |
| 2 | S1 | 85 | 0 |
| 3 | S1 | 90 | 0 |
| 4 | S1 | 85 | 1 |
| 5 | S1 | 83 | 0 |
| 6 | S1 | 90 | 0 |
| 7 | S1 | 85 | 1 |
| 8 | S1 | 90 | 0 |
| 9 | S1 | 93 | 1 |
| 10 | S1 | 93 | 0 |
| 11 | S1 | 93 | 0 |
+----+-----------+--------+-----------+
You can create an Island ID from this by summing all NewIsland values before the current one, using SUM with the ROWS clause of OVER:
with islands as
(
select
* ,
case LAG(Status,1,0) OVER (PARTITION BY StudentID ORDER BY ID)
when 90 then 1 else 0 end as NewIsland
from #temp
)
select * ,
SUM(NewIsland) OVER (PARTITION BY StudentID ORDER BY ID ROWS UNBOUNDED PRECEDING)
from islands
This produces :
+----+-----------+--------+-----------+--------+
| ID | StudentID | Status | NewIsland | Result |
+----+-----------+--------+-----------+--------+
| 1 | S1 | 75 | 0 | 0 |
| 2 | S1 | 85 | 0 | 0 |
| 3 | S1 | 90 | 0 | 0 |
| 4 | S1 | 85 | 1 | 1 |
| 5 | S1 | 83 | 0 | 1 |
| 6 | S1 | 90 | 0 | 1 |
| 7 | S1 | 85 | 1 | 2 |
| 8 | S1 | 90 | 0 | 2 |
| 9 | S1 | 93 | 1 | 3 |
| 10 | S1 | 93 | 0 | 3 |
| 11 | S1 | 93 | 0 | 3 |
+----+-----------+--------+-----------+--------+
BTW this is a case of the wider Gaps & Islands problem in SQL.
UPDATE
LAG and OVER are available in all supported SQL Server versions, ie SQL Server 2012 and later. OVER is also available in SQL Server 2008 but not LAG. In those versions different, slower techniques were used to calculate islands: The SQL of Gaps and Islands in Sequences
In most cases ROW_NUMBER() is used to calculate the row ordering, which results in one extra CTE. This can be avoided if the desired ordering is the same as the ID, or any other unique incrementing column. The following query returns the same results as the query that uses LAG :
select
* ,
case when exists (select ID
from #temp t1
where t1.StudentID=t2.StudentID
and t1.ID=t2.ID-1
and t2.status=90) then 1
else 0 end
as NewIsland
from #temp t2
This query returns 1 if there's any row with the same StudentID, Status 90 and ID or ROW_NUMBER one less, ie the same as LAG(,1).
After that we just need to SUM previous values. While SUM OVER was available in 2008, it only supported PARTITION BY. We need to use another subquery :
;with islands as
(
select
* ,
case when exists (select ID from #temp t1 where t1.StudentID=t2.StudentID and t1.ID=t2.ID-1 and t2.status=90) then 1
else 0 end
as NewIsland
from #temp t2
)
select * ,
(select ISNULL(SUM(NewIsland),0)
from islands i1
where i1.ID<i2.ID) AS Result
from islands i2
This sums all NewIsland values for rows with an ID less than the current one.
Performance
All those subqueries result in a lot of repeated scans. Suprisingly though, the older query is faster than the query with LAG because the first query has to order temporary results multiple times and filter by Status, with a 45% vs 55% execution plan cost.
Things change dramatically when an index is added :
declare #temp table ( ID int identity PRIMARY KEY, StudentID char(2), Status int,
INDEX IX_TMP(StudentID,ID,Status))
The multiple sorts disappear and the costs become 80% vs 20%. The query just scans the index values once without sorting the intermediate results.
The subquery version wasn't able to take advantage of the index
UPDATE 2
uzi suggested that removing LAG and summing only up to the previous row would be better :
select * ,
SUM(case when status =90 then 1 else 0 end)
OVER (PARTITION BY StudentID
ORDER BY ID ROWS
BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
from #temp;
Semantically, this is the same thing - for each row find all previous ones, calculate 1 for the 90s and 0 for the other rows, and sum them.
The server generate similar execution plans in both cases. The LAG version used two streaming aggregate operators while the version without it one. The end result for this limited data set was essentially the same though.
For a larger data set the results may be different, eg if the server has to spool data to tempdb because they didn't fit in memory.

Perhaps this is not a very good solution, but it works.
SELECT StudentID ID
, Marks Status
, CASE
WHEN Marks = 90
THEN SUM(q) OVER(order by row) - 1
ELSE SUM(q) OVER(order by row)
END Result
FROM (
SELECT row_number() OVER(order by StudentID desc) row
, *
, CASE
WHEN Marks = 90
THEN 1
ELSE 0
END q
FROM #temp
) a

You could simply use subquery
select *,
coalesce((select sum(case when Marks = 90 then 1 else 0 end)
from table
where studentid = t.studentid and
? < t.?) , 0) as Result
from table t;
However, ? (i.e. id) specify your actual data ordering columns

Searching for a row based on the grouping of rows

Preface, I am not sure the question even make sense.
Problem Statement
I have a table that looks like this:
+----------------+-----------+
| ProductGroupID | ProductID |
+----------------+-----------+
| 1 | 1 |
| 1 | 2 |
| 2 | 3 |
| 2 | 4 |
| 3 | 1 |
| 3 | 2 |
| 4 | 1 |
| 4 | 2 |
| 4 | 3 |
+----------------+-----------+
What I am trying to do is find all ProductGroupIDs that contain both ProductID 1 and 2.
Desired Output
+----------------+
| ProductGroupID |
+----------------+
| 1 |
| 3 |
| 4 |
+----------------+
My Attempt
Below is a naïve script that I created, but I think that it could be vastly improved.
SELECT DISTINCT ProductGroupID
FROM
(SELECT ProductGroupID FROM tbl WHERE ProductID = 1 ) t1
INNER JOIN
(SELECT ProductGroupID FROM tbl WHERE ProductID = 2 ) t2
ON t1.ProductGroupID = t2.ProductGroupID
For some reason, my gut tells me that a CROSS APPLY could be useful in this situation, but I cannot seem to reason about the problem.
Any assistance will be appreciated.
Addendum
Bonus points if you can produce a script that shows ProductGroupIDs where only 1 and 3 are displayed, and 4 is ignored because it has an extra item in the set.

How about using HAVING:
WITH VTE AS(
SELECT *
FROM (VALUES (1,1),
(1,2),
(2,3),
(2,4),
(3,1),
(3,2),
(4,1),
(4,2),
(4,3)) V(ProductGroupID, ProductID))
SELECT ProductGroupID
FROM VTE
GROUP BY ProductGroupID
HAVING COUNT(CASE ProductID WHEN 1 THEN 1 END) >0
AND COUNT(CASE ProductID WHEN 2 THEN 1 END) >0;
If you want to ignore ProductIDGroup 4 then:
WITH VTE AS(
SELECT *
FROM (VALUES (1,1),
(1,2),
(2,3),
(2,4),
(3,1),
(3,2),
(4,1),
(4,2),
(4,3)) V(ProductGroupID, ProductID))
SELECT ProductGroupID
FROM VTE
GROUP BY ProductGroupID
HAVING COUNT(CASE ProductID WHEN 1 THEN 1 END) >0
AND COUNT(CASE ProductID WHEN 2 THEN 1 END) >0
AND COUNT(CASE WHEN ProductID NOT IN (1,2) THEN 1 END) = 0;

Sql rows count with grouping

Hello I have two tables as below
tblContactType
typeId typeName active
1 Email 1
2 Phone 1
3 Address 1
4 Fax 1
tblContact
id IdName typeId groupId
100 test 1 1
101 test2 1 1
102 test3 1 2
103 test4 2 2
104 test5 2 3
105 test6 3 3
Want the results to be with column names as typeName count and grouped by group id. Results should be total number of types associated to a group,which are associated to a contact.
GroupId EmailCount PhoneCount AddressCount FaxCount
1 2 0 0 0
2 1 1 0 0
3 0 1 1 0

You can group by and pivot as below:
Select * from (
Select t.groupid, tct.typename, t.id from tblContact t
inner join tblContactType tct
on t.typeid = tct.typeid
) a
pivot (count(a.id) for typename in ([Email],[Phone],[Address],[Fax]) ) p
For dynamic list of columns you can use dynamic query as below:
declare #cols1 varchar(max)
declare #query nvarchar(max)
Select #cols1 = stuff((Select distinct ','+QuoteName(typename) from tblContactType for xml path('')),1,1,'')
Set #query = ' Select * from (
Select t.groupid, tct.typename, t.id from tblContact t
inner join tblContactType tct
on t.typeid = tct.typeid
) a
pivot (count(a.id) for typename in (' + #cols1 + ') ) p '
Select #query --Check the generated query is good and then execute below
--exec sp_executesql #query
Output as below:
+---------+---------+-------+-----+-------+
| groupid | Address | Email | Fax | Phone |
+---------+---------+-------+-----+-------+
| 1 | 0 | 2 | 0 | 0 |
| 2 | 0 | 1 | 0 | 1 |
| 3 | 1 | 0 | 0 | 1 |
+---------+---------+-------+-----+-------+

Here is another solution.
SELECT groupId,
SUM(CASE WHEN c.typeId = 1 THEN 1 ELSE 0 END) 'EmailCount',
SUM(CASE WHEN c.typeId = 2 THEN 1 ELSE 0 END) 'PhoneCount',
SUM(CASE WHEN c.typeId = 3 THEN 1 ELSE 0 END) 'AddressCount',
SUM(CASE WHEN c.typeId = 4 THEN 1 ELSE 0 END) 'FaxCount'
FROM tblContact c
JOIN tblContactType ct ON c.typeId = ct.typeId
GROUP BY groupId
Results
-------------------------------------------------------------
groupId | EmailCount | PhoneCount | AddressCount | FaxCount
-------------------------------------------------------------
1 | 2 | 0 | 0 | 0
2 | 1 | 1 | 0 | 0
3 | 0 | 1 | 1 | 0
-------------------------------------------------------------

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

cross apply in sql with sum - sql-server

Related

Sum two columns in SQL and optimise SQL query

Running Totals Skipping Over Certain Values

Partion based on Specified value

Searching for a row based on the grouping of rows

Sql rows count with grouping

Categories

Resources