Select First, Max, and Last non-null value per group

Select First, Max, and Last non-null value per group - sql-server

Trying to select, per group, the first and last values (chronologically) as well as the max value. I had written a query that works fine except it does not handle the NULL values. I need it to ignore NULL values.
Here's an example:
DECLARE #T table (
LabName VARCHAR(20)
, CreatedOn date
, LabValue int
)
INSERT INTO #T
( LabName,CreatedOn,LabValue )
VALUES
('Creatinine', '2016-01-01', NULL)
, ('Creatinine', '2016-02-01', 15)
, ('Creatinine', '2016-03-01', 20)
, ('Creatinine', '2016-04-01', 19)
, ('SGOT (ST)', '2016-01-01', 25)
, ('SGOT (ST)', '2016-02-01', 31)
, ('SGOT (ST)', '2016-03-01', 25)
, ('SGOT (ST)', '2016-04-01', NULL)
SELECT DISTINCT
*
FROM (
SELECT
LabName
, FIRST_VALUE(LabValue) OVER(PARTITION BY LabName ORDER BY CreatedOn ASC) AS FirstValue
, MAX(LabValue) OVER(PARTITION BY LabName) AS MaxValue
, LAST_VALUE(LabValue) OVER(PARTITION BY LabName ORDER BY CreatedOn ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) LastValue
FROM #T
) AS T
It was working fine until I realized some labs aren't run on some dates. Once I put some NULLs into the test data, the results for First and Last will include them.
Here is the result I get:
+------------+------------+----------+-----------+
| LabName | FirstValue | MaxValue | LastValue |
+------------+------------+----------+-----------+
| Creatinine | NULL | 20 | 19 |
| SGOT (ST) | 25 | 31 | NULL |
+------------+------------+----------+-----------+
Here is the result I want:
+------------+------------+----------+-----------+
| LabName | FirstValue | MaxValue | LastValue |
+------------+------------+----------+-----------+
| Creatinine | 15 | 20 | 19 |
| SGOT (ST) | 25 | 31 | 25 |
+------------+------------+----------+-----------+

Use conditional aggregation with ROW_NUMBER():
SELECT LabName,
MAX(CASE WHEN seqnum_asc = 1 THEN LabValue END) as FirstValue,
MAX(LabValue) as MaxValue,
MAX(CASE WHEN seqnum_desc = 1 THEN LabValue END) as LastValue
FROM (SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY LabName
ORDER BY (CASE WHEN LabValue IS NOT NULL THEN 1 ELSE 2 END),
CreatedOn
) as seqnum_asc,
ROW_NUMBER() OVER (PARTITION BY LabName
ORDER BY (CASE WHEN LabValue IS NOT NULL THEN 1 ELSE 2 END),
CreatedOn DESC
) as seqnum_desc
FROM #T t
) T
GROUP BY LabName;

As you said there are 13 such columns where you need to check not null values.
I think you should first filter all not null values using CTE,then using CTE you can write your actual query.CTE will reduce your result set and applying window function on reduce resultset will give better performance.
BTW,13 such columns appear t be bad DB design.you may have to 100 query in future.
IMHO, DISTINCT often indicate bad DB design than query.
;With CTE as
(-- try to reduce resultset if possible
SELECT * FROM #T
WHERE LabValue IS NOT NULL
)
SELECT DISTINCT
*
FROM (
SELECT
LabName
, FIRST_VALUE(LabValue) OVER(PARTITION BY LabName ORDER BY CreatedOn ASC) AS FirstValue
, MAX(LabValue) OVER(PARTITION BY LabName) AS MaxValue
, LAST_VALUE(LabValue) OVER(PARTITION BY LabName ORDER BY CreatedOn ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) LastValue
FROM CTE
) AS T

Your database is handling NULL values properly.
First value for Creatinine is actually null and last value for SGOT (ST) is null as well.
If you wish to discard rows with null values just add it in the WHERE clause:
SELECT DISTINCT
*
FROM (
SELECT
LabName
, FIRST_VALUE(LabValue) OVER(PARTITION BY LabName ORDER BY CreatedOn ASC) AS FirstValue
, MAX(LabValue) OVER(PARTITION BY LabName) AS MaxValue
, LAST_VALUE(LabValue) OVER(PARTITION BY LabName ORDER BY CreatedOn ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) LastValue
FROM #T
WHERE LabValue IS NOT NULL
) AS T;

Related

Get sum up of every 2nd day data between a selected date range

I having table like below in Sql Server. I need to get data within in a date range, for example -: StartDate = '2020-09-01' and EndDate = '2020-09-11'. Its quite simple to get data between a date range but complicated part is that,i need to Sum up data in every 2nd day in the selected date range.
For Example -:
As in the above image, i need to Sum up of SKU in every 2nd day in single column. Could anyone help me out with the query for this result output.
CREATE TABLE #Temp
(
Sku Nvarchar(50),
OrderDate DateTime,
Quantity Int,
)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-01 00:00:00.000',2)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-02 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-03 00:00:00.000',3)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-04 00:00:00.000',4)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-05 00:00:00.000',5)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-06 00:00:00.000',6)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-07 00:00:00.000',2)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-08 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-09 00:00:00.000',3)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-10 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-11 00:00:00.000',10)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#abc','2020-09-01 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#abc','2020-09-02 00:00:00.000',10)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#abc','2020-09-03 00:00:00.000',10)
select * from #Temp

Use row_number() window function to generate a sequence number per Sku. Do a GROUP BY (rn - 1) / 2. HAVING COUNT(*) = 2 is to only consider those with 2 rows
; with
cte as
(
select *, rn = row_number() over (partition by Sku order by OrderDate)
from #Temp
)
select Sku, sum(Quantity)
from cte
group by Sku, (rn - 1) / 2
having count(*) = 2
order by Sku , (rn - 1) / 2
Use STRING_AGG if you want the result in CSV.

With ROW_NUMBER() and LAG() window functions:
select Sku, Quantity
from (
select Sku,
row_number() over (partition by Sku order by OrderDate) rn,
Quantity + lag(Quantity) over (partition by Sku order by OrderDate) Quantity
from #Temp
where OrderDate between '20200901' and '20200911'
) t
where rn % 2 = 0
order by Sku, rn;
See the demo.
Results:
> Sku | Quantity
> :--- | -------:
> #abc | 11
> #xyz | 3
> #xyz | 7
> #xyz | 11
> #xyz | 3
> #xyz | 4

Something like this
;with
string_cte(Sku, OrderDate, Quantity, rn_grp) as(
select *, (row_number() over (partition by Sku order by OrderDate)+1)/2
from #Temp),
sum_cte(Sku, rn_grp, sum_quantity) as (
select Sku, rn_grp, sum(quantity)
from string_cte
group by Sku, rn_grp
having count(*)>1)
select
Sku, string_agg(sum_quantity, ',') within group (order by rn_grp) SecondDaySumUp
from sum_cte
group by Sku
order by 1 desc;
Output
Sku SecondDaySumUp
#xyz 3,7,11,3,4
#abc 11

Return the current month salary and previous month salary in a same table

I have a task to prepare a report generated from a run control page and retrieve the current month salary and previous month salary. In that page, the user will choose the cal_id they want for example in this case the user choose cal id = FEB. Assume the table as below named table_salary:
emplid | cal_id | salary | pymt_date
101 | JAN | 10000 | 2018-01-01
101 | FEB | 15000 | 2018-02-01
And my expected output is
emplid | cur_sal| prev_sal
101 | 15000 | 10000
What I have done so far is like below
SELECT
A.EMPLID, A.SALARY AS CUR_SAL, B.SALARY AS PREV_SAL
FROM
TABLE_SALARY A
LEFT OUTER JOIN
TABLE_SALARY B ON A.EMPLID AND B.EMPLID
AND A.CAL_ID = B.CAL_ID
AND B.PYMT_DT = (SELECT MAX(B1.PYMT_DT)
FROM TABLE_SALARY B1
WHERE B1.EMPLID = B.EMPLID
AND B1.PYMT_DT >= DATEADD(mm, DATEDIFF(mm, 0, B.PYMT_DT) - 1, 0)
AND B1.PYMT_DT < DATEADD(mm, DATEDIFF(mm, 0, PYMT_DT), 0))
But above SQL didn't return the expected output.
Does anyone have an idea how to achieve my expected output?

It should be like this
Use Lead instead of Lag
Create table #t ( id int identity (1,1), Empid int , Month varchar
(10), Salary int, Paymentdate date )
insert into #t (Empid ,Month,Salary,Paymentdate) Select
'1','Jan',1000, '2018-01-01'
insert into #t (Empid ,Month,Salary,Paymentdate) Select
'1','Feb',1500, '2018-02-01'
Select * from #t
SELECT TOP 1
Empid, SALARY AS CUR_SAL, Lead(SALARY, 1, 0) OVER (ORDER BY PaymentDate DESC) AS PREV_SAL FROM
#t ORDER BY
Paymentdate DESC
SELECT TOP 1
Empid, SALARY AS CUR_SAL, LAG(SALARY, 1, 0) OVER (ORDER BY PaymentDate DESC) AS PREV_SAL FROM
#t ORDER BY
Paymentdate DESC

Use a window function to retrieve the previous row in a sorted set. I think this should work.
SELECT TOP 1
EMPLID, SALARY AS CUR_SAL, LEAD(SALARY, 1, 0) OVER (ORDER BY PYMT_DT DESC) AS PREV_SAL
FROM
TABLE_SALARY
ORDER BY
PYMT_DT DESC

SQL Server how to sum max for specific category?

Got a problem when constructing a analysis SQL using SQL Server
The raw data as below
GameID | UsrRegID | Score_User
281 | 1 | 1
281 | 1 | 2
281 | 1 | 3
282 | 1 | 0
282 | 1 | 0
282 | 1 | 1
283 | 1 | 2
283 | 1 | 3
Below is the expect output result:
Distinct_Count_GameID | UsrRegID | Score_User
3 | 1 | 7
The logic for calculating the Score_user as below:
Sum(Max(Score_user) for each GemeID)
So the result need to be 3+1+3=7.
Can using the pure SQL to get the above expecting output?

I think we need to aggregate twice here. One option uses ROW_NUMBER:
WITH cte AS (
SELECT GameID, UsrRegID, Score_User,
ROW_NUMBER() OVER (PARTITION BY GameID, UsrRegID ORDER BY Score_User DESC) rn
FROM yourTable
)
SELECT
UsrRegID,
COUNT(DISTINCT GameID) AS Distinct_Count_GameID,
SUM(Score_User) AS Score_User
FROM cte
WHERE rn = 1
GROUP BY
UsrRegID;

You can't do an aggregate of an aggregate on the same SELECT, you can chain them together with CTE or subqueries.
;WITH Maxs AS
(
SELECT
T.GameID,
T.UsrRegID,
MaxScore = MAX(T.Score_User)
FROM
YourTable AS T
GROUP BY
T.GameID,
T.UsrRegID
)
SELECT
M.UsrRegID,
Distinct_Count_GameID = COUNT(DISTINCT(M.GameID)),
Score_User = SUM(M.MaxScore)
FROM
Maxs AS M
GROUP BY
M.UsrRegID

You can also try like following.
SELECT Count(DISTINCT [rgameid]) Distinct_Count_GameID,
Count(DISTINCT [usrregid]) UsrRegID,
(SELECT Sum(M)
FROM (SELECT Max([score_user]) M
FROM [TableName]
GROUP BY [rgameid])t) AS Score_User
FROM [TableName]
DEMO

First find maximum value of score for each GameId and UsrRegID and then find SUM() for the column, Score_User and group it by the columns, GameID and UsrRegID using GROUP BY clause.
Query
select count(distinct [t].[GameID]) as [GameID], [t].[UsrRegID],
sum([t].[Score_User]) as [Score_User] from(
select [GameID], [UsrRegID], max([Score_User]) as [Score_User]
from [your_table_name]
group by [GameID], [UsrRegID]
) as [t]
group by [t].[UsrRegID];
Or, give a row number based on the descending order of score value and group by GameID and UsrRegID. Then find the count of distinct GameId and sum of maximum score.
Query
;with cte as(
select [rn] = row_number() over(
partition by [GameID], [UsrRegID]
order by [Score_User] desc
), *
from [your_table_name]
)
select count(distinct [GameID]) as [GameID], [UsrRegID],
sum([Score_User]) as [Score_User] from cte
where [rn] = 1
group by [UsrRegID];

Aggregates and a COUNT(Distinct GameID):
declare #raw as table (GameID int, UsrRegID int, Score_user int)
insert into #raw values (281, 1, 1)
,(281, 1, 2)
,(281, 1, 3)
,(282, 1, 0)
,(282, 1, 0)
,(282, 1, 1)
,(283, 1, 2)
,(283, 1, 3)
select count(distinct GameID) as Distinct_Count_GameID, UsrRegID, sum(max_score_user)
from
(
select GameID
, UsrRegID
, max(score_user) as max_score_user
from #raw
group by GameID, UsrRegID
) a
group by a.UsrRegID

Merge rows based on the same date?

I have a table that looks like the below
Date | ID | Period | ArchivedBy | ArchivedFlag | Value
2018-01-20 12:23 |23344 | Q1 | NULL | NULL | 200
2018-01-20 12:20 |23344 | NULL | P.Tills | 1 | NULL
2018-01-20 12:19 |23344 | NULL | NULL | 1 | NULL
This table represents all edits made to an agreement (each new edit gets it's own row). If a value hasn't been changed at all, it will say NULL.
so ideally the above would look like the following
Date | ID | Period | ArchivedBy | ArchivedFlag | Value
2018-01-20 |23344 | Q1 | P.Tills | 1 | 200
This returned row should show the latest state of the agreement based on the date. So for the date in my example (2018-01-20) this one row would be returned, combining all changes that were made throughout the day into 1 row which shows how it looks following all the changes throughout the day.
I hope this makes sense?
Thank you!

Here is one way using Row_Number and Group by
SELECT [Date] = Cast([Date] AS DATE),
ID,
Max(period),
Max(ArchivedBy),
Max(ArchivedFlag),
Max(CASE WHEN rn = 1 THEN [Value] END)
FROM (SELECT *,
Rn = Row_number()OVER(partition BY Cast([Date] AS DATE), ID ORDER BY [Date] DESC)
FROM Yourtable)a
GROUP BY Cast([Date] AS DATE),
ID

I would propose 2 solutions.
Simple
For each day select top 1 NOT NULL value:
SELECT G.ID, G.GD Date, Period.*, ArchivedBy.*, Value.* FROM
(SELECT DISTINCT ID, CAST(Date AS Date) GD FROM T) G
CROSS APPLY (SELECT TOP 1 Period FROM T WHERE Period IS NOT NULL AND CAST(Date AS Date)=GD ORDER BY Date DESC) Period
CROSS APPLY (SELECT TOP 1 ArchivedBy FROM T WHERE ArchivedBy IS NOT NULL AND CAST(Date AS Date)=GD ORDER BY Date DESC) ArchivedBy
CROSS APPLY (SELECT TOP 1 Value FROM T WHERE Value IS NOT NULL AND CAST(Date AS Date)=GD ORDER BY Date DESC) Value
Optimized (intuitively, not tested*)
Use varbinary sorting rules and aggregation, manually order NULLs:
SELECT CAST(Date AS Date), ID,
CAST(SUBSTRING(MAX(Arch),9, LEN(MAX(Arch))) AS varchar(10)) ArchivedBy --unbox
--other columns
FROM
(
SELECT Date, ID,
CAST(CASE WHEN ArchivedBy IS NOT NULL THEN ROW_NUMBER() OVER (PARTITION BY CAST(Date AS Date) ORDER BY Date) ELSE 0 END AS varbinary(MAX))+CAST(ArchivedBy AS varbinary(MAX)) Arch --box
--other columns
FROM T
) Tab
GROUP BY ID, CAST(Date AS Date)

different result Consecutive records in a table using SQL

I have the following Table definition with sample data. In the following table.
"TP" consecutive 3 records 2 times,then "SL" consecutive 1 records 2 times……
id | Result
1 | TP
2 | TP
3 | TP
4 | SL
5 | TP
6 | NONE
7 | NONE
8 | SL
9 | TP
10 | TP
11 | TP
12 | SL
13 | SL
14 | SL
And I am looking for a result like this:
comboNum | num
TP_3 | 2
SL_1 | 2
TP_1 | 1
SL_3 | 1
Any suggestions?

You can as the below
DECLARE #Tbl TABLE (Id INT, Result VARCHAR(10))
INSERT INTO #Tbl
VALUES
(1,'TP')
,(2,'TP')
,(3,'TP')
,(4,'SL')
,(5,'TP')
,(6,'NONE')
,(7,'NONE')
,(8,'SL')
,(9,'TP')
,(10,'TP')
,(11,'TP')
,(12,'SL')
,(13,'SL')
,(14,'SL')
;WITH CTE1
AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY Result, Id) RowId FROM #Tbl
),CTE2
AS
(
SELECT
Result,
MAX(C.Id) - MIN(C.Id) Cons,
MIN(C.Id) StartP,
MAX(C.Id) EndP
FROM
CTE1 C
WHERE
c.Result <> 'NONE'
GROUP BY
C.Result,
C.RowId - C.Id
)
SELECT
C.Result + '_' + CAST(C.Cons + 1 AS VARCHAR(50)) AS comboNum,
COUNT(*) AS Num
FROM
CTE2 C
GROUP BY
C.Result,
C.Cons
ORDER BY Num DESC
Result:
comboNum Num
------------------ -----------
TP_3 2
SL_1 2
TP_1 1
SL_3 1

Two CTEs with tricky ROW_NUMBER() sequence:
;WITH cte as (
SELECT id,
Result,
ROW_NUMBER() OVER (PARTITION BY Result ORDER BY id) - ROW_NUMBER() OVER (ORDER BY id) as seq
FROM YourTable
WHERE Result != 'NONE'
), final AS (
SELECT MIN(id) as mid,
Result +'_'+ CAST(MAX(id)-MIN(id)+1 as nvarchar(max)) as comboNum
FROM cte
GROUP BY Result, seq
)
SELECT comboNum,
COUNT(mid) as num
FROM final
GROUP BY comboNum
ORDER BY MIN(mid)
Output:
comboNum num
TP_3 2
SL_1 2
TP_1 1
SL_3 1

Declare #tblTest AS TABLE(
ID INT,
Result VARCHAR(50)
)
INSERT INTO #tblTest VALUES(1,'TP')
,(2,'TP')
,(3,'TP')
,(4,'SL')
,(5,'TP')
,(6,'NONE')
,(7,'NONE')
,(8,'SL')
,(9,'TP')
,(10,'TP')
,(11,'TP')
,(12,'SL')
,(13,'SL')
,(14,'SL')
;WITH X AS
(
SELECT
T.*,
ROW_NUMBER() OVER (ORDER BY ID) AS SrNo,
ROW_NUMBER() OVER (PARTITION BY Result ORDER BY id) AS PartNo
FROM #tblTest T
WHERE Result<>'NONE'
)
SELECT
ComboNum,
COUNT(Occurance) AS Num
FROM
(
SELECT
Result +'_'+ CAST((max(ID)-min(ID))+1 AS VARCHAR(5)) AS ComboNum,
(MAX(ID)-MIN(ID))+1 AS Occurance,
MIN(SrNo) AS SrNo
FROM X
GROUP BY Result, (SrNo - PartNo)
) Z
GROUP BY ComboNum,Occurance
ORDER BY MIN(SrNo)
Output:

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Select First, Max, and Last non-null value per group - sql-server

Related

Get sum up of every 2nd day data between a selected date range

Return the current month salary and previous month salary in a same table

SQL Server how to sum max for specific category?

Merge rows based on the same date?

different result Consecutive records in a table using SQL

Categories

Resources