Select the IDs using group by condition - sql-server

I have a dataset where I need to find the diseased patients in consecutive rows.
I'll share my sample dataset with a clear explanation.
ID Normal Des1 Des2 Des3 Des4
12 0 1 0 0 0
12 1 0 1 0 0
12 1 0 1 0 0
12 1 0 1 0 0
14 0 1 0 1 0
18 1 0 0 0 0
18 1 0 0 0 0
18 1 0 0 0 0
11 0 1 0 0 0
11 0 1 0 0 0
11 0 1 0 0 0
22 1 0 0 0 0
Here I specified the Diseased list of the dataset. I required the IDs for those who are in the same Disease in all the period.
Assume that I need an output for Patients who never fall in any Diseased criteria(IDs 18, 22) I stored it as a new set(Undiseased), Later I need to get the same model for Des1 patients (IDs 11). I tried the below code to fetch the data. but It returns partial output.
select ID from tablename where
(normal = '1' and Des1 = '0' and Des2 = '0' and Des3 = '0' and Des4 = '0')
group by ID

You can try the below query using COUNT (Transact-SQL)
function.
Create table MySampleTable (Id int, Des1 int, Des2 int, Des3 int)
insert into MySampleTable Values
(12, 0, 1, 0),
(12, 1, 0, 1),
(12, 1, 0, 1),
(18, 1, 0, 0),
(18, 1, 0, 0),
(11, 0, 1, 0),
(11, 0, 1, 0)
; with cte as (Select Id
, Count(distinct Des1) as TotDes1
, Count(distinct Des2) as TotDes2
, Count(distinct Des3) as TotDes3
from MySampleTable
group by Id
)
Select Id from cte where TotDes1 = 1
and TotDes2 = 1 and TotDes3 = 1
It looks like as shown below with the output.
Here is the live db<>fiddle demo.
You can also use the having clause as shown in the query below.
Select Id
/*
, Count(distinct Des1) as TotDes1
, Count(distinct Des2) as TotDes2
, Count(distinct Des3) as TotDes3
*/
from MySampleTable
group by Id
having Count(distinct Des1) = 1 and Count(distinct Des2) = 1
and Count(distinct Des3) = 1

Demo on db<>fiddle
You can achieve it in this simple way
;WITH cte_TempTable AS(
Select DISTINCT Id, Des1, Des2, Des3
from MySampleTable
)
SELECT Id
FROM cte_TempTable
GROUP BY Id
HAVING COUNT(Id) = 1
Output

You can use use apply :
select t.id
from table t cross apply
( values (Des1, 'Des1'), (Des2, 'Des2'), (Des3, 'Des3'), (Des4, 'Des4')
) tt(DiseasFlag, DiseasName)
where DiseasFlag = 1
group by t.id
having count(distinct DiseasName) = 1;

Related

How to find the latest record of each group partition by column name in SQL Server

I want to get top 1 record ordered by devicetimestamp grouped by deviceimei. I want to count how many records there area which have VL1 < 350 of latest record according to timestamp desc.
Query
select *
from
(select
*,
rank() over (partition by deviceimei order by devicetimestamp desc) as data_rank
from
[TransTrak_V_1.0].[dbo].[Current_Voltage])
where
datarank = 1
Sample dataset
A_id DeviceImei DeviceTimeStamp VL1 VL2 VL3 IL1 IL2 IL3 VL12 VL23 VL31 AVL INUT data_rank
999165 8628180465463 2020-10-05 11:45:05.000 0 0 0 0 0 0 0 0 0 0 0 1
999163 8628180465463 2020-10-05 11:41:32.000 0 0 0 0 0 0 0 0 0 0 0 2
999161 8628180465463 2020-10-05 11:38:23.000 0 0 0 0 0 0 0 0 0 0 0 3
A ROW_NUMBER inside a CTE could work.
Essentially what we are doing is creating a sequential row count based on your deviceimei ordered by the datetime field.
;WITH top_n AS (
SELECT v.*,
ROW_NUMBER() OVER(PARTITION BY v.DeviceImei ORDER BY v.DeviceTimeStamp DESC) as Seq
FROM #Voltage v
)
SELECT COUNT(DeviceImei) AS [RowCount]
FROM top_n
WHERE VL1 < 350
AND Seq = 1
GROUP BY DeviceImei
RowCount
1
Setup
-- Drop a temporary table called '#Voltage'
-- Drop the table if it already exists
IF OBJECT_ID('tempDB..#Voltage', 'U') IS NOT NULL
DROP TABLE #Voltage
GO
-- Create the temporary table from a physical table called 'TableName' in schema 'dbo' in database 'DatabaseName'
CREATE TABLE #Voltage
(
A_id BIGINT
, DeviceImei BIGINT
, DeviceTimeStamp DATETIME2(2)
, VL1 INT
, VL2 INT
, VL3 INT
, IL1 INT
, IL2 INT
, IL3 INT
, VL12 INT
, VL23 INT
, VL31 INT
, AVL INT
, INUT INT
, data_rank INT
)
INSERT INTO #Voltage
VALUES
(999165,8628180465463, '20201005 11:45:05.000', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
(999163,8628180465463, '20201005 11:41:32.000', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2),
(999161,8628180465463, '20201005 11:38:23.000', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3)

Adding two dynamic columns in the table through stored procedure and these columns must have data based on condition

I am trying to add two dynamic columns HeaderText and IsShowHeader to my table through a stored procedure.
In the first column, the first row must have text as Header1 and after 8 rows text must be Header2, then again after 8 rows text must be Header3 and so on.
In the second column value must be 1, and next 8 rows must have 0, the 9th row value must be 1 again, then the next 8 rows must have 0 like this...
ALTER PROCEDURE [dbo].[SkipRow]
AS
BEGIN
SELECT RID
,FirstName
,LastName
,(CASE WHEN X.[Row#]%9=0 And [X].[Row#]=0 THEN 1 ELSE
0 END)As IsShowHeader
,(COUNT(*) OVER ()) as TotalRows FROM
(
SELECT
*,ROW_NUMBER() OVER(ORDER BY RID) AS [Row#]
FROM Mytable1 WITH(NOLOCK)
)X
End
Output:
HeaderText IsShowHeader
1 Header1 1
2 Null 0
3 Null 0
4 Null 0
5 Null 0
6 Null 0
7 Null 0
8 Null 0
9 Null 0
10 Header2 1
11 Null 0
12 Null 0
13 Null 0
14 Null 0
15 Null 0
16 Null 0
17 Null 0
18 Null 0
19 Header3 1
you already have the [Row#], use the modulus operator % to get every 9 rows
HeaderText = case when ([Row#] - 1) % 9 = 0
then 'Header' + convert(varchar(10), ([Row#] - 1) / 9 + 1)
end,
IsShowHeader = case when ([Row#] - 1) % 9 = 0
then 1 else 0 end
You can try this:
SELECT M.id,
M.HeaderText,
CASE WHEN M.HeaderText IS NOT NULL THEN 1 ELSE 0 END AS IsShowHeader
FROM
(
SELECT P.id,
CASE
WHEN P.HeaderText IS NOT NULL THEN
P.HeaderText + CAST(P.IndexNumber AS VARCHAR(10))
ELSE
NULL
END AS HeaderText
FROM
(
SELECT K.id,
HeaderText,
COUNT(K.HeaderText) OVER (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS IndexNumber
FROM
(
SELECT id,
CASE
WHEN (id - 1) % 9 = 0 THEN 'Header' ELSE NULL
END AS HeaderText
FROM dbo.test
) AS K
) AS P
) AS M;
I have illustrate the scenario step by step, however you can rewrite it in a simple way like the answer that #Squirrel posted.

SQL Server: Sum of Calculated Row within N Months

I am new to SQL Server and have a question regarding summing over a calculated row with a conditional statement.
My data is organized as follows:
ID S_DATE END_DATE MNum CHG DateCHG
---------------------------------------------
1 1/26/2001 2/26/2001 7 NULL 1
1 2/27/2001 3/27/2001 8 1 1
1 3/28/2001 1/9/2003 9 1 21
1 1/10/2003 3/2/2004 11 2 14
1 3/3/2004 10/14/2004 10 -1 7
1 10/15/2004 6/22/2005 9 -1 8
1 6/23/2005 3/9/2008 8 -1 33
1 3/10/2008 1899-12-30 0 NULL -1299
2 9/23/1993 9/11/2000 3 NULL 84
2 1/1/1999 12/31/1998 3 0 -1
2 9/12/2000 11/13/2001 2 -1 14
2 11/14/2001 1899-12-30 0 NULL -1223
DateCHG is equal to the number of months between S_DATE & End_Date. I would like to find the SUM of CHG for each ID where the CHG occurs within 3 months of previous date.
Here is my current code (NOTE: Column headers are different from data above for formatting purposes. Also I cannot write to this database so only in Query format)
SELECT
*,
CASE
WHEN MratingNum = 0 OR
LAG(MratingNum) OVER (OVER BY MAST_ISSU_NUM, RATG_DATETIME) = 0 OR
MAST_ISSU_NUM <> LAG(MAST_ISSU_NUM) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME) --OR
--LAG(MratingNum) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME) < 12 OR --By Credit Rating
--LAG(MratingNum) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME) < 18
THEN NULL
ELSE CAST(MratingNum AS INT) - LAG(MratingNum) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME)
END AS CHG,
DATEDIFF(month, RATG_DATETIME, RATG_END_DATETIME) AS DateCHG
FROM
MOODYS_DRD.dbo.DEBT_RATG AS t1
LEFT JOIN
sandbox.dbo.RatingMap AS t2 ON t1.RATG_TXT = t2.MratingValue
WHERE
RATG_TYP_CD = 'LT'
ORDER BY
MAST_ISSU_NUM, RATG_DATETIME
So for example the output would look something like this:
ID S_DATE .... SumCHG
1 1/26/2001.... NULL
1 2/27/2001.... NULL
1 3/28/2001.... 2
1 1/10/2003.... NULL
1 3/3/2004 .... NULL
I'm assuming the best approach is to calculate a rolling sum of DateCHG where it is less than 3 and then SUM the CHG column? Thanks all!
EDIT: This is fairly complex so let me try another way of asking the question. For each record I want to look back and find the SUM of CHG within 3 months of the S_DATE. For 3/28/2001, this would include 2/01 and 1/01. The MNum went from 7 to 9 so the SUM of CHG would be 2. However from 3/04, there were no changes in the past 3 months so return NULL. I obviously want to do this per ID so don't want to overlap 3 months from ID 2 to 1. Hope this makes more sense now?
t0 and t are used for setting up the data.
with t0
as ( select *
from ( values ( 1, '1/26/2001', '2/26/2001', 7, null, 1),
( 1, '2/27/2001', '3/27/2001', 8, 1, 1),
( 1, '3/28/2001', '1/9/2003', 9, 1, 21),
( 1, '1/10/2003', '3/2/2004', 11, 2, 14),
( 1, '3/3/2004', '10/14/2004', 10, -1, 7),
( 1, '10/15/2004', '6/22/2005', 9, -1, 8),
( 1, '6/23/2005', '3/9/2008', 8, -1, 33),
( 1, '3/10/2008', '1899-12-30', 0, null, -1299),
( 2, '9/23/1993', '9/11/2000', 3, null, 84),
( 2, '1/1/1999', '12/31/1998', 3, 0, -1),
( 2, '9/12/2000', '11/13/2001', 2, -1, 14),
( 2, '11/14/2001', '1899-12-30', 0, null, -1223) ) t ( ID, S_DATE, END_DATE, MNum, CHG, DateCHG )
),
t as ( select t0.ID ,
cast(t0.S_DATE as date) S_DATE ,
cast(t0.END_DATE as date) END_DATE ,
t0.MNum ,
t0.CHG ,
t0.DateCHG
from t0
)
select case when Cnt >= 3 then p.CHG
end SumCHG,
*
from t
outer apply ( select sum(u.CHG) CHG ,
count(*) Cnt
from t u
where u.ID = t.ID
and u.S_DATE between dateadd(month, -3,
t.S_DATE)
and t.S_DATE
) p
order by t.ID ,
t.S_DATE;
Use CTE for your tables,
;with t as (
SELECT
*,
CASE
WHEN MratingNum = 0 OR
LAG(MratingNum) OVER (OVER BY MAST_ISSU_NUM, RATG_DATETIME) = 0 OR
MAST_ISSU_NUM <> LAG(MAST_ISSU_NUM) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME) --OR
--LAG(MratingNum) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME) < 12 OR --By Credit Rating
--LAG(MratingNum) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME) < 18
THEN NULL
ELSE CAST(MratingNum AS INT) - LAG(MratingNum) OVER (ORDER BY MAST_ISSU_NUM, RATG_DATETIME)
END AS CHG,
DATEDIFF(month, RATG_DATETIME, RATG_END_DATETIME) AS DateCHG
FROM
MOODYS_DRD.dbo.DEBT_RATG AS t1
LEFT JOIN
sandbox.dbo.RatingMap AS t2 ON t1.RATG_TXT = t2.MratingValue
WHERE
RATG_TYP_CD = 'LT'
)
select case when Cnt >= 3 then p.CHG
end SumCHG,
*
from t
outer apply ( select sum(u.CHG) CHG ,
count(*) Cnt
from t u
where u.ID = t.ID
and u.S_DATE between dateadd(month, -3,
t.S_DATE)
and t.S_DATE
) p
order by t.ID ,
t.S_DATE;

Query to identify contiguous ranges

I'm trying to write a query on the below data set to add a new column which has some sort of "period_id_group".
contiguous new_period row_nr new_period_starting_id
0 0 1 0
1 1 2 2
1 0 3 0
1 0 4 0
1 1 5 5
1 0 6 0
What I'm trying to get is:
contiguous new_period row_nr new_period_starting_id period_id_group
0 0 1 0 0
1 1 2 2 2
1 0 3 0 2
1 0 4 0 2
1 1 5 5 5
1 0 6 0 5
The logic is that for each 0 value in the new_period_starting_id, it has to get the >0 value from the row above.
So, for row_nr = 1 since there is no row before it, period_id_group is 0.
For row_nr = 2 since this is a new perid (marked by new_period = 1), the period_id_group is 2 (the id of this row).
For row_nr = 3 since it's part of a contiguous range (because contiguous = 1), but is not the start of the range, because it's not a new_period (new_period = 0), its period_id_group should inherit the value from the previous row (which is the start of the contiguous range) - in this case period_id_group = 2 also.
I've tried multiple versions but couldn't get a good solution for SQL Server 2008R2, since I can't use LAG().
What I have, so far, is a shameful:
select *
from #temp2 t1
left join (select distinct new_period_starting_id from #temp2) t2
on t1.new_period_starting_id >= t2.new_period_starting_id
where 1 = case
when contiguous = 0
then 1
when contiguous = 1 and t2.new_period_starting_id > 0
then 1
else 1
end
order by t1.rn
Sample data script:
declare #tmp2 table (contiguous int
, new_period int
, row_nr int
, new_period_starting_id int);
insert into #tmp2 values (0, 0, 1, 0)
, (1, 1, 2, 2)
, (1, 0, 3, 0)
, (1, 0, 4, 0)
, (1, 1, 5, 5)
, (1, 0, 6, 0);
Any help is appreciated.
So, if I'm understanding you correctly, you just need one additional column.
SELECT t1.contiguous, t1.new_period, t1.row_nr, t1.new_period_starting_id,
(SELECT TOP 1 (new_period_starting_id)
FROM YourTable t2
WHERE t2.row_nr <= t1.row_nr
AND t2.period_id_group > 0 /* optimization */
ORDER BY t2.row_nr DESC /* optimization */) AS period_id_group
FROM YourTable t1
Here is yet another option for this.
select t1.contiguous
, t1.new_period
, t1.row_nr
, t1.new_period_starting_id
, x.new_period_starting_id
from #tmp2 t1
outer apply
(
select top 1 *
from #tmp2 t2
where (t2.row_nr = 1
or t2.new_period_starting_id > 0)
and t1.row_nr >= t2.row_nr
order by t2.row_nr desc
) x
Found the solution:
select *
, case
when contiguous = 0
then f1
when contiguous = 1 and new_periods = 1
then f1
when contiguous = 1 and new_periods = 0
then v
else NULL
end [period_group]
from (
select *
, (select max(f1) from #temp2 where new_period_starting_id > 0 and rn < t1.rn) [v]
from #temp2 t1
) rs
order by rn

Calculation percentage of response from a survey table

I have a table in which I store survey data. Each survey is if 5 questions. Users answer can be either 1 or 0 against each question and each survey has a date associated with it
I need to come up with a query so that I can get the %age of people answered '1' for each questions.
My example data:
RecId | AnswerdId | QuestionId | Answer | Date
----------------------------------------------------
1 1 1 1 6/1/2016
2 1 2 0 6/1
3 1 3 1 6/1
4 1 4 0 6/1
5 1 5 1 6/1
6 2 1 0 6/2
7 2 2 0 6/2
8 2 3 1 6/2
9 2 4 1 6/2
10 2 5 1 6/2
I need an output like
Question1 Question2 Question3 Question4 Question5
50% 0% 100% 50% 100%
Can anyone help?
Thanks
The "trick" is to use a CASE statement inside a SUM function, deciding there which answer values should be considered. In the example below, that SELECT its the innermost query and you can already compute the percentual.
As you want to show each value in a different column, you can also use the same approach suggested by #JohnCappelletti and compute each column separately, or you can PIVOT that result set. Be aware the answer values (represented as [1], [2], etc) can't be dynamically read without using dynamic SQL, ok?
I prefer to return all results as decimal because the presentation layer (not the data layer) should be responsible by formatting that value.
;WITH SurveyData AS
(
SELECT RecId, AnswerdId, QuestionId, Answer
FROM ( VALUES
(01, 1, 1, 1),
(02, 1, 2, 0),
(03, 1, 3, 1),
(04, 1, 4, 0),
(05, 1, 5, 1),
(06, 2, 1, 0),
(07, 2, 2, 0),
(08, 2, 3, 1),
(09, 2, 4, 1),
(10, 2, 5, 1)
) AS Sample(RecId, AnswerdId, QuestionId, Answer)
)
SELECT ISNULL(SUM([1]), 0) [Question1],
ISNULL(SUM([2]), 0) [Question2],
ISNULL(SUM([3]), 0) [Question3],
ISNULL(SUM([4]), 0) [Question4],
ISNULL(SUM([5]), 0) [Question5]
FROM (
SELECT QuestionId,
SUM(CASE WHEN Answer = 1 THEN 1.0 ELSE 0.0 END) Yes,
SUM(CASE WHEN Answer = 0 THEN 1.0 ELSE 0.0 END) No ,
SUM(CASE WHEN Answer = 1 THEN 1.0 ELSE 0.0 END) / COUNT(Answer) Perc
FROM SurveyData
GROUP BY QuestionId
) AS SourceTable
PIVOT
( SUM(Perc)
FOR QuestionId IN ([1], [2], [3], [4], [5])
) AS PivotTable
Declare #Table table (RecId int,AnswerID int,QuestionID int,Answer int,Date Date)
Insert into #Table (RecId,AnswerID,QuestionId,Answer,Date) values
(1,1,1, 1,'6/1/2016'),
(2,1,2, 0,'6/1/2016'),
(3,1,3, 1,'6/1/2016'),
(4,1,4, 0,'6/1/2016'),
(5,1,5, 1,'6/1/2016'),
(6,2,1, 0,'6/2/2016'),
(7,2,2, 0,'6/2/2016'),
(8,2,3, 1,'6/2/2016'),
(9,2,4, 1,'6/2/2016'),
(10,2,5, 1,'6/2/2016')
;with cteSum as (Select QuestionID,Pct = sum(Answer)/(count(*)+0.0) From #Table Group By QuestionID)
Select Question1 = format(sum(case when QuestionID=1 then Pct else 0 end),'0%')
,Question2 = format(sum(case when QuestionID=2 then Pct else 0 end),'0%')
,Question3 = format(sum(case when QuestionID=3 then Pct else 0 end),'0%')
,Question4 = format(sum(case when QuestionID=4 then Pct else 0 end),'0%')
,Question5 = format(sum(case when QuestionID=5 then Pct else 0 end),'0%')
From cteSum
Returns
Question1 Question2 Question3 Question4 Question5
50% 0% 100% 50% 100%
So, I used a temp table #temprecordset instead of your table. So, please replace it when you are using your code. I used a while loop to loop from the smallest to the largest questionId and calculated percentages using a temp table.
Make sure you drop the temp tables after you are done printing the results. Hope this helps.
declare #questionId Integer
declare #maxQuestionId Integer
declare #countofAnswer Integer
declare #countofQuestion Integer
declare #percentofQuestionsAnswered decimal
set #questionId=1
set #maxQuestionId = (select max(questionId) from #temprecordset)
WHILE ( #questionId <= #maxQuestionId )
BEGIN
set #countofAnswer = (select sum(answer) from #temprecordset where QuestionId=#questionId)
set #countofQuestion = (select count(*) from #temprecordset where QuestionId=#questionId)
set #percentofQuestionsAnswered = #countofAnswer*100/#countofQuestion
insert into #QuestionPercent values (#questionId,#percentofQuestionsAnswered)
SET #questionId =#questionId + 1
END
select * from #QuestionPercent

Resources