Missing something when converting rows to columns in SQL - sql-server

I am trying to convert rows to columns from a table, I was able to get the result set but for few records I am missing the logic. Below is the table and SQL I am running, if you the result of the query for P123 PROCEDURE_END should be NULL, Instead of NULL I am getting the PROCEDURE_BEGIN value. I am not sure what I am missing here, please help me out with this.
INSERT INTO #PAT_TEST
VALUES ( 'P123', 'D457', '2015-11-29 01:48:00.000', 1 )
, ( 'P123', 'D457', NULL, 0 )
, ( 'P872', 'D457', '2015-11-30 02:48:00.000', 1 )
, ( 'P872', 'D457', '2015-11-30 03:48:00.000', 0 );
SELECT PAT_ID
, DOC_ID
, PROCEDURE_BEGIN = MIN(PROC_TIME)
, PROCEDURE_END = MAX(PROC_TIME)
FROM ( SELECT PAT_ID
, DOC_ID
, PROC_TIME
, rn = ( ROW_NUMBER() OVER ( PARTITION BY PAT_ID, DOC_ID ORDER BY PROC_TIME ) - 1 ) / 2
FROM #PAT_TEST
) a
GROUP BY PAT_ID
, DOC_ID
, a.rn
ORDER BY PAT_ID
, DOC_ID
, PROCEDURE_BEGIN;

Would this work for you:
If null should be treated as the largest value:
SELECT PAT_ID ,DOC_ID, PROCEDURE_BEGIN=MIN(ISNULL(PROC_TIME,'99991231')),
PROCEDURE_END=CASE WHEN MAX(ISNULL(PROC_TIME,'99991231')) ='99991231' THEN NULL ELSE MAX(ISNULL(PROC_TIME,'99991231')) END
FROM( SELECT PAT_ID ,DOC_ID,PROC_TIME
,rn=(ROW_NUMBER() OVER (PARTITION BY PAT_ID,DOC_ID ORDER BY PROC_TIME)-1)/2
FROM #PAT_TEST
) a
GROUP BY PAT_ID ,DOC_ID, rn
ORDER BY PAT_ID,DOC_ID, PROCEDURE_BEGIN
If null should be treated as the Smallest value:
SELECT PAT_ID ,DOC_ID, PROCEDURE_BEGIN=CASE WHEN MIN(ISNULL(PROC_TIME,0)) = '19000101' THEN NULL ELSE MIN(ISNULL(PROC_TIME,0)) END,
PROCEDURE_END=MAX(PROC_TIME )
FROM( SELECT PAT_ID ,DOC_ID,PROC_TIME
,rn=(ROW_NUMBER() OVER (PARTITION BY PAT_ID,DOC_ID ORDER BY PROC_TIME)-1)/2
FROM #PAT_TEST
) a
GROUP BY PAT_ID ,DOC_ID, rn
ORDER BY PAT_ID,DOC_ID, PROCEDURE_BEGIN

Related

Update null values by value in same column

I have a table in MS SQL Server, where are some null values in column "value"
Group ID Value
A 1 10
A 2
A 3
A 4 40
B 1
B 2 20
B 3 30
B 4
I want to update null values by not null in the same group with with the first higher ID, or if there is not any higher in same group, first lower. So the result should look like this.
Group ID Value
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
Thanks!
You can use windowed version of SUM function in order to determine islands of NULL valued records along with the record having the higher ID in the same group:
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
Output:
Group ID Value grp
-----------------------
A 4 40 1
A 3 30 2
A 2 NULL 2
A 1 NULL 2
B 4 40 1
B 3 NULL 1
B 2 20 2
B 1 10 3
You can now wrap the above query in a CTE and use another CTE to do the update:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value
FROM CTE
)
UPDATE ToUpdate
SET Value = group_value
WHERE Value IS NULL
Demo here
Edit:
The above query doesn't handle the edge case where the very last record within a Group slice is NULL. To handle this case as well you can use the following query:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID) AS grp2
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value,
MAX(Value) OVER (PARTITION BY [Group], grp2) AS group_value2
FROM CTE
)
UPDATE ToUpdate
SET Value = COALESCE(group_value, group_value2)
WHERE Value IS NULL
Demo here
Please try this-
DATA GENERATION
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,10),
('A',2,NULL),
('A',3,NULL),
('A',4,40),
('B',1,NULL),
('B',2,20),
('B',3,30),
('B',4,NULL)
SOLUTION
UPDATE a
SET a.Value = b.Value
FROM #T a
INNER JOIN
(
SELECT a.GroupCd,a.Id,Coalesce(a.Value,z.Value,z1.Value) Value
FROM #T a
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id < b.Id
ORDER BY Id
)z
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id > b.Id
ORDER BY Id DESC
)z1
)b ON a.GroupCd = b.GroupCd AND a.Id = b.Id
SELECT * FROM #T
OUTPUT
GroupCd Id Value
------- ----------- -----------
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
(8 rows affected)
You Can try This simple Method
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,NULL),
('A',2,NULL),
('A',3,30),
('A',4,40),
('B',1,10),
('B',2,20),
('B',3,NULL),
('B',4,40)
SELECT
*,
NewVal = COALESCE(Value,(SELECT TOP 1 Value FROM #T WHERE GroupCd = T.GroupCd AND Id > T.Id AND Value IS NOT NULL ORDER BY Id ASC))
FROM #T T
My Result
update MY_TABLE set [value] = [newValue] from (
select [Group] [newGroup],
[Value] [newValue]
from (
select [Group], [Value],
row_number() over (partition by [group] order by [Id] desc) [rn]
from MY_TABLE
where [Value] is not null
) [a] where [rn] = 1
) where [Group] = [newGroup] and [Value] is null

Get list of dates without entries in SQL Server

I am looking to find a solution to this problem. I have a table called LogEntry that stores information used by multiple offices, where they have to log any visitors that come in to their office on any given day. If no visitors come in, they are still required to log "No Visitors" for the day. How do I run a query that pulls all dates where an office failed to create even a "No Visitors" log?
I've looked at this question (and the article linked within), but even adapting that query, I'm only able to create a blank row for a date where an office is missing an entry for a date, not specify the actual office that did not create an entry. Is there a way to do what I'm trying to do?
declare #temp table (
CDate datetime,
loc_id varchar(50)
)
insert into #temp SELECT DISTINCT entryDate, locationID FROM LogEntry WHERE entryDate >= '05/01/2017' AND entryDate <= '07-31-2017'
;with d(date) as (
select cast('05/01/2017' as datetime)
union all
select date+1
from d
where date < '07/31/2017'
)
select DISTINCT t.loc_id, CONVERT(date, d.date)
FROM d LEFT OUTER JOIN #temp t ON d.date = t.CDate
GROUP BY t.loc_id, d.date
ORDER BY t.loc_id
As I said, this query returns me a list of dates in the date range, and all locations that submitted entries on that date, but I'd like to find a way to extract essentially the opposite information: if an office (specified by locationID) did not submit an entry on a given day, return only those locationIDs and the dates that they missed.
Sample data
EntryID | locationID | entryDate
=================================
1 1 07-01-2017
2 1 07-02-2017
3 2 07-02-2017
4 1 07-04-2017
Expected Result (for date range of 07-01 to 07-04)
locationID | missedEntryDate
============================
1 07-03-2017
2 07-01-2017
2 07-03-2017
2 07-04-2017
Your first step was good, you create a list of all dates, but you also need a list of all locations. Then you create a cross join to have all combinations and then you perform the left join to find out what is missing.
;with allDates(date) as (
select cast('05/01/2017' as datetime)
union all
select date+1
from d
where date < '07/31/2017'
), allLocations as (
SELECT DISTINCT loc_id
FROM #temp
), allCombinations as (
SELECT date, loc_id
FROM allDates
CROSS JOIN allLocations
)
SELECT AC.loc_id, AC.date
FROM allCombinations AC
LEFT JOIN #temp t
ON AC.date = t.CDate
AND AC.loc_id = t.loc_id
WHERE t.loc_id IS NULL -- didnt find a match on #temp
If your dataset is not too large you can try this:
select t.loc_id, CONVERT(date, d.date)
FROM d
-- Cross join dates to all available locs
CROSS JOIN (SELECT DISTINCT loc_id FROM #temp ) AS Locs
LEFT JOIN
( SELECT loc_id, t.CDate
FROM #temp
GROUP BY loc_id, d.date ) AS t ON d.date = t.CDate AND Locs.loc_id = t.loc_id
ORDER BY Locs.loc_id
This should be a bit faster:
;WITH cte AS (
SELECT a.LocID, RangeStart.CDate, ( CASE WHEN Input.LocID IS NULL THEN 1 ELSE 0 END ) AS IsMissing
FROM ( SELECT DISTINCT LocID FROM #temp ) AS a
CROSS JOIN ( SELECT CONVERT( DATETIME, '2017-05-01' ) AS CDate ) AS RangeStart
LEFT JOIN
( SELECT LocID, MIN( CDate ) AS CDate
FROM #temp
WHERE CDate = '2017-05-01'
GROUP BY LocID ) AS Input ON a.LocID = Input.LocID AND RangeStart.CDate = Input.CDate
UNION ALL
SELECT a.LocID, a.CDate + 1 AS CDate,
ISNULL( ItExists, 0 ) AS IsMissing
FROM cte AS a
OUTER APPLY( SELECT LocID, 1 AS ItExists FROM #temp AS b WHERE a.LocID = b.LocID AND a.CDate + 1 = b.CDate ) AS c
WHERE a.CDate < '2017-07-01'
)
SELECT * FROM cte OPTION( MAXRECURSION 0 )
You can also add an index:
CREATE INDEX IX_tmp_LocID_CDate ON #temp( LocID, CDate )
Sample data set for the second query:
CREATE TABLE #temp( LocID VARCHAR( 50 ), CDate DATETIME )
INSERT INTO #temp
VALUES
( '1', '2017-05-01' ), ( '1', '2017-05-02' ), ( '1', '2017-05-03' ), ( '1', '2017-05-04' ), ( '1', '2017-05-05' ),
( '2', '2017-05-01' ), ( '2', '2017-05-02' ), ( '2', '2017-05-03' ), ( '2', '2017-05-04' ), ( '2', '2017-05-05' )
;WITH d AS (
SELECT CAST( '05/01/2017' AS DATETIME ) AS date
UNION ALL
SELECT date + 2
FROM d
WHERE date < '2018-07-31'
)
INSERT INTO #temp
SELECT LocID, d.date
FROM ( SELECT DISTINCT LocID FROM #temp ) AS a
CROSS JOIN d
OPTION( MAXRECURSION 0 )

SQL Server Ranking issue

I am trying to apply ranking to my data set the logic is as follows:
For each ID , Order by ID2 ASC and Order by IsMaster Desc rank the row 1 and only change it when the ID4 value changes
My dataset and desired output looks like:
Test data
CREATE TABLE Test_Table
(ID INT ,ID2 INT, IsMaster INT, ID4 VARCHAR(10))
GO
INSERT INTO Test_Table (ID ,ID2 , IsMaster , ID4 )
VALUES
(1, 101, 1 ,'AAA') -- 1 <-- Desired output for rank
,(1, 102, 0 ,'AAA') -- 1
,(1, 103, 0 ,'AAB') -- 2
,(1, 104, 0 ,'AAB') -- 2
,(1, 105, 0 ,'CCC') -- 3
,(2, 101, 1 ,'AAA') -- 1
,(2, 102, 0 ,'AAA') -- 1
,(2, 103, 0 ,'AAA') -- 1
,(2, 104, 0 ,'AAB') -- 2
,(2, 105, 0 ,'CCC') -- 3
this is what I have tried so far:
SELECT *
,DENSE_RANK() OVER (PARTITION BY ID ORDER BY ID2 ASC, IsMaster DESC ) rn
FROM Test_Table
please please please help me thank you.
This is a island/gap problem.
First you use LAG() to see if you have a different ID4 on the same partition.
Is important you also need partition by IsMaster
Then you create the islands when ID4 changes.
Finally use comulative SUM() to get the proper rank.
Sql Demo
WITH id4_change as (
SELECT *,
LAG(ID4) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) as prev
FROM Test_Table
), islands as (
SELECT *,
CASE WHEN ID4 = PREV
THEN 0
ELSE 1
END as island
FROM id4_change
)
SELECT *,
SUM(island) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) rank
FROM islands
ORDER BY ID, ID2, IsMaster DESC
;
OUTPUT: You can see when ID4 = PREV doesnt create a new "Island" so have same rank.
EDIT: You can simplify first two querys
WITH id4_change as (
SELECT *,
CASE WHEN ID4 = LAG(ID4) OVER (PARTITION BY ID, IsMaster ORDER BY ID2)
THEN 0
ELSE 1
END as island
FROM Test_Table
)
SELECT *,
SUM(island) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) rank
FROM id4_change
ORDER BY ID, ID2, IsMaster DESC
;
Another way probably less efficient but it will work.
WITH X AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID2) RowNum
FROM dbo.Test_Table
)
, CTE_VehicleNumber
as
(
SELECT T.ID , T.ID2, t.IsMaster ,T.ID4 , t.RowNum , 1 as [Rank]
FROM X as T
WHERE T.IsMaster = 1
UNION ALL
SELECT T.ID, T.ID2, t.IsMaster ,T.ID4 , t.RowNum , CASE WHEN t.ID4 <> c.ID4 THEN 1+ C.[Rank]
ELSE 0+ C.[Rank]
END as [Rank]
FROM CTE_VehicleNumber as C
inner join X as T ON T.RowNum = C.RowNum + 1
AND t.ID = c.ID
)
SELECT ID , ID2, IsMaster ,ID4 , [Rank]
FROM CTE_VehicleNumber
ORDER BY ID , ID2, IsMaster ,ID4 , [Rank]
OPTION (MAXRECURSION 0);
Are you sure that your orders of ID2 and IsMaster affect the desired result, considering the rest of the data in ID and ID4?
I just tried to use the following code:
; WITH CTE AS (
SELECT DISTINCT ID, ID4, DENSE_RANK() OVER (ORDER BY ID4) Rnk
FROM #Test_Table
)
SELECT t.*, c.Rnk
FROM #Test_Table t
INNER JOIN CTE c ON t.ID = c.ID AND t.ID4 = c.ID4;
... and even with changing the order of ID2 and IsMaster I can't get it to "misbehave" - IF there's only one IsMaster = 1 per a group of ID4's and no duplicates in ID2.

SQL Server - Select top 2 rows

I'm attempting to write a query that will return
The most recent AccountDate with a record of 0 per locationID
Then the second most recent AccountDate per locationID. The record can be either 1 or 0.
If there are two AccountDates with the same date then return the most recent AccountDate based on DateAccountLoaded
How ever my solution doesn't look very elegant. Has anyone got a better way of achieving this.
Please see below my solution
CREATE TABLE [dbo].[TopTwoKeyed](
ID INT IDENTITY(1,1) PRIMARY KEY(ID),
[LocationID] [int] NULL,
[AccountDate] [date] NULL,
[Record] [tinyint] NULL,
[DateAccountLoaded] [date] NULL
)
INSERT INTO [dbo].[TopTwoKeyed] (
[LocationID],
AccountDate,
Record,
DateAccountLoaded
)
VALUES(1,'2009-10-31',0,'2011-03-23'),
(1,'2008-10-31',1,'2011-03-23'),
(1,'2008-10-31',0,'2010-03-22'),
(1,'2008-10-31',1,'2009-03-23'),
(1,'2011-10-31',1,'2010-03-22'),
(1,'2009-10-31',0,'2010-03-23'),
(2,'2011-10-31',0,'2010-03-23'),
(2,'2010-10-31',0,'2010-03-23'),
(2,'2010-10-31',1,'2010-03-23'),
(2,'2010-10-31',1,'2009-03-23'),
(3,'2010-10-31',0,'2010-03-23'),
(3,'2009-10-31',0,'2010-03-23'),
(3,'2008-10-31',1,'2010-03-23')
-- Get the most recent Account Date per locationID which has a record type of 0
SELECT f.LocationID
,f.AccountDate
,f.DateAccountLoaded
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY LocationID ORDER BY AccountDate DESC,DateAccountLoaded DESC) AS RowNumber
,LocationID AS LocationID
,AccountDate AS AccountDate
,DateAccountLoaded AS DateAccountLoaded
FROM [dbo].[TopTwoKeyed]
WHERE Record = 0
) f
WHERE f.RowNumber = 1
UNION ALL
SELECT ff.LocationID
,ff.AccountDate
,ff.DateAccountLoaded
FROM (
-- Get the SECOND most recent AccountDate. Can be either Record 0 or 1.
SELECT ROW_NUMBER() OVER (PARTITION BY LocationID ORDER BY AccountDate DESC,DateAccountLoaded DESC) AS RowNumber
,LocationID AS LocationID
,AccountDate AS AccountDate
,DateAccountLoaded 'DateAccountLoaded'
FROM [dbo].[TopTwoKeyed] tt
WHERE EXISTS
(
-- Same query as top of UNION. Get the most recent Account Date per locationID which has a record type of 0
SELECT 1
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY LocationID ORDER BY AccountDate DESC,DateAccountLoaded DESC) AS RowNumber
,LocationID AS LocationID
,AccountDate AS AccountDate
FROM [dbo].[TopTwoKeyed]
WHERE Record = 0
) f
WHERE f.RowNumber = 1
AND tt.LocationID = f.LocationID
AND tt.AccountDate < f.AccountDate
)
) ff
WHERE ff.RowNumber = 1
-- DROP TABLE [dbo].[TopTwoKeyed]
You could use a row_number subquery to find the most recent account date. Then you can outer apply to search for the next most recent account date:
select MostRecent.LocationID
, MostRecent.AccountDate
, SecondRecent.AccountDate
from (
select row_number() over (partition by LocationID order by
AccountDate desc, DateAccountLoaded desc) as rn
, *
from TopTwoKeyed
where Record = 0
) MostRecent
outer apply
(
select top 1 *
from TopTwoKeyed
where Record in (0,1)
and LocationID = MostRecent.LocationID
and AccountDate < MostRecent.AccountDate
order by
AccountDate desc
, DateAccountLoaded desc
) SecondRecent
where MostRecent.rn = 1
EDIT: To place the rows below eachother, you probably have to use a union. A single row_number can't work because the second row has different criterium for the Record column.
; with Rec0 as
(
select ROW_NUMBER() over (partition by LocationID
order by AccountDate desc, DateAccountLoaded desc) as rn
, *
from TopTwoKeyed
where Record = 0
)
, Rec01 as
(
select ROW_NUMBER() over (partition by LocationID
order by AccountDate desc, DateAccountLoaded desc) as rn
, *
from TopTwoKeyed t1
where Record in (0,1)
and not exists
(
select *
from Rec0 t2
where t2.rn = 1
and t1.LocationID = t2.LocationID
and t2.AccountDate < t1.AccountDate
)
)
select *
from Rec0
where rn = 1
union all
select *
from Rec01
where rn = 1

PIVOT on Common Table Expression

I have a CTE as follows
WITH details
AS ( SELECT FldId
,Rev
,Words
,row_number() OVER ( PARTITION BY FldId ORDER BY Rev DESC ) AS rn
FROM WorkItemLongTexts
WHERE ID = 2855
)
SELECT f.ReferenceName
,d.FldId
,Rev
,Words
FROM details AS d
INNER JOIN Fields AS f ON f.FldId = d.FldId
WHERE d.rn = 1 ;
The above returns the following output
ReferenceName | FldId | Rev | Words
Description 52 2 Description here
Objectives 10257 2 Objectives here
Specification 10258 6 Specification here
Requirements 10259 6 Requirements here
I want to apply PIVOT (or whatever is the best option) so that i can get output as follows
Description | Objectives | Specification | Requirements
Description here Objectives here Specification here Requirements here
Pls. suggest.
Thanks
You do this:
SELECT
FldId,
[Description],
[Objectives],
[Specification],
[Requirements]
FROM (
SELECT
ReferenceName,
FldId,
REV,
Words
FROM CTE
WHERE RowNumber = 1
) t
PIVOT (
MIN(Words)
FOR ReferenceName IN ([Description], [Objectives], [Specification], [Requirements])
) PIV
Or you can add it to your CTE, like this:
;WITH CTE2 AS (
SELECT
FldId,
REV,
[Description],
[Objectives],
[Specification],
[Requirements],
ROW_NUMBER() OVER (PARTITION BY FldId ORDER BY REV DESC) AS RowNumber
FROM TBL
PIVOT (
MIN(Words)
FOR ReferenceName IN ([Description], [Objectives], [Specification], [Requirements])
) PIV
)
SELECT
FldId,
REV,
[Description],
[Objectives],
[Specification],
[Requirements]
FROM CTE2
WHERE RowNumber = 1
WITH details
AS ( SELECT FldId
,Rev
,Words
,row_number() OVER ( PARTITION BY FldId ORDER BY Rev DESC ) AS rn
FROM WorkItemLongTexts
WHERE ID = 2855
),
cte_1
AS ( SELECT f.ReferenceName
,d.FldId
,Rev
,Words
FROM details AS d
INNER JOIN Fields AS f ON f.FldId = d.FldId
WHERE d.rn = 1
)
SELECT max(case [ReferenceName] WHEN 'Descripton' THEN [Words] ELSE NULL END) AS [Descripton]
,max(case [ReferenceName] WHEN 'Objectives' THEN [Words] ELSE NULL END) AS [Objectives]
,max(case [ReferenceName] WHEN 'Specification' THEN [Words] ELSE NULL END) AS [Specification]
,max(case [ReferenceName] WHEN 'Requirements' THEN [Words] ELSE NULL END) AS [Requirements]
FROM cte_1 ;
OR:
-- cte here as above
SELECT Description
,Objectives
,Specification
,Requirements
FROM cte_1 PIVOT ( max(Words) FOR ReferenceName IN ( Description,
Objectives,
Specification,
Requirements ) ) AS PivotTable
Do something like:
with details as (...)
, unpivotted as (select f.ReferenceName, Words
from details as d
inner join Fields as f
on f.FldId=d.FldId
where d.rn =1)
Select *
from unpivotted
pivot
(max(Words) for Description in ([Objectives],[Specification],[Requirements]) p
;

Resources