SQL Server query to see what changed from month to month - sql-server

I am struggling with developing a query to compare changes in a single table from month to month, example data -
+-----------------------------------------------------------+
| TaxGroupDetails |
+-----------+--+----------+--+-----------+--+---------------+
| Tax Group | | Tax Type | | Geocode | | EffectiveDate |
+-----------+--+----------+--+-----------+--+---------------+
| 2001 | | 1D | | 440011111 | | 1120531 |
| 2001 | | X1 | | 440011111 | | 1120531 |
| 2001 | | D3 | | 440011111 | | 1120531 |
| 2001 | | DGH | | 440011111 | | 1120531 |
| 2001 | | 1D | | 440011111 | | 1130101 |
| 2001 | | X1 | | 440011111 | | 1130101 |
| 2001 | | D3 | | 440011111 | | 1130101 |
| 2001 | | 1D | | 440011111 | | 1140201 |
| 2001 | | X1 | | 440011111 | | 1140201 |
| 2001 | | D3 | | 440011111 | | 1140201 |
| 2001 | | Z9 | | 440011111 | | 1140201 |
+-----------+--+----------+--+-----------+--+---------------+
I want to see the changes in the table, what was added or removed from a taxgroup, between the top two effective dates.
The results I am trying to obtain based on the sample data would be Z9 (added) if I was running the query in February (1140201) of this year.
If I was running the query in January (1130101) of last year I would expect to see DGH (removed)
I would expect two seperate queries, one to show what was added and another to show what was removed.
I have tried multiple avenues to come up with these two queries but cant seem to obtain the correct results. Can anyone point me in the right direction ?

SELECT
Current.TaxGroup,
Current.TaxType,
Current.GeoCode,
'Added'
FROM
TaxGroupDetails AS Current
WHERE
Current.EffectiveDate = #CurrentPeriod AND
NOT EXISTS
(
SELECT *
FROM TaxGroupDetails As Previous
WHERE
Previous.EffectiveDate = #PreviousPeriod
Current.TaxGroup = Previous.TaxGroup and
Current.TaxType = Previous.TaxType and
Current.GeoCode = Previous.GeoCode
)
UNION ALL
SELECT
Current.TaxGroup,
Current.TaxType,
Current.GeoCode,
'Added'
FROM
TaxGroupDetails AS Previous
WHERE
Previous.EffectiveDate = #PreviousPeriod AND
NOT EXISTS
(
SELECT *
FROM TaxGroupDetails As Current
WHERE
Current.EffectiveDate = #CurrentPeriod
Current.TaxGroup = Previous.TaxGroup and
Current.TaxType = Previous.TaxType and
Current.GeoCode = Previous.GeoCode
)

As you say you need two queries, one to select each of the two groups of data you want to compare.
SELECT [Tax Group], [Tax Type], [Geocode], [EffectiveDate]
FROM TaxGroupDetails
WHERE EffectiveDate = 1120531
SELECT [Tax Group], [Tax Type], [Geocode], [EffectiveDate]
FROM TaxGroupDetails
WHERE EffectiveDate = 1140201
You then need to join these two together using some form of key, the combination of tax group and tax type seems sensible here.
SELECT *
FROM
(
SELECT [Tax Group], [Tax Type], [Geocode], [EffectiveDate]
FROM TaxGroupDetails
WHERE EffectiveDate = 1120531
) AS FirstGroup
FULL OUTER JOIN
(
SELECT [Tax Group], [Tax Type], [Geocode], [EffectiveDate]
FROM TaxGroupDetails
WHERE EffectiveDate = 1140201
) AS SecondGroup
ON FirstGroup.[Tax Group] = SecondGroup.[Tax Group]
AND FirstGroup.[Tax Type] = SecondGroup.[Tax Type]
The FULL OUTER JOIN here tells SQL to include the remaining row when the other doesn't exist.
Finally let's tidy up and order the columns and not use a *:
SELECT COALESCE(FirstGroup.[Tax Group], SecondGroup.[Tax Group]),
COALESCE(FirstGroup.[Tax Type], SecondGroup.[Tax Type]),
FirstGroup.Geocode, SecondGroup.Geocode,
FirstGroup.EffectiveDate, SecondGroup.EffectiveDate
FROM
.
.
.
COALESCE removes the NULLs from the first matched columns and as we are saying these muct be equal there is no point showing both copies.

The set-based solution: take the difference between the whole table and the whole table with all dates projected forward by one time interval. That will eliminate all rows except the ones with "new" codes.
SELECT
[TaxGroup],
[Tax Type],
[EffectiveDate]
FROM TaxGroupDetails t
EXCEPT
SELECT
[TaxGroup],
[Tax Type],
( SELECT MIN([EffectiveDate])
FROM TaxGroupDetails
WHERE [EffectiveDate] > t.[EffectiveDate]
AND [TaxGroup] = t.[TaxGroup]
)
FROM TaxGroupDetails t
To see what got deleted, project backwards instead. Change the subquery to:
SELECT MAX([EffectiveDate])
FROM TaxGroupDetails
WHERE [EffectiveDate] < t.[EffectiveDate]
AND [TaxGroup] = t.[TaxGroup]

If you have SQL2012:
WITH t AS (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY [TaxGroup], [Tax Type] ORDER BY [EffectiveDate] ASC) rownum
FROM [TaxGroup]
)
SELECT *
FROM t
WHERE rownum = 1
AND [EffectiveDate] = #Date
To get the other query, change ASC to DESC

Try this / you could start from this [partial] solution:
DECLARE #MyTable TABLE (
ID INT IDENTITY PRIMARY KEY,
[Tax Group] SMALLINT NOT NULL,
[Tax Type] VARCHAR(3) NOT NULL,
[Geocode] INT NOT NULL,
[EffectiveDate] INT NOT NULL
);
INSERT #MyTable
SELECT 2001, '1D ', 440011111, 1120531
UNION ALL SELECT 2001, 'X1 ', 440011111, 1120531
UNION ALL SELECT 2001, 'D3 ', 440011111, 1120531
UNION ALL SELECT 2001, 'DGH', 440011111, 1120531
UNION ALL SELECT 2001, '1D ', 440011111, 1130101
UNION ALL SELECT 2001, 'X1 ', 440011111, 1130101
UNION ALL SELECT 2001, 'D3 ', 440011111, 1130101
UNION ALL SELECT 2001, '1D ', 440011111, 1140201
UNION ALL SELECT 2001, 'X1 ', 440011111, 1140201
UNION ALL SELECT 2001, 'D3 ', 440011111, 1140201
UNION ALL SELECT 2001, 'Z9 ', 440011111, 1140201;
DECLARE #Results TABLE (
ID INT NOT NULL,
Rnk INT NOT NULL,
EffectiveYear SMALLINT NOT NULL,
PRIMARY KEY (Rnk, EffectiveYear)
);
INSERT #Results
SELECT x.ID,
DENSE_RANK() OVER(ORDER BY x.[Tax Group], x.[Tax Type], x.[Geocode]) AS Rnk,
x.EffectiveDate / 10000 AS EffectiveYear
FROM #MyTable x;
SELECT
crt.*,
prev.*,
CASE
WHEN crt.ID IS NOT NULL AND prev.ID IS NOT NULL THEN '-' -- No change
WHEN crt.ID IS NULL AND prev.ID IS NOT NULL THEN 'D' -- Deleted
WHEN crt.ID IS NOT NULL AND prev.ID IS NULL THEN 'I' -- Inserted
END AS RowStatus
FROM #Results crt FULL OUTER JOIN #Results prev ON crt.Rnk = prev.Rnk
AND crt.EffectiveYear - 1 = prev.EffectiveYear
ORDER BY ISNULL(crt.EffectiveYear - 1, prev.EffectiveYear), crt.Rnk;
Sample output:
---- ---- ------------- ---- ---- -------------
| Current data | | Previous data |
---- ---- ------------- ---- ---- ------------- ---------
ID Rnk EffectiveYear ID Rnk EffectiveYear RowStatus
---- ---- ------------- ---- ---- ------------- ---------
1 1 112 NULL NULL NULL I -- Current vs. previous: current row hasn't a previous row
3 2 112 NULL NULL NULL I -- the same thing
4 3 112 NULL NULL NULL I -- the same thing
2 4 112 NULL NULL NULL I -- the same thing
NULL NULL NULL 4 3 112 D <-- Deleted: ID 4 = 'DGH'
5 1 113 1 1 112 - -- there is no change
7 2 113 3 2 112 -
6 4 113 2 4 112 -
8 1 114 5 1 113 -
10 2 114 7 2 113 -
9 4 114 6 4 113 -
11 5 114 NULL NULL NULL I <-- Inserted: ID 11 = 'Z9'
NULL NULL NULL 8 1 114 D
NULL NULL NULL 10 2 114 D
NULL NULL NULL 9 4 114 D
NULL NULL NULL 11 5 114 D
Note: I assume that there are no duplicated rows (x.[Tax Group], x.[Tax Type], x.[Geocode]) within a year.

Related

Join created table under condition

I am creating a code to join two different tables under a certain condition. The tables look like this
(TABLE 2)
date | deal_code | originator | servicer | random |
-----------------------------------------------------
2011 | 001 | commerzbank | SPV1 | 1 |
2012 | 001 | commerzbank | SPV1 | 12 |
2013 | 001 | commerzbank | SPV1 | 7 |
2013 | 005 | unicredit | SPV2 | 7 |
and another table
(TABLE 1)
date | deal_code | amount |
---------------------------
2011 | 001 | 100 |
2012 | 001 | 100 |
2013 | 001 | 100 |
2013 | 005 | 200 |
I would like to have this as the final result
date | deal_code | amount | originator | servicer | random |
--------------------------------------------------------------
2013 | 001 | 100 | commerzbank | SPV1 | 7 |
2013 | 005 | 200 | unicredit | SPV2 | 7 |
I created the following code
select q1.deal_code, q1.date
from table1 q1
where q1.date = (SELECT MAX(t4.date)
FROM table1 t4
WHERE t4.deal_code = q1.deal_code)
that gives me:
(TABLE 3)
date | deal_code | amount |
---------------------------
2013 | 001 | 100 |
2013 | 005 | 200 |
That is the latest observation for table 1, now I would like to have the originator and servicer information given the deal_code and date. Any suggestion? I hope to have been clear enough. Thanks.
This should do what you are looking for. Please be careful when naming columns. Date is a reserved word and is too ambiguous to be a good name for a column.
declare #Something table
(
SomeDate int
, deal_code char(3)
, originator varchar(20)
, servicer char(4)
, random int
)
insert #Something values
(2011, '001', 'commerzbank', 'SPV1', 1)
, (2012, '001', 'commerzbank', 'SPV1', 12)
, (2013, '001', 'commerzbank', 'SPV1', 7)
, (2013, '005', 'unicredit ', 'SPV2', 7)
declare #SomethingElse table
(
SomeDate int
, deal_code char(3)
, amount int
)
insert #SomethingElse values
(2011, '001', '100')
, (2012, '001', '100')
, (2013, '001', '100')
, (2013, '005', '200')
select x.SomeDate
, x.deal_code
, x.originator
, x.servicer
, x.random
, x.amount
from
(
select s.SomeDate
, s.deal_code
, s.originator
, s.servicer
, s.random
, se.amount
, RowNum = ROW_NUMBER()over(partition by s.deal_code order by s.SomeDate desc)
from #Something s
join #SomethingElse se on se.SomeDate = s.SomeDate and se.deal_code = s.deal_code
) x
where x.RowNum = 1
Looks like this would work:
DECLARE #MaxYear INT;
SELECT #MaxYear = MAX(date)
FROM table1 AS t1
INNER JOIN table2 AS t2
ON t1.deal_code = t2.deal_code;
SELECT t1.date,
t1.deal_code,
t1.amount,
t2.originator,
t2.servicer,
t2.random
FROM table1 AS t1
INNER JOIN table2 AS t2
ON t1.date = #MaxYear
AND t1.deal_code = t2.deal_code;
I agree with Sean Lange about the date column name. His method gets around the dependency on the correlated sub-query, but at the heart of things, you really just need to add an INNER JOIN to your existing query in order to get the amount column into your result set.
select
q2.date,
q2.deal_code,
q1.amount,
q2.originator,
q2.servicer,
q2.random
from
table1 q1
join
table2 q2
on q1.date = q2.date
and q1.deal_code = q2.deal_code
where q1.date = (SELECT MAX(t4.date)
FROM table1 t4
WHERE t4.deal_code = q1.deal_code)

SQL - How to combine rows

I have the following table which looks at calls and attendances. I got this by using union all on a 'calls' and 'attendances' tables and then used row number on the ID and ordered by dates.
Table1:
Type | ID | Call/AttendanceDate | RowNum
------------|----|---------------------|--------
Attendance | 12 | 2018-09-16 10:11:00 | 82
Call | 12 | 2018-09-18 14:11:47 | 83
Call | 12 | 2018-10-02 17:26:13 | 84
Call | 12 | 2018-10-05 14:58:31 | 85
Attendance | 12 | 2018-10-13 01:41:00 | 86
Call | 12 | 2018-10-13 02:39:12 | 87
Call | 12 | 2018-10-13 04:31:22 | 88
Attendance | 12 | 2018-10-13 14:29:00 | 89
Call | 12 | 2018-10-13 14:59:19 | 90
Attendance | 12 | 2018-10-15 15:50:00 | 91
The code I used for this is:
WITH CTE1 AS
(
SELECT 'Call' as [Type], ID, CallDate AS Date1
FROM CallsTable
UNION ALL
SELECT 'Attendance' as [Type], ID, AttendanceDate AS Date2
FROM AttendanceTable]
)
,CTE2 AS
(
SELECT [Type], Date1, ID, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Date1 ASC) AS RowNum
FROM CTE1
)
--------------------------------OUTPUT--------------------------------
SELECT a.[Type], a.ID, a.Date1, a.RowNum
FROM CTE2 a
JOIN CTE2 b
ON a.ID= b.ID
AND a.RowNum = b.RowNum + 1
WHERE a.ID = '12'
ORDER BY ID, RowNum
I want to modify this to look like the below output, so that whenever an attendance follows a call, it should be in the same row.
Table2:
Type | ID | CallDate | RowNum | Type | AttendanceDate | RowNum
------|----|------------------|--------|------------|------------------|--------
NULL | 12 | NULL | NULL | Attendance | 16/09/2018 10:11 | 82
Call | 12 | 18/09/2018 14:11 | 83 | NULL | NULL | NULL
Call | 12 | 02/10/2018 17:26 | 84 | NULL | NULL | NULL
Call | 12 | 05/10/2018 14:58 | 85 | Attendance | 13/10/2018 01:41 | 86
Call | 12 | 13/10/2018 02:39 | 87 | NULL | NULL | NULL
Call | 12 | 13/10/2018 04:31 | 88 | Attendance | 13/10/2018 14:29 | 89
Call | 12 | 13/10/2018 14:59 | 90 | Attendance | 15/10/2018 15:50 | 91
Is this possible? What code could I use?
Use FULL JOIN
SELECT
*
FROM
(SELECT * FROM CTE2 WHERE Type = 'CALL') A
FULL JOIN
(SELECT * FROM CTE2 WHERE Type = 'ATTENDANCE') B
ON A.ID = B.ID AND A.RowNum = B.RowNum - 1
You can use APPLY :
SELECT C.[Type], C.ID, C.CallDate, C.RowNum,
(CASE WHEN C2.RowNum - C.RowNum = 1 THEN C2.[TYPE] end) [TYPE],
(CASE WHEN C2.RowNum - C.RowNum = 1 THEN C2.CallDate end) AttendanceDate,
(CASE WHEN C2.RowNum - C.RowNum = 1 THEN C2.RowNum end) RowNum
FROM CTE2 C OUTER APPLY
(SELECT TOP (1) C2.*
FROM CTE2 C2
WHERE C2.ID = C.ID AND C2.[Type] = 'Attendance' AND C2.RowNum > C.RowNum
ORDER BY C2.RowNum
) C2
WHERE C.ID = 12 AND C.[Type] = 'Call';
Not as elegant, but works for me, a table valued function
alter FUNCTION GetCallActivity()
RETURNS #activityTable TABLE
(
call_type varchar(16),
call_id int,
call_date datetime,
call_rownum int,
atnd_type varchar(16),
atnd_id int,
atnd_date datetime,
atnd_rownum int
)
AS
BEGIN
-- initialize the return table
insert into #activityTable
(call_type, call_id, call_date, call_rownum )
select a.type, a.id, a.activity_date, a.rownum
from stack_calls a
where a.type = 'Call'
order by a.activity_date;
-- match to the attendence recs to the call recs
update #activityTable
set atnd_type = b.type,
atnd_id = b.id,
atnd_date = b.activity_date,
atnd_rownum = b.rownum
from stack_calls b
join #activityTable a
on b.rownum = a.call_rownum + 1
where b.type = 'Attendance';
-- deal with the edge cases
insert into #activityTable
( atnd_type, atnd_id, atnd_date, atnd_rownum )
select x.type,
x.id,
x.activity_date,
x.rownum
from
(
select a.type,
a.id,
a.activity_date,
a.rownum,
lag(a.type, 1) over (order by a.activity_date) as prev_type
from stack_calls a
where a.type = 'Attendance'
) x
where x.prev_type is null
RETURN
END
GO

Update several columns with latest values from another table

Here's the data:
[ TABLE_1 ]
id | prod1 | date1 | prod2 | date2 | prod3 | date3 |
---|--------|--------|--------|--------|--------|-------|
1 | null | null | null | null | null | null |
2 | null | null | null | null | null | null |
3 | null | null | null | null | null | null |
[ TABLE_2 ]
id | date | product |
-----|-------------|-----------|
1 | 20140101 | X |
1 | 20140102 | Y |
1 | 20140103 | Z |
2 | 20141201 | data |
2 | 20141201 | Y |
2 | 20141201 | Z |
3 | 20150101 | data2 |
3 | 20150101 | data3 |
3 | 20160101 | X |
Both tables have other columns not listed here.
date is formatted: yyyymmdd and datatype is int.
[ TABLE_2 ] doesn't have empty rows, just tried to make sample above more readable.
Here's the Goal:
I need to update [ TABLE_1 ] prod1,date1,prod2,date2,prod3,date3
with product collected from [ TABLE_2 ] with corresponding date values.
Data must be sorted so that "latest" product becomes prod1,
2nd latest product will be prod2 and 3rd is prod3.
Latest product = biggest date (int).
If dates are equal, order doesn't matter. (see id=2 and id=3).
Updated [ TABLE_1 ] should be:
id | prod1 | date1 | prod2 | date2 | prod3 | date3 |
---|--------|----------|--------|----------|--------|----------|
1 | Z | 20140103 | Y | 20140102 | X | 20140101 |
2 | data | 20141201 | Y | 20141201 | Z | 20141201 |
3 | X | 20160101 | data2 | 20150101 | data3 | 20150101 |
Ultimate goal is to get the following :
[ TABLE_3 ]
id | order1 | order2 | order3 | + Columns from [ TABLE_1 ]
---|--------------------|----------------------|------------|--------------------------
1 | 20140103:Z | 20140102:Y | 20140103:Z |
2 | 20141201:data:Y:Z | NULL | NULL |
3 | 20160101:X | 20150101:data2:data3 | NULL |
I have to admit this exceeds my knowledge and I haven't tried anything.
Should I do it with JOIN or SELECT subquery?
Should I try to make it in one SQL -clause or perhaps in 3 steps,
each prod&date -pair at the time ?
What about creating [ TABLE_3 ] ?
It has to have columns from [ TABLE_1 ].
Is it easiest to create it from [ TABLE_2 ] -data or Updated [ TABLE_1 ] ?
Any help would be highly appreciated.
Thanks in advance.
I'll post some of my own shots on comments.
After looking into it (after my comment), a stored procedure would be best, that you can call to view the data as a pivot, and do away with TABLE_1. Obviously if you need to make this dynamic, you'll need to look into dynamic pivots, it's a bit of a hack with CTEs:
CREATE PROCEDURE DBO.VIEW_AS_PIVOTED_DATA
AS
;WITH CTE AS (
SELECT ID, [DATE], 'DATE' + CAST(ROW_NUMBER() OVER(PARTITION BY ID ORDER BY [DATE] DESC) AS VARCHAR) AS [RN]
FROM TABLE_2)
, CTE2 AS (
SELECT ID, PRODUCT, 'PROD' + CAST(ROW_NUMBER() OVER(PARTITION BY ID ORDER BY [DATE] DESC) AS VARCHAR) AS [RN]
FROM TABLE_2)
, CTE3 AS (
SELECT ID, [DATE1], [DATE2], [DATE3]
FROM CTE
PIVOT(MAX([DATE]) FOR RN IN ([DATE1],[DATE2],[DATE3])) PIV)
, CTE4 AS (
SELECT ID, [PROD1], [PROD2], [PROD3]
FROM CTE2
PIVOT(MAX(PRODUCT) FOR RN IN ([PROD1],[PROD2],[PROD3])) PIV)
SELECT A.ID, [PROD1], [DATE1], [PROD2], [DATE2], [PROD3], [DATE3]
FROM CTE3 AS A
JOIN CTE4 AS B
ON A.ID=B.ID
Construction:
WITH ranked AS (
SELECT [id]
,[date]
,[product]
,row_number() over (partition by id order by date desc) rn
FROM [sistemy].[dbo].[TABLE_2]
)
SELECT id, [prod1],[date1],[prod2],[date2],[prod3],[date3]
FROM
(
SELECT id, type+cast(rn as varchar(1)) col, value
FROM ranked
CROSS APPLY
(
SELECT 'date', CAST([date] AS varchar(8))
UNION ALL
SELECT 'prod', product
) ca(type, value)
) unpivoted
PIVOT
(
max(value)
for col IN ([prod1],[date1],[prod2],[date2],[prod3],[date3])
) pivoted
You need to take a few steps to achive the aim.
Rank your products by date:
SELECT [id]
,[date]
,[product]
,row_number() over (partition by id order by date desc) rn
FROM [sistemy].[dbo].[TABLE_2]
Unpivot your date and product columns into one column. You can use UNPIVOT OR CROSS APPLY statements. I prefer CROSS APPLY
SELECT id, type+cast(rn as varchar(1)) col, value
FROM ranked
CROSS APPLY
(
SELECT 'date', CAST([date] AS varchar(8))
UNION ALL
SELECT 'prod', product
) ca(type, value)
or the same result using UNPIVOT
SELECT id, type+cast(rn as varchar(1)) col, value
FROM (
SELECT [id],
rn,
CAST([date] AS varchar(500)) date,
CAST([product] AS varchar(500)) prod
FROM ranked) t
UNPIVOT
(
value FOR type IN (date, product)
) unpvt
and at last you use PIVOTE and get a result.

Unpivotting multiple columns - substring of column name as a new column with CROSS APPLY

I have a table with the following format
YEAR, MONTH, ITEM, REQ_QTY1, REQ_QTY2 , ....REQ_QTY31 ,CONVERTED1, CONVERTED2 ....CONVERTED31
Where the suffix of each column is the day of the month.
I need to convert it to the following format, where Day_of_month is the numeric suffix of each column
YEAR, MONTH, DAY_OF_MONTH, ITEM, REQ_QTY, CONVERTED
I thought of using CROSS APPLY to retrieve the data, but I can't use CROSS APPLY to get the "Day of Month"
SELECT A.YEAR, A.MONTH, A.ITEM, B.REQ_QTY, B.CONVERTED
FROM TEST A
CROSS APPLY
(VALUES
(REQ_QTY1, CONVERTED1),
(REQ_QTY2, CONVERTED2),
(REQ_QTY3, CONVERTED3),
......
(REQ_QTY31, CONVERTED31)
)B (REQ_QTY, CONVERTED)
The only way I found is to use a nested select with inner join
SELECT A.YEAR, A.MONTH, A.DAY_OF_MONTH, A.ITEM,A.REQ_QTY, D.CONVERTED FROM
(SELECT YEAR, MONTH, ITEM, SUBSTRING(DAY_OF_MONTH,8,2) AS DAY_OF_MONTH, REQ_QTY FROM TEST
UNPIVOT
(REQ_QTY FOR DAY_OF_MONTH IN ([REQ_QTY1],[REQ_QTY2],[REQ_QTY3],......[REQ_QTY30],[REQ_QTY31])
) B
) A
INNER JOIN (SELECT YEAR, MONTH, ITEM, SUBSTRING(DAY_OF_MONTH,10,2) AS DAY_OF_MONTH, CONVERTED FROM TEST
UNPIVOT
(CONVERTED FOR DAY_OF_MONTH IN ([CONVERTED1],[CONVERTED2],[CONVERTED3],....[CONVERTED30],[CONVERTED31])
) C
) D
ON D.YEAR = A.YEAR AND D.MONTH = A.MONTH AND D.ITEM = A.ITEM AND D.DAY_OF_MONTH = A.DAY_OF_MONTH
Is there a way to use CROSS APPLY and yet get the DAY_OF_MONTH out?
This is not a solution with CROSS APPLY but it will definitely make it a bit faster as it uses a bit simpler approach and simpler execution plan.
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE Test_Table([YEAR] INT, [MONTH] INT, [ITEM] INT, REQ_QTY1 INT
, REQ_QTY2 INT ,REQ_QTY3 INT , CONVERTED1 INT, CONVERTED2 INT, CONVERTED3 INT)
INSERT INTO Test_Table VALUES
( 2015 , 1 , 1 , 10 , 20 , 30 , 100 , 200 , 300),
( 2015 , 2 , 1 , 10 , 20 , 30 , 100 , 200 , 300),
( 2015 , 3 , 1 , 10 , 20 , 30 , 100 , 200 , 300)
Query 1:
SELECT *
FROM
(
SELECT [YEAR]
,[MONTH]
,ITEM
,Vals
,CASE WHEN LEFT(N,3) = 'REQ' THEN SUBSTRING(N,8 ,2)
WHEN LEFT(N,3) = 'CON' THEN SUBSTRING(N,10,2)
END AS Day_Of_Month
,CASE WHEN LEFT(N,3) = 'REQ' THEN LEFT(N,7)
WHEN LEFT(N,3) = 'CON' THEN LEFT(N,9)
END AS Tran_Type
FROM Test_Table t
UNPIVOT (Vals FOR N IN ([REQ_QTY1],[REQ_QTY2],[REQ_QTY3],
[CONVERTED1],[CONVERTED2],[CONVERTED3]))up
)t2
PIVOT (SUM(Vals)
FOR Tran_Type
IN (REQ_QTY, CONVERTED))p
Results:
| YEAR | MONTH | ITEM | Day_Of_Month | REQ_QTY | CONVERTED |
|------|-------|------|--------------|---------|-----------|
| 2015 | 1 | 1 | 1 | 10 | 100 |
| 2015 | 1 | 1 | 2 | 20 | 200 |
| 2015 | 1 | 1 | 3 | 30 | 300 |
| 2015 | 2 | 1 | 1 | 10 | 100 |
| 2015 | 2 | 1 | 2 | 20 | 200 |
| 2015 | 2 | 1 | 3 | 30 | 300 |
| 2015 | 3 | 1 | 1 | 10 | 100 |
| 2015 | 3 | 1 | 2 | 20 | 200 |
| 2015 | 3 | 1 | 3 | 30 | 300 |
Well, I found a way using CROSS APPLY, but instead of taking a substring, I'm basically hardcoding the days. Works well enough so...
SELECT A.YEAR, A.MONTH, A.ITEM, B.DAY_OF_MONTH, B.REQ_QTY, B.CONVERTED
FROM TEST A
CROSS APPLY
(
VALUES
('01', REQ_QTY1, CONVERTED1),
('02', REQ_QTY2, CONVERTED2),
('03', REQ_QTY3, CONVERTED3),
('04', REQ_QTY4, CONVERTED4),
......
('31', REQ_QTY31, CONVERTED31)
) B (DAY_OF_MONTH, REQ_QTY, CONVERTED)

Date-based multiple group by T-SQL query

First of all, execuse the longer question, but I will try to put it as simply as possible...
I'm trying to write a kind of a reporting query, but I'm having a problem getting the desired results. The problem:
Employee table
Id | Name
---------------
1 | John Smith
2 | Alan Jones
3 | James Jones
Task table
Id | Title | StartDate | EmployeeId | Estimate (integer - ticks)
----------------------------------------------------------------------------
1 | task1 | 21.08.2011 | 1 | 90000000000
2 | task2 | 21.08.2011 | 1 | 150000000
3 | task3 | 22.08.2011 | 2 | 1230000000
Question:
How to get the estimate summary per day, grouped, but to include all the employees?
Like this:
Date | EmployeeId | EmployeeName | SummaryEstimate
-------------------------------------------------------------
19.08.2011 | 1 | John Smith | NULL
19.08.2011 | 2 | Alan Jones | NULL
19.08.2011 | 3 | James Jones | NULL
20.08.2011 | 1 | John Smith | NULL
20.08.2011 | 2 | Alan Jones | NULL
20.08.2011 | 3 | James Jones | NULL
21.08.2011 | 1 | John Smith | 90150000000
21.08.2011 | 2 | Alan Jones | NULL
21.08.2011 | 3 | James Jones | NULL
22.08.2011 | 1 | John Smith | NULL
22.08.2011 | 2 | Alan Jones | 1230000000
22.08.2011 | 3 | James Jones | NULL
What I currently do is I have a "dates" table with 30years of days. I left join and group by that table to get other dates included too. Well, here is the query:
SELECT dates.value, employee.Id, employee.Name, sum(task.Estimate)
FROM TableOfDates as dates
left join Tasks as task on (dates.value = convert(varchar(10), task.StartTime, 101))
left join Employees as employee on (employee.Id = task.EmployeeId)
WHERE dates.value >= '2011-08-19' and dates.value < '2011-08-22'
GROUP BY dates.value, employee.Id, employee.Name
ORDER BY dates.value, employee.Id
The convert call is to get the date part of the DateTime column.
The result that I get is:
Date | EmployeeId | EmployeeName | SummaryEstimate
-------------------------------------------------------------
19.08.2011 | NULL | NULL | NULL
20.08.2011 | NULL | NULL | NULL
21.08.2011 | 1 | John Smith | 90150000000
22.08.2011 | 2 | Alan Jones | 1230000000
I am there half of the way, I get dates that are not in the two base joined tables (Employees and Tasks) but I cannot also have all the employees included as in the table shown before this one.
I've tried cross-joining, then subqueries, but little luck there. Any help would be very much appreciated ! Thank you for having the time to go through all of this, I hope I was clear enough...
SELECT DE.DateValue, DE.EmployeeId, DE.EmployeeName, sum(task.Estimate)
FROM
( SELECT
D.value AS DateValue
, E.Id AS EmployeeId
, E.Name AS EmployeeName
FROM
TableOfDates D
CROSS JOIN Employees E ) DE
left join Tasks as task on DE.DateValue = convert(varchar(10), task.StartTime, 101)
AND DE.EmployeeId = task.EmployeeId
WHERE DE.DateValue >= '2011-08-19' and DE.DateValue < '2011-08-22'
GROUP BY DE.DateValue, DE.EmployeeId, DE.EmployeeName
ORDER BY DE.DateValue, DE.EmployeeId
Note that this solution offers the possibility to drop the day-table as you may use a dynamic recursive CTE instead.
The other CTE:s (Employees and Tasks) can be substituted with the real tables.
DECLARE #startDate DATETIME = '2011-08-01'
DECLARE #endDate DATETIME = '2011-09-01'
;WITH Employees(Id,Name)
AS
(
SELECT 1, 'John Smith'
UNION ALL
SELECT 2, 'Alan Jones'
UNION ALL
SELECT 3, 'James Jones'
)
,Tasks (Id, Title, StartDate, EmployeeId, Estimate)
AS
(
SELECT 1, 'task1', '2011-08-21', 1, 90000000000
UNION ALL
SELECT 2, 'task2', '2011-08-21', 1, 150000000
UNION ALL
SELECT 3, 'task3', '2011-08-22', 2, 1230000000
)
,TableOfDates(value)
AS
(
SELECT DATEADD(DAY, DATEDIFF(DAY, 0, #startDate), 0)
UNION ALL
SELECT DATEADD(DAY, 1, value)
FROM TableOfDates
WHERE value < #endDate
)
SELECT dates.value
,employee.Id
,employee.Name
,SUM(task.Estimate) AS SummaryEstimate
FROM TableOfDates dates
CROSS JOIN Employees employee
LEFT JOIN Tasks task
ON dates.value = task.StartDate
AND (employee.Id = task.EmployeeId)
WHERE dates.value >= '2011-08-19'
AND dates.value < '2011-08-26'
GROUP BY
dates.value
,employee.Id
,employee.Name
ORDER BY
dates.value
,employee.Id
use this query:
create table #T_dates (id_date int identity(1,1),inp_date datetime)
create table #T_tasks (id_task int identity(1,1),key_date int, key_emp int, est int)
create table #T_emp (id_emp int identity(1,1),name varchar(50))
insert #T_dates (inp_date) values ('08.19.2011')
insert #T_dates (inp_date) values ('08.20.2011')
insert #T_dates (inp_date) values ('08.21.2011')
insert #T_dates (inp_date) values ('08.22.2011')
insert #T_dates (inp_date) values ('08.23.2011')
insert #T_dates (inp_date) values ('08.24.2011')
--select * from #T_dates
insert #T_emp (name) values ('John Smith')
insert #T_emp (name) values ('Alan Jones')
insert #T_emp (name) values ('James Jones')
--select * from #T_emp
insert #T_tasks (key_date,key_emp,est) values (4,1,900000)
insert #T_tasks (key_date,key_emp,est) values (4,1,15000)
insert #T_tasks (key_date,key_emp,est) values (5,2,123000)
--select * from #T_tasks
select inp_date,id_emp,name,EST
from #T_emp
cross join #T_dates
left join
(
select key_date,key_emp,SUM(est) 'EST' from #T_tasks group by key_date,key_emp
) Gr
ON Gr.key_emp = id_emp and Gr.key_date = id_date
where inp_date >= '2011-08-19' and inp_date <= '2011-08-22'
order by inp_date,id_emp

Resources