SQL, using Group by until specific trend (increment, decrement, same) - sql-server

I would like to know how can i modify my code for considering all the same values of suppose 10 as UP till the time it is incrementing and then down for decrement and SAME if there is no change till the time there is no variation in the value (increment, decrement, same).
Here is my code :
;with etape1 as
(
select ROW_NUMBER() OVER(ORDER BY mnth) AS id,* from [InsideTSQL2008].[alioune].[Sales]
)
,
etape2 as
(
select
a.id, b.mnth AS START , a.mnth AS FINISH ,
a.qty - b.qty AS TREND
FROM
etape1 a
LEFT JOIN etape1 b
on a.id = b.id+1
)
select * from etape2;
My Result is :
id START FINISH TREND
1 NULL 2007-12-01 NULL
2 2007-12-01 2008-01-01 10
3 2008-01-01 2008-02-01 10
4 2008-02-01 2008-03-01 10
5 2008-03-01 2008-04-01 10
6 2008-04-01 2008-05-01 0
7 2008-05-01 2008-06-01 -10
8 2008-06-01 2008-07-01 -10
9 2008-07-01 2008-08-01 -10
10 2008-08-01 2008-09-01 -10
11 2008-09-01 2008-10-01 10
12 2008-10-01 2008-11-01 -10
13 2008-11-01 2008-12-01 20
14 2008-12-01 2009-01-01 10
15 2009-01-01 2009-02-01 10
16 2009-02-01 2009-03-01 -40
My final result as required should be like :
Start End Trend
200712 200712 unknown
200801 200804 UP
200805 200805 SAME
200806 200809 DOWN
200810 200810 UP
200811 200811 DOWN
200812 200812 UP
200903 200903 DOWN
200904 200905 SAME
200906 200907 UP
Any help would be really helpful; Thanks

Took me a few goes (and a few hours), but I think I have what you want:
DECLARE #Sales AS TABLE (mnth datetime, qty int)
INSERT INTO #Sales
SELECT '2016-01-01', 10 UNION ALL
SELECT '2016-02-01', 20 UNION ALL
SELECT '2016-03-01', 30 UNION ALL
SELECT '2016-04-01', 40 UNION ALL
SELECT '2016-05-01', 40 UNION ALL
SELECT '2016-06-01', 30 UNION ALL
SELECT '2016-07-01', 20 UNION ALL
SELECT '2016-08-01', 30 UNION ALL
SELECT '2016-09-01', 40 UNION ALL
SELECT '2016-10-01', 45 UNION ALL
SELECT '2016-11-01', 50
;WITH etape1 AS (
SELECT ROW_NUMBER() OVER(ORDER BY mnth) AS id, * FROM #Sales
)
, etape2 AS (
SELECT id, lag(mnth) OVER (ORDER BY id) AS START, mnth AS FINISH, CASE WHEN qty - LAG(qty) OVER (ORDER BY id) < 0 THEN -1 WHEN qty - LAG(qty) OVER (ORDER BY id) > 0 THEN 1 ELSE 0 END AS TREND
FROM etape1
)
, etape3 AS (
SELECT id, START, FINISH, TREND, lag(TREND) OVER (ORDER BY id) AS PrevTrend
FROM etape2
)
, etape4 AS (
SELECT id, START, FINISH, TREND, SUM(CASE WHEN TREND = PREVTREND THEN 0 ELSE 1 END) OVER (ORDER BY id ROWS UNBOUNDED PRECEDING) AS Change
FROM etape3
)
SELECT MIN(START) AS START, MAX(FINISH) AS FINISH, CASE WHEN MIN(TREND) IS NULL THEN 'Unknown' WHEN MIN(TREND) < 0 THEN 'Down' WHEN MIN(TREND) > 0 THEN 'Up' WHEN MIN(Start) is NULL THEN 'Unknown' ELSE 'Same' END AS TREND
FROM etape4
GROUP BY Change
ORDER BY START
Results are:
START FINISH TREND
NULL 2016-01-01 Unknown
2016-01-01 2016-04-01 Up
2016-04-01 2016-05-01 Same
2016-05-01 2016-07-01 Down
2016-07-01 2016-11-01 Up

Related

Find each employee Project Start Date and End Date (i.e., Start Date and End Date should be continuous without any break in Days/Months/Year)

ID
EmployeeId
ProjectId
StartDate
EndDate
1
1
100
01-04-2019
30-04-2019
2
1
100
01-05-2019
31-05-2019
3
1
100
01-12-2019
31-12-2019
4
1
100
01-01-2020
31-01-2020
5
2
200
01-01-2019
31-01-2019
6
2
200
01-02-2019
28-02-2019
7
2
200
01-04-2019
28-04-2019
8
2
200
01-05-2019
31-05-2019
9
2
200
01-06-2019
30-06-2019
10
3
100
01-08-2019
31-08-2019
11
3
100
01-09-2019
30-09-2019
12
3
200
01-10-2019
31-10-2019
13
3
200
01-11-2019
30-11-2019
14
3
300
01-12-2019
31-12-2019
15
3
300
01-01-2020
31-01-2020
16
3
300
01-02-2020
29-02-2020
expected Output
EmployeeId
ProjectId
StartDate
EndDate
1
100
01-04-2019
31-05-2019
1
100
01-12-2019
31-01-2020
2
200
01-01-2019
28-02-2019
2
200
01-04-2019
28-04-2019
2
200
01-05-2019
30-06-2019
3
100
01-08-2019
30-09-2019
3
200
01-10-2019
30-11-2019
3
300
01-12-2019
29-02-2020
I have tried to find the enddate of the currentrow is enddate+1 is startdate of the next row,if it is continious without any gaps then need to select startdate of the previous row and enddate of current row.
;with MyCTE as
(
select mt.EmployeeId, mt.StartDate, mt.EndDate, ROW_NUMBER() over (order by ID) as RowNum
from #Employees mt
)
select c1.employeeId, case when c2.employeeId is null then c1.StartDate else dateadd(dd,1, c2.EndDate) end as StartDate,
c1.EndDate
from MyCTE c1
left join MyCTE c2
on C1.employeeId=c2.employeeId and
--and dateadd(dd,1,c1.startdate)
c1.RowNum = c2.RowNum +1
This is a classic gaps-and-islands problem.
There are many solutions. A typical simple (if not very efficient) solution, is as follows:
Use LAG to identify rows which start a group/island (partitioning as necessary)
Use a windowed COUNT to assign a group ID to each of those
Group by that ID, and take the MIN/MAX of the values
WITH PrevValues AS (
SELECT *,
IsStart = CASE WHEN DATEADD(day, -1, StartDate) <=
LAG(EndDate) OVER (PARTITION BY EmployeeId, ProjectId ORDER BY StartDate)
THEN NULL ELSE 1 END
FROM Employees e
),
Groups AS (
SELECT *,
GroupId = COUNT(IsStart) OVER (PARTITION BY EmployeeId, ProjectId ORDER BY StartDate ROWS UNBOUNDED PRECEDING)
FROM PrevValues pv
)
SELECT
g.EmployeeId,
g.ProjectId,
StartDate = MIN(StartDate),
EndDate = MAX(EndDate)
FROM Groups g
GROUP BY
g.EmployeeId,
g.ProjectId,
g.GroupId;
db<>fiddle

How to Sum (MAX values) from different value groups in same column SQL Server

I have a table like this:
Date
Consec_Days
2015-01-01
1
2015-01-03
1
2015-01-06
1
2015-01-07
2
2015-01-09
1
2015-01-12
1
2015-01-13
2
2015-01-14
3
2015-01-17
1
I need to Sum the max value (days) for each of the consecutive groupings where Consec_Days are > 1. So the correct result would be 5 days.
This is a type of gaps-and-islands problem.
There are many solutions, here is one simple one
Get the start points of each group using LAG
Calculate a grouping ID using a windowed conditional count
Group by that ID and take the highest sum
WITH StartPoints AS (
SELECT *,
IsStart = CASE WHEN LAG(Consec_Days) OVER (ORDER BY Date) = 1 THEN 1 END
FROM YourTable t
),
Groupings AS (
SELECT *,
GroupId = COUNT(IsStart) OVER (ORDER BY Date)
FROM StartPoints
WHERE Consec_Days > 1
)
SELECT TOP (1)
SUM(Consec_Days)
FROM Groupings
GROUP BY
GroupId
ORDER BY
SUM(Consec_Days) DESC;
db<>fiddle
with cte as (
select Consec_Days,
coalesce(lead(Consec_Days) over (order by Date), 1) as next
from YourTable
)
select sum(Consec_Days)
from cte
where Consec_Days <> 1 and next = 1
db<>fiddle

select data, grouped as Histogram

I have data in this format:
CREATE TABLE data(y int)
INSERT INTO data VALUES ((1))
INSERT INTO data VALUES ((55555))
INSERT INTO data VALUES ((55555))
INSERT INTO data VALUES ((99999))
I want to create a histogram, for to get a rough overview, of how my data is distributed. I am thinking of this format as output:
lowerBoundary upperBoundary y
------------- ------------- -----------
0 9999 1
10000 19999 0
20000 29999 0
30000 39999 0
40000 49999 0
50000 59999 2
60000 69999 0
70000 79999 0
80000 89999 0
90000 99999 1
You will have to create a table of numbers, so that the 0-rows will be displayed correctly. Then you can calculate lower and upper boundary of each "group".
Example SQL:
SELECT lowerBoundary, upperBoundary, COUNT(d.y) AS y
FROM (
SELECT n*10000 AS lowerBoundary, (n+1)*10000-1 AS upperBoundary
FROM (
-- Selects possible groups. Make this big enough for your data.
SELECT ones.n + 10*tens.n + 100*hundreds.n AS n
FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n)
) numbersTable
) boundaries
-- join with data
LEFT JOIN data d
ON d.y BETWEEN lowerBoundary AND upperBoundary
-- avoid trailing '0' rows
WHERE lowerBoundary <= (SELECT MAX(d.y) FROM data d)
GROUP BY lowerBoundary, upperBoundary
ORDER BY 1
Click here to run this skript at SQL-Fiddle
Another option...
I use a TVF to generate dynamic ranges. Being a single-statement function, it is extremely fast. Furthermore, if you can't use a UDF, the logic is easily ported into a cte or sub-query.
Select RetVal1
,RetVaL2
,y = sum(case when y is null then 0 else 1 end)
From [dbo].[udf-Range-Number-Span](0,100000,10000) A
Left Join Data B on y>=RetVal1 and y<RetVal2
Group By RetVal1,RetVal2
Returns
RetVal1 RetVaL2 y
0.00 10000.00 1
10000.00 20000.00 0
20000.00 30000.00 0
30000.00 40000.00 0
40000.00 50000.00 0
50000.00 60000.00 2
60000.00 70000.00 0
70000.00 80000.00 0
80000.00 90000.00 0
90000.00 100000.00 1
The UDF if needed
CREATE FUNCTION [dbo].[udf-Range-Number-Span] (#R1 money,#R2 money,#Incr money)
Returns Table
Return (
with cte0(M) As (Select cast((#R2-#R1)/#Incr as int)),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h )
Select RetSeq=1,RetVal1=#R1,RetVal2=#R1+#Incr
Union All
Select N+1,(N*#Incr)+#R1,((N*#Incr)+#R1)+#Incr
From cte2,cte0
Where N<cte0.M
)
--Max 100 million observations
--Select * from [dbo].[udf-Range-Number-Span](1,4,.5)

SQL Query time spent between certain value

I have a database for all temperatures the last 10 years.
Now I want to find all periods where the temperature was above ex. 15 degree.
Simplified example:
...
2015-05-10 12
2015-05-11 15 |
2015-05-12 16 |
2015-05-13 17 |
2015-05-14 16 |
2015-05-15 15 |
2015-05-16 12
2015-05-17 11
2015-05-18 15 |
2015-05-19 12
2015-05-20 18 |
...
Så now I want get all time periods like this:
Min Max
2015-05-11 2015-05-15
2015-05-18 2015-05-18
2015-05-20 2015-05-20
Any suggestion of how this query will look like ?
You could use CTE
CREATE TABLE #Date (DateT datetime, Value int )
INSERT INTO #Date
VALUES ('2015-05-10',12),
('2015-05-11',15),
('2015-05-12',16),
('2015-05-13',17),
('2015-05-14',16),
('2015-05-15',15),
('2015-05-16',12),
('2015-05-17',11),
('2015-05-18',15),
('2015-05-19',12),
('2015-05-20',18)
WITH t AS (
SELECT DateT d,ROW_NUMBER() OVER(ORDER BY DateT) i
FROM #Date
WHERE Value >= 15
GROUP BY DateT
)
SELECT MIN(d) as DataStart,MAX(d) as DataFinal, ROW_NUMBER() OVER(ORDER BY DATEDIFF(day,i,d)) as RN
FROM t
GROUP BY DATEDIFF(day,i,d)
RN column is optional you could use
SELECT MIN(d) as DataStart,MAX(d) as DataFinal
FROM t
GROUP BY DATEDIFF(day,i,d)
Here is a solution using a gaps and islands algorithm. It looks kind of bulky but it runs fast and scales great. It is also modular if you want to add a gap-allowed parameter and you can rewrite it to partition by some other columns and it still performs nicely.
Inspired by Peter Larssons post here: http://www.sqltopia.com/?page_id=83
WITH [theSource](Col1,Col2)
AS
(
SELECT Col1,Col2 FROM (VALUES
('2015-05-10',12),
('2015-05-11',15),
('2015-05-12',16),
('2015-05-13',17),
('2015-05-14',16),
('2015-05-15',15),
('2015-05-16',12),
('2015-05-17',11),
('2015-05-18',15),
('2015-05-19',12),
('2015-05-20',18)
) as x(Col1,Col2)
)
,filteredSource([Value])
AS
(
SELECT Col1 as [Value]
FROM theSource WHERE Col2 >= 15
)
,cteSource(RangeStart, RangeEnd)
AS (
SELECT RangeStart,
CASE WHEN [RangeStart] = [RangeEnd] THEN [RangeEnd] ELSE LEAD([RangeEnd]) OVER (ORDER BY Value) END AS [RangeEnd]
FROM (
SELECT [Value],
CASE
WHEN DATEADD(DAY,1,LAG([Value]) OVER (ORDER BY [Value])) >= [Value] THEN NULL
ELSE [Value]
END AS RangeStart,
CASE
WHEN DATEADD(DAY,-1,LEAD([Value]) OVER (ORDER BY [Value])) <= [Value] THEN NULL
ELSE [Value]
END AS RangeEnd
FROM filteredSource
) AS d
WHERE RangeStart IS NOT NULL
OR RangeEnd IS NOT NULL
)
SELECT RangeStart AS [Min],
RangeEnd AS [Max]
FROM cteSource
WHERE RangeStart IS NOT NULL;

Count field Pivot table In SQL Server

I have this table - Name : Mytable:
Amount Desc Month Sym code ID
$32,323.00 Bla1 1 121 3 2424221
$4,242.00 Bla1 1 121 3 2424221
$3,535.00 Bla1 1 121 1 3230824
$4,984.00 Bla2 1 433 1 3230824
$47,984.00 Bla2 2 433 1 3230824
$41.00 Bla2 2 433 1 3230824
$3,472.00 Bla6 1 D2 27 2297429
$3,472.00 Bla6 1 D2 27 2297429
$3,239.00 Bla6 2 D2 27 2297429
$4,249.00 Bla8 2 114 24 3434334
ID and Month Stands for for a paycheck. There are 6 paychecks : 1 + 3230824, 2+3230824 etc.
And I want to generate a pivot like this:
Jan Feb
count amount count amount
121 2 40100$ 0 0
433 1 52968$ 1 48025$
D2 1 6944$ 1 3239$
114 0 0 1 4249$
Explanation: 121 is two in Jan because ID = 2424221 got it twice and 3230824 got it one time. The number of of "occurrences" in the paychecks is two.
But, In the amount I sum every thing To get the total sum of money in the paycheck for that Sym.
Same, 433 got the value of 1 in Feb for example because only 3230824 got it (twice).
I started writing this:
SELECT *
FROM (
SELECT
[Sym] as Sym, [Month] as [month], [Amount] as Amount
FROM Mytable
) as T
PIVOT
(
Sum(Amount)
FOR [Month] IN ([1],[2])
)AS piv
Well, The amounts are correct But I don't know how can I pull this count as I explained near the amount in the pivot table.
SELECT
[Sym],
COALESCE(SUM(CASE WHEN [1] IS NULL THEN NULL ELSE [Cnt] END), 0) [Jan Count],
COALESCE(SUM(CASE WHEN [1] IS NULL THEN NULL ELSE [1] END), 0) [Jan Amount],
COALESCE(SUM(CASE WHEN [2] IS NULL THEN NULL ELSE [Cnt] END), 0) [Feb Count],
COALESCE(SUM(CASE WHEN [2] IS NULL THEN NULL ELSE [2] END), 0) [Feb Amount]
FROM (
SELECT
mt1.[Sym] as Sym, mt1.[Month] as [month], mt1.[Amount] as Amount, mt2.[Cnt]
FROM Mytable mt1
JOIN (SELECT COUNT(DISTINCT [ID]) [Cnt], [Sym], [Month]
FROM MyTable
GROUP BY [Sym], [Month]) mt2
ON mt1.[Sym] = mt2.[Sym] AND mt1.[Month] = mt2.[Month]
) as T
PIVOT
(
Sum(Amount)
FOR [Month] IN ([1],[2])
)AS piv
GROUP BY [Sym]
SQL Fiddle

Resources