Edge detection in SQL Server query - sql-server

I've got a table where the value in each row can be one of two values, eg 0 or 1. Each row also has an ID identity column.
What's the most efficient way to query the table to return the min and max ID values for each contiguous group of values?
Example:
declare #tbl table (ID INT IDENTITY(1,1), Value INT);
insert into #tbl (Value)
values (1), (1), (1), (0), (0), (1), (0), (0), (1), (1), (1), (1);
Table contents:
ID Value
1 1
2 1
3 1
4 0
5 0
6 1
7 0
8 0
9 1
10 1
11 1
12 1
What I'd like to output from the query:
GroupID Value MinID MaxID
1 1 1 3
2 0 4 5
3 1 6 6
4 0 7 8
5 1 9 12
I've worked out a query to return these results. However, it's inefficient as it has to run over the table 4x, twice to get the MinID values and twice again for the MaxID values. As the real table I'll be running this query over has over 10 million rows, I'm looking for a more efficient query.
Here's the query I've worked out:
declare #maxID INT;
select #maxID = MAX(ID)
from #tbl;
select min.RowID AS GroupID, min.Value, min.MinID, max.MaxID
from
(
select
ROW_NUMBER() OVER (ORDER BY x.ID) AS RowID,
x.Value, x.ID as MinID
from #tbl x
left join #tbl ld on x.ID = ld.ID + 1
and x.Value <> ld.Value
where x.ID = 1
or ld.ID is not null
) min
join
(
select
ROW_NUMBER() OVER (ORDER BY x.ID) AS RowID,
x.Value, x.ID as MaxID
from #tbl x
left join #tbl trl on x.ID = trl.ID - 1
and x.Value <> trl.Value
where trl.ID is not null
or x.ID = #maxID
) max
on min.RowID = max.RowID
order by GroupID;
Is there a better way to achieve the same results?

Try the following which uses lag to first identify where each group of values changes, then a running sum to create a sequential value for each group; with that in place you can then simply aggregate:
declare #tbl table (ID int identity(1,1), Value int);
insert into #tbl (Value)
values (1), (1), (1), (0), (0), (1), (0), (0), (1), (1), (1), (1);
with grp as (
select *,
case when Lag(Value) over(order by ID) = Value then 0 else 1 end gp
from #tbl
), seq as (
select *, Sum(gp) over(order by ID) s
from grp
)
select
s + 1 as GroupId,
Value,
Min(ID) MinId,
Max(ID) MaxId
from seq
group by s + 1, Value
order by GroupId;
fiddle Demo

Related

Update null values by value in same column

I have a table in MS SQL Server, where are some null values in column "value"
Group ID Value
A 1 10
A 2
A 3
A 4 40
B 1
B 2 20
B 3 30
B 4
I want to update null values by not null in the same group with with the first higher ID, or if there is not any higher in same group, first lower. So the result should look like this.
Group ID Value
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
Thanks!
You can use windowed version of SUM function in order to determine islands of NULL valued records along with the record having the higher ID in the same group:
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
Output:
Group ID Value grp
-----------------------
A 4 40 1
A 3 30 2
A 2 NULL 2
A 1 NULL 2
B 4 40 1
B 3 NULL 1
B 2 20 2
B 1 10 3
You can now wrap the above query in a CTE and use another CTE to do the update:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value
FROM CTE
)
UPDATE ToUpdate
SET Value = group_value
WHERE Value IS NULL
Demo here
Edit:
The above query doesn't handle the edge case where the very last record within a Group slice is NULL. To handle this case as well you can use the following query:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID) AS grp2
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value,
MAX(Value) OVER (PARTITION BY [Group], grp2) AS group_value2
FROM CTE
)
UPDATE ToUpdate
SET Value = COALESCE(group_value, group_value2)
WHERE Value IS NULL
Demo here
Please try this-
DATA GENERATION
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,10),
('A',2,NULL),
('A',3,NULL),
('A',4,40),
('B',1,NULL),
('B',2,20),
('B',3,30),
('B',4,NULL)
SOLUTION
UPDATE a
SET a.Value = b.Value
FROM #T a
INNER JOIN
(
SELECT a.GroupCd,a.Id,Coalesce(a.Value,z.Value,z1.Value) Value
FROM #T a
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id < b.Id
ORDER BY Id
)z
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id > b.Id
ORDER BY Id DESC
)z1
)b ON a.GroupCd = b.GroupCd AND a.Id = b.Id
SELECT * FROM #T
OUTPUT
GroupCd Id Value
------- ----------- -----------
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
(8 rows affected)
You Can try This simple Method
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,NULL),
('A',2,NULL),
('A',3,30),
('A',4,40),
('B',1,10),
('B',2,20),
('B',3,NULL),
('B',4,40)
SELECT
*,
NewVal = COALESCE(Value,(SELECT TOP 1 Value FROM #T WHERE GroupCd = T.GroupCd AND Id > T.Id AND Value IS NOT NULL ORDER BY Id ASC))
FROM #T T
My Result
update MY_TABLE set [value] = [newValue] from (
select [Group] [newGroup],
[Value] [newValue]
from (
select [Group], [Value],
row_number() over (partition by [group] order by [Id] desc) [rn]
from MY_TABLE
where [Value] is not null
) [a] where [rn] = 1
) where [Group] = [newGroup] and [Value] is null

How to select the values of multiple columns in sorted order in mssql?

In a table there are three columns, each containing numeric values (in my case representing length, width and height, but could be anything).
How can I select them, ordered by their value?
For example, given the values:
id | length | width | height
1 | 100 | 30 | 50
2 | 6 | 12 | 9
Expected output would be:
id | min | mid | max
1 | 30 | 50 | 100
2 | 6 | 9 | 12
Use the Table Value Constructor with ORDER BY to get the relevent values in sorted order.
Using OFFSET X ROWS and FETCH NEXT 1 ROWS ONLY, you can access a specific position inside the sorted column values.
Use it repeatedly (increasing OFFSET by 1 in each step) to access the sorted column values at each position.
SELECT
length,
width,
height,
(
SELECT dimensions
FROM ( VALUES (length),(width),(height) ) AS compare(dimensions)
ORDER BY dimensions ASC
OFFSET 0 ROWS
FETCH NEXT 1 ROWS ONLY
) AS minDimension,
(
SELECT dimensions
FROM ( VALUES (length),(width),(height) ) AS compare(dimensions)
ORDER BY dimensions ASC
OFFSET 1 ROWS
FETCH NEXT 1 ROWS ONLY
) AS midDimension,
(
SELECT dimensions
FROM ( VALUES (length),(width),(height) ) AS compare(dimensions)
ORDER BY dimensions ASC
OFFSET 2 ROWS
FETCH NEXT 1 ROWS ONLY
) AS maxDimension
FROM sometable
Try this
DECLARE #T TABLE
(
Id INT,
[Length] INT,
Width INT,
Height INT
)
INSERT INTO #T
VALUES(1,100,30,50),(2,6,9,12)
;WITH CTE
AS
(
SELECT
Id,
Val = [Length]
FROM #T
UNION ALL
SELECT
Id,
Val = Width
FROM #T
UNION ALL
SELECT
Id,
Val = Height
FROM #T
),C2
AS
(
SELECT
SeqNo = ROW_NUMBER() OVER(PARTITION BY ID ORDER BY Val),
Id,
Val
FROM CTE
)
SELECT
Id,
[1] "min",
[2] "mid",
[3] "max"
FROM C2
PIVOT
(
MAX(VAL)
FOR
SeqNo IN
(
[1],[2],[3]
)
)q
One alternate solution, using PERCENTILE_CONT to obtain the Median:
WITH Measurement AS(
SELECT *
FROM (VALUES (1,100,30,50),
(2,6,12,9)) V(ID, [Length], Width, Height)),
Pvt AS(
SELECT ID,
D.Measurement AS MeasurementType,
CASE WHEN D.Measurement = 'Length' THEN M.[Length]
WHEN D.Measurement = 'Width' THEN M.Width
WHEN D.Measurement = 'Height' THEN M.Height
END AS Measurement
FROM Measurement M
CROSS APPLY (VALUES ('Length'),('Width'),('Height')) D(Measurement)),
Median AS(
SELECT ID,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY Measurement) OVER (PARTITION BY ID) AS Median,
MeasurementType, Measurement
FROM Pvt)
SELECT ID,
MIN(Measurement) AS [Min],
Median,
MAX(Measurement) AS [Max]
FROM Median
GROUP BY ID, Median
ORDER BY ID;
First, make the values of columns length, width and height to one column using union. And then use this result-set as a sub-query and give a row number based on group by id and descending order of the newly added column. Then by using a CASE expression take the row having row number 1 as max, 2 as mid and 3 as min.
Query
;with cte as(
select [rn] = row_number() over(
partition by t.[id]
order by t.[new_col] desc
), *
from (
select [id], [length] as [new_col] from [your_table_name]
union all
select [id], [width] from [your_table_name]
union all
select [id], [height] from [your_table_name]
) t
)
select [id],
max(case [rn] when 1 then [new_col] end) as [max],
max(case [rn] when 2 then [new_col] end) as [mid],
max(case [rn] when 3 then [new_col] end) as [min]
from cte
group by [id];
Fiddle demo
This seems to work:
Query
declare #table table (id integer, length integer, width integer, height integer)
insert into #table select 1,100,30,50
insert into #table select 2,6,12,9
select id, MIN(a) min
,(select top 1 max(c.a) from
(
select id,length a from #table
union all
select id,width a from #table
union all
select id,height a from #table
)c
where c.id = b.id and c.a <> max(b.a) and c.a <> min(b.a) group by c.id) mid, max(a) max
from
(
select id,length a from #table
union all
select id,width a from #table
union all
select id,height a from #table
)b
group by id

SQL QUERY to Start Numbering Row_ID Everytime a Unique number Is Found

is there any possible way to get a unique id for similar kind of number for example
H_ID ROW_ID
1 1
1 2
1 3
2 1
2 2
3 1
3 2
AS SHOWN 1 IS REPEATED 3 TIMES SO NUMBERS ARE 1,2,3 AND THEN AGAIN STARTS WITH 1
Try it's
declare #table table (h_id int)
insert into #table
select 1
insert into #table
select 1
insert into #table
select 1
insert into #table
select 2
insert into #table
select 2
insert into #table
select 3
insert into #table
select 3
select h_id,d
from (select row_number() over(partition by h_id order by h_id ) as d,*
from #table) as source
Reference https://learn.microsoft.com/en-us/sql/t-sql/functions/row-number-transact-sql
You can use ROW_NUMBER() window function
CREATE TABLE #ROW_TEST
(
H_ID INT
)
INSERT INTO #ROW_TEST
SELECT 1
UNION ALL SELECT
1
UNION ALL SELECT
1
UNION ALL SELECT
2
UNION ALL SELECT
2
UNION ALL SELECT
3
UNION ALL SELECT
3
SELECT H_ID,ROW_NUMBER() OVER (PARTITION BY H_ID ORDER BY H_ID) FROM #ROW_TEST

Adding columns in a query executed from a stored procedure

I have a query in a stored procedure like below
select x,y from table
and the results will look like below
x y
1 a
1 b
2 a
2 b
3 a
3 b
i need to add a blank column or zeros when the value of x changes like below
x y
1 a
1 b
0 0
2 a
2 b
0 0
3 a
3 b
Can this be done by sql or since i'm using the data for birt reports can this be done with birt?
You need UNION ALL to add the extra rows, you also need to ORDER them, the DENSE_RANK is to get rid of the extra row.
here is how it could be done:
DECLARE #t table(x int, y char(1))
INSERT #t values
(1,'a'),(1,'b'),(2,'a'),
(2,'b'),(3,'a'),(3,'b')
;WITH CTE AS
(
SELECT
2 rn, x,y, x sort from #t
UNION ALL
SELECT
distinct dense_rank() over (order by x desc) rn, 0, '0', x+.1 sort
FROM #t
)
SELECT x,y
FROM CTE
WHERE rn > 1
ORDER BY sort, x
Result:
x y
1 a
1 b
0 0
2 a
2 b
0 0
3 a
3 b
This is working example:
DECLARE #DataSource TABLE
(
[x] TINYINT
,[y] CHAR(1)
);
INSERT INTO #DataSource ([x], [y])
VALUES (1, 'a')
,(1, 'b')
,(2, 'a')
,(2, 'b')
,(3, 'a')
,(3, 'b');
WITH DataSource AS
(
SELECT *
FROM #DataSource
UNION ALL
-- the NULL will be always displayed on the first position
SELECT DISTINCT [x]
,NULL
FROM #DataSource
)
SELECT IIF([Rank] = 1, 0, [x])
,IIF([Rank] = 1, 0, [x])
FROM
(
SELECT ROW_NUMBER() OVER(PARTITION BY [x] ORDER BY [y]) AS [Rank]
,[x]
,[y]
FROM DataSource
) DS
ORDER BY [x]
,[Rank]
Few important notes:
the NULL values for each x will be with rank 1 always
the final result set is sorted by x and rank both
declare #t table (X varchar(1),Y varchar(1))
insert into #t(X,y) values (1,'A'),
(1,'B'),
(2,'A'),
(2,'B'),
(3,'A'),
(3,'B')
;with CTE As(
select X,Y,ROW_NUMBER()OVER(PARTITION BY X,Y ORDER BY X)RN
from #t
CROSS APPLY
(
values
('',NULL),
('',NULL)
) C(R, N)),
CTE2 AS(
Select CASE WHEN RN > 1 THEN 0 ELSE X END X ,
CASE WHEN RN > 1 THEN CAST(0 AS VARCHAR) ELSE Y END ID
,ROW_NUMBER()OVER(PARTITION BY X ORDER BY (SELECT NULL)) R
FROM CTE
)
select X,ID from cte2 where R <> 2

SQL Server 2008 : TSQL, select same data for different times based on column value

I am using mssql 2008 R2,
i have below structure
create table #temp (
product int,
[order] int,
ord_qnty int
)
insert #temp
select 10 ,3,4
now, if ord_qnty is 4 , i want to select same product,order four times but in all four rows thevalue of ord_qnty should be 1 , i.e.
out put should be
Product order ord_qnty
10 3 1
10 3 1
10 3 1
10 3 1
If you have a numbers table, you can use that. If not, you can generate one:
;with Numbers(n) as (
select ROW_NUMBER() OVER (ORDER BY object_id) from sys.objects
)
select product,[order],1 as ord_qnty
from #temp t inner join Numbers num
on t.ord_qnty >= num.n
(In my nearly empty scratch database, the ROW_NUMBER() generates 77 rows. If that's not going to be enough, you can introduce cross-joins or use other tricks to generate more numbers, or you can create and populate a permanent numbers table)
Try this one -
Query:
DECLARE #temp TABLE
(
product INT
, [order] INT
, ord_qnty INT
)
INSERT #temp(product, [order], ord_qnty)
SELECT 10, 3, 4
SELECT
t.product
, t.[order]
, ord_qnty = 1
FROM #temp t
JOIN [master].dbo.spt_values sv ON t.ord_qnty > sv.number
WHERE sv.[type] = 'p'
SELECT
t.product
, t.[order]
, ord_qnty = 1
FROM #temp t
JOIN (
SELECT number = ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM sys.system_parameters p
) sv ON t.ord_qnty >= sv.number
Output:
product order ord_qnty
----------- ----------- -----------
10 3 1
10 3 1
10 3 1
10 3 1
Query Cost:
For any "millions value":
SET NOCOUNT ON;
DECLARE #numbers TABLE (number INT)
DECLARE #temp TABLE
(
product INT
, [order] INT
, ord_qnty INT
)
INSERT #temp(product, [order], ord_qnty)
SELECT 10, 3, 4
DECLARE
#i BIGINT = 1
, #max BIGINT = (
SELECT MAX(ord_qnty)
FROM #temp
)
WHILE (#i <= #max) BEGIN
INSERT INTO #numbers (number)
VALUES (#i), (#i+1), (#i+2), (#i+3), (#i+4), (#i+5), (#i+6), (#i+7), (#i+8), (#i+9)
SELECT #i += 10
END
SELECT
t.product
, t.[order]
, ord_qnty = 1
FROM #temp t
CROSS JOIN (
SELECT *
FROM #numbers
WHERE number < #max + 1
) t2

Resources