SQL: return IDs whose val=min(exp)

SQL: return IDs whose val=min(exp) - sql-server

Given a table like
pkg#, time
0, 20
1, 23
2, 34
3, 35
4, 59
I want to know the pkg# who has max/min time difference to its successor pkg (gap between 2 consecutive pkgs)
In this case, pkg-2 has min time difference (1), and pkg-3 has max time difference (14)
What's the sql that can return pkg# for min/max time difference to its next pkg?

If you are on SQL SERVER 2012 or above, you can try LEAD function here to get the next row value to align in your current row:
SELECT *, LEAD([time]) OVER(ORDER BY [pkg#]) as nexttime
FROM [your_table]
will yield something like this:
pkg time nexttime
0 20 23
1 23 34
2 34 35
3 35 59
4 59 NULL
Now compare these two columns values should give you what you want. (Note last row will have nexttime = NULL since there's no more row to get value from, so just filter it out when querying).
Assume new table name is new_table, to get max diff:
select top 1 *, nexttime-time as diff
from new_table
where nexttime is not null
order by (nexttime-time) desc
and to get min diff just order by nexttime-time

A slight twist on #xbb 's answer:
CREATE TABLE #t ( Pkg INT, Time INT );
INSERT #t ( Pkg, Time )
VALUES ( 0, 20 ),
( 1, 23 ),
( 2, 34 ),
( 3, 35 ),
( 4, 59 );
SELECT Pkg
, Time
, Time - LAG(Time) OVER ( ORDER BY Pkg ) AS TimeSincePrevious
, ABS(time - LEAD(Time) OVER ( ORDER BY Pkg )) AS TimeUntilNext
FROM #t;
DROP TABLE #t;
Will yield the result:
Pkg Time TimeSincePrevious TimeUntilNext
0 20 NULL 3
1 23 3 11
2 34 11 1
3 35 1 24
4 59 24 NULL

Take a look at solution below - I decomposed query into three steps:
WITH Ordered AS
(
SELECT ROW_NUMBER() OVER (ORDER BY pkg) rowNum, pkg, [time] FROM Test
),
Diffs AS
(
SELECT T1.pkg,
T2.[time]-T1.[time] diff,
MIN(T2.[time]-T1.[time]) OVER () minimum,
MAX(T2.[time]-T1.[time]) OVER () maximum
FROM Ordered T1
JOIN Ordered T2 ON T1.rowNum = T2.rowNum-1
)
SELECT pkg, diff FROM Diffs
WHERE diff=minimum OR diff=maximum
ORDER by diff
Number of rows
Join with offset 1, calculate diff, MIN and MAX
Filter rows not equal to min or max
Query may return more rows if tie occurs. Ties can be simply removed by replacing final SELECT with:
...
SELECT MIN(pkg) pkg, diff FROM Diffs
WHERE diff=minimum OR diff=maximum
GROUP BY diff
ORDER by diff

Related

SQL Server script not working as expected

I have this little script that shall return the first number in a column of type int which is not used yet.
SELECT t1.plu + 1 AS plu
FROM tovary t1
WHERE NOT EXISTS (SELECT 1 FROM tovary t2 WHERE t2.plu = t1.plu + 1)
AND t1.plu > 0;
this returns the unused numbers like
3
11
22
27
...
The problem is, that when I make a simple select like
SELECT plu
FROM tovary
WHERE plu > 0
ORDER BY plu ASC;
the results are
1
2
10
20
...
Why the first script isn't returning some of free numbers like 4, 5, 6 and so on?

Compiling a formal answer from the comments.
Credit to Larnu:
It seems what the OP really needs here is an (inline) Numbers/Tally (table) which they can then use a NOT EXISTS against their table.
Sample data
create table tovary
(
plu int
);
insert into tovary (plu) values
(1),
(2),
(10),
(20);
Solution
Isolating the tally table in a common table expression First1000 to produce the numbers 1 to 1000. The amount of generated numbers can be scaled up as needed.
with First1000(n) as
(
select row_number() over(order by (select null))
from ( values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0) ) a(n) -- 10^1
cross join ( values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0) ) b(n) -- 10^2
cross join ( values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0) ) c(n) -- 10^3
)
select top 20 f.n as Missing
from First1000 f
where not exists ( select 'x'
from tovary
where plu = f.n);
Using top 20 in the query above to limit the output. This gives:
Missing
-------
3
4
5
6
7
8
9
11
12
13
14
15
16
17
18
19
21
22
23
24

Calculating the stddev and avg between the most recent number and all the other numbers in a running list snowflake

I have a dataset that looks something like this:
id committed delivered timestamp stddev
1 10 8 01-02-2022 ?
2 20 15 01-14-2022 ?
3 12 12 01-30-2022 ?
4 2 0 02-14-2022 ?
.
.
99 null
I am trying to calculate the standard deviation between sprint x and all the sprints after sprint x; for example, the standard deviation and avg between sprint 1, 2, 3 & 4, 2, 3 & 4, 3 & 4, and so on. If there are no records after 4, that stddev would be null
With the current snowflake functions, I am generally unable to calculate the stddev in general, let alone do something with a lag/lead function.
Does anyone have any advice? Thanks in advance!
Update:
I've figured out how to calculate a moving avg over sprint x and the next sprint, but not for all previous sprints:
(delivered + lead(delivered) over (partition by id order by timestamp asc)) / 2
stddev can also be calculated using abs / sqrt (2)

You're looking for a frame clause -- this is part of the window function that can specify which rows in the current partition to use in the calculation.
select
id,
stddev(delivered) over (
order by id asc
rows between current row and unbounded following
) as stddev,
avg(delivered) over (
order by id asc
rows between current row and unbounded following
) as avg
from my_data

tconbeer is 100% correct, but here is the code and count to "show it working"
and you example data (moshed) into a VALUES section to avoid making a table.
I also stripped out timestamp, as it's not used in this demo, but normally I would order by that, but I could not see the pattern, so just dropped it, as it's non material to the example.
SELECT t.*
,count(delivered) over ( order by id asc rows between current row and unbounded following ) as _count
,stddev(delivered) over ( order by id asc rows between current row and unbounded following ) as stddev
,avg(delivered) over ( order by id asc rows between current row and unbounded following ) as avg
FROM VALUES
(1, 10, 8),
(2, 20, 15),
(3, 12, 12),
(4, 2, 0),
(5, 2, 0),
(6, 0, 0)
t(id, committed, delivered)
ORDER BY 1;
gives:
ID
COMMITTED
DELIVERED
_COUNT
STDDEV
AVG
1
10
8
6
6.765106577
5.833
2
20
15
5
7.469939759
5.4
3
12
12
4
6
3
4
2
0
3
0
0
5
2
0
2
0
0
6
0
0
1
null
0

you can create a dummy table which will have a id generated sequentially using the generator function for a particular range and do a LEFT join with the table. This way you will get rows with NULL values where the id is not present, and then you can use lag / leap to get the average.
--- untested
select seq4() as id1 , TMP.* from table(generator(rowcount => 10)) v
LEFT JOIN (SELECT * FROM (
SELECT 1 as id , 8 As committed , '01-02-2022' as delivered UNION ALL
SELECT 2 as id , 20 As committed , '01-14-2022' as delivered UNION ALL
SELECT 3 as id , 12 As committed , '01-30-2022' as delivered UNION ALL
SELECT 5 as id , 2 As committed , '02-14-2022' as delivered
)) TMP
ON trim(id1) = trim(tmp.id)

Accumulative Update for previous records

I have table that shows these information
Month NewClients OnHoldClients
5-2017 10 2
6-2017 16 4
7-2017 11 1
8-2017 15 6
9-2017 18 7
I am trying to find the accumulative total for each month
which is
(NewClients - OnHoldClients) + Previous Month Total
Something like this
Month NewClients OnHoldClients Total
5-2017 10 2 8
6-2017 16 4 20
7-2017 11 1 30
8-2017 15 6 39
9-2017 18 7 50
the query i tried to build was something like this but I think should be an easier way to do that
UPDATE MyTable
SET Total = (SELECT TOP 1 Total FROM MyTable B WHERE B.Month < A.Month) + NewClients - OnHoldClients
FROM MyTable A

Before we begin, note the mere fact that you're facing such calculative problem is a symptom that maybe you don't have the best possible design. Normally for this purpose calculated values are being stored along the way as the records are inserted. So i'd say you'd better have a total field to begin with and calculate it as records amass.
Now let's get down to the problem at hand. i composed a query which does that nicely but it's a bit verbose due to recursive nature of the problem. However, it yields the exact expected result:
DECLARE #dmin AS date = (SELECT min(mt.[Month]) from dbo.MyTable mt);
;WITH cte(_Month, _Total) AS (
SELECT mt.[Month] AS _Month, (mt.NewClients - mt.OnHoldClients) AS _Total
FROM dbo.MyTable mt
WHERE mt.[Month] = #dmin
UNION ALL
SELECT mt.[Month] AS _Month, ((mt.NewClients - mt.OnHoldClients) + ccc._Total) AS _Total
FROM dbo.MyTable mt
CROSS APPLY (SELECT cc._Total FROM (SELECT c._Total,
CAST((row_number() OVER (ORDER BY c._Month DESC)) AS int) as _Rank
FROM cte c WHERE c._Month < mt.[Month]) as cc
WHERE cc._Rank = 1) AS ccc
WHERE mt.[Month] > #dmin
)
SELECT c._Month, max(c._Total) AS Total
FROM cte c
GROUP BY c._Month
It is a recursive CTE structure that goes about each record all along the way to the initial month and adds up to the final Total value. This query only includes Month and Total fields but you can easily add the other 2 to the list of projection.

Try this
;WITH CTE([Month],NewClients,OnHoldClients)
AS
(
SELECT '5-2017',10,2 UNION ALL
SELECT '6-2017',16,4 UNION ALL
SELECT '7-2017',11,1 UNION ALL
SELECT '8-2017',15,6 UNION ALL
SELECT '9-2017',18,7
)
SELECT [Month],
NewClients,
OnHoldClients,
SUM(MonthTotal)OVER( ORDER BY [Month]) AS Total
FROM
(
SELECT [Month],
NewClients,
OnHoldClients,
SUM(NewClients-OnHoldClients)OVER(PArtition by [Month] Order by [Month]) AS MonthTotal
FROM CTE
)dt
Result,Demo:http://rextester.com/DKLG54359
Month NewClients OnHoldClients Total
--------------------------------------------
5-2017 10 2 8
6-2017 16 4 20
7-2017 11 1 30
8-2017 15 6 39
9-2017 18 7 50

Sql comparaison nested on one column

Imagine a table :
ID Month Year Value 1
1 May 17 58
2 June 09 42
3 December 18 58
4 December 18 58
5 September 10 84
6 May 17 42
7 January 16 3
I want to return all the data that shares the same month and year where Value 1 is different. So in our example, I want to return 1 and 6 only but not 3 and 4 or any of the other entries.
Is there a way to do this? I am thinking about a combination of distinct and group by but can't seem to come up with the right answer being new to SQL.
Thanks.

It could be done without grouping, but with simple self-join:
select distinct t1.*
from [Table] t1
inner join [Table] t2 on
t1.Month = t2.Month
and t1.Year = t2.Year
and t1.Value_1 <> t2.Value_1
You can find some information and self-join examples here and here.

For each row you can examine aggregates in its group with the OVER clause. eg:
create table #t(id int, month varchar(20), year int, value int)
insert into #t(id,month,year,value)
values
(1,'May' ,17, 58 ),
(2,'June' ,09, 42 ),
(3,'December' ,18, 58 ),
(4,'December' ,18, 58 ),
(5,'September',10, 84 ),
(6,'May' ,17, 42 ),
(7,'January' ,16, 3 );
with q as
(
select *,
min(value) over (partition by month,year) value_min,
max(value) over (partition by month,year) value_max
from #t
)
select id,month,year,value
from q
where value_min <> value_max;

If I understood your question correctly, you are looking for the HAVING keyword.
If you GROUP BY Month, Year, Value_1 HAVING COUNT(*) = 1, you get all combinations of Month, Year and Value_1 that have no other occurrence.

regarding updating rows in table without using loop

I have table in which I have column called quantity also I have 10 rows which having same column value 200(it can be any value)
Requirement is: if a input value is x=500(or anynumber) then this value should be compared with quantity column value in a fasion below:
if 1 row's quantity is 200 then it should subtract it form 500 and x should be updated to 300 and quantity of that row should be made 0 then It should move to next row till x is 0
could you please help me write sql query for this...
it is ask that loops should not be used.
thanks,

What is the version of SQL Server? If it's 2012 or 2014, you can use the following:
DECLARE #x int = 500
;WITH cte_sum AS
(
SELECT quantity,
ISNULL(SUM(quantity) OVER (ORDER BY (SELECT NULL) ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) sum_running_before,
SUM(quantity) OVER (ORDER BY (SELECT NULL) ROWS UNBOUNDED PRECEDING) sum_running_total
FROM YourTable
)
UPDATE cte_sum
SET quantity = CASE
WHEN quantity >= #x - sum_running_before THEN
quantity - (#x - sum_running_before)
ELSE 0
END
WHERE (#x >= sum_running_total OR (#x < sum_running_total AND sum_running_before < #x))
It's a bit more tricky to get running totals in earlier versions but I think you got the main idea.

DECLARE #YourTable TABLE
(
CustId INT,
Quantity INT
)
INSERT INTO #YourTable
( CustId, Quantity )
VALUES
( 1, 10 ),
( 1, 10 ),
( 1, 10 ),
( 1, 10 ),
( 2, 20 ),
( 2, 20 ),
( 2, 20 ),
( 2, 20 );
;WITH cte_sum AS
(
SELECT
y.CustId,
y.Quantity,
ROW_NUMBER() OVER (PARTITION BY CustId ORDER BY Quantity) RN
FROM #YourTable y
)
SELECT s1.CustId, s1.Quantity, s2.Qty, s1.Quantity + ISNULL(s2.Qty, 0) RunningTotal, s1.RN
FROM cte_sum s1
OUTER APPLY
(
SELECT SUM(Quantity) Qty FROM cte_sum s2
WHERE s2.CustId = s1.CustId
AND s2.RN < s1.RN
) s2
ORDER BY s1.CustId, s1.RN
Here's an example of a running total that will work for Sql Server 2005+
This is the output:
CustId Quantity Qty RunningTotal RN
1 10 NULL 10 1
1 10 10 20 2
1 10 20 30 3
1 10 30 40 4
2 20 NULL 20 1
2 20 20 40 2
2 20 40 60 3
2 20 60 80 4

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

SQL: return IDs whose val=min(exp) - sql-server

Related

SQL Server script not working as expected

Calculating the stddev and avg between the most recent number and all the other numbers in a running list snowflake

Accumulative Update for previous records

Sql comparaison nested on one column

regarding updating rows in table without using loop

Categories

Resources