JOIN on multilple rows to SELECT only TOP row in SQL Sever - sql-server

I have a table which has only 1 row per ID/Date while the other one can have overlapping ID/Dates. I need to JOIN them in such a way that only top row is selected (actually any row from duplicates table is fine!). Its fairly easy to do this in 2 steps (Insert & Update) but I'm looking if it can be done in a single step.
CREATE TABLE #Row1Each (ID VARCHAR(10), Date_ DATETIME, Value FLOAT) INSERT INTO #Row1Each SELECT 'AAPL', '1/10/2015', 100 INSERT INTO #Row1Each SELECT 'MSFT', '1/10/2015', 20
CREATE TABLE #Table1 (ID VARCHAR(10), Date_ DATETIME, Qty FLOAT) INSERT INTO #Table1 SELECT 'AAPL', '1/10/2015', 55000
CREATE TABLE #Duplicates (ID VARCHAR(10), StartDate DATETIME, EndDate DATETIME, Quote FLOAT) INSERT INTO #Duplicates SELECT 'AAPL', '1/2/2015', '12/31/2016', 0.1 INSERT INTO #Duplicates SELECT 'AAPL', '1/4/2015', '10/05/2016', 0.11
/*
AAPL 2015-01-10 00:00:00.000 100
MSFT 2015-01-10 00:00:00.000 20
AAPL 2015-01-10 00:00:00.000 55000
AAPL 2015-01-02 00:00:00.000 2016-12-31 00:00:00.000 0.1
AAPL 2015-01-04 00:00:00.000 2016-10-05 00:00:00.000 0.1
*/
SELECT A.*
, B.Qty
, C.Quote
FROM #Row1Each A
LEFT JOIN #Table1 B ON B.ID = A.ID
AND B.Date_ = A.Date_
LEFT JOIN #Duplicates C ON C.ID = A.ID
AND A.Date_ BETWEEN C.StartDate AND C.EndDate
DROP TABLE #Row1Each DROP TABLE #Table1 DROP TABLE #Duplicates
/* Desired output
AAPL 2015-01-10 00:00:00.000 100 55000 0.1
MSFT 2015-01-10 00:00:00.000 20 NULL NULL
*/

You can do this using APPLY.
SELECT
r.Id,
r.Date_,
r.Value,
t.Qty,
d.Quote
FROM #Row1Each r
LEFT JOIN #Table1 t
ON t.ID = r.ID
AND t.Date_ = r.Date_
OUTER APPLY(
SELECT TOP 1 *
FROM #Duplicates d
WHERE
d.ID = r.ID
AND r.Date_ BETWEEN d.StartDate AND d.EndDate
ORDER BY EndDate DESC -- Returns Top 1 Based on EndDate
)d

Related

Repeated data on inserted rows

--demo setup
drop table if exists dbo.product
go
create table dbo.Product
(
ProductId int,
ProductTitle varchar(55),
ProductCategory varchar(255),
Loaddate datetime
)
insert into dbo.Product
values (1, 'Table', 'ABCD', '3/4/2018'),
(1, 'Table', 'ABCD', '3/5/2018'),
(1, 'Table', 'ABCD', '3/6/2018'),
(1, 'Table', 'XYZ', '3/7/2018'),
(1, 'Table', 'XYZ', '3/8/2018'),
(1, 'Table', 'XYZ', '3/9/2018'),
(1, 'Table', 'GHI', '3/10/2018'),
(1, 'Table', 'GHI', '3/11/2018'),
(1, 'Table', 'XYZ', '3/12/2018'),
(1, 'Table', 'XYZ', '3/13/2018')
SELECT
product.productid,
product.producttitle,
product.productcategory,
MIN(product.loaddate) AS BeginDate,
-- ,max(product.LoadDate) as BeginDate1
CASE
WHEN MAX(product.loaddate) = MAX(oa.enddate1)
THEN '12/31/9999'
ELSE MAX(product.loaddate)
END AS EndDate
FROM
dbo.product product
CROSS APPLY
(SELECT MAX(subproduct.loaddate) EndDate1
FROM dbo.product subproduct
WHERE subproduct.productid = product.productid) oa
GROUP BY
productid, producttitle, productcategory
Output:
productid
producttitle
productcategory
BeginDate
EndDate
1
Table
ABCD
2018-03-04 00:00:00.000
2018-03-06 00:00:00.000
1
Table
XYZ
2018-03-07 00:00:00.000
9999-12-31 00:00:00.000
1
Table
GHI
2018-03-10 00:00:00.000
2018-03-11 00:00:00.000
Desired output:
productid
producttitle
productcategory
BeginDate
EndDate
1
Table
ABCD
2018-03-04 00:00:00.000
2018-03-06 00:00:00.000
1
Table
XYZ
2018-03-07 00:00:00.000
2018-03-09 00:00:00.000
1
Table
GHI
2018-03-10 00:00:00.000
2018-03-11 00:00:00.000
1
Table
XYZ
2018-03-12 00:00:00.000
9999-12-31 00:00:00.000
The last two inserted rows repeat the data from Loaddate '3/7/2018'-'3/9/2018', this doesn't happen if any of the new inserted rows doesn't repeat data. The only thing that changes is the LoadDate, giving me incorrect output. how can i get something like that desired output?
Well, first of all, you need to find a sequence number over all your records. If you already have a primary key, that's good. In example you gave us, there's no such column, so let's generate it.
Then, we make pairs with start and end dates for each product's category change. Another thing is to group all these product's category changes.
Finally, we make just a simple group by:
;
with cte as ( select *,
row_number() over(partition by ProductId order by Loaddate) as rn
from product
), cte2 as ( select t1.ProductId,
t1.ProductTitle,
t1.ProductCategory,
t1.Loaddate as BeginDate,
case
when t1.ProductCategory <> t2.ProductCategory
then t1.Loaddate
else coalesce(t2.Loaddate, null)
end as EndDate,
row_number() over(order by t1.ProductId, t1.Loaddate) as rn_overall,
row_number() over(partition by t1.ProductId, t1.ProductCategory order by t1.Loaddate) as rn_category
from cte as t1
left join cte as t2
on t2.ProductId = t1.ProductId
and t2.rn = t1.rn + 1
), cte3 as ( select *,
min(rn_overall) over (partition by ProductId, ProductCategory, rn_overall - rn_category) as product_group
from cte2
)
select ProductId, ProductTitle, ProductCategory,
min(BeginDate) as BeginDate,
case
when max(case when EndDate is null then 1 else 0 end) = 0
then max(EndDate)
else null
end as EndDate
from cte3
group by ProductId, ProductTitle, ProductCategory, product_group
order by ProductId, BeginDate

Want Result from one to many relation table by Group By

I would like to get result by "group by" but not luck. I can see other option but looking best perfomance query.
Thanks in Advance !
CREATE TABLE #Invoice (InvoiceId int, InvoiceDate datetime, NetAmount
decimal(18,2))
CREATE TABLE #Payment (PaymentId int, InvoiceId int, PaidAmount
decimal(18,2))
INSERT INTO #Invoice VALUES (101, '20180212', 5000)
INSERT INTO #Invoice VALUES (102, '20180112', 600)
INSERT INTO #Invoice VALUES (103, '20181211', 1800)
INSERT INTO #Invoice VALUES (104, '20180101', 1000)
INSERT INTO #Invoice VALUES (105, '20180212', 7000)
INSERT INTO #Payment VALUES (101,103,1800)
INSERT INTO #Payment VALUES (102,102,500)
INSERT INTO #Payment VALUES (103,101,2000)
INSERT INTO #Payment VALUES (103,101,3000)
Create this query :
SELECT
INV.InvoiceDate,
SUM(Inv.NetAmount) as NetAmount,
SUM(ISNULL(PY.PaidAmount,0)) As PaidAmount
From #Invoice INV
LEFT JOIN #Payment PY
ON PY.InvoiceId = INV.InvoiceId
GROUP BY
INV.InvoiceDate
Get Result :
InvoiceDate NetAmount PaidAmount
2018-01-01 1000.00 0.00
2018-01-12 600.00 500.00
2018-02-12 17000.00 5000.00 ****Issue: Net values should be 12000 not 17000
2018-12-11 1800.00 1800.00
Expected Result :
InvoiceDate NetAmount PaidAmount
2018-01-01 1000.00 0.00
2018-01-12 600.00 500.00
2018-02-12 5000.00 5000.00
2018-12-11 1800.00 1800.00
This assumes that the expected results the OP has provided is wrong. They have the value 5000 for the NetAmount on 20180212, however there are 2 invoices on that date with the values 5000 and 7000, making 12000. If 5000 is correct, we need details on why the value of invoice 105 (7000) is not to be included.
Anyway, on the assumption the expected results is wrong, this gets the result I believe you are looking for:
WITH CTE AS(
SELECT I.InvoiceId,
I.InvoiceDate,
I.NetAmount,
P.PaymentId,
P.PaidAmount,
ROW_NUMBER() OVER (PARTITION BY I.InvoiceId, I.InvoiceDate ORDER BY (SELECT NULL)) AS RN
FROM #Invoice I
LEFT JOIN #Payment P ON I.InvoiceId = P.InvoiceId)
SELECT C.InvoiceDate,
SUM(CASE RN WHEN 1 THEN C.NetAmount END) AS NetAmount,
ISNULL(SUM(C.PaidAmount),0) AS PaidAmount
FROM CTE C
GROUP BY C.InvoiceDate;

Please advise my the stored procedure which is group by and total which different criteria

I have answered one of the interview questions as below.
There are two tables (employee and Department).
Show report No. of people(count) and total salary where IT Dept. salary from 250 to 500 and Sales Dept. salary from 250 to 1000 and Marketing Dept. salary from 250 to 1500.
Sample expected result below
Marketing 0 0.00
Information Technology 1 250.00
Sales 2 1200.00
Employee table
EmpID EmpName DeptID Salary
1 Mike 1 1000.00
2 Paul 1 1500.00
3 John 1 2000.00
4 Joe 2 500.00
5 Kim 3 2000.00
6 Lim 3 2500.00
7 Sam 2 700.00
8 Mario 1 250.00
Department table
DeptID DeptCode DeptName
1 IT Information Technology
2 ST Sales
3 MT Marketing
My Answer:
ALTER PROCEDURE [dbo].[TheseAndThat]
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
-- Insert statements for procedure here
SELECT dd.DeptName, ISNULL(TT.c,0) AS StaffCount , ISNULL(TT.s,0) AS TotalSalary FROM [dbo].[Department] dd
LEFT JOIN
(
SELECT d.DeptCode AS dcode, COUNT(*) as c, SUM(e.Salary) as s FROM [dbo].[Employee] e
JOIN [dbo].[Department] d
ON e.DeptID = d.DeptID
WHERE e.Salary between 250 and 500 AND d.DeptID = 1
GROUP BY e.DeptID, d.DeptCode
UNION
SELECT d.DeptCode AS dcode, COUNT(*) as c, SUM(e.Salary) as s FROM [dbo].[Employee] e
JOIN [dbo].[Department] d
ON e.DeptID = d.DeptID
WHERE e.Salary between 250 and 1000 AND d.DeptID = 2
GROUP BY e.DeptID, d.DeptCode
UNION
SELECT d.DeptCode AS dcode, COUNT(*) as c, SUM(e.Salary) as s FROM [dbo].[Employee] e
JOIN [dbo].[Department] d
ON e.DeptID = d.DeptID
WHERE e.Salary between 250 and 1500 AND d.DeptID = 3
GROUP BY e.DeptID, d.DeptCode
) TT
ON dd.DeptCode = TT.dcode
ORDER BY TT.c
END
I'm not sure my answer is correct. However, the result seems to be ok.
Please advise.
If I was you I will go this this query (only 1 time scan to employee table)
SELECT d.DeptName, ISNULL(e.NoEmp,0) AS NoEmp, ISNULL(SumSalary,0) AS SumSalary
FROM [dbo].[Department] AS d
LEFT JOIN (
SELECT DeptID, COUNT(EmpID) As NoEmp, SUM (Salary) AS SumSalary
FROM [dbo].[Employee]
WHERE Salary BETWEEN 250 AND CASE WHEN DeptID = 1 THEN 500
WHEN DeptID = 2 THEN 1000
WHEN DeptID = 3 THEN 1500
END
GROUP BY DeptID) AS e ON d.DeptID = e.DeptID
WHERE d.DeptID IN(1,2,3)
First, the code to setup temp tables:
declare #employees table
(
EmpID int,
EmpName varchar(100),
DeptID int,
Salary decimal
)
declare #departament table
(
DeptID int,
DeptCode char(2),
DeptName varchar(100)
)
insert into #employees values (1,'Mike',1,1000.00)
insert into #employees values (2,'Paul',1,1500.00)
insert into #employees values (3,'John',1,2000.00)
insert into #employees values (4,'Joe',2,500.00)
insert into #employees values (5,'Kim',3,2000.00)
insert into #employees values (6,'Lim',3,2500.00)
insert into #employees values (7,'Sam',2,700.00)
insert into #employees values (8,'Mario',1,250.00)
insert into #departament values (1, 'IT', 'Information Technology')
insert into #departament values (2, 'ST', 'Sales')
insert into #departament values (3, 'MT', 'Marketing')
Now, the report:
select DeptName, COALESCE(d2.DeptID, 0), COALESCE(Salaries,0) from #departament d2
left join
(
select COUNT(*) as DeptID, SUM(Salary) as Salaries from #departament d
inner join #employees e on d.DeptID = e.DeptID
where
(d.DeptID = 1 and e.Salary between 250 and 500)
or
(d.DeptID = 2 and e.Salary between 250 and 1000)
or
(d.DeptID = 3 and e.Salary between 250 and 1500)
group by d.DeptID) as sums on sums.DeptID = d2.DeptID
An alternative (uses the same temp tables as #GustavoF answer):
DECLARE #Input TABLE( DeptID INT, SalaryRangeMin decimal, SalaryRangeMax decimal )
INSERT INTO #Input
VALUES
( 1, 250, 500),
( 2 ,250 ,1000 ),
( 3 ,250 , 1500 )
SELECT D.DeptName, COUNT(EmpID) as EmployeeCount, ISNULL( SUM( e.Salary ), 0.0 ) as TotalSalary
FROM #Input AS I
INNER JOIN #departament AS D ON I.DeptID = D.DeptID
LEFT JOIN #Employees AS E ON I.DeptID = E.DeptID AND E.Salary BETWEEN I.SalaryRangeMin AND I.SalaryRangeMax
GROUP BY E.DeptID, D.DeptCode, D.DeptName
ORDER BY TotalSalary ASC
Output:
DeptName EmployeeCount TotalSalary
--------------------------- ------------- -------------
Marketing 0 0
Information Technology 1 250
Sales 2 1200

SQL Server: fill a range with dates from overlapping intervals with priority

I need to fill the range from 2017-04-01 to 2017-04-30 with the data from this table, knowing that the highest priority records should prevail over those with lower priorities
id startValidity endValidity priority
-------------------------------------------
1004 2017-04-03 2017-04-30 1
1005 2017-04-10 2017-04-22 2
1010 2017-04-19 2017-04-23 3
1006 2017-04-24 2017-04-28 2
1008 2017-04-26 2017-04-28 3
In practice I would need to get a result like this:
id startValidity endValidity priority
--------------------------------------------
1004 2017-04-03 2017-04-09 1
1005 2017-04-10 2017-04-18 2
1010 2017-04-19 2017-04-23 3
1006 2017-04-24 2017-04-25 2
1008 2017-04-26 2017-04-28 3
1004 2017-04-29 2017-04-30 1
can't think of anything elegant or more efficient solution right now . . .
-- Sample Table
declare #tbl table
(
id int,
startValidity date,
endValidty date,
priority int
)
-- Sample Data
insert into #tbl select 1004, '2017-04-03', '2017-04-30', 1
insert into #tbl select 1005, '2017-04-10', '2017-04-22', 2
insert into #tbl select 1010, '2017-04-19', '2017-04-23', 3
insert into #tbl select 1006, '2017-04-24', '2017-04-28', 2
insert into #tbl select 1008, '2017-04-26', '2017-04-28', 3
-- Query
; with
date_range as -- find the min and max date for generating list of dates
(
select start_date = min(startValidity), end_date = max(endValidty)
from #tbl
),
dates as -- gen the list of dates using recursive CTE
(
select rn = 1, date = start_date
from date_range
union all
select rn = rn + 1, date = dateadd(day, 1, d.date)
from dates d
where d.date < (select end_date from date_range)
),
cte as -- for each date, get the ID based on priority
(
select *, grp = row_number() over(order by id) - rn
from dates d
outer apply
(
select top 1 x.id, x.priority
from #tbl x
where x.startValidity <= d.date
and x.endValidty >= d.date
order by x.priority desc
) t
)
-- final result
select id, startValidity = min(date), endValidty = max(date), priority
from cte
group by grp, id, priority
order by startValidity
I do not understand the purpose of Calendar CTE or table.
So I am not using any REcursive CTE or calendar.
May be I hvn't understood the requirement completly.
Try this with diff sample data,
declare #tbl table
(
id int,
startValidity date,
endValidty date,
priority int
)
-- Sample Data
insert into #tbl select 1004, '2017-04-03', '2017-04-30', 1
insert into #tbl select 1005, '2017-04-10', '2017-04-22', 2
insert into #tbl select 1010, '2017-04-19', '2017-04-23', 3
insert into #tbl select 1006, '2017-04-24', '2017-04-28', 2
insert into #tbl select 1008, '2017-04-26', '2017-04-28', 3
;With CTE as
(
select * ,ROW_NUMBER()over(order by startValidity)rn
from #tbl
)
,CTE1 as
(
select c.id,c.startvalidity,isnull(dateadd(day,-1, c1.startvalidity)
,c.endValidty) Endvalidity
,c.[priority],c.rn
from cte c
left join cte c1
on c.rn+1=c1.rn
)
select id,startvalidity,Endvalidity,priority from cte1
union ALL
select id,startvalidity,Endvalidity,priority from
(
select top 1 id,ca.startvalidity,ca.Endvalidity,priority from cte1
cross apply(
select top 1
dateadd(day,1,endvalidity) startvalidity
,dateadd(day,-1,dateadd(month, datediff(month,0,endvalidity)+1,0)) Endvalidity
from cte1
order by rn desc)CA
order by priority
)t4
--order by startvalidity --if req

How to find missing dates by ID

I need help modifying a script to find the missing RevenuePeriod by ID within a given time frame. I have found a similar script that finds the missing date for a given ID but I don't know how to modify the script where it will give the missing dates per ID.
create table #WorksheetHistory (WorksheetID [int] IDENTITY(1,1) ,ID varchar(6), RevenuePeriod datetime)
insert into #WorksheetHistory (ID,RevenuePeriod)
SELECT '000001','2015-06-01 00:00:00.00' Union All
SELECT '000001','2015-07-01 00:00:00.00' Union All
SELECT '000001','2015-11-01 00:00:00.00' Union All
SELECT '000001','2015-12-01 00:00:00.00' Union All
SELECT '000002','2015-06-01 00:00:00.00' Union All
SELECT '000002','2015-12-01 00:00:00.00'
DECLARE #EndDate datetime
DECLARE #StartDate datetime
SET #StartDate = '2015-06-01 00:00:00.00'
SET #EndDate = '2015-12-01 00:00:00.00'
;WITH Dates as
(
SELECT #StartDate AS dt
UNION ALL
SELECT DATEADD(month, 1, dt) as dt
FROM Dates
WHERE dt < (select dateadd(month,-1,#EndDate) enddate)
)
select Month(dt) as dtMonth, Year(dt) dtYear
from Dates d left outer join #WorksheetHistory w
on dateadd(month, datediff(month,0,d.dt),0) = dateadd(month, datediff(month,0,w.RevenuePeriod),0) Where RevenuePeriod is null
Drop Table #WorksheetHistory
The current output returns the following output. I realize in the script it doesnt have an ID returned, but even if I do, it would return null value as the script indicates return null dates. I don't know how to put the associated ID in it too.
dt
2015-08-01 00:00:00.000
2015-09-01 00:00:00.000
2015-10-01 00:00:00.000
My desired result would be return the Missing ID's with the respective missing dates.
ID dt
000001 2015-08-01 00:00:00.00
000001 2015-09-01 00:00:00.00
000001 2015-10-01 00:00:00.00
000002 2015-07-01 00:00:00.00
000002 2015-08-01 00:00:00.00
000002 2015-09-01 00:00:00.00
000002 2015-10-01 00:00:00.00
000002 2015-11-01 00:00:00.00
Use EXCEPT:
WITH Dates as
(
SELECT #StartDate AS dt
UNION ALL
SELECT DATEADD(month, 1, dt) as dt
FROM Dates
WHERE dt < DATEADD(m, -1, #EndDate)
)
-- all the possible combinations
SELECT w.ID, d.dt
FROM Dates d
CROSS JOIN (
SELECT ID
FROM #WorksheetHistory
GROUP BY ID
) w
EXCEPT
-- the combinations you actually have
SELECT w.ID, d.dt
FROM Dates d
JOIN #WorksheetHistory w
ON d.dt = w.RevenuePeriod;

Resources