SQL Server Recursive CTE Many-to-Many "Best Match" - sql-server

I've got a many-to-many relationship. I want to identify the best matches.
A match = #firstParty.StartDate between #thirdParty.MatchRangeStart and #thirdParty.MatchRangeEnd.
best match = the earliest record in #thirdParty that's not a "best match" for an earlier record in #firstParty.
I can't use a loop, for performance reasons. So, I think a recursive CTE will be needed.
Here's my desired output:
FirstPartyId StartDate ThirdPartyId ThirdPartyStartDate
------------ ---------- ------------ -------------------
1 2016-01-01 1 2016-01-10
2 2016-01-02 2 2016-01-11
3 2016-01-03 3 2016-01-12
Script:
declare #firstParty table
(
FirstPartyId integer identity,
StartDate date,
ThirdPartyId integer
);
insert into #firstParty (StartDate)
values
('01/01/2016'), ('01/02/2016'), ('01/03/2016'), ('01/04/2017'), ('01/05/2017');
/*
dates in #firstParty and #thirdParty are not guaranteed to be unique
in all scenarios
*/
declare #thirdParty table
(
ThirdPartyId integer identity,
StartDate date,
MatchRangeStart date,
MatchRangeEnd date
);
insert into #thirdParty (StartDate)
values
('01/10/2016'), ('01/11/2016'), ('01/12/2016');
update #thirdParty set MatchRangeStart = dateadd(d, -31, StartDate), MatchRangeEnd = dateadd(d, 31, StartDate);
declare #matches table
(
Id integer identity,
FirstPartyId integer,
FirstPartyStartDate date,
ThirdPartyId integer,
ThirdPartyStartDate date
);
insert into #matches (FirstPartyId, FirstPartyStartDate, ThirdPartyId, ThirdPartyStartDate)
select
fp.FirstPartyId,
fp.StartDate,
tp.ThirdPartyId,
tp.StartDate
from
#thirdParty tp
join #firstParty fp on
fp.StartDate between tp.MatchRangeStart and tp.MatchRangeEnd
;
select
fp.FirstPartyId,
fp.StartDate,
tpm.ThirdPartyId,
tpm.ThirdPartyStartDate
from
#firstParty fp
cross apply
(
select top 1
m.*
from
#matches m
where
fp.FirstPartyId = m.FirstPartyId
order by
m.ThirdPartyStartDate
) tpm;

Related

SQL query with function

I have a table of data which i am using a count statement to get the amount of records for the submission date
example
AuditId Date Crew Shift Cast ObservedBy 2ndObserver AuditType Product
16 2017-06-27 3 Day B1974, B1975 Glen Mason NULL Identification Billet
20 2017-06-29 1 Day 9879 Corey Lundy NULL Identification Billet
21 2017-06-29 4 Day T9627, T9625 Joshua Dwyer NULL ShippingPad Tee
22 2017-06-29 4 Day NULL Joshua Dwyer NULL Identification Billet
23 2017-06-29 4 Day S9874 Joshua Dwyer NULL ShippingPad Slab
24 2017-06-29 4 Day Bay 40 Joshua Dwyer NULL Identification Billet
Basically I am using the following code to get my results
SELECT YEAR([Date]) as YEAR, CAST([Date] as nvarchar(25)) AS [Date], COUNT(*) as "Audit Count"
FROM AuditResults
where AuditType = 'Identification' AND Product = 'Billet'
group by Date
this returns example
YEAR Date Audit Count
2017 2017-06-27 1
2017 2017-06-29 3
Now I want to be able to retrieve all dates even if blank
so I would like the return to be
YEAR Date Audit Count
2017 2017-06-27 1
2017 2017-06-28 0
2017 2017-06-29 3
I have the following function I am trying to use:
ALTER FUNCTION [dbo].[fnGetDatesInRange]
(
#FromDate datetime,
#ToDate datetime
)
RETURNS #DateList TABLE (Dt date)
AS
BEGIN
DECLARE #TotalDays int, #DaysCount int
SET #TotalDays = DATEDIFF(dd,#FromDate,#ToDate)
SET #DaysCount = 0
WHILE #TotalDays >= #DaysCount
BEGIN
INSERT INTO #DateList
SELECT (#ToDate - #DaysCount) AS DAT
SET #DaysCount = #DaysCount + 1
END
RETURN
END
How do I use my select statement with this function? or is there a better way?
cheers
Try this;
ALTER FUNCTION [dbo].[fnGetDatesInRange]
(
#FromDate datetime,
#ToDate datetime
)
RETURNS #YourData TABLE ([Year] int, DateText nvarchar(25),[Audit Count] int)
AS
begin
insert into #YourData
SELECT
YEAR(allDates.[Date]) as YEAR,
CAST(allDates.[Date] as nvarchar(25)) AS [Date],
COUNT(r.Product) as "Audit Count"
from
(
SELECT
[date]=convert(datetime, CONVERT(float,d.Seq))
FROM
(
select top 100000 row_number() over(partition by 1 order by A.name) as Seq
from syscolumns A, syscolumns B
)d
)allDates
left join
AuditResults r on r.[Date]=allDates.[date] and r.AuditType = 'Identification' AND r.Product = 'Billet'
where
allDates.[Date]>=#FromDate and allDates.[Date]<=#ToDate
group by
allDates.[Date]
return
end
The key is the 'allDates' section ;
SELECT
[date]=convert(datetime, CONVERT(float,d.Seq))
FROM
(
select top 100000 row_number() over(partition by 1 order by A.name) as Seq
from syscolumns A, syscolumns B
)d
This will return all dates between 1900 and 2173 (in this example). Limit that as you need but a nice option. A ton of different ways to approach this clearly
you have to create another table calendar as (Mysql)- idea is the same on all RDBMS-
CREATE TABLE `calendar` (
`dt` DATE NOT NULL,
UNIQUE INDEX `calendar_dt_unique` (`dt`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;
and fill with date data.
more details

TSQL get COUNT of rows that are missing from right table

There was one other SIMILAR answer but it is 2 pages long and my requirement doesn't need that. I have 2 tables, tableA and a tableB, and I need to find the COUNTS of rows that are present in tableA but are not present in tableB OR if update_on in tableB is not today's date.
My tables:
tableA:
release_id book_name release_begin_date
----------------------------------------------------
1122 midsummer 2016-01-01
1123 fool's errand 2016-06-01
1124 midsummer 2016-04-01
1125 fool's errand 2016-08-01
tableB:
release_id book_name updated_on
-----------------------------------------
1122 midsummer 2016-08-17
1123 fool's errand 2016-08-16**
Expected result: Since each book is missing one release id, 1 is count. But in addition fool's errand's existing row in tableB has updated_on date of yesterday and not today, it needs to be counted in count_of_not_updated.
book_name count_of_missing count_of_not_updated
-------------------------------------------------------
midsummer 1 0
fool's errand 1 1
Note: Even though fool's errand is present in tableB, I need to show it in count_of_missing because it's updated_on date is yesterday and not today. I know it has to be a combination of a left join and something else, but the kicker here is not only getting the missing rows from left table but at the same time checking if the updated_on table was today's date and if not, count that row in count_of_not_updated.
select sum(case when b.release_id is null then 1 else 0 end) as noReleaseID
, sum(case when datediff(d, b.release_date, getdate()) > 0 then 1 else 0 end) as releaseDateNotToday
, a.release_id
from tableA a
left outer join tableB b on a.release_id = b.release_id
Group by a.release_id
This example uses a sum function on a case statement to add up the instances where the case statement returns true. Note that the current code assumes, as in your example, that you are looking to count all old release dates from table b - more steps would be required if each book has multiple old release dates in table b, and you only want to compare to the most recent release date.
Try this
DECLARE #tableA TABLE (release_id INT, book_name NVARCHAR(50), release_begin_date DATETIME)
DECLARE #tableB TABLE (release_id INT, book_name NVARCHAR(50), updated_on DATETIME)
INSERT INTO #tableA
VALUES
(1122, 'midsummer', '2016-01-01'),
(1123, 'fool''s errand', '2016-06-01'),
(1124, 'midsummer', '2016-04-01'),
(1125, 'fool''s errand', '2016-08-01')
INSERT INTO #tableB
VALUES
(1122, 'midsummer', '2016-08-17'),
(1123, 'fool''s errand', '2016-08-16')
;WITH TmpTableA
AS
(
SELECT
book_name,
COUNT(1) CountOfTableA
FROM
#tableA
GROUP BY
book_name
), TmpTableB
AS
(
SELECT
book_name,
COUNT(1) CountOfTableB,
SUM(CASE WHEN CONVERT(VARCHAR(11), updated_on, 112) = CONVERT(VARCHAR(11), GETDATE(), 112) THEN 0 ELSE 1 END) count_of_not_updated
FROM
#tableB
GROUP BY
book_name
)
SELECT
A.book_name ,
A.CountOfTableA - ISNULL(B.CountOfTableB, 0) AS count_of_missing,
ISNULL(B.count_of_not_updated, 0) AS count_of_not_updated
FROM
TmpTableA A LEFT JOIN
TmpTableB B ON A.book_name = B.book_name
Result:
book_name count_of_missing count_of_not_updated
-------------------- ---------------- --------------------
fool's errand 1 1
midsummer 1 1

How to get a derive 'N' Date Rows from a single record with From / To date columns?

Title sounds confusing but let me please explain:
I have a table that has two columns that provide a date range, and one column that provides a value. I need to query that table and "detail" the data such as this
Is it possible to do only using TSQL?
Additional Info
The table in question is about 2-3million records long (and growing)
Assuming the range of dates is fairly narrow, an alternative is to use a recursive CTE to create a list of all dates in the range and then join interpolate to it:
WITH LastDay AS
(
SELECT MAX(Date_To) AS MaxDate
FROM MyTable
),
Days AS
(
SELECT MIN(Date_From) AS TheDate
FROM MyTable
UNION ALL
SELECT DATEADD(d, 1, TheDate) AS TheDate
FROM Days CROSS JOIN LastDay
WHERE TheDate <= LastDay.MaxDate
)
SELECT mt.Item_ID, mt.Cost_Of_Item, d.TheDate
FROM MyTable mt
INNER JOIN Days d
ON d.TheDate BETWEEN mt.Date_From AND mt.Date_To;
I've also assumed an that date from and date to represent an inclusive range (i.e. includes both edges) - it is unusual to use inclusive BETWEEN on dates.
SqlFiddle here
Edit
The default MAXRECURSION on a recursive CTE in Sql Server is 100, which will limit the date range in the query to a span of 100 days. You can adjust this to a maximum of 32767.
Also, if you are filtering just a smaller range of dates in your large table, you can adjust the CTE to limit the number of days in the range:
WITH DateRange AS
(
SELECT CAST('2014-01-01' AS DATE) AS MinDate,
CAST('2014-02-16' AS DATE) AS MaxDate
),
Days AS
(
SELECT MinDate AS TheDate
FROM DateRange
UNION ALL
SELECT DATEADD(d, 1, TheDate) AS TheDate
FROM Days CROSS APPLY DateRange
WHERE TheDate <= DateRange.MaxDate
)
SELECT mt.Item_ID, mt.Cost_Of_Item, d.TheDate
FROM MyTable mt
INNER JOIN Days d
ON d.TheDate BETWEEN mt.Date_From AND mt.Date_To
OPTION (MAXRECURSION 0);
Update Fiddle
This can be achieved using Cursors.
I've simulated the test data provided and created another table with the name "DesiredTable" to store the data inside, and created the following cusror which achieved exactly what you are looking for:
SET NOCOUNT ON;
DECLARE #ITEM_ID int, #COST_OF_ITEM Money,
#DATE_FROM date, #DATE_TO date;
DECLARE #DateDiff INT; -- holds number of days between from & to columns
DECLARE #counter INT = 0; -- for loop counter
PRINT '-------- Begin the Date Expanding Cursor --------';
-- defining the cursor target statement
DECLARE Date_Expanding_Cursor CURSOR FOR
SELECT [ITEM_ID]
,[COST_OF_ITEM]
,[DATE_FROM]
,[DATE_TO]
FROM [dbo].[OriginalTable]
-- openning the cursor
OPEN Date_Expanding_Cursor
-- fetching next row data into the declared variables
FETCH NEXT FROM Date_Expanding_Cursor
INTO #ITEM_ID, #COST_OF_ITEM, #DATE_FROM, #DATE_TO
-- if next row is found
WHILE ##FETCH_STATUS = 0
BEGIN
-- calculate the number of days in between the date columns
SELECT #DateDiff = DATEDIFF(day,#DATE_FROM,#DATE_TO)
-- reset the counter to 0 for the next loop
set #counter = 0;
WHILE #counter <= #DateDiff
BEGIN
-- inserting rows inside the new table
insert into DesiredTable
Values (#COST_OF_ITEM, DATEADD(day,#counter,#DATE_FROM))
set #counter = #counter +1
END
-- fetching next row
FETCH NEXT FROM Date_Expanding_Cursor
INTO #ITEM_ID, #COST_OF_ITEM, #DATE_FROM, #DATE_TO
END
-- cleanup code
CLOSE Date_Expanding_Cursor;
DEALLOCATE Date_Expanding_Cursor;
The code fetches every row from your original table, then it calculates the number of days between DATE_FROM and DATE_TO columns, then using this number the script will create identical rows to be inserted inside the new table DesiredTable.
give it a try and let me know of the results.
You can generate an increment table and join it to your date From:
Query:
With inc(n) as (
Select ROW_NUMBER() over (order by (select 1)) -1 From (
Select 1 From (values(1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) as x1(n)
Cross Join (values(1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) as x2(n)
) as x(n)
)
Select item_id, cost, DATEADD(day, n, dateFrom), n From #dates d
Inner Join inc i on n <= DATEDIFF(day, dateFrom, dateTo)
Order by item_id
Output:
item_id cost Date n
1 100 2014-01-01 00:00:00.000 0
1 100 2014-01-02 00:00:00.000 1
1 100 2014-01-03 00:00:00.000 2
2 105 2014-01-08 00:00:00.000 2
2 105 2014-01-07 00:00:00.000 1
2 105 2014-01-06 00:00:00.000 0
2 105 2014-01-09 00:00:00.000 3
3 102 2014-02-14 00:00:00.000 3
3 102 2014-02-15 00:00:00.000 4
3 102 2014-02-16 00:00:00.000 5
3 102 2014-02-11 00:00:00.000 0
3 102 2014-02-12 00:00:00.000 1
3 102 2014-02-13 00:00:00.000 2
Sample Data:
declare #dates table(item_id int, cost int, dateFrom datetime, dateTo datetime);
insert into #dates(item_id, cost, dateFrom, dateTo) values
(1, 100, '20140101', '20140103')
, (2, 105, '20140106', '20140109')
, (3, 102, '20140211', '20140216');
Yet another way is to create and maintain calendar table, containing all dates for many years (in our app we have table for 30 years or so, extending every year). Then you can just link to calendar:
select <whatever you need>, calendar.day
from <your tables> inner join calendar on calendar.day between <min date> and <max date>
This approach allows to include additional information (holidays etc) in calendar table - sometimes very helpful.

MS SQL Change expiry date when inserting value with same name

I have a question relating to trigger. I have a table (FoodPrice) with different columns like a start date and an expiry date.
Let's consider that I would like to add a price that could expire like a sale, I would like (ON INSERT) to set the ExpiryDate of the previous value to the current date:
Initial table
Food Value StartDate ExpiryDate
------ ---------- ---------- ----------
Carrot 25.5 24/12/2013 NULL
Apple 44.9 5/1/2014 NULL
Squash 25.6 12/3/2013 NULL
New table with inserted rows:
Food Value StartDate ExpiryDate
------ ---------- ---------- ----------
Carrot 25.5 24/12/2013 28/4/2014
Apple 44.9 5/1/2014 NULL
Squash 25.6 12/3/2013 28/4/2014
Carrot 24 28/4/2014 NULL
Squash 22 28/4/2014 NULL
Dupplicate values for Food column is not a big deal but is it possible to create a trigger to solve this problem ? Thank you !
Here's the code:
-- the table and sample data
create table FoodPrice (
Food varchar(10),
Value decimal(5,2),
StartDate date,
ExpiryDate date
);
go
insert FoodPrice values
('Carrot', 20, '20131124' , '20131224'),
('Apple' , 40, '20140101' , '20140105'),
('Squash', 25, '20130301' , '20130312'),
('Carrot', 25.5, '20131224' , NULL),
('Apple' , 44.9, '20140105' , NULL),
('Squash', 25.6, '20130312' , NULL)
go
-- the trigger
create trigger trFoodPrice_insert
on FoodPrice
after insert
as
;with x as (
select fp.food, fp.startdate as fp_startdate, fp.expirydate as fp_expirydate,
ins.startdate as ins_startdate, ins.expirydate as ins_expirydate,
row_number() over(partition by fp.food order by fp.startdate) as rn
from ins
inner join foodprice fp on ins.food=fp.food
and fp.startdate < ins.startdate
and fp.expirydate is null
),
y as (
select *
from x
where rn = 1
)
--select * from y
update y
set fp_expirydate = ins_startdate
go
-- let's test it
insert foodprice values
('Carrot', 24, '20140428', null),
('Squash', 22, '20140428', null)
go
select * from
foodprice
order by food, startdate
As always, I'm a big fan of first testing the select before the actual update, hence the CTE.

MSSQL - Create records for entries that don't exist in the last 7 days

I have an ASP.NET website with a C# back-end using MSSQL SQL Server 2008 for its content.
I have written the following stored procedure which checks for any records within the last 7 days and then returns what it finds.
ALTER PROCEDURE [dbuser].[GetResponses]
(
#QUEST_ID int
)
AS
SELECT DateAdded, SUM(Responses) AS responseCount
FROM ActiveResponses
WHERE #QUEST_ID = QuestionnaireID AND DateAdded >= dateadd(day,datediff(day,0,GetDate())- 6,0)
GROUP BY DateAdded
RETURN
My problem here is that if no record exists for any of those last 7 days then my method over on the website back-end side will fail as it required 7 records. For example:
Lets say I have the following records in my table
-DateAdded--------Responses
2012-02-12 4
2012-02-11 5
2012-02-10 8
2012-02-08 7
2012-02-07 3
Notice that there are no records for both 2012-02-13(today) and 2012-02-09
How can I create an SQL statement that checks the last 7 days for the number of responses and if no records are found for any one of those days it creates a record with a response of 0 in the correct position?
This is a good application of a numbers table (ex: http://www.projectdmx.com/tsql/tblnumbers.aspx)
Assuming you have a numbers table dbo.Nums that has at least 6 numbers in it, you can try the following:
CREATE TABLE #Dates
(
[Date] DATETIME
)
INSERT INTO #Dates
(
[Date]
)
SELECT
DATEADD(DD, DATEDIFF(DD, 0, GETDATE()) - ([n] - 1), 0)
FROM
[dbo].[Nums] WITH (NOLOCK)
WHERE
[n] < 7
SELECT
[Date],
ISNULL(SUM([Responses]), 0) AS [responseCount]
FROM
#Dates AS d
LEFT OUTER JOIN
ActiveResponses AS a
ON
a.[DateAdded] = d.[Date]
WHERE
#QUEST_ID = QuestionnaireID
ORDER BY
[Date] ASC
This demonstrates getting summary data for each day in a week, even if some of the days have no data:
declare #Data as table ( DateAdded date, Responses int )
insert into #Data ( DateAdded, Responses ) values ( '2/10/2012', 5 ), ( '2/13/2012', 9 )
; with James as (
select cast( SysDateTime() as date ) as StartOfDay, 7 as DaysLeft
union all
select DateAdd( d, -1, StartOfDay ), DaysLeft - 1
from James
where DaysLeft > 1
)
select J.StartOfDay, DateAdd( ms, -3, cast( DateAdd( day, 1, J.StartOfDay ) as DateTime ) ) as EndOfDay, Coalesce( D.Responses, 0 ) as Responses
from James as J left outer join
#Data as D on D.DateAdded = J.StartOfDay
order by J.StartOfDay desc
Left as an exercise is mating this with your questionnaire data.
Note that the time closest to midnight represented by DateTime values is 3ms before midnight. You can use the StartOfDay and EndOfDay values to drop any DateAdded into the correct date.
Declare a table variable with the last seven dates and include it with your query:
ALTER PROCEDURE [dbuser].[GetResponses]
(
#QUEST_ID int
)
AS
DECLARE #i INT=0;
DECLARE #today DATE=getdate();
DECLARE #last7 TABLE(DateAdded DATE);
WHILE #i>-7 BEGIN
INSERT INTO #last7 VALUES (DATEADD(DAY,#i,#today));
SET #i -= 1;
END
;WITH a AS (
SELECT ar.DateAdded, count(ar.Responses) as responseCount
FROM ActiveResponses ar
INNER JOIN #last7 z ON z.DateAdded=ar.DateAdded
WHERE #QUEST_ID = ar.QuestionnaireID
GROUP BY ar.DateAdded
)
SELECT DateAdded=ISNULL(a.DateAdded,z.DateAdded)
, responseCount=ISNULL(a.responseCount,0)
FROM #last7 z
LEFT JOIN a ON a.DateAdded=z.DateAdded;
RETURN;
GO
Results:
DateAdded responseCount
---------- -------------
2012-02-13 0
2012-02-12 4
2012-02-11 5
2012-02-10 8
2012-02-09 0
2012-02-08 7
2012-02-07 3

Resources