Query To Get "Most Recent" Joining On Other Table With Date - sql-server

I have a couple of tables and need to join them, but with a twist.
Table #GradeChange has Student IDs, the Effective Date of the grade change, and the Grade they were changed to ON that date. Table #EventOccurrence has events that occurred for that student on a certain date. I need to find what grade the student was in when the event occurred. This will be the most recent Grade from #GradeChange that occurred prior to the #EventOccurrence Effective Date. Students may have multiple EventOccurrences, and we can assume all students will have at least one #GradeChange entry with a date prior to their oldest event.
This is the DDL:
/* If the test table already exists, drop it */
IF OBJECT_ID('TempDB..#GradeChange','U') IS NOT NULL
DROP TABLE #GradeChange;
IF OBJECT_ID('TempDB..#EventOccurrence','U') IS NOT NULL
DROP TABLE #EventOccurrence;
/* Create first temp table */
CREATE TABLE #GradeChange
(
ID varchar(6),
EffectiveDate datetime,
Grade varchar(50)
);
/* Populate it */
INSERT INTO #GradeChange
(ID, EffectiveDate, Grade)
SELECT '678443','Jul 2 2009 11:30PM','Grade 3' UNION ALL
SELECT '678443','Jan 24 2007 2:40PM','Kindergarten - Half Day' UNION ALL
SELECT '678443','Jul 4 2007 11:09PM','Grade 1' UNION ALL
SELECT '678443','Jul 2 2008 11:35PM','Grade 2' UNION ALL
SELECT '718466','May 18 2009 11:50PM','Pre-Kindergarten' UNION ALL
SELECT '718466','Jul 2 2009 11:27PM','Kindergarten - Half Day' UNION ALL
SELECT '718466','Aug 27 2009 11:18PM','Pre-Kindergarten' UNION ALL
SELECT '718466','Jul 9 2010 11:18PM','Kindergarten - Half Day' UNION ALL
SELECT '718466','Aug 2 2010 11:14PM','Kindergarten';
/* Create 2nd temp table */
CREATE TABLE #EventOccurrence
(
ID varchar(6),
EventDate datetime
);
/* Populate it */
INSERT INTO #EventOccurrence
(ID, EventDate)
SELECT '718466','Nov 16 2010 12:00AM' UNION ALL
SELECT '718466','May 20 2009 12:00AM' UNION ALL
SELECT '678443','Dec 7 2007 12:00AM';
So the two tables would look like this:
And the expected results would look like this:
I've played with "MAX" and "MIN" and "OVER()" but can't quite get it right. I very much appreciate any help!

SELECT *
FROM #EventOccurrence eo
OUTER APPLY
(
SELECT TOP 1 Grade
FROM #GradeChange gc
WHERE gc.id = eo.id
AND gc.EffectiveDate <= eo.EventDate
ORDER BY
gc.EffectiveDate DESC
) gc

select eo.ID, eo.EventDate, gc.Grade
from #EventOccurrence eo
inner join #GradeChange gc
on eo.ID = gc.ID
and gc.EffectiveDate = (select max(gc.EffectiveDate)
from #GradeChange gc
where gc.ID = eo.ID
and gc.EffectiveDate <= eo.EventDate)

with merged (ID, DDate, Grade, pos) AS
(
select ID, DDate, Grade, ROW_NUMBER() over (order by ID, DDate) AS pos FROM (
select ID, EffectiveDate AS DDate, Grade FROM GradeChange
union
select ID, EventDate AS DDate, NULL FROM EventOccurrence
) sub
)
SELECT m1.ID, m1.DDate, m2.Grade FROM merged m1 LEFT OUTER JOIN merged m2 ON m1.pos = m2.pos+1 WHERE m1.Grade IS NULL

Related

How do I use GROUP BY in SQL Server 2012

I've got two tables, one contains the list of bins and the second contains the weekdays where that bin is collected.
declare #bins table (
id int IDENTITY(1,1) PRIMARY KEY,
name nvarchar(255),
collectionCode nvarchar(255)
)
declare #collectionDays table (
id int IDENTITY(1,1) PRIMARY KEY,
weekday int,
collectionCode nvarchar(255)
)
insert into #bins (name, collectionCode) values
('Bin 1','MWF'),
('Bin 2','MWF'),
('Bin 3','ED'),
('Bin 4','ED'),
('Bin 5','ED'),
('Bin 6','ED'),
('Bin 7','ED'),
('Bin 8','ED'),
('Bin 9','ED'),
('Bin 10','MWF')
insert into #collectionDays (weekday, collectionCode) values
(0,'MWF'),
(2,'MWF'),
(4,'MWF'),
(0,'ED'),
(1,'ED'),
(2,'ED'),
(3,'ED'),
(4,'ED'),
(5,'ED'),
(6,'ED')
What I want to do is return list of all the bins with their next collection day.
I've already created this query where if returns the next collection date but it only returns the next collection date for just one bin at a time. I don't want to run this query for each bin in the database.
Here's my query
select top 1
name,
format(dateadd(day, (datediff(day, weekday, getdate()) / 7) * 7 + 7, weekday), 'dd/MM/yyyy') AS date
from #bins b
join #collectionDays c on b.collectionCode = c.collectionCode
where b.id = 1
order by date asc
If I remove top 1 and b.id = 1 condition, it'll return all the bins and next date for each weekday. If I try using group by, I get an error Column '#collectionDays.weekday' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
select
name,
format(dateadd(day, (datediff(day, weekday, getdate()) / 7) * 7 + 7, weekday), 'dd/MM/yyyy') AS date
from #bins b
join #collectionDays c on b.collectionCode = c.collectionCode
group by b.id
order by date asc
Any ideas on how I can return the next collection date for each bin in a single query?
EDIT: Updated the queries with join and other stuff
This should do it
select
name,
MIN(format(dateadd(day, (datediff(day, weekday, getdate()) / 7) * 7 + 7, weekday), 'dd/MM/yyyy')) AS [DATE]
from #bins b
join #collectionDays c on b.collectionCode = c.collectionCode
group by name
order by date asc
Basically you want to group by the name as that's one of the results you're returning, then fetch the minimum date returned for each name.
For this example name appears to be unique. In a lot of cases this is not guaranteed, so you'd want to do something like this
;with cte_nextdates as
(
select
b.id,
MIN(format(dateadd(day, (datediff(day, weekday, getdate()) / 7) * 7 + 7, weekday), 'dd/MM/yyyy')) AS [DATE]
from #bins b
join #collectionDays c on b.collectionCode = c.collectionCode
group by b.id
)
SELECT B.name,
[date]
FROM cte_nextdates ND
INNER JOIN #bins B ON B.id = ND.id
order by date asc
What this does is to group on ID rather than name.
The problem is that you can only include fields in grouped SQL queries that are either included in the group clause or passed into an aggregate function like Min or max. As Name might not be unique, we can't shouldn't group on it.
To get around that we take the result set of ID and Date returned and we join it to the Bin table to get the bin name .

How to recreate old snapshot using field history table in Bigquery

I'm currently working on an interesting problem. I am trying to recreate state of table as it was on a given previous date. I have 2 tables
Table A: consists of live data, gets refreshed on an hourly basis.
Table A_field_history: consists of changes made to the fields in Table A.
Following image consists of current state, where Table A has live updated data and Table A_field_history only captures changes made to the fields on table A.
I am trying to recreate Table A as of particular given date. Following image consists of table state as it was on 06/30/2020.
The requirement is to have capability to recreate state of Table A based on any given date.
I actually identified a way to rollback (virtually, not on actual table) all the updates made after given specific date. Following are the steps followed:
Create dummy tables:
WITH
Table_A AS
(
SELECT 1 As ID, '2020-6-28' as created_date, 10 as qty, 100 as value
Union ALL
SELECT 2 As ID, '2020-5-29' as created_date, 20 as qty, 200 as value),
Table_A_field_history AS
(
SELECT 'xyz' id,'2020-07-29' created_date,'12345' created_by,'qty' field,'10' new_value,'200' old_value,'1' A_id
UNION ALL
SELECT 'abc' id,'2020-07-24' created_date,'12345' created_by,'qty' field,'20' new_value,'10' old_value,'2' A_id
UNION ALL
SELECT 'xyz' id,'2020-07-29' created_date,'12345' created_by,'value' field,'100' new_value,'2000' old_value,'1' A_id
UNION ALL
SELECT 'abc' id,'2020-07-24' created_date,'12345' created_by,'value' field,'200' new_value,'5000' old_value,'2' A_id
UNION ALL
SELECT 'xyz' id,'2020-06-29' created_date,'12345' created_by,'qty' field,'200' new_value,'' old_value,'1' A_id
UNION ALL
SELECT 'abc' id,'2020-05-30' created_date,'12345' created_by,'qty' field,'10' new_value,'' old_value,'2' A_id
UNION ALL
SELECT 'xyz' id,'2020-06-29' created_date,'12345' created_by,'value' field,'2000' new_value,'' old_value,'1' A_id
UNION ALL
SELECT 'abc' id,'2020-05-30' created_date,'12345' created_by,'value' field,'5000' new_value,'' old_value,'2' A_id
),
Step 1. Create date cte to filter data based on given date:
`date_spine
AS
(
SELECT * FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', CURRENT_DATE(), INTERVAL 1 Day)) AS as_of_date
),`
Step 2. Above created date cte can be used as a Spine for our query, cross join to map as_of_date with all the changes made in the history table.
date_changes
AS
(
SELECT DISTINCT
date.as_of_date,
hist.A_id
FROM Table_A_field_history hist CROSS JOIN date_spine date
),
Step 3. Now we have as_of_date mapped to all historical transactions, now we can get max of change date.
most_recent_changes AS (
SELECT
dc.as_of_date,
dc.A_id ,
MAX(fh.created_date) AS created_date,
FROM date_changes dc
LEFT JOIN Table_A_field_history AS fh
ON dc.A_id = fh.A_id
WHERE CAST(fh.created_date AS DATE) <= dc.as_of_date
GROUP BY dc.as_of_date,
dc.A_id
),
Step 4. Now mapping max change date with actual created_date and history table
past_changes AS (
SELECT
mr.as_of_date,
mr.A_id,
mr.created_date,
a.id AS entry_id,
a.created_by AS created_by_id,
CASE WHEN a.field='qty' THEN a.new_value ELSE '' END AS qty,
CASE WHEN a.field='value' THEN a.new_value ELSE '' END AS value,
FROM most_recent_changes AS mr
LEFT JOIN Table_A_field_history AS a
ON mr.A_id = a.A_id
AND mr.created_date = a.created_date
WHERE a.id IS NOT NULL
)
Step 5. Now we can use as_of_date to get historical state of Table A.
Select *
From past_changes x
WHERE x.as_of_date = '2020-07-29'

Return data even if no record present sql server

I have table called students, and I would get record even if data is not present for a particular course , in particular year.
Here is my data :
CREATE TABLE students ([Year] int,
Course varchar(50),
[Number of students] int);
INSERT INTO students
VALUES (2018, 'Maths ', 10),
(2019, 'Maths', 15),
(2018, 'Economics', 8),
(2018, 'Social Studies', 3),
(2019, 'Social Studies', 4),
(2018, 'Science', 7);
How to get output like this :
Year Course Students
2018 Maths 10
2019 Maths 15
2018 Economics 8
2019 Economics 0
2018 Social Studies 3
2019 Social Studies 4
2018 Science 7
2019 Science 0
Please I don’t want to write code values manually if using where clause , example, where course in (‘economics ‘,’maths’ ). Would like to use variables instead in SQL server.
Thank you.
You may join with calendar tables:
WITH years AS (
SELECT 2018 AS Year UNION ALL
SELECT 2019
),
courses AS (
SELECT 'Maths' AS Course UNION ALL
SELECT 'Economics' UNION ALL
SELECT 'Social Studies' UNION ALL
SELECT 'Science'
)
SELECT
y.Year,
c.Course,
COALESCE(s.Students, 0) AS Students
FROM year y
CROSS JOIN courses c
LEFT JOIN Students s
ON s.Year = y.Year AND s.Course = c.Course;
This gets you the results you want, however, with a large table the DISTINCT is likely to get expensive:
SELECT Y.[Year],
C.Course,
ISNULL(S.[Number of students],0) AS [Number of students]
FROM (SELECT DISTINCT [Year] FROM dbo.students) Y
CROSS JOIN (SELECT DISTINCT Course FROM dbo.students) C
LEFT JOIN dbo.students S ON Y.[Year] = S.[Year]
AND C.Course = S.Course;
Ideally you should have a separate table with your Year and Course data, and then the query can be "shortened" to:
SELECT Y.[Year],
C.Course,
ISNULL(S.[Number of students],0) AS [Number of students]
FROM dbo.Years Y
CROSS JOIN dbo.Courses C
LEFT JOIN dbo.students S ON Y.[Year] = S.[Year]
AND C.Course = S.Course;

Update a Table Based on Count from Another Table

I am trying to do a COUNT against a table using a date range, from another table. The count has to also match with a reference number, here's the data format:
[Rev Old New] (Table 1)
Id Start Finish Amount Calls
41 2018-01-01 2018-06-01 111.01
[Calls] (Table 2)
Id Date Amount
3 2018-05-05 12.1
41 2018-01-03 11.7
41 2018-06-11 12.9
I am quite new to MS SQL so apologies for my rather basic knowledge!
So, I want the count of rows in [Calls], where the Date is between the Start and Finish dates in [Rev Old New] and the ID is the same in both tables (it's a client ref)
I want to UPDATE [Rev Old New] with this value in [Calls]
Here's what I have so far, not working and probably nowhere near the right syntax!
UPDATE [Insight].[dbo].[Rev Old New]. t2
SET [Calls] =
(SELECT COUNT(CASE WHERE t1.Date BETWEEN t2.[Start] AND t2.[Finish])
FROM [Insight].[dbo].[Calls] t1
WHERE t1.[Id] = t2.[Id])
The error I get is this:
Msg 102, Level 15, State 1, Line 1
Incorrect syntax near 't2'.
Msg 156, Level 15, State 1, Line 3
Incorrect syntax near the keyword 'WHERE'.
You don't need the CASE statement, a simple WHERE will suffice:
UPDATE [Insight].[dbo].[Rev Old New]
SET [Rev Old New].[Calls] = (SELECT COUNT(*) FROM [Insight].[dbo].[Calls] t1
WHERE t1.Date BETWEEN [Rev Old New].[Start] AND [Rev Old New].[Finish])
this may help
CREATE TABLE #RevOldNew(Id BIGINT, Start DATETIME, Finish DATETIME, Amount BIGINT, Calls INT)
CREATE TABLE #Calls(Id BIGINT,[Date] DATETIME, AMOUNT BIGINT)
INSERT INTO #RevOldNew
SELECT 1,'2018-06-01','2018-06-15',NULL,NULL UNION ALL
SELECT 1,'2018-07-16','2018-07-31',NULL,NULL UNION ALL
SELECT 1,'2018-08-01','2018-08-15',NULL,NULL UNION ALL
SELECT 1,'2018-08-16','2018-08-31',NULL,NULL UNION ALL
SELECT 2,'2018-07-01','2018-07-15',NULL,NULL UNION ALL
SELECT 2,'2018-08-01','2018-08-15',NULL,NULL UNION ALL
SELECT 2,'2018-08-16','2018-08-31',NULL,NULL UNION ALL
SELECT 3,'2018-07-16','2018-07-31',NULL,NULL UNION ALL
SELECT 3,'2018-08-01','2018-08-15',NULL,NULL UNION ALL
SELECT 3,'2018-08-16','2018-08-31',NULL,NULL
INSERT INTO #Calls
SELECT 1,'2018-07-16',23 UNION ALL
SELECT 1,'2018-07-21',534 UNION ALL
SELECT 1,'2018-07-28',456 UNION ALL
SELECT 1,'2018-08-02',43 UNION ALL
SELECT 1,'2018-08-11',565 UNION ALL
SELECT 1,'2018-08-20',56 UNION ALL
SELECT 2,'2018-07-05',576 UNION ALL
SELECT 2,'2018-08-22',54 UNION ALL
SELECT 2,'2018-08-29',676 UNION ALL
SELECT 3,'2018-07-17',32 UNION ALL
SELECT 3,'2018-08-15',43
;with cte
As (
SELECT r.id,r.Start,r.Finish, SUM(c.AMOUNT) Amount, COUNT(c.id) calls
FROM #RevOldNew r
LEFT JOIN #Calls c on r.id=c.id and c.Date between r.Start and r.Finish
Group by r.id,r.Start,r.Finish
)
UPDATE r
SET r.Amount=c.Amount,
r.Calls=c.calls
FROM #RevOldNew r
JOIN cte c on c.id=r.id and c.Start=r.Start and c.Finish=r.Finish
SELECT * from #RevOldNew
DROP TABLE #RevOldNew
DROP TABLE #Calls
First you should avoid spaces in table names. It's not a good practice.
Then a query which solves your problem is :
update [Rev Old New]
set Calls=(select count(*) from Calls where [Rev Old New].id = Calls.id and Calls.date >= [Rev Old New].Start and Calls.date <= [Rev Old New].Finish)
where
select count(*) from Calls where [Rev Old New].id = Calls.id and Calls.date >= [Rev Old New].Start and Calls.date <= [Rev Old New].Finish
count the lines from Calls with the id in [Rev Old New] with date in Calls between Finish and Start (included) in [Rev Old New]
I hope this helps.

SQL stored procedure for picking a random sample based on multiple criteria

I am new to SQL. I looked for all over the internet for a solution that matches the problem I have but I couldn't find any. I have a table named 'tblItemReviewItems' in an SQL server 2012.
tblItemReviewItems
Information:
1. ItemReviewId column is the PK.
2. Deleted column will have only "Yes" and "No" value.
3. Audited column will have only "Yes" and "No" value.
I want to create a stored procedure to do the followings:
Pick a random sample of 10% of all ItemReviewId for distinct 'UserId' and distinct 'ReviewDate' in a given date range. 10% sample should include- 5% of the total population from Deleted (No) and 5% of the total population from Deleted (Yes). Audited ="Yes" will be excluded from the sample.
For example – A user has 118 records. Out of the 118 records, 17 records have Deleted column value "No" and 101 records have Deleted column value "Yes". We need to pick a random sample of 12 records. Out of those 12 records, 6 should have Deleted column value "No" and 6 should have Deleted column value "Yes".
Update Audited column value to "Check" for the picked sample.
How can I achieve this?
This is the stored procedure I used to pick a sample of 5% of Deleted column value "No" and 5% of Deleted column value "Yes". Now the situation is different.
ALTER PROC [dbo].[spItemReviewQcPickSample]
(
#StartDate Datetime
,#EndDate Datetime
)
AS
BEGIN
WITH CTE
AS (SELECT ItemReviewId
,100.0
*row_number() OVER(PARTITION BY UserId
,ReviewDate
,Deleted
order by newid()
)
/count(*) OVER(PARTITION BY UserId
,Reviewdate
,Deleted
)
AS pct
FROM tblItemReviewItems
WHERE ReviewDate BETWEEN #StartDate AND #EndDate
AND Deleted in ('Yes','No')
AND Audited='No'
)
SELECT a.*
FROM tblItemReviewItems AS a
INNER JOIN cte AS b
ON b.ItemReviewId=a.ItemReviewId
AND b.pct<=6
;
WITH CTE
AS (SELECT ItemReviewId
,100.00
*row_number() OVER(PARTITION BY UserId
,ReviewDate
,Deleted
ORDER BY newid()
)
/COUNT(*) OVER(PARTITION BY UserId
,Reviewdate
,Deleted
)
AS pct
FROM tblItemReviewItems
WHERE ReviewDate BETWEEN #StartDate AND #EndDate
AND deleted IN ('Yes','No')
AND audited='No'
)
UPDATE a
SET Audited='Check'
FROM tblItemReviewItems AS a
INNER JOIN cte AS b
ON b.ItemReviewId=a.ItemReviewId
AND b.pct<=6
;
END
Any help would be highly appreciated. Thanks in advance.
This may assist you in getting started. My idea is, you create the temp tables you need, and load the specific data into the (deleted, not deleted etc.). You then run something along the lines of:
IF OBJECT_ID('tempdb..#tmpTest') IS NOT NULL DROP TABLE #tmpTest
GO
CREATE TABLE #tmpTest
(
ID INT ,
Random_Order INT
)
INSERT INTO #tmpTest
(
ID
)
SELECT 1 UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 4 UNION ALL
SELECT 5 UNION ALL
SELECT 6 UNION ALL
SELECT 7 UNION ALL
SELECT 8 UNION ALL
SELECT 9 UNION ALL
SELECT 10 UNION ALL
SELECT 11 UNION ALL
SELECT 12 UNION ALL
SELECT 13 UNION ALL
SELECT 14 UNION ALL
SELECT 15 UNION ALL
SELECT 16;
DECLARE #intMinID INT ,
#intMaxID INT;
SELECT #intMinID = MIN(ID)
FROM #tmpTest;
SELECT #intMaxID = MAX(ID)
FROM #tmpTest;
WHILE #intMinID <= #intMaxID
BEGIN
UPDATE #tmpTest
SET Random_Order = 10 + CONVERT(INT, (30-10+1)*RAND())
WHERE ID = #intMinID;
SELECT #intMinID = #intMinID + 1;
END
SELECT TOP 5 *
FROM #tmpTest
ORDER BY Random_Order;
This assigns a random number to a column, that you then use in conjunction with a TOP 5 clause, to get a random top 5 selection.
Appreciate a loop may not be efficient, but you may be able to update to a random number without it, and the same principle could be implemented. Hope that gives you some ideas.

Resources