SQL Server query to take last grouped row by condition

SQL Server query to take last grouped row by condition - sql-server

I have simple SQL Server query:
declare #table table (id int, name nvarchar(5), deleted bit)
insert into #table(id, name, deleted) values(1, 'A1', 0)
insert into #table(id, name, deleted) values(2, 'A1', 0)
insert into #table(id, name, deleted) values(3, 'A1', 0)
insert into #table(id, name, deleted) values(4, 'A1', 1)
insert into #table(id, name, deleted) values(5, 'A2', 0)
insert into #table(id, name, deleted) values(6, 'A2', 0)
select
max(id) as id,
name
from #table
where deleted = 0
group by name
it returns to rows
id |name
--------------
3 |A1
6 |A2
but should return only one
id |name
--------------
6 |A2
since last (or max) id of A1 is deleted.
How to fix my query.
Thanks a lot.

Since you could have a third set like this:
insert into #table(id, name, deleted) values(7, 'A3', 1)
insert into #table(id, name, deleted) values(8, 'A3', 0)
I assume you want the following returned as well, since the highest id for that name was not marked as deleted:
id name
---- ----
8 A3
Then this query should do it:
;WITH x AS
(
SELECT id, name, deleted,
rn = ROW_NUMBER() OVER (PARTITION BY name ORDER BY id DESC)
FROM #table
)
SELECT id = MAX(id), name
FROM x
WHERE NOT EXISTS
(
SELECT 1 FROM x AS x2
WHERE name = x.name
AND deleted = 1 AND rn = 1
)
GROUP BY name;
If you only want rows returned where no row for a particular name was ever deleted, then it's slightly simpler:
SELECT id = MAX(id), name
FROM #table AS t
WHERE NOT EXISTS
(
SELECT 1 FROM #table
WHERE name = t.name AND deleted = 1
)
GROUP BY name;

Another approach: http://www.sqlfiddle.com/#!3/697d7/5
create table t(id int, name nvarchar(5), deleted bit)
insert into t(id, name, deleted) values(1, 'A1', 0)
insert into t(id, name, deleted) values(2, 'A1', 0)
insert into t(id, name, deleted) values(3, 'A1', 0)
insert into t(id, name, deleted) values(4, 'A1', 1)
insert into t(id, name, deleted) values(5, 'A2', 0)
insert into t(id, name, deleted) values(6, 'A2', 0)
with no_deletions as
(
select name
from t
group by name
having max(nullif(cast(deleted as int),0)) is null
)
select
max(id) as id,
name
from t
where name in (select name from no_deletions)
group by name
Output:
| ID | NAME |
-------------
| 6 | A2 |

Related

Stored procedure in SnowSQL to combine 2 tables and avoid duplicates

DECLARE #tmp1 TABLE (ItemCode INT, Item VARCHAR(30), Qty INT)
INSERT INTO #tmp1 (ItemCode, Item, Qty)
VALUES(1, 'Item1', 300),
(2, 'Item2', 500)
DECLARE #tmp2 TABLE (JobNo INT, ItemCode INT, Item VARCHAR(30), Qty INT)
INSERT INTO #tmp2 (JobNo, ItemCode, Item, Qty)
VALUES(1, 1, 'Item1', 150),
(2, 1, 'Item1', 150),
(3, 2, 'Item1', 50),
(4, 2, 'Item1', 75),
(5, 2, 'Item1', 125),
(6, 2, 'Item1', 100)
;WITH MyCTE AS
(
SELECT t1.ItemCode, t1.Item, t1.Qty, t2.JobNo, t2.ItemCode AS ItemCode2, t2.Item AS Item2, t2.Qty AS Qty2, t2.MySeed
FROM #tmp1 AS t1 INNER JOIN (
SELECT *, ROW_NUMBER() OVER(PARTITION BY ItemCode ORDER BY JobNo) AS [MySeed]
FROM #tmp2
) AS t2 ON t1.ItemCode = t2.ItemCode
WHERE t2.MySeed = 1
UNION ALL
SELECT NULL AS ItemCode, NULL AS Item, NULL AS Qty, t2.JobNo, t2.ItemCode AS ItemCode2, t2.Item AS Item2, t2.Qty AS Qty2, t2.MySeed
FROM #tmp1 AS t1 INNER JOIN (
SELECT *, ROW_NUMBER() OVER(PARTITION BY ItemCode ORDER BY JobNo) AS [MySeed]
FROM #tmp2
) AS t2 ON t1.ItemCode = t2.ItemCode
WHERE t2.MySeed > 1
)
SELECT *
FROM MyCTE
ORDER BY ItemCode2, MySeed
I found a sample query online as above which is straight forward as it combines table 1 and table 2 and it assumes in table 2 if myseed count is 1 then input null values.
But, I need a query in snowflake for the scenario where table 1 can have any number of entries for each id and table 2 can also have multiple entries for each id. I need to join both the tables without duplicates and input null in the rows when the one table has multiple entries and other table don't have multiple entries.
How to write in snowflake(SNOWSQL)?
I need a result like below to combine two tables and avoid duplicates in either one of the table

Snowflake : IN operator

so I want something as below in my query
select * from table a
where a.id in(select id, max(date) from table a group by id)
I am getting error here , as IN is equivalent to = .
how to do it?
example :
id
date
1
2022-31-01
1
2022-21-03
2
2022-01-01
2
2022-02-01
I need to get only one record based on date(max). The table has more columns than just id and date
so I need to something like this in snowflake
select * from table a
where id in(select id,max(date) from table a group by id)
```-----------------------
All solutions are working , if i select from table .
but i have case statement in view where duplicate records are coming
example :
create or replace view v_test
as
select * from
(
select id,lastdatetime,*,
case when start_date < timestamp and timestamp < end
and move_date = '9999-12-31' then 'Y'
else 'N' end as IND
from table a
) a
so if any one select view where IND= 'Y', more than 1 records are coming
what i want is to select latest records for ID where IND='Y' and max(lastdatetime)
how to incorporate this logic in view?

I think you are trying to get the latest record for each id?
select *
from table a
qualify row_number() over (partition by id order by date desc) = 1

So if we look at your sub-select:
using this "data" for the examples:
with data (id, _date) as (
select column1, to_date(column2, 'yyyy-dd-mm') from values
(1, '2022-31-01'),
(1, '2022-21-03'),
(2, '2022-01-01'),
(2, '2022-02-01')
)
select id, max(_date)
from data
group by 1;
it gives:
ID
MAX(_DATE)
1
2022-03-21
2
2022-01-02
which makes it seem you want the "the last date, per id"
which can classically (ansi sql) be written:
select d.*
from data as d
join (
select
id,
max(_date) as max_date
from data
group by 1
) as c
on d.id = c.id and d._date = c.max_date
;
ID
_DATE
1
2022-03-21
2
2022-01-02
which gives you "all the rows values". BUT if you have many rows with the same last date, you will get those, in the output.
Another methods is to use a ROW_NUMBER to pick one and only one row, which is the style of answer Mike has given:
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify row_number() over (partition by id order by _date desc) =1 ;
gives:
ID
_DATE
EXTRA
1
2022-03-21
extra_b_double_a
2
2022-01-02
extra_d
now if you want the "all rows of the last day" you method works, albeit the QUALIFY/ROW_NUMBER is faster. You can use RANK
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify dense_rank() over (partition by id order by _date desc) =1 ;
ID
_DATE
EXTRA
1
2022-03-21
extra_b_double_a
1
2022-03-21
extra_b_double_b
2
2022-01-02
extra_d
Now the last thing that it almost seems you are asking for, is "how do find the ID with the most recent data (here 1) and get all rows for that"
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify id = last_value(id) over (order by _date);

Here is an example of how to use the in operator with a subquery:
select * from table1 t1 where t1.id in (select t2.id from table2 t2);

Usage of IN is possible to match on both columns:
select *
from tab AS a
where (a.id, a.date) in (select id, max(date) from tab group by id);
For sample data:
CREATE TABLE tab (id, date)
AS
SELECT column1, to_date(column2, 'yyyy-dd-mm')
FROM VALUES
(1, '2022-31-01'),
(1, '2022-21-03'),
(2, '2022-01-01'),
(2, '2022-02-01');
Output:

Grouping similar items recursively

I have been reading the following Microsoft article on recursive queries using CTE and just can't seem to wrap my head around how to use it for group common items.
I have a table the contains the following columns:
ID
FirstName
LastName
DateOfBirth
BirthCountry
GroupID
What I need to do is start with the first person in the table and iterate through the table and find all the people that have the same (LastName and BirthCountry) or have the same (DateOfBirth and BirthCountry).
Now the tricky part is that I have to assign them the same GroupID and then for each person in that GroupID, I need to see if anyone else has the same information and then put the in the same GroupID.
I think I could do this with multiple cursors but it is getting tricky.
Here is sample data and output.
ID FirstName LastName DateOfBirth BirthCountry GroupID
----------- ---------- ---------- ----------- ------------ -----------
1 Jonh Doe 1983-01-01 Grand 100
2 Jack Stone 1976-06-08 Grand 100
3 Jane Doe 1982-02-08 Grand 100
4 Adam Wayne 1983-01-01 Grand 100
5 Kay Wayne 1976-06-08 Grand 100
6 Matt Knox 1983-01-01 Hay 101
John Doe and Jane Doe are in the same Group (100) because they have the same (LastName and BirthCountry).
Adam Wayne is in Group (100) because he has the same (BirthDate and BirthCountry) as John Doe.
Kay Wayne is in Group (100) because she has the same (LastName and BirthCountry) as Adam Wayne who is already in Group (100).
Matt Knox is in a new group (101) because he does not match anyone in previous groups.
Jack Stone is in a group (100) because he has the same (BirthDate and BirthCountry) as Kay Wayne who is already in Group (100).
Data scripts:
CREATE TABLE #Tbl(
ID INT,
FirstName VARCHAR(50),
LastName VARCHAR(50),
DateOfBirth DATE,
BirthCountry VARCHAR(50),
GroupID INT NULL
);
INSERT INTO #Tbl VALUES
(1, 'Jonh', 'Doe', '1983-01-01', 'Grand', NULL),
(2, 'Jack', 'Stone', '1976-06-08', 'Grand', NULL),
(3, 'Jane', 'Doe', '1982-02-08', 'Grand', NULL),
(4, 'Adam', 'Wayne', '1983-01-01', 'Grand', NULL),
(5, 'Kay', 'Wayne', '1976-06-08', 'Grand', NULL),
(6, 'Matt', 'Knox', '1983-01-01', 'Hay', NULL);

Here's what I came up with. I have rarely written recursive queries so it was some good practice for me. By the way Kay and Adam do not share a birth country in your sample data.
with data as (
select
LastName, DateOfBirth, BirthCountry,
row_number() over (order by LastName, DateOfBirth, BirthCountry) as grpNum
from T group by LastName, DateOfBirth, BirthCountry
), r as (
select
d.LastName, d.DateOfBirth, d.BirthCountry, d.grpNum,
cast('|' + cast(d.grpNum as varchar(8)) + '|' as varchar(1024)) as equ
from data as d
union all
select
d.LastName, d.DateOfBirth, d.BirthCountry, r.grpNum,
cast(r.equ + cast(d.grpNum as varchar(8)) + '|' as varchar(1024))
from r inner join data as d
on d.grpNum > r.grpNum
and charindex('|' + cast(d.grpNum as varchar(8)) + '|', r.equ) = 0
and (d.LastName = r.LastName or d.DateOfBirth = r.DateOfBirth)
and d.BirthCountry = r.BirthCountry
), g as (
select LastName, DateOfBirth, BirthCountry, min(grpNum) as grpNum
from r group by LastName, DateOfBirth, BirthCountry
)
select t.*, dense_rank() over (order by g.grpNum) + 100 as GroupID
from T as t
inner join g
on g.LastName = t.LastName
and g.DateOfBirth = t.DateOfBirth
and g.BirthCountry = t.BirthCountry
For the recursion to terminate it's necessary to keep track of the equivalences (via string concatenation) so that at each level it only needs to consider newly discovered equivalences (or connections, transitivities, etc.) Notice that I've avoided using the word group to avoid bleeding into the GROUP BY concept.
http://rextester.com/edit/TVRVZ10193
EDIT: I used an almost arbitrary numbering for the equivalences but if you wanted them to appear in a sequence based on the lowest ID with each block that's easy to do. Instead of using row_number() say min(ID) as grpNum presuming, of course, that IDs are unique.

I assume groupid is the output you want which start from 100.
Even if groupid come from another table,then it is no problem.
Firstly,sorry for my "No cursor comments".Cursor or RBAR operation is require for this task.In fact after a very long time i met such requirement which took so long and I use RBAR operation.
if tommorrow i am able to do using SET BASE METHOD,then I will come and edit it.
Most importantly using RBAR operation make the script more understanding and I think it wil work for other sample data too.
Also give feedback about the performance and how it work with other sample data.
Alsi in my script you note that id are not in serial,and it do not matter,i did this in order to test.
I use print for debuging purpose,you can remove it.
SET NOCOUNT ON
DECLARE #Tbl TABLE(
ID INT,
FirstName VARCHAR(50),
LastName VARCHAR(50),
DateOfBirth DATE,
BirthCountry VARCHAR(50),
GroupID INT NULL
);
INSERT INTO #Tbl VALUES
(1, 'Jonh', 'Doe', '1983-01-01', 'Grand', NULL) ,
(2, 'Jack', 'Stone', '1976-06-08', 'Grand', NULL),
(3, 'Jane', 'Doe', '1982-02-08', 'Grand', NULL),
(4, 'Adam', 'Wayne', '1983-01-01', 'Grand', NULL),
(5, 'Kay', 'Wayne', '1976-06-08', 'Grand', NULL),
(6, 'Matt', 'Knox', '1983-01-01', 'Hay', NULL),
(7, 'Jerry', 'Stone', '1976-06-08', 'Hay', NULL)
DECLARE #StartGroupid INT = 100
DECLARE #id INT
DECLARE #Groupid INT
DECLARE #Maxid INT
DECLARE #i INT = 1
DECLARE #MinGroupID int=#StartGroupid
DECLARE #MaxGroupID int=#StartGroupid
DECLARE #LastGroupID int
SELECT #maxid = max(id)
FROM #tbl
WHILE (#i <= #maxid)
BEGIN
SELECT #id = id
,#Groupid = Groupid
FROM #Tbl a
WHERE id = #i
if(#Groupid is not null and #Groupid<#MinGroupID)
set #MinGroupID=#Groupid
if(#Groupid is not null and #Groupid>#MaxGroupID)
set #MaxGroupID=#Groupid
if(#Groupid is not null)
set #LastGroupID=#Groupid
UPDATE A
SET groupid =case
when #id=1 and b.groupid is null then #StartGroupid
when #id>1 and b.groupid is null then #MaxGroupID+1--(Select max(groupid)+1 from #tbl where id<#id)
when #id>1 and b.groupid is not null then #MinGroupID --(Select min(groupid) from #tbl where id<#id)
end
FROM #Tbl A
INNER JOIN #tbl B ON b.id = #ID
WHERE (
(
a.BirthCountry = b.BirthCountry
and a.DateOfBirth = b.dateofbirth
)
or (a.LastName = b.LastName and a.BirthCountry = b.BirthCountry)
or (a.LastName = b.LastName and a.dateofbirth = b.dateofbirth)
)
--if(#id=7) --#id=2,#id=3 and so on (for debug
--break
SET #i = #i + 1
SET #ID = #I
END
SELECT *
FROM #Tbl
Alternate Method but still it return 56,000 rows without rownum=1.See if it work with other sample data or see if you can further optimize it.
;with CTE as
(
select a.ID,a.FirstName,a.LastName,a.DateOfBirth,a.BirthCountry
,#StartGroupid GroupID
,1 rn
FROM #Tbl A where a.id=1
UNION ALL
Select a.ID,a.FirstName,a.LastName,a.DateOfBirth,a.BirthCountry
,case when ((a.BirthCountry = b.BirthCountry and a.DateOfBirth = b.dateofbirth)
or (a.LastName = b.LastName and a.BirthCountry = b.BirthCountry)
or (a.LastName = b.LastName and a.dateofbirth = b.dateofbirth)
) then b.groupid else b.groupid+1 end
, b.rn+1
FROM #tbl A
inner join CTE B on a.id>1
where b.rn<#Maxid
)
,CTE1 as
(select * ,row_number()over(partition by id order by groupid )rownum
from CTE )
select * from cte1
where rownum=1

Maybe you can run it in this way
SELECT *
FROM table_name
GROUP BY
FirstName,
LastName,
GroupID
HAVING COUNT(GroupID) >= 2
ORDER BY GroupID

How can I copy the Column Data Of one Table To Another Table where the row data of one table matches column name of another

I have two tables :
Table 1:
Id | PersonId |Variable | Value|
1 12 FirstName NULL
2 12 Address NULL
------------------------
Table2:
Id | PersonId | FirstName| LastName| Address | Phone
1 12 Tommy Stark NY 12365
I need to copy data from table 2 into table 1 and
I need output like:
Table 1:
Id | PersonId |Variable | Value|
1 12 FirstName Tommy
2 12 Address NY

You could use a series of case expressions to match table 1 values to table 2 column names. It's clunky as heck, but it should work:
UPDATE t1
SET t1.value = CASE t1.variable
WHEN 'FirstName' THEN t2.firstname
ELSE t1.value
END,
t1.value = CASE t1.variable
WHEN 'LastName' THEN t2.lastname
ELSE t1.value
END,
t1.value = CASE t1.variable
WHEN 'Address' THEN t2.address
ELSE t1.value
END,
t1.value = CASE t1.phone
WHEN 'Phone' THEN t2.phone
ELSE t1.value
END
FROM t1
JOIN t2 ON t1.personid = t2.personid

Declare #Table1 TABLE
(Id int, PersonId int, Variable varchar(9), Value varchar(4))
;
INSERT INTO #Table1
(Id, PersonId, Variable, Value)
VALUES
(1, 12, 'FirstName', NULL),
(2, 12, 'Address', NULL)
;
DECLARE #Table2 TABLE
(Id int, PersonId int, FirstName varchar(5), LastName varchar(5), Address varchar(2), Phone int)
;
INSERT INTO #Table2
(Id, PersonId, FirstName, LastName, Address, Phone)
VALUES
(1, 12, 'Tommy', 'Stark', 'NY', 12365)
select TT.Id,
TT.PersonId,
TT.Variable,
CASE
WHEN T.col = TT.Variable
THEN T.val
END value
from #Table1 TT
INNER JOIN (
select col,val from #Table2 t CROSS APPLY (values ('Id',CAST(Id AS VARCHAR)), ('PersonId',CAST(PersonId AS VARCHAR)),
('FirstName',CAST(FirstName AS VARCHAR)),
('LastName',CAST(LastName AS VARCHAR)),
('Address',CAST(Address AS VARCHAR)),
('Phone',CAST(Phone AS VARCHAR)))cs(col,val))T
ON T.col = TT.Variable

Find bus route number from sql queries

I am designing bus route database. i have three tables like below,
LocationDetails
Location ID Location Name
---------------------------
1 A
2 B
3 C
4 D
5 E
6 F
7 G
8 H
9 I
RouteDetails
RouteId RouteNumber
--------------------
1 101
2 102
3 103
RouteLocationDetails
RouteId LocationId
------------------
1 3
1 4
1 5
1 6
2 2
2 3
2 4
2 5
2 6
I want to find the route numbers for the user entered location name.
For example if user want to travel the location from 'C' to 'F' they need to know what and all route number available for that locations, in my case 2 bus route number is available for that route (Routenumber 101, 102)
Can anyone tell me how to write the sql query for this?

If you only need to handle simple query like A to B, you can self join the RouteLocationDetails table
select r1.RouteId from RouteLocationDetails r1 join RouteLocationDetails r2 on r1.RouteId = r2.RouteId where r1.LocationId <> r2.LocationId and r1.LocationId = 'LocationNameOfC' and r2.LocationId = 'LocationNameOfF'
just a simple illustration, you need to join back the other two table for the necessary information.

You can use below and make sure to pass two location names...
select rd.RouteNumber,r.RouteId from #LocationDetails l
Join
#RouteLocationDetails r
on
l.[Location ID]=r.LocationId
join
#RouteDetails rd
on
r.RouteId=rd.RouteId
where [Location Name]='C'
or [Location Name]='F'
group by rd.RouteNumber,r.RouteId
having count(r.LocationId)=2
Output
RouteNumber RouteId
101 1
102 2

I've created the following SQL Fiddle which should work in your case: http://sqlfiddle.com/#!6/4824f/2
Creating the tables and inserting the data:
create table LocationDetails ([Location ID] int, [Location Name] varchar(1))
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(1, 'A')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(2, 'B')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(3, 'C')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(4, 'D')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(5, 'E')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(6, 'F')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(7, 'G')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(8, 'H')
INSERT INTO LocationDetails ([Location ID],[Location Name]) VALUES(9, 'I')
create table RouteDetails (RouteId int, RouteNumber int)
INSERT INTO RouteDetails (RouteId, RouteNumber) VALUES(1, 101)
INSERT INTO RouteDetails (RouteId, RouteNumber) VALUES(2, 102)
INSERT INTO RouteDetails (RouteId, RouteNumber) VALUES(3, 103)
create table RouteLocationDetails (RouteId int, LocationId int)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(1, 3)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(1, 4)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(1, 5)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(1, 6)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(2, 2)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(2, 3)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(2, 4)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(2, 5)
INSERT INTO RouteLocationDetails (RouteId, LocationId) VALUES(2, 6)
Query the from and to location:
select RouteNumber from LocationDetails ldfrom
inner join RouteLocationDetails rldfrom on rldfrom.LocationId = ldfrom.[Location Id]
inner join RouteLocationDetails rldto on rldto.RouteId = rldfrom.RouteId
inner join LocationDetails ldto on ldto.[Location Id] = rldto.LocationId
inner join RouteDetails rd on rd.RouteId = rldto.RouteId
where ldfrom.[Location Name] = 'C' and ldto.[Location Name] = 'F'