TSQL Union, Max(date) - have solution. Want improvement - sql-server

The following query works, but there has to be a better way to set the value of a table to the max date of the union of two sets of data. Here's what I have:
Update stagingTable
Set OverrideFlag =
(
select total.overrideflag from
(
select Customer_SSN as ssn, RequestDateTime as maxdate, overrideflag
from tableA
where RequestDateTime > '9/1/2012'
union
select ssn, EntryDate as maxdate, overrideflag
from tableB
where EntryDate > '9/1/2012'
) total
join
(
select ssn, max(maxdate) as maxdate from
(
select Customer_SSN as ssn, RequestDateTime as maxdate
from tableA
where RequestDateTime > '9/1/2012'
union
select ssn, EntryDate as maxdate
from tableB
where EntryDate > '9/1/2012'
) maxs
group by ssn
) maxdates on total.ssn = maxdates.ssn and total.maxdate = maxdates.maxdate where total.ssn = stagingTable.ssn
)

It appears you are doing the exact same thing twice, so I am not getting the need to define something twice and join it back to itself unless there was something different in one of the nested selects. You are essentially writing the same statement twice and the redundancy may be an issue as one of the selects appears completely redundant.
-- this is a CTE and is better for reuse than a nested select as you can reference it
-- is as a base and reuse that, versus having to write the same statement twice.
;with a as
(
select
Customer_SSN as ssn,
RequestDateTime as maxdate,
OverRideFlag,
-- EDIT, you should be able to use a 'Windowed function' to get the maxDate
max(RequestDateTime) over(partition by SSN order by RequestDateTime desc) as maxDate
from tableA
where RequestDateTime > '9/1/2012'
union
select
ssn,
EntryDate,
OverRideFlag,
max(RequestDateTime) over(partition by SSN order by RequestDateTime desc) as maxDate
from tableB
where EntryDate > '9/1/2012'
)
Update stagingTable
Set OverrideFlag = total.overrideflag
from a total
join stagingTable on total.ssn = stagingTable.ssn
-- do not know reference here so you need to add that table as a 'join'
where total.EntryDate = total.maxDate

I also found a different way to do it with a temp table. I'm becoming very comfortable with these, but I always want to see a different way how to do this. Not disappointed!
create table #tOvr(customerID varchar(15), ssn varchar(11), EntryDate datetime, overrideflag varchar(2))
insert into #tOvr
select customer_ID, Customer_SSN, RequestDateTime, overrideflag
from tableA
where RequestDateTime > '9/1/2012'
and Customer_ID in
(select contact_ID from stagingTable
where Location_ID = #Location_ID)
insert into #tOvr
select Customer_ID, ssn, EntryDate, overrideflag
from tableB
where EntryDate > '9/1/2012'
and Customer_ID in
(select contact_ID from stagingTable
where Location_ID = #Location_ID)
Update stagingTable
Set OverrideFlag =
(select overrideflag from #tOvr
where EntryDate = (select max(EntryDate) from #tOvr where #tOvr.customerID = stagingTable.contact_ID)
)

Related

Select all records for customers where mindate in 2015

I want to select all records for customers whose first order is from 2015. I want any orders they placed after 2015 too, but I DON'T want the records for customers whose first order was in 2016. I am ultimately trying to find the percentage of people who order more than twice, but I want to exclude the customers who were new in 2016.
This doesn't work because 'mindate' is an invalid column name but I'm not sure why or how else to try it.
Select
od.CustomerID, OrderID, OrderDSC, OrderDTS
From
OrderDetail OD
Join
(Select
OrderID, Min(orderdts) as mindate
From
OrderDetail
Where
mindate Between '2015-1-1' and '2015-12-31'
Group By Orderid) b on od.OrderID = b.OrderID
Because execution phases - it's seqency how is qry evaluated and by engine. In where clause your mindate not yet exists.
You can change mindate by orderdts:
select OrderID, min(orderdts) as mindate
from OrderDetail
where orderdts between '2015-1-1' and '2015-12-31'
group by Orderid
Second option is to use having statement - it's evaluated after group by.
What I di was select the distinct CustomerIDs that fall in between your daterange and did a left join with the table so it filters out anyone that doesn't fall in between your daterange.
SELECT * FROM
(Select DISTINCT(CustomerID) as CustomerID
FROM OrderDetail WHERE OrderDTS between '2015-1-1' AND '2015-12-31') oIDs
LEFT JOIN
OrderDetail OD
ON oIDs.CustomerID = OD.CustomerID
Try using the EXISTS clause. It is basically a sub-query. Below is an example you should be able to adapt.
create table Test (Id int, aDate datetime)
insert Test values (1,'04/04/2014')
insert Test values (1,'05/05/2015')
insert Test values (1,'06/06/2016')
insert Test values (2,'04/30/2016')
insert Test values (3,'02/27/2014')
select t.* from Test t
where
aDate>='01/01/2015'
and exists(select * from Test x where x.Id=t.Id and x.aDate >='01/01/2015' and x.aDate<'01/01/2016')
I don't know the orderdts data type but if it is datetime orders on 2015-12-31 will not be included (unless the order date is 2015-12-31 00:00:00.000. Note how this will skip the first record:
DECLARE #orders TABLE (CustomerID INT, orderDate DATETIME);
INSERT #orders VALUES (1, '2015-12-31 00:00:01.000'), (1, '2015-12-30'), (2, '2015-01-04');
SELECT * FROM #orders WHERE orderDate BETWEEN '2015-01-01' AND '2015-12-31';
In this case you would want the WHERE clause filter to look like:
WHERE orderDate BETWEEN '2015-01-01 00:00:00.000' AND '2015-12-31 23:59:59.999';
Or
WHERE CAST(orderDate AS date) BETWEEN '2015-01-01' AND '2015-12-31';
(the first example will almost certainly perform better).
Now, using this sample data:
-- Sample data
CREATE TABLE #LIST (LISTName varchar(10) NOT NULL);
INSERT #LIST
SELECT TOP (100) LEFT(newid(), 8)
FROM sys.all_columns a, sys.all_columns b;
-- You will probably want LISTName to be indexed
CREATE NONCLUSTERED INDEX nc_LISTName ON #LIST(LISTName);
You can implement Paul's solution like this:
DECLARE #LIST_Param varchar(8) = 'No List';
SELECT LISTName
FROM
(
SELECT distinct LISTName
FROM #LIST
UNION ALL
SELECT 'No List'
WHERE (SELECT COUNT(DISTINCT LISTName) FROM #LIST) < 1000000
) Distinct_LISTName
WHERE (#LIST_Param = 'No List' or #LIST_Param = LISTName);
Alternatively you can do this:
DECLARE #LIST_Param varchar(8) = 'No List';
WITH x AS
(
SELECT LISTName, c = COUNT(*)
FROM #LIST
WHERE (#LIST_Param = 'No List' or #LIST_Param = LISTName)
GROUP BY LISTName
),
c AS (SELECT s = SUM(c) FROM x)
SELECT LISTName
FROM x CROSS JOIN c
WHERE s < 1000000;

Finding a recent most duplicate records from SQL Server 2012

I want to find the recent duplicate records from SQL Server 2012. Here is the table structure I have.
I have table name called UserRegistration which contains the duplicate of UserID(GUID) and in same table, I have CreatedDate Column as well (Date). Now I want to find the recent duplicate records from this table.
Here is the same data.
id FirstName LastName CreatedDate UserID
109 FirstNameA LastNameA 28-04-2015 GUID1
110 FirstNameC LastNameD 19-05-2015 GUID2
111 FirstNameE LastNameF 22-05-2015 GUID1
If you notice on above tables, GUID 1 are having the duplicate, Now I want to find the recent one means it should return me only those rows with duplication but recent data. So in above table structure, it should return me 111 because record has been created recently compared to the 109. I believe you understand.
Do let me know if you have any question. I am happy to answer. Thanks. Awaiting for the reply.
Harshal
Try the below query this should do the work based on your i/p data -
create table #UserRegistration (id int,FirstName varchar(20),LastName varchar(20),CreatedDate date,UserID varchar(20))
insert into #UserRegistration
select 109, 'FirstNameA', 'LastNameA', '2015-04-28', 'GUID1' union
select 110, 'FirstNameC', 'LastNameD', '2015-05-19', 'GUID2' union
select 111, 'FirstNameE', 'LastNameF', '2015-05-22', 'GUID1'
select id, FirstName, LastName, CreatedDate, UserID from
(SELECT ur.*,row_number() over(partition by UserID order by CreatedDate) rn
FROM #UserRegistration ur) A
where rn > 1
You could use CTE. Group your records by UserID and give your particular row a rank ordered by CreatedDate.
insert into tab(id, FirstName, LastName, CreatedDate, UserID)
values(109, 'FirstNameA', 'LastNameA', '2015-04-28', 'guid1'),
(110, 'FirstNameC', 'LastNameD', '2015-05-19', 'guid2'),
(111, 'FirstNameE', 'LastNameF', '2015-05-22', 'guid1');
with cte as
(
select id, ROW_NUMBER() over (partition by UserID order by CreatedDate asc) as [Rank],
FirstName, LastName, CreatedDate, UserID
from tab
)
select id, FirstName, LastName, CreatedDate, UserID from cte where Rank > 1
Rank > 1 condition is responsible for retrieving duplicated items.
sqlfiddle link:
http://sqlfiddle.com/#!6/4d1f2/6
Solved this by using tmp-tables:
SELECT a.UserID,
MAX(a.CreatedDate) As CreatedDate
INTO #latest
FROM <your table> a
GROUP BY a.UserID
HAVING COUNT(a.UserID) > 1
SELECT b.id
FROM #latest a
INNER JOIN <your table> b ON a.UserID = b.UserID AND a.CreatedDate = b.CreatedDate
try this,
SELECT * FROM TableName tt WHERE
exists(select MAX(createdDate)
from TableName
where tt.UserID = UserID
group by UserID
having MAX(createdDate)= tt.createdDate)
I think your createddate field is not a date field, then try Format
WITH TempAns (id,UserID,duplicateRecordCount)
AS
(
SELECT id,
UserID,
ROW_NUMBER()OVER(partition by UserID ORDER BY id)
AS duplicateRecordCount
FROM #t
)
select * from #t where id in (
select max(id )
from TempAns
where duplicateRecordCount > 1
group by name )
You'd rank your records with ROW_NUMBER() to give all last records per userid #1. With COUNT() you make sure only to get the userids having more than one record.
select
id, firstname, lastname, createddate, userid
from
(
select
id, firstname, lastname, createddate, userid,
row_number() over (partition by userid oder by createddate desc) as rn,
count(*) over (partition by userid) as cnt
from userregistration
) ranked
where rn = 1 -- only last one
and cnt > 1; -- but only if there is more than one record for the userid
This gets the latest record for every userid that has duplicates.

Converting Rows to Columns

I have a table with columns UserID and CountryName
Now I want get record in this way
[UserId] [ContryName1] [ContryName2] [ContryName3].........
Fiddle here : http://sqlfiddle.com/#!6/cd6f1/1
DECLARE #SQL AS NVARCHAR(MAX)
WITH CTE AS
(
SELECT USERID,COUNTRYNAME,ROW_NUMBER() OVER(PARTITION BY USERID ORDER BY COUNTRYNAME) AS RN
FROM CNTRIES
)
SELECT #SQL = 'WITH CTE1 AS
(
SELECT USERID,COUNTRYNAME,ROW_NUMBER() OVER(PARTITION BY USERID ORDER BY COUNTRYNAME) AS RN
FROM CNTRIES
)
SELECT *
FROM
(SELECT USERID,COUNTRYNAME,RN FROM CTE1)C
PIVOT (MAX(COUNTRYNAME) FOR RN IN (['+STUFF((SELECT '],['+CAST(RN AS VARCHAR(100))
FROM CTE
GROUP BY RN
FOR XML PATH('')),1,3,'')+'])) AS PIVOTT'
PIVOT is your best option if your version is SQL Server 2005 or above, but you don't state the version and trying to use PIVOT without a natural aggregate can be difficult to grasp for some. If your version is below 2005, you have bigger problems. Otherwise, you'll need to left join the table on itself to give you the same result. You can use a ranking function to make it a little easier. Something like this, while inefficient, will produce similar results.
/*
IF OBJECT_ID('Countries','U') IS NOT NULL
DROP TABLE Countries
CREATE TABLE Countries
(
UserID INT
, CountryName VARCHAR(255)
)
INSERT Countries
VALUES (1, 'India')
, (1, 'UK')
, (2, 'USA')
, (2, 'India')
, (2, 'Canada')
*/
SELECT DISTINCT x.UserID, x.CountryName Country1, y.CountryName Country2, z.CountryName Country3
FROM Countries c
LEFT JOIN
(
SELECT *, RANK() OVER(PARTITION BY UserID ORDER BY UserID, CountryName) AS UserRank
FROM Countries
)x ON x.UserID = c.UserID AND x.UserRank=1
LEFT JOIN
(
SELECT *, RANK() OVER(PARTITION BY UserID ORDER BY UserID, CountryName) AS UserRank
FROM Countries
)y ON y.UserID = c.UserID AND y.UserRank=2
LEFT JOIN
(
SELECT *, RANK() OVER(PARTITION BY UserID ORDER BY UserID, CountryName) AS UserRank
FROM Countries
)z ON z.UserID = c.UserID AND z.UserRank=3

SQL Server rows with latest date

I have a table :
EventLog (
EventID (INT),
UserID (VARCHAR(50),
Event (NTEXT),
EventDate(DateTime),
DocuvmentID(INT)
)
I need to write a query to get the latest event, datetime for a bunch of userId which will be
WHERE UserID IN ( 'john','tom'...etc)
How can I do that?
SELECT y.UserID, y.Event, y.EventDate
FROM (SELECT UserId, MAX(EventDate) AS MaxDate
FROM YourTable
WHERE UserId IN ('john','tom',...)
GROUP BY UserId) t
INNER JOIN YourTable y
ON t.UserId = y.UserId
AND t.MaxDate = y.EventDate
With a simple CTE:
;WITH LatestDates AS
(SELECT
EventID, UserID,
Event, EventDate,
DocumentID,
ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY EventDate DESC) AS 'RowNum'
)
SELECT *
FROM LatestDates
WHERE RowNum = 1 AND UserID IN (........)
This partitions your data by some criteria (I picked UserID as an option - might be something else for you), then numbers each group sequentially starting at 1, ordered by another criteria (here: EventDate DESC) - so the most recent event for each "partition" has RowNum = 1 which is what I select from that CTE
select
UserID,
MAX(EventDate) AS LatestEventDate
from
EventLog
where
UserID in ('john','tom')
group by
UserID

select top 1 with a group by

I have two columns:
namecode name
050125 chris
050125 tof
050125 tof
050130 chris
050131 tof
I want to group by namecode, and return only the name with the most number of occurrences. In this instance, the result would be
050125 tof
050130 chris
050131 tof
This is with SQL Server 2000
I usually use ROW_NUMBER() to achieve this. Not sure how it performs against various data sets, but we haven't had any performance issues as a result of using ROW_NUMBER.
The PARTITION BY clause specifies which value to "group" the row numbers by, and the ORDER BY clause specifies how the records within each "group" should be sorted. So partition the data set by NameCode, and get all records with a Row Number of 1 (that is, the first record in each partition, ordered by the ORDER BY clause).
SELECT
i.NameCode,
i.Name
FROM
(
SELECT
RowNumber = ROW_NUMBER() OVER (PARTITION BY t.NameCode ORDER BY t.Name),
t.NameCode,
t.Name
FROM
MyTable t
) i
WHERE
i.RowNumber = 1;
select distinct namecode
, (
select top 1 name from
(
select namecode, name, count(*)
from myTable i
where i.namecode = o.namecode
group by namecode, name
order by count(*) desc
) x
) as name
from myTable o
SELECT max_table.namecode, count_table2.name
FROM
(SELECT namecode, MAX(count_name) AS max_count
FROM
(SELECT namecode, name, COUNT(name) AS count_name
FROM mytable
GROUP BY namecode, name) AS count_table1
GROUP BY namecode) AS max_table
INNER JOIN
(SELECT namecode, COUNT(name) AS count_name, name
FROM mytable
GROUP BY namecode, name) count_table2
ON max_table.namecode = count_table2.namecode AND
count_table2.count_name = max_table.max_count
I did not try but this should work,
select top 1 t2.* from (
select namecode, count(*) count from temp
group by namecode) t1 join temp t2 on t1.namecode = t2.namecode
order by t1.count desc
Here are to examples that you could use but the temp table use is more efficient than the view, but was done on a small data sample. You would want to check your own statistics.
--Creating A View
GO
CREATE VIEW StateStoreSales AS
SELECT t.state,t.stor_id,t.stor_name,SUM(s.qty) 'TotalSales'
,ROW_NUMBER() OVER (PARTITION BY t.state ORDER BY SUM(s.qty) DESC) AS 'Rank'
FROM [dbo].[sales] s
JOIN [dbo].[stores] t ON (s.stor_id = t.stor_id)
GROUP BY t.state,t.stor_id,t.stor_name
GO
SELECT * FROM StateStoreSales
WHERE Rank <= 1
ORDER BY TotalSales Desc
DROP VIEW StateStoreSales
---Using a Temp Table
SELECT t.state,t.stor_id,t.stor_name,SUM(s.qty) 'TotalSales'
,ROW_NUMBER() OVER (PARTITION BY t.state ORDER BY SUM(s.qty) DESC) AS 'Rank' INTO #TEMP
FROM [dbo].[sales] s
JOIN [dbo].[stores] t ON (s.stor_id = t.stor_id)
GROUP BY t.state,t.stor_id,t.stor_name
SELECT * FROM #TEMP
WHERE Rank <= 1
ORDER BY TotalSales Desc
DROP TABLE #TEMP

Resources