T-SQL - Customer Linking - sql-server

Please run the below code, these are all the same Customer because 2 of them have the same TaxNumber while another one matches one based on CompanyName. I need to link them all and set the ParentCompanyID based on who was created first. I am struggling to get them linked.
CREATE TABLE #Temp
(
CustomerID INT,
CustomerName VARCHAR(20),
CustomerTaxNumber INT,
CreatedDate DATE
)
INSERT INTO #Temp
VALUES (8, 'Company PTY',1234, '2019-09-20'),
(2, 'Company PT', 1234, '2019-09-24'),
(3, 'Company PTY',NULL, '2019-09-29')
SELECT * FROM #Temp
Below is the result that I require....
Any help will be appreciated.

Using case expression with first_value can give you the desired results:
SELECT CustomerID, CustomerName, CustomerTaxNumber, CreatedDate,
CASE WHEN CustomerTaxNumber IS NULL THEN
FIRST_VALUE(CustomerID) OVER(PARTITION BY CustomerName ORDER BY CreatedDate)
ELSE
FIRST_VALUE(CustomerID) OVER(PARTITION BY CustomerTaxNumber ORDER BY CreatedDate)
END As ParentCompanyID
FROM #Temp

Try this:
CREATE TABLE #Temp
(
CustomerID INT,
CustomerName VARCHAR(20),
CustomerTaxNumber INT,
CreatedDate DATE
)
INSERT INTO #Temp
VALUES (8, 'Company PTY',1234, '2019-09-20'),
(2, 'Company PT', 1234, '2019-09-24'),
(3, 'Company PTY',NULL, '2019-09-29')
SELECT DS.[CreatedDate] AS [FirstEntry]
,DS.[CustomerID] AS [ParentCompanyID]
,#Temp.*
FROM #Temp
CROSS APPLY
(
SELECT TOP 1 *
FROM #Temp
ORDER BY CreatedDate
) DS
DROP TABLE #Temp
You are condition is pretty simple - get the first record. If you need to group the records in some way, you can add additional filtering in the CROSS APPLY clause.

Related

Select Duplicate Records

I want to retrieve only Duplicated records not unique records.
Suppose I have data which consists of as below
Ids Names
1 A
2 B
1 A
I want like output like the following:
Sno Id Name
1 1 A
2 1 A
Try this:
DECLARE #DataSource TABLE
(
[ID] INT
,[name] CHAR(1)
,[value] CHAR(2)
);
INSERT INTO #DataSource ([ID], [name], [value])
VALUES (1, 'A', 'x1')
,(2, 'B', 'x2')
,(1, 'A', 'x3');
WITH DataSource AS
(
SELECT *
,COUNT(*) OVER (PARTITION BY [ID], [name]) AS [Count]
FROM #DataSource
)
SELECT *
FROM Datasource
WHERE [Count] > 1;
The grouping part is done in the PARTITION BY part of the window function. So, basically, we are counting records for each unique ID - name pairs. Of couse, you are able to add more columns columns here.
SELECT Id, Names
FROM T
GROUP BY Id,Name
HAVING COUNT(*) >1
like your request, you need to create a new column [SNo] that is partitioned on the orignal columns (Names, Id). Those with [SNo] >1 are duplicates. To Filter, just get RCount>1.
See a mockup below:
DECLARE #Records TABLE (Id int, Names VARCHAR(10))
INSERT INTO #Records
SELECT 1, 'A' UNION ALL
SELECT 2, 'B' UNION ALL
SELECT 1, 'A'
----To Get Duplicates -----
SELECT *
FROM
(
SELECT
SNo=ROW_NUMBER()over(PARTITION BY Names,Id order by Id),
RCount=COUNT(*) OVER (PARTITION BY [ID], Names),
*
FROM
#Records
)M
WHERE
RCount>1

How to write a SQL script that deletes duplicate posts

I have a table with these columns:
id (pk, int identity), imei (varchar), name (varchar), lastconnected (datetime)
Some of the entries in this table have the same name and imei, but different id and different lastconnected date.
How can I effectively filter out all entries that have duplicates (with a SQL script), and then delete the one with the latest lastconnected date?
A simple ROW_NUMBER and DELETE should do the trick:
WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER(PARTITION BY imei, [name] ORDER BY lastconnected DESC)
FROM dbo.YourTable
)
DELETE FROM CTE
WHERE RN = 1;
This is easy and will solve your problem
DECLARE #table TABLE
(
id int,
name varchar(10),
imei varchar(10)
)
insert into #table select 1, 'a','a'
insert into #table select 2, 'b','a'
insert into #table select 3, 'c','a'
insert into #table select 4, 'a','a'
insert into #table select 5, 'c','a'
insert into #table select 6, 'a','a'
insert into #table select 7, 'c','a'
insert into #table select 8, 'a','a'
WHILE (exists (select '' from #table group by name , imei having count(*) > 1))
BEGIN
delete from #table where id in (
select max(id) from #table group by imei , name having count(*) > 1)
End
select * from #table
My first instinct is to use RANK(). This will delete all duplicates, not just the most recent, in cases where things are duplicated multiple times.
delete a
from (
select id, imei, name, lastconnected, RANK() over(partition by imei, name order by lastconnected) as [rank] from #temp
) as a
where a.rank>1
It selects the maximum of the date for each combination of name and iemi and then deletes that particular row.
DELETE FROM yourtablee
WHERE (lastconnecteddate,name,imei) in
(SELECT max(lastconnecteddate), name,imei
FROM yourtable
GROUP BY name,imei)

select true/false based on col value in a group by

Sorry if the title is not clear. I have a simple table T1
counter int, not null
type nvarchar(250), not null
name nvarchar(50), not null
I'm summing up the counters grouped by type, Like this:
select sum(counter), type
from T1
group by type;
I want to select one more field which is boolean (true/false) which is any of the names contains a specific text i.e. if name like '%Bassem%' then select true. But I can not figure it out since I'm using group by.
Here's a way to do this.
First create a test table and insert some values:
CREATE TABLE dbo.T
(
[counter] int not null,
[type] nvarchar(250) not null,
[name] nvarchar(50) not null
);
INSERT INTO dbo.T ([counter], [type], [name])
VALUES (1, N'Alpha', N'Bassem Akl'),
(2, N'Alpha', N'aaaaa'),
(3, N'Alpha', N'Akl Bassem'),
(4, N'Bravo', N'bbbbb'),
(5, N'Bravo', N'A Bassem'),
(6, N'Charlie', N'ccccc'),
(7, N'Charlie', N'ddddd');
Then use a CTE (common table expression) to determine if the name contains the text you are searching for. You don't have to use a CTE here, but it makes the overall SELECT statement easier to understand.
WITH cte AS
(
SELECT [counter], [type], IIF([name] LIKE N'%Bassem%', 1, 0) AS 'contains'
FROM dbo.T
)
SELECT SUM([counter]) AS 'SumCounter', [type], CAST(MAX([contains]) AS bit) as 'contains'
FROM cte
GROUP BY [type];
Note that Transact-SQL doesn't have a Boolean data type; instead it has a bit type. See Books Online > bit (Transact-SQL) -- https://msdn.microsoft.com/en-gb/library/ms177603.aspx
select sum(counter), type,
max(case when name like '%value%' then true else false end ) as 'Booleanvalue'
from T1
group by type;
you can do this as well,but you may want to do a distinct later using cte or derived table
select sum(counter) over (partition by type order by type),
type,
case when name like '%value%' then true else false end as 'Booleanvalue'
from table
WITH cte as
(
SELECT counter, type, IIF(name like '%Bassem%', 1, 0) as b
FROM #t
)
select sum(counter), type, CAST(max(b) as bit)
from cte
group by type;
A simple CASE statement gets it done.
SELECT SUM(foo.counter)
, type
, CASE WHEN foo.type = 'type a' THEN 1 ELSE 0 END AS true_false
FROM (VALUES (1, 'type a'), (2, 'type b')) AS foo(counter, type)
GROUP BY type;

How do I find records out of order - SQL?

Let's say I have a table with an ID Identity column, some data, and a datestamp. Like this:
1 data 5/1/2013 12:30
2 data 5/2/2013 15:32
3 data 5/2/2013 16:45
4 data 5/3/2013 9:32
5 data 5/5/2013 8:21
6 data 5/4/2013 9:36
7 data 5/6/2013 11:42
How do I write a query that will show me the one record that is timestamped 5/4? The table has millions of records. I've done some searching, but I don't know what to call what I'm searching for. :/
declare #t table(id int, bla char(4), timestamp datetime)
insert #t values
(1,'data','5/1/2013 12:30'),
(2,'data','5/2/2013 15:32'),
(3,'data','5/2/2013 16:45'),
(4,'data','5/3/2013 9:32'),
(5,'data','5/5/2013 8:21'),
(6,'data','5/4/2013 9:36'),
(7,'data','5/6/2013 11:42')
select timestamp
from
(
select rn1 = row_number() over (order by id),
rn2 = row_number() over (order by timestamp), timestamp
from #t
) a
where rn1 not in (rn2, rn2-1)
in 2008 r2, this would be a way
DECLARE #Table AS TABLE
(id INT , ladate DATETIME)
INSERT INTO #Table VALUES (1, '2013-05-01')
INSERT INTO #Table VALUES (2, '2013-05-02')
INSERT INTO #Table VALUES (3, '2013-05-03')
INSERT INTO #Table VALUES (4, '2013-05-05')
INSERT INTO #Table VALUES (5, '2013-05-04')
INSERT INTO #Table VALUES (6, '2013-05-06')
INSERT INTO #Table VALUES (7, '2013-05-07')
INSERT INTO #Table VALUES (8, '2013-05-08')
--I added the records in the sort order but if not just make sure you are sorted in the query
SELECT t2.ladate FROM #Table T1
INNER JOIN #Table T2 ON T1.Id = T2.Id + 1
INNER JOIN #Table t3 ON t2.id = t3.id + 1
WHERE t3.ladate < t2.ladate AND t2.ladate > t1.ladate
-- I made the assumption that your Id are all there, 1,2,3,4,5.... none missing... if there are rownumbers missing, you can use row_number()

Use a table-value function to return data in columns instead of rows

I'm trying to write a query which will take a limited number of historical records and display the results in one row.
For example, I have a table of people:
|PersonID|Forename|Surname
|--------|--------|----------
|00000001|Andy |Cairns
|00000002|John |Smith
And a table of all their historical addresses:
|PersonID|Date |Street |Town
-------------------------------------------
|00000001|2011-01-01|Main Street |MyTown
|00000001|2010-01-01|Old Street |OldTown
|00000002|2010-01-01|Diagon Alley |London
|00000001|2009-01-01|First Street |OtherTown
etc..
I'd like to return the following:
|PersonID|Name |MoveDate1 |Town1 |MoveDate2 |Town2 |MoveDate3 |Town3
------------------------------------------------------------------------
|00000001|Andy |2011-01-01|MyTown|2010-01-01|OldTown|2009-01-01|OtherTown
|00000002|John |2010-01-01|London| | | |
At the moment, I'm using the following query:
select PersonID, Name, s.mdate, s.town
from dbo.people
cross apply dbo.getAddressList as s
And the following table-value function:
alter function [dbo].[getAddressList]
(
#personID
)
returns
#addresslisttable
(
mdate smalldatetime
town char
)
as
begin
insert into #addresslist (
mdate
town
)
select top 3 mdate, town
from dbo.addresses
where PersonID = #personID
order by mdate desc
return
end
Unfortunately, this is returning a new row for each address, like this:
|PersonID|Name|MDate |Town
|00000001|Andy|2011-01-01|MyTown
|00000001|Andy|2010-01-01|OldTown
|00000001|Andy|2009-01-01|OtherTown
How can I return each returned row in a field instead?
Thanks in advance.
Where possible you should always use inline TVFs in preference to multistatement ones.
ALTER FUNCTION [dbo].[getAddressList]
(
#personID INT
)
RETURNS TABLE
AS
RETURN
(
WITH cte AS
(SELECT TOP 3 mdate, town, ROW_NUMBER() OVER (ORDER BY mdate DESC) rn
FROM dbo.addresses
WHERE PersonID = #personID
ORDER BY mdate DESC
)
SELECT
MAX(CASE WHEN rn=1 THEN mdate END) AS MoveDate1,
MAX(CASE WHEN rn=1 THEN town END) AS Town1,
MAX(CASE WHEN rn=2 THEN mdate END) AS MoveDate2,
MAX(CASE WHEN rn=2 THEN town END) AS Town2,
MAX(CASE WHEN rn=3 THEN mdate END) AS MoveDate3,
MAX(CASE WHEN rn=3 THEN town END) AS Town3
FROM cte
)
I'd also investigate the relative performance of not using the TVF at all. And doing a JOIN, ROW_NUMBER() OVER (PARTITION BY PersonID) and the PIVOT technique above.
Here, check it out:
-- Create People (not like that... jeez...)
CREATE TABLE #People (PersonID INT, Forename VARCHAR(25), Surname VARCHAR(25))
INSERT INTO #People VALUES (1, 'Andy', 'Cairns')
INSERT INTO #People VALUES (2, 'John', 'Smith')
-- Create historical addresses
CREATE TABLE #Addy (PersonID INT, AddyDate DATETIME, Street VARCHAR(50), Town VARCHAR(50))
INSERT INTO #Addy VALUES (1, '2011-01-01', 'Main Street', 'MyTown')
INSERT INTO #Addy VALUES (1, '2010-01-01', 'Old Street', 'OldTown')
INSERT INTO #Addy VALUES (2, '2010-01-01', 'Diagon Alley', 'London')
INSERT INTO #Addy VALUES (1, '2009-01-01', 'First Street', 'OtherTown')
-- Create ranked addresses mapped to people
SELECT p.Forename, p.Surname, a.*,
ROW_NUMBER() OVER (PARTITION BY p.PersonID ORDER BY p.PersonID) As Ordinal
INTO #Ranked
FROM #People p INNER JOIN #Addy a ON p.PersonID = a.PersonID
-- Make sure everything is kosher
SELECT * FROM #People
SELECT * FROM #Addy
SELECT * FROM #Ranked
-- Create a container for "final" results
DECLARE #Results TABLE (PersonID INT, Forename VARCHAR(25)
, MoveDate1 DATETIME, Street1 VARCHAR(50), Town1 VARCHAR(50)
, MoveDate2 DATETIME, Street2 VARCHAR(50), Town2 VARCHAR(50)
, MoveDate3 DATETIME, Street3 VARCHAR(50), Town3 VARCHAR(50))
-- Get our people primed in the results table
INSERT INTO #Results (PersonID, Forename) SELECT PersonID, Forename FROM #People
-- Fill it up
UPDATE #Results SET MoveDate1 = AddyDate, Street1 = Street, Town1 = Town FROM #Ranked INNER JOIN #Results r ON #RAnked.PersonID = r.PersonID WHERE Ordinal = 1
UPDATE #Results SET MoveDate2 = AddyDate, Street2 = Street, Town2 = Town FROM #Ranked INNER JOIN #Results r ON #RAnked.PersonID = r.PersonID WHERE Ordinal = 2
UPDATE #Results SET MoveDate3 = AddyDate, Street3 = Street, Town3 = Town FROM #Ranked INNER JOIN #Results r ON #RAnked.PersonID = r.PersonID WHERE Ordinal = 3
-- Winsauce?
SELECT * FROM #Results
-- Cleanup
DROP TABLE #People
DROP TABLE #Addy
DROP TABLE #Ranked

Resources