SQL name string matching -sql server - sql-server

I have a table with data in the form
Date Amount Payer
04/01/2021 50 LARRY BURNS
16/01/2021 46 JOHN SMITH
15/01/2021 35 SUSAN ARTHUR
14/01/2021 28 S. ARTHUR
13/01/2021 21 JO SMITH
12/01/2021 13 LARRY BURNS
11/01/2021 6 SUSAN ARTHUR
I also have another table with data in the form
ID Customer Name Customer Type
10001 LARRY BURNS CU
10002 JOHN SMITH CU
10003 SUSAN ARTHUR CU
The first table which is a transactions table does not have a foreign key to reference the ID from the customer. The only information provided is the Payer column which includes inconsistently spelled names of customers. Is it possible to do some form of 'name matching' as a pseudo-join to allow retrieval of the customer name and ID?
Ideally in the form:
Date Amount Payer Customer Name ID
04/01/2021 50 LARRY BURNS LARRY BURNS 10001
16/01/2021 46 JOHN SMITH JOHN SMITH 10002
15/01/2021 35 SUSAN ARTHUR SUSAN ARTHUR 10003
14/01/2021 28 S. ARTHUR SUSAN ARTHUR 10003
13/01/2021 21 JO SMITH JOHN SMITH 10002
12/01/2021 13 LARRY BURNS LARRY BURNS 10001
11/01/2021 6 SUSAN ARTHUR SUSAN ARTHUR 10003

Given that you're willing to have an allowance for differences, you could try something like the following:
DECLARE #Payer table (
[Date] date, Amount decimal(18,2), Payer varchar(50)
);
INSERT INTO #Payer VALUES
( '01/04/2021', 50, 'LARRY BURNS' ),
( '01/16/2021', 46, 'JOHN SMITH' ),
( '01/15/2021', 35, 'SUSAN ARTHUR' ),
( '01/14/2021', 28, 'S. ARTHUR' ),
( '01/13/2021', 21, 'JO SMITH' ),
( '01/12/2021', 13, 'LARRY BURNS' ),
( '01/11/2021', 6 , 'SUSAN ARTHUR' );
DECLARE #Customer table (
ID int, CustomerName varchar(50), CustomerType varchar(2)
);
INSERT INTO #Customer VALUES
( 10001, 'LARRY BURNS', 'CU' ),
( 10002, 'JOHN SMITH', 'CU' ),
( 10003, 'SUSAN ARTHUR', 'CU' );
SELECT
[Date],
Amount,
Payer,
ID,
CustomerName
FROM #Payer AS payer
OUTER APPLY (
SELECT TOP 1
ID,
CustomerName
FROM #Customer AS c
WHERE
c.CustomerName = payer.Payer
OR
DIFFERENCE ( payer.Payer, c.CustomerName ) >= 3
OR
DIFFERENCE ( c.CustomerName, payer.Payer ) >= 3
) AS customer;
Returns
+------------+--------+--------------+-------+--------------+
| Date | Amount | Payer | ID | CustomerName |
+------------+--------+--------------+-------+--------------+
| 2021-01-04 | 50.00 | LARRY BURNS | 10001 | LARRY BURNS |
| 2021-01-16 | 46.00 | JOHN SMITH | 10002 | JOHN SMITH |
| 2021-01-15 | 35.00 | SUSAN ARTHUR | 10003 | SUSAN ARTHUR |
| 2021-01-14 | 28.00 | S. ARTHUR | 10003 | SUSAN ARTHUR |
| 2021-01-13 | 21.00 | JO SMITH | 10002 | JOHN SMITH |
| 2021-01-12 | 13.00 | LARRY BURNS | 10001 | LARRY BURNS |
| 2021-01-11 | 6.00 | SUSAN ARTHUR | 10003 | SUSAN ARTHUR |
+------------+--------+--------------+-------+--------------+

Related

Calculating gain or loss in a partition SQL

With SQL partitions min and max values are easy to find, but how is Gain or Loss determined over a partition?
This brings in the time element to compare min and max. If max occurs later in time than min, that would be a "Gain". If min occurs later, that would be a "Loss".
How might the GainorLoss column be calculated?
CREATE TABLE Weights (id int, date date, person varchar(40), Weight int);
INSERT INTO Weights VALUES (1, '2022-09-01', 'Alice', 100);
INSERT INTO Weights VALUES (2, '2022-10-01', 'Alice', 105);
INSERT INTO Weights VALUES (3, '2022-11-01', 'Alice', 110);
INSERT INTO Weights VALUES (4, '2022-12-01', 'Alice', 115);
INSERT INTO Weights VALUES (5, '2022-09-01', 'Peter', 150);
INSERT INTO Weights VALUES (6, '2022-10-01', 'Peter', 145);
INSERT INTO Weights VALUES (7, '2022-11-01', 'Peter', 140);
INSERT INTO Weights VALUES (8, '2022-12-01', 'Peter', 135);
select
person
, date
, weight
, min(Weight) OVER (PARTITION BY person) as minWeight
, max(Weight) OVER (PARTITION BY person) as maxWeight
--if max weight occurs after min weight, then "Gain" ELSE "Loss" AS GainorLoss
from weights
Desired output:
person
date
weight
minWeight
maxWeight
GainorLoss
Alice
2022-09-01
100
100
120
Gain
Alice
2022-10-01
105
100
120
Gain
Alice
2022-11-01
110
100
120
Gain
Alice
2022-12-01
120
100
120
Gain
Peter
2022-09-01
150
135
150
Loss
Peter
2022-10-01
145
135
150
Loss
Peter
2022-11-01
140
135
150
Loss
Peter
2022-12-01
135
135
150
Loss
You can use FIRST_VALUE to get the first or the last value based on an order.
The example below calculates 2 GainOrLess results.
The 2nd is what you described. The 1st is what I think you want.
But with the current sample data they give same result.
select *
, CASE
WHEN LastWeight > FirstWeight THEN 'Gain'
WHEN LastWeight < FirstWeight THEN 'Loss'
ELSE 'Same'
END AS [GainOrLoss1]
, CASE
WHEN MaxWeightDate > MinWeightDate THEN 'Gain'
WHEN MaxWeightDate < MinWeightDate THEN 'Loss'
ELSE 'Same'
END AS [GainOrLoss2]
from
(
select
person
, [date]
, weight
, FIRST_VALUE(Weight) OVER (PARTITION BY person ORDER BY [date], id) as FirstWeight
, FIRST_VALUE(Weight) OVER (PARTITION BY person ORDER BY [date] DESC, id DESC) as LastWeight
, FIRST_VALUE([date]) OVER (PARTITION BY person ORDER BY Weight, id) as MinWeightDate
, FIRST_VALUE([date]) OVER (PARTITION BY person ORDER BY Weight DESC, id DESC) as MaxWeightDate
from weights
) q
ORDER BY person, [date]
person | date | weight | FirstWeight | LastWeight | MinWeightDate | MaxWeightDate | GainOrLoss1 | GainOrLoss2
:----- | :--------- | -----: | ----------: | ---------: | :------------ | :------------ | :---------- | :----------
Alice | 2022-09-01 | 100 | 100 | 115 | 2022-09-01 | 2022-12-01 | Gain | Gain
Alice | 2022-10-01 | 105 | 100 | 115 | 2022-09-01 | 2022-12-01 | Gain | Gain
Alice | 2022-11-01 | 110 | 100 | 115 | 2022-09-01 | 2022-12-01 | Gain | Gain
Alice | 2022-12-01 | 115 | 100 | 115 | 2022-09-01 | 2022-12-01 | Gain | Gain
Peter | 2022-09-01 | 150 | 150 | 135 | 2022-12-01 | 2022-09-01 | Loss | Loss
Peter | 2022-10-01 | 145 | 150 | 135 | 2022-12-01 | 2022-09-01 | Loss | Loss
Peter | 2022-11-01 | 140 | 150 | 135 | 2022-12-01 | 2022-09-01 | Loss | Loss
Peter | 2022-12-01 | 135 | 150 | 135 | 2022-12-01 | 2022-09-01 | Loss | Loss
db<>fiddle here
I think the result would be more accurate if you measure the original weight with the current weight.
Or you can measure the status from the prior weight and the current weight.
It's up to you to choose, or you could just use both methods, they can be combined off course.
And I also added a resulttype for when the weight did not changed, when there is no gain and no loss
This method uses the original weight and the current weight to determine the gain-or-loss status
declare #Weights TABLE (id int, [date] date, person varchar(40), Weight int)
INSERT INTO #Weights values
(1, '2022-09-01', 'Alice', 100), (2, '2022-10-01', 'Alice', 105),
(3, '2022-11-01', 'Alice', 110), (4, '2022-12-01', 'Alice', 115),
(5, '2022-09-01', 'Peter', 150), (6, '2022-10-01', 'Peter', 145),
(7, '2022-11-01', 'Peter', 140), (8, '2022-12-01', 'Peter', 135)
select t.person,
t.[date],
t.weight,
t.minWeight,
t.maxWeight,
t.Original,
case when t.Original > t.weight then 'Loss'
when t.Original < t.weight then 'Gain'
else 'Unchanged'
end as GainOrLoss
from ( select w.person,
w.[date],
w.weight,
min(w.Weight) OVER (PARTITION BY w.person) as minWeight,
max(w.Weight) OVER (PARTITION BY w.person) as maxWeight,
FIRST_VALUE(w.Weight) OVER (PARTITION BY person ORDER BY [date], id) as Original
from #weights w
) t
with this result
-------|----------|--------|-----------|-----------|--------|---------
person |date |weight |minWeight |maxWeight |Original GainOrLoss
-------|----------|--------|-----------|-----------|--------|---------
Alice |2022-09-01| 100 |100 |115 |100 |Unchanged
Alice |2022-10-01| 105 |100 |115 |100 |Gain
Alice |2022-11-01| 110 |100 |115 |100 |Gain
Alice |2022-12-01| 115 |100 |115 |100 |Gain
Peter |2022-09-01| 150 |135 |150 |150 |Unchanged
Peter |2022-10-01| 145 |135 |150 |150 |Loss
Peter |2022-11-01| 140 |135 |150 |150 |Loss
Peter |2022-12-01| 135 |135 |150 |150 |Loss
You can test and change is in this DBFiddle
EDIT
If you want to see the weight changes with regards to the prior weight, you can use the LAG funtion.
In my option this would be the best method to follow up someone's weight change.
This method uses the prior weight and the current weight to determine the gain-or-loss status
select t.person,
t.[date],
t.priorWeight,
t.weight,
case when t.priorWeight > t.weight then 'Loss'
when t.priorWeight < t.weight then 'Gain'
else 'nochange'
end as GainOrLoss,
t.minWeight,
t.maxWeight
from ( select w.person,
w.[date],
w.weight,
min(w.Weight) OVER (PARTITION BY w.person) as minWeight,
max(w.Weight) OVER (PARTITION BY w.person) as maxWeight,
lag(w.Weight) over (partition by person order by [date]) as priorWeight
from #weights w
) t
-------|----------|------------|-------|----------|---------|---------
person |date |priorWeight |weight |GainOrLoss|minWeight|maxWeight
-------|----------|------------|-------|----------|---------|---------
Alice |2022-09-01| |100 |nochange |100 |115
Alice |2022-10-01|100 |105 |Gain |100 |115
Alice |2022-11-01|105 |110 |Gain |100 |115
Alice |2022-12-01|110 |115 |Gain |100 |115
Peter |2022-09-01| |150 |nochange |135 |150
Peter |2022-10-01|150 |145 |Loss |135 |150
Peter |2022-11-01|145 |140 |Loss |135 |150
Peter |2022-12-01|140 |135 |Loss |135 |150
And again a DBFiddle you can use to play with this query
You can combine the 2 methods offcourse

SQL Query that splits a row into multiple rows based on the dates of Monday to Sunday of a particular week

So basically as the title says, may sound confusing, but this is how my table is set up:
+-----------+--------------------------+---------+-----------+------------+-----------------+-------------+---------+
| RecordID | WeekCommencing | Name | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday | Sunday |
+-----------+--------------------------+---------+-----------+------------+-----------------+-------------+---------+
| 1 | 2020-08-10 | John Doe | WH | WH | RW | WH | WH | DO | DO |
+-----------+----------------+----------+---------+----------+------------+-----------+---------+-----------+--------+
What I want to do is query the records in the table and separate each day into it's respective date, "WeekCommencing" always a Monday so using the above table, I want to manipulate it to look like this:
+-----------+--------------------------+---------+-----------+-----+
| RecordID | WeekCommencing | Name | Date | Category |
+-----------+--------------------------+---------+-----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-10 | WH |
+-----------+----------------+----------+---------+----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-11 | WH |
+-----------+----------------+----------+---------+----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-12 | RW |
+-----------+----------------+----------+---------+----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-13 | WH |
+-----------+----------------+----------+---------+----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-14 | WH |
+-----------+----------------+----------+---------+----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-15 | DO |
+-----------+----------------+----------+---------+----------+------+
| 1 | 2020-08-10 | John Doe | 2020-08-16 | DO |
+-----------+----------------+----------+---------+----------+------+
So as you can see, the week commencing marks the beginning of the week which is a Monday, therefore the date will be Monday and the category associated with Monday is assigned to that date, then Tuesday the 11th and the category assigned to Tuesday on that date, and so on until the following Monday then it starts again. How would I be able to accomplish this?
You can unpivot your data using CROSS APPLY and a table value constructor, e.g.
DECLARE #DummyData TABLE
(
RecordID INT,
WeekCommencing DATE,
Name VARCHAR(8),
Monday VARCHAR(2),
Tuesday VARCHAR(2),
Wednesday VARCHAR(2),
Thursday VARCHAR(2),
Friday VARCHAR(2),
Saturday VARCHAR(2),
Sunday VARCHAR(2)
);
INSERT #DummyData(RecordID, WeekCommencing, Name, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday)
VALUES
(1, '20200810', 'John Doe', 'WH', 'WH', 'RW', 'WH', 'WH', 'DO', 'DO');
SELECT t.RecordID,
t.WeekCommencing,
t.Name,
Date = DATEADD(DAY, upvt.AddDays, t.WeekCommencing),
upvt.Category
FROM #DummyData AS t
CROSS APPLY
(VALUES
(0, t.Monday),
(1, t.Tuesday),
(2, t.Wednesday),
(3, t.Thursday),
(4, t.Friday),
(5, t.Saturday),
(6, t.Sunday)
) AS upvt (AddDays, Category);
OUTPUT
RecordID WeekCommencing Name Date Category
---------------------------------------------------------------
1 2020-08-10 John Doe 2020-08-10 WH
1 2020-08-10 John Doe 2020-08-11 WH
1 2020-08-10 John Doe 2020-08-12 RW
1 2020-08-10 John Doe 2020-08-13 WH
1 2020-08-10 John Doe 2020-08-14 WH
1 2020-08-10 John Doe 2020-08-15 DO
1 2020-08-10 John Doe 2020-08-16 DO

How to show count of sub elements in Group By query - Pivoted

We have cars table.
It has marques:
Nissan
Toyota
BMW
We have cities:
New York
Los Angeles
Dallas
Table:
Id
Marque
CityId
It has all 250 000 cars for the 3 cities.
How do we show them grouped by city and count, but the cities are columns.
This is my query:
SELECT Count(veh.id) [Count],pd.District, vet.Name FROM Vehicles veh
INNER JOIN PostalDistricts pd on pd.Id = veh.PostalDistrictId
INNER JOIN VehicleMarqueId vet on vet.id = veh.VehicleMarqueId
GROUP BY pd.District, vet.Name
ORDER BY Count(veh.id) DESC, pd.District asc
But the result is:
+-------+-------------+--------+
| Count | City | Marque |
+-------+-------------+--------+
| 9547 | New York | Toyota |
| 3509 | Dallas | Toyota |
| 2608 | Los Angeles | Toyota |
| 2545 | New York | Nissan |
| 2107 | Dallas | Nissan |
| 1780 | Los Angeles | Nissan |
+-------+-------------+--------+
Expected is:
+-------------+--------+--------+
| City | Toyota | Nissan |
+-------------+--------+--------+
| Dallas | 3509 | 2107 |
| Los Angeles | 2608 | 1780 |
| New York | 9547 | 2545 |
+-------------+--------+--------+
You can use the pivot statement:
declare #tmp table ([count] int , City varchar(50), Marque nvarchar(50))
insert into #tmp values
(9547, 'New York', 'Toyota')
,(3509, 'Dallas', 'Toyota')
,(2608, 'Los Angeles', 'Toyota')
,(2545, 'New York', 'Nissan')
,(2107, 'Dallas', 'Nissan')
,(1780, 'Los Angeles', 'Nissan')
select * from #tmp
pivot
(
max([count])
for Marque in ([Toyota], [Nissan] )
) piv
Results:
But if you have more values in the Marque column you have to use dynamic TSQL to generate all the columns needed

SQL Server parent child (parent see all everything)?

This is my table:
EmployeeID Employee ManagerID
---------------------------------
1 Anna 5
2 John 4
3 Steve 4
4 Lisa 1
5 Adam NULL
6 Per 1
There is no problem for me to get parent and child relationship with a self-join like this:
SELECT
E.EmployeeID,
E.Employee AS Employee,
E.ManagerID,
M.Employee AS Manager
FROM
Employee AS E
LEFT JOIN
Employee AS M ON E.ManagerID = M.EmployeeID
EmployeeID Employee ManagerID Manager
1 Anna 5 Adam
2 John 4 Lisa
3 Steve 4 Lisa
4 Lisa 1 Anna
5 Adam NULL NULL
6 Per 1 Anna
However, How would i go about to make sure that the parent see the whole hierarchy level?
I would like the table to look like this:
EmployeeID Manager Employee EmployeeID
5 Adam Anna 1
5 Adam Per 6
5 Adam Lisa 4
5 Adam John 2
5 Adam Steve 3
1 Anna Per 6
1 Anna Lisa 4
1 Anna John 2
1 Anna Steve 3
4 Lisa John 2
4 Lisa Steve 3
Note: in this example i only have 3 levels of manger but there can be many more
You can try this:
DECLARE #DataSource TABLE
(
[EmployeeID] TINYINT
,[Employee] VARCHAR(12)
,[ManagerID] TINYINT
);
INSERT INTO #DataSource ([EmployeeID], [Employee], [ManagerID])
VALUES (1, 'Anna', 5)
,(2, 'John', 4)
,(3, 'Steve', 4)
,(4, 'Lisa', 1)
,(5, 'Adam', NULL)
,(6, 'Per', 1);
WITH DataSource AS
(
SELECT DISTINCT DS1.*
,0 AS [Level]
,DS1.[EmployeeID] AS Parent
FROM #DataSource DS1
INNER JOIN #DataSource DS2
ON DS1.[EmployeeID] = DS2.[ManagerID]
UNION ALL
SELECT DS2.*
,DS1.[Level] + 1
,DS1.Parent
FROM DataSource DS1
INNER JOIN #DataSource DS2
ON DS1.[EmployeeID] = DS2.[ManagerID]
)
SELECT DS1.[EmployeeID]
,DS1.[Employee] AS [Manager]
,DS.[EmployeeID]
,DS.[Employee]
FROM DataSource DS
INNER JOIN #DataSource DS1
ON DS.[Parent] = DS1.[EmployeeID]
WHERE DS.[Level] <> 0
ORDER BY DS.[Parent] DESC;
We are using recursive CTE and it may look a kind of messy and complicated if you are seeing this syntax for the first time, but it's nothing special.
When are using recursive CTE run some performance tests in order to be sure it is the right technique for solving your issue.
You should use recursive CTE syntax. In the first iteration (before UNION ALL) you get all Parent-Child pairs. In the recursive part (after UNION ALL) you get the next level child for each pair and substitute it into the pair Parent-Child instead of the Child leaving Parent the same.
WITH CTE AS
(
SELECT TP.EmployeeID as ManagerId,
TP.Employee as Manager,
TC.EmployeeID as EmployeeID,
TC.Employee as Employee
FROM TEmployee as TP
JOIN TEmployee as TC on (TP.EmployeeID = TC.ManagerID)
UNION ALL
SELECT TP.ManagerId as ManagerId,
TP.Manager as Manager,
TC.EmployeeID as EmployeeID,
TC.Employee as Employee
FROM CTE as TP
JOIN TEmployee as TC on (TP.EmployeeID = TC.ManagerID)
)
SELECT * FROM CTE Order By ManagerID
result:
+-----------+---------+------------+----------+
| ManagerId | Manager | EmployeeID | Employee |
+-----------+---------+------------+----------+
| 1 | Anna | 4 | Lisa |
| 1 | Anna | 6 | Per |
| 1 | Anna | 2 | John |
| 1 | Anna | 3 | Steve |
| 4 | Lisa | 2 | John |
| 4 | Lisa | 3 | Steve |
| 5 | Adam | 1 | Anna |
| 5 | Adam | 4 | Lisa |
| 5 | Adam | 6 | Per |
| 5 | Adam | 2 | John |
| 5 | Adam | 3 | Steve |
+-----------+---------+------------+----------+

Select Same Customer Name but that has different customer Address

Trying to select records that are all for the same customer, but where the address is different.
So I can later let the user choose Bob Yonkers, then choose to update all of Bob's records to a specific address. So I want to show all the available records.
Data Example:
CUSTOMER_NAME, CUSTOMER_ADDRESS
Bob Yonkers , 42 Satellite Cir
Bob Yonkers , 667 Orbit St
Bob Yonkers , 42 Satellite Cir
Bob Yonkers , 667 Orbit St
David Boom , 5959 Bush Ave
David Boom , 5959 Bush Ave
David Boom , 5959 Bush Ave
David Boom , 5959 Bush Ave
David Boom , 5959 Bush Ave
Ruby Tuesday , 123 Highway Ln Apt#1
Ruby Tuesday , 123 Highway Ln
David Boom ,5959 Bush Ave
David Boom ,5959 Bush Ave
David Boom ,5959 Bush Ave
So the query would bring back these results...
Result Example:
CUSTOMER_NAME, CUSTOMER_ADDRESS
Bob Yonkers , 42 Satellite Cir
Bob Yonkers , 667 Orbit St
Ruby Tuesday , 123 Highway Ln Apt#1
Ruby Tuesday , 123 Highway Ln
Any help would be appreciated.
SELECT *
FROM [table] t1
INNER JOIN [table] t2 ON t1.Name=t2.Name AND t1.Address<>t2.Address
This is a refinement of Joel's:
SELECT distinct t1.*
FROM [table] t1
INNER JOIN [table] t2 ON t1.Name=t2.Name AND t1.Address<>t2.Address
give this a try...
select * from (select count(customername) as ct, customername, address from table group by customername, address) t1
where t1.ct>1
This intrigued me since a friend had asked me something similar. The query below will solve the problem, albeit in-efficiently:
mysql> select DISTINCT CUSTOMER_NAME,CUSTOMER_ADDRESS from CUST_ADDR
where CUSTOMER_NAME in (select CUSTOMER_NAME from CUST_ADDR GROUP BY
CUSTOMER_NAME HAVING COUNT(DISTINCT CUSTOMER_ADDRESS) > 1 );
+---------------+----------------------+
| CUSTOMER_NAME | CUSTOMER_ADDRESS |
+---------------+----------------------+
| Bob Yonkers | 42 Satellite Cir |
| Bob Yonkers | 667 Orbit St |
| Ruby Tuesday | 123 Highway Ln Apt#1 |
| Ruby Tuesday | 123 Highway Ln |
+---------------+----------------------+
4 rows in set (0.01 sec)

Resources