MS SQL split values in same table - sql-server

Good day
I have :
TableX
Column1
John Smith 007
Tera Name 111
Bob Eva 554
I need
TableX
Column1 Column2
John Smith 007 007
Tera Name 111 111
Bob Eva 554 554
I created code but not work. I think there must be join to recognise columns.
ALTER TABLE [dbo].[TableX]
ADD Column2 varchar (50);
UPDATE [dbo].[TableX] SET
Column1=Column2
WHERE select SUBSTRING([Column1], PATINDEX('%[0-9]%', [Column1]
), LEN([column1]))
Thanks for help

If the number you want to extract is always at the end, then you can use:
PATINDEX('%[^0-9]%', REVERSE(Column1))
to get the index of the first character that is not a number, starting from the end.
So, to extract the number you can use:
RIGHT(Column1, PATINDEX('%[^0-9]%', REVERSE(Column1)) - 1)
Hence, the UPDATE will look like this:
UPDATE [dbo].[TableX]
SET Column2 = RIGHT(Column1, PATINDEX('%[^0-9]%', REVERSE(Column1)) - 1)
Demo here

Assuming required part length = 3
UPDATE [dbo].[TableX] SET
column2 = RTRIM(right(column1, CHARINDEX('/', column1) +3))

Related

Optimal search string in the where clause

Want to search the string using PATINDEX and SOUNDEX within the WHERE clause or any optimal way.
I have the following table with some sample data to search the given string using PATINDEX and SOUNDEX.
create table tbl_pat_soundex
(
col_str varchar(max)
);
insert into tbl_pat_soundex values('Smith A Steve');
insert into tbl_pat_soundex values('Steve A Smyth');
insert into tbl_pat_soundex values('A Smeeth Stive');
insert into tbl_pat_soundex values('Steve Smith A');
insert into tbl_pat_soundex values('Smit Steve A');
Note: I have 100 Millions of records in the table to search for.
String to search:- 'Smith A Steve'
SELECT col_str
FROM tbl_pat_soundex
WHERE PATINDEX('%Smith%',col_str) >= 1 AND PATINDEX('%A%',col_str) >= 1 AND PATINDEX('%Steve%',col_str) >= 1
Getting Output:
col_str
--------------
Smith A Steve
Steve Smith A
Expected Output:
col_str
----------------
Smith A Steve
Steve A Smyth
A Smeeth Stive
Steve Smith A
Smit Steve A
Tried:
1:
SELECT col_str
FROM tbl_pat_soundex
WHERE PATINDEX('%Smith%',col_str) >= 1 AND
PATINDEX('%A%',col_str) >= 1 AND
PATINDEX('%Steve%',col_str) >= 1
2:
SELECT col_str
FROM tbl_pat_soundex
WHERE PATINDEX('%'+SOUNDEX('Smith')+'%',SOUNDEX(col_str)) >= 1 AND
PATINDEX('%'+SOUNDEX('A')+'%',SOUNDEX(col_str)) >= 1 AND
PATINDEX('%'+SOUNDEX('Steve')+'%',SOUNDEX(col_str)) >= 1
3:
SELECT col_str
FROM tbl_pat_soundex
WHERE DIFFERENCE('Smith',col_str) = 4 AND
DIFFERENCE('A',col_str) =4 AND
DIFFERENCE('Steve',col_str) = 4
4:
--Following was taking huge time(was kept running more than 20 minutes) to execute.
SELECT DISTINCT col_str
FROM tbl_pat_soundex [a]
CROSS APPLY SplitString([a].[col_str], ' ') [b]
WHERE DIFFERENCE([b].Item,'Smith') >= 1 AND
DIFFERENCE([b].Item,'A') >= 1 AND
DIFFERENCE([b].Item,'Steve') >= 1
With such a lot of rows the only hint I can give you is: Change the design. Each name part should live in a separate column...
The following will work, but I promise it will be slow...
--set up a test db
USE master;
GO
CREATE DATABASE shnugo;
GO
USE shnugo;
GO
--your table, I added an ID-column
create table tbl_pat_soundex
(
ID INT IDENTITY --needed to distinguish rows
,col_str varchar(max)
);
GO
--A function, which will return a blank-separated string as a alphabetically sorted list of distinct soundex values separated by /: "Smith A Steve" comes back as /A000/S310/S530/
CREATE FUNCTION dbo.ComputeSoundex(#str VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE #tmpXML XML=CAST('<x>' + REPLACE((SELECT #str AS [*] FOR XML PATH('')),' ','</x><x>') + '</x>' AS XML);
RETURN (SELECT DISTINCT '/' + SOUNDEX(x.value('text()[1]','varchar(max)')) AS [se]
FROM #tmpXML.nodes('/x[text()]') A(x)
ORDER BY se
FOR XML PATH(''),TYPE).value('.','nvarchar(max)') + '/';
END
GO
--Add a column to store a computed soundex-chain permanently
ALTER TABLE tbl_pat_soundex ADD SortedSoundExPattern VARCHAR(MAX);
GO
--We need a trigger to maintain the computed soundex-chain on any insert or update
CREATE TRIGGER RefreshComputeSoundex ON tbl_pat_soundex
FOR INSERT,UPDATE
AS
BEGIN
UPDATE s SET SortedSoundExPattern=dbo.ComputeSoundex(i.col_str)
FROM tbl_pat_soundex s
INNER JOIN inserted i ON s.ID=i.ID;
END
GO
--test data
insert into tbl_pat_soundex(col_str) values
('Smith A Steve')
,('Steve A Smyth')
,('A Smeeth Stive')
,('Steve Smith A')
,('Smit Steve A')
,('Smit Steve') --no A
,('Smit A') --no Steve
,('Smit Smith Robert Peter A') --add noise
,('Shnugo'); --something else entirely
--check the intermediate result
SELECT *
FROM tbl_pat_soundex
/*
+----+---------------------------+-----------------------+
| ID | col_str | SortedSoundExPattern |
+----+---------------------------+-----------------------+
| 1 | Smith A Steve | /A000/S310/S530/ |
+----+---------------------------+-----------------------+
| 2 | Steve A Smyth | /A000/S310/S530/ |
+----+---------------------------+-----------------------+
| 3 | A Smeeth Stive | /A000/S310/S530/ |
+----+---------------------------+-----------------------+
| 4 | Steve Smith A | /A000/S310/S530/ |
+----+---------------------------+-----------------------+
| 5 | Smit Steve A | /A000/S310/S530/ |
+----+---------------------------+-----------------------+
| 6 | Smit Steve | /S310/S530/ |
+----+---------------------------+-----------------------+
| 7 | Smit A | /A000/S530/ |
+----+---------------------------+-----------------------+
| 8 | Smit Smith Robert Peter A | /A000/P360/R163/S530/ |
+----+---------------------------+-----------------------+
| 9 | Shnugo | /S520/ |
+----+---------------------------+-----------------------+
*/
--Now we can start to search:
DECLARE #StringToSearch VARCHAR(MAX)=' A Steve';
WITH SplittedSearchString AS
(
SELECT soundexCode.value('text()[1]','nvarchar(max)') AS SoundExCode
FROM (SELECT CAST('<x>' + REPLACE(dbo.ComputeSoundex(#StringToSearch),'/','</x><x>') + '</x>' AS XML)) A(x)
CROSS APPLY x.nodes('/x[text()]') B(soundexCode)
)
SELECT a.ID,col_str
FROM tbl_pat_soundex a
INNER JOIN SplittedSearchString s On SortedSoundExPattern LIKE '%/' + s.SoundExCode + '/%'
GROUP BY ID,col_str
HAVING COUNT(ID)=(SELECT COUNT(*) FROM SplittedSearchString)
ORDER BY ID
GO
--clean-up
USE master;
GO
DROP DATABASE shnugo;
Short explanation
This is how it works:
The cte will use the same function to return a soundex-chain of alle the input's fragments
The query will then INNER JOIN this with a LIKE test --this will be sloooooow...
The final check is, if the number of hits is the same as number of fragments.
And a final hint: If you want to search for an exact match, but you want to include different writings you can just directly compare the two strings. You might even place an index on the new column SortedSoundExPattern. Due to the way of creation all kinds of "Steven A Smith", "Steeven a Smit" and even in differing order like "Smith Steven A" will produce exactly the same pattern.
In my view, you should try to use dynamic SQL.
For example, you have a table:
create table tbl_pat_soundex
(
id int,
col_str varchar(max)
)
And you have an the following clustered index or any other index(table with over 100 million rows should have some index):
CREATE NONCLUSTERED INDEX myIndex ON dbo.tbl_pat_soundex(id) INCLUDE (col_str)*/
So try to create the following dynamic SQL query based on your logic and execute it. The wish result should look like this:
DECLARE #statement NVARCHAR(4000)
SET #statement = N'
SELECT col_str
FROM tbl_pat_soundex
WHERE col_str like '%Smith%' AND id > 0
UNION ALL
SELECT col_str
FROM tbl_pat_soundex
WHERE col_str like '%Steve%' AND id > 0
UNION ALL
SELECT col_str
FROM tbl_pat_soundex
WHERE
PATINDEX('%Smith%',col_str) >= 1 AND PATINDEX('%A%',col_str) >= 1 AND
PATINDEX('%Steve%',col_str) >= 1
AND id > 0'
Basically, what we do is creating single search queries which will have index seek and then combine all results.
This query will have index seek as we use predicate id > 0(assuming that all ids are greater than 0 or you can write your own negative number):
SELECT col_str
FROM tbl_pat_soundex
WHERE col_str like '%Smith%' AND id > 0

delete multiple rows with same value in sql 14

I have a table with 3 columns and the first column is 'name'. Some names are entered twice, some 3 times and some more than that. I would like to keep only one value for each name and delete the extra rows.
There are no primary keys or id column.
There are about 1 million rows in the table.
Would like to delete using one query(preferably) in SQL 14. Can someone help please?
Name column2 column3
Suzy
Suzy
Suzy
John
John
George
George
George
George
Would like to have it as:
Name column2 column3
Suzy
John
George
Many thanks in advance
You can use row_number function, try like this,
WITH CTE
AS (
SELECT NAME
,column2
,column3
,RN = ROW_NUMBER() OVER (
PARTITION BY NAME ORDER BY NAME
)
FROM < YourTableName >
)
DELETE
FROM CTE
WHERE RN > 1

T-SQL Dynamic Query and pivot

I have data where people have changed role mid month and want to count the activity after their new start date. Can I use the results of a table as a dynamic Query, I have a query which returns the following resultset:-
Firstname Surname StartDate
----------------------------------
Jon Smith 2015-01-01
Paul Jones 2014-07-23
...
So the query would look something like:
SELECT Firstname +' '+ surname, month, count(1) FROM dataTable
WHERE (Firstname='John' AND Surname='Smith' AND date >=2015-01-01)
OR (Firstname='Paul' AND Surname='Jones' AND date >=2014-07-23)
OR ...
but the number of 'ORs' would depend on the number of rows in the first table
Name Month Count
----------------------------------
Jon Smith 1 15
Paul Jones 1 16
Jon Smith 2 30
Paul Jones 2 25
Charlie Gu 1 52
Which I can then pivot to get
Name 1 2
--------------------------
Jon Smith 15 30
Paul Jones 16 25
Charlie Gu 52 NULL
Thanks in advance
It seems to me that Ako is right and a simple join should do the trick rather than a dynamic query.
declare #NewStartDates table
(
Firstname nvarchar(100),
Surname nvarchar(100),
StartDate date
);
insert into #NewStartDates
(Firstname, Surname, StartDate)
values (N'Jon', N'Smith', '20150101'),
(N'Paul', N'Jones', '20140723');
select d.Firstname,
d.Surname,
year(d.Date) * 100 + month(d.Date) as Period,
count(*) as ActivityCount
from dataTable as d
inner join #NewStartDates as n
on d.Firstname = n.Firstname
and d.Surname = n.Surname
and d.Date >= n.StartDate
group by d.Firstname,
d.Surname,
year(d.Date) * 100 + month(d.Date);
Please refer this - it will give you complete idea how you can get dynamic column query.Dynamic Column Generation - Pivot [SQL]

SQL Server - Update Column with Handing Duplicate and Unique Rows Based Upon Timestamp

I'm working with SQL Server 2005 and looking to export some data off of a table I have. However, prior to do that I need to update a status column based upon a field called "VisitNumber", which can contain multiple entries same value entries. I have a table set up in the following manner. There are more columns to it, but I am just putting in what's relevant to my issue
ID Name MyReport VisitNumber DateTimeStamp Status
-- --------- -------- ----------- ----------------------- ------
1 Test John Test123 123 2014-01-01 05.00.00.000
2 Test John Test456 123 2014-01-01 07.00.00.000
3 Test Sue Test123 555 2014-01-02 08.00.00.000
4 Test Ann Test123 888 2014-01-02 09.00.00.000
5 Test Ann Test456 888 2014-01-02 10.00.00.000
6 Test Ann Test789 888 2014-01-02 11.00.00.000
Field Notes
ID column is a unique ID in incremental numbers
MyReport is a text value and can actually be thousands of characters. Shortened for simplicity. In my scenario the text would be completely different
Rest of fields are varchar
My Goal
I need to address putting in a status of "F" for two conditions:
* If there is only one VisitNumber, update the status column of "F"
* If there is more than one visit number, only put "F" for the one based upon the earliest timestamp. For the other ones, put in a status of "A"
So going back to my table, here is the expectation
ID Name MyReport VisitNumber DateTimeStamp Status
-- --------- -------- ----------- ----------------------- ------
1 Test John Test123 123 2014-01-01 05.00.00.000 F
2 Test John Test456 123 2014-01-01 07.00.00.000 A
3 Test Sue Test123 555 2014-01-02 08.00.00.000 F
4 Test Ann Test123 888 2014-01-02 09.00.00.000 F
5 Test Ann Test456 888 2014-01-02 10.00.00.000 A
6 Test Ann Test789 888 2014-01-02 11.00.00.000 A
I was thinking I could handle this by splitting each types of duplicates/triplicates+ (2,3,4,5). Then updating every other (or every 3,4,5 rows). Then delete those from the original table and combine them together to export the data in SSIS. But I am thinking there is a much more efficient way of handling it.
Any thoughts? I can accomplish this by updating the table directly in SQL for this status column and then export normally through SSIS. Or if there is some way I can manipulate the column for the exact conditions I need, I can do it all in SSIS. I am just not sure how to proceed with this.
WITH cte AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY VisitNumber ORDER BY DateTimeStamp) rn from MyTable
)
UPDATE cte
SET [status] = (CASE WHEN rn = 1 THEN 'F' ELSE 'A' END)
I put together a test script to check the results. For your purposes, use the update statements and replace the temp table with your table name.
create table #temp1 (id int, [name] varchar(50), myreport varchar(50), visitnumber varchar(50), dts datetime, [status] varchar(1))
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (1,'Test John','Test123','123','2014-01-01 05:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (2,'Test John','Test456','123','2014-01-01 07:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (3,'Test Sue','Test123','555','2014-01-01 08:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (4,'Test Ann','Test123','888','2014-01-01 09:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (5,'Test Ann','Test456','888','2014-01-01 10:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (6,'Test Ann','Test789','888','2014-01-01 11:00')
select * from #temp1;
update #temp1 set status = 'F'
where id in (
select id from #temp1 t1
join (select min(dts) as mindts, visitnumber
from #temp1
group by visitNumber) t2
on t1.visitnumber = t2.visitnumber
and t1.dts = t2.mindts)
update #temp1 set status = 'A'
where id not in (
select id from #temp1 t1
join (select min(dts) as mindts, visitnumber
from #temp1
group by visitNumber) t2
on t1.visitnumber = t2.visitnumber
and t1.dts = t2.mindts)
select * from #temp1;
drop table #temp1
Hope this helps

Update first rows that match condition

Update table1
set column1 = 'abc', column2 = 25
where column3 IN ('John','Kate','Tim')
Column3 contains John twice (two associated rows/records), similarly - it has Kate third times and Tim twice.
How can I adjust the query so that the update affects only the first row with John, the first row with Kate and the first with Tim?
For the reference, here is table1:
column1 column2 column3
aa 2 John (!)
affd 24 John
dfd 5 Tim (!)
ss 77 Kate (!)
s 4 Tim
s 1 Kate
sds 34 Kate
I want to update only the rows marked with (!)
I am especially interested in Ms Access! - but also curious how this is done in Sql Server in case it differs. Thank you!
Sql Server solution - Note, you must have a unique identity column for this to work (or some set of unique columns).
UPDATE table1
SET column1 = 'abc',
column2 = 25
WHERE id IN (SELECT id
FROM (SELECT id,
Row_number()
OVER (
ORDER BY rowyouwanttoorderby ) AS ROWNUM
FROM table1
WHERE column3 IN ( 'John', 'Kate', 'Tim' )) AS temp
WHERE rownum = 1)

Resources