How to select delimited texts separately and completely SQL Server table column - sql-server

I have a SQL Server table with a column p_author containing semi-colon (;) delimited text value. I used this query to split and select the respective id's from another table but it only splits once whereas I need all the value id's after splitting either p_author contains one value, two values, three values or whatever number of values. Under is the used query for splitting and selecting respective id's from another table.
select aid as [CountedID]
from sub_aminer_author
where name like (select RIGHT(p_author, LEN(p_author) - CHARINDEX(';', p_author))
from sub_aminer_paper
where pid = 4)
Sample data is shown here in this image.
#DarkKnight--This is my output in SqlServer2014

Try this..
DECLARE #X XML
DECLARE #STR VARCHAR(MAX)=''
Select #STR = #STR+';'+P_AUTHOR
From sub_aminer_paper
WHERE PID = 4
ORDER BY PID
select #STR = substring(#STR,2,len(#STR))
SELECT #X = CONVERT(xml,' <root> <s>' + REPLACE(#STR,';','</s> <s>') + '</s> </root> ')
select aid as [CountedID], name
from sub_aminer_author s
inner join (
SELECT row_number() over(order by (select null)) as rn, T.c.value('.','varchar(max)') as value
FROM #X.nodes('/root/s') T(c)) t on t.value = s.name
order by rn
Example fiddle : http://sqlfiddle.com/#!3/34b6c/10

As it has been said in the comments, you really need to remove this bad design.
It will only lead to bigger problems and performance issues. (I know that sometime you have to deal will terrible design for a little while)
In the meantime, if you really need to split your rows, this type of query with a recursive CTE (SQL Fiddle)can be used:
create table sub_aminer_author(pid int, p_author varchar(max), name varchar(max));
go
Insert into sub_aminer_author(pid, p_author, name) values
(1, 'AAAA;BBBBB;CCCCC', 'AAAA'), (2, 'DDDDD;EEEEE;FFFF;GGGGGGGG', 'GGGGGGGG'), (3, 'HHH', 'GGGGGGGG');
go
with split(pid, first, pos) as(
Select pid, cast(1 as bigint)
, Case When CHARINDEX(';', p_author) > 0
Then CHARINDEX(';', p_author)-1 Else len(p_author) End
From sub_aminer_author
Union All
Select d.pid, s.pos+2
, Case When CHARINDEX(';', d.p_author, s.pos+2) > 0 Then CHARINDEX(';', d.p_author, s.pos+2)-1 Else len(p_author) End
From split s
Inner Join sub_aminer_author d on d.pid = s.pid
Where s.pos < len(d.p_author)-1
)
Select d.pid, s.first, s.pos , SUBSTRING(d.p_author, s.first, s.pos - s.first +1)
From split s
Inner Join sub_aminer_author d on d.pid = s.pid
order by d.pid, s.first
You have to understand that it is neither good nor efficient.
It should be ok as long as you only temporary use it to fix your current design issue and move splited data to a better design.

Related

Substring is slow with while loop in SQL Server

One of my table column stores ~650,000 characters (each value of the column contains entire table). I know its bad design however, Client will not be able to change it.
I am tasked to convert the column into multiple columns.
I chose to use dbo.DelimitedSplit8K function
Unfortunately, it can only handle 8k characters at max.
So I decided to split the column into 81 8k batches using while loop and store the same in a variable table (temp or normal table made no improvement)
DECLARE #tab1 table ( serialnumber int, etext nvarchar(1000))
declare #scriptquan int = (select MAX(len (errortext)/8000) from mytable)
DECLARE #Counter INT
DECLARE #A bigint = 1
DECLARE #B bigint = 8000
SET #Counter=1
WHILE ( #Counter <= #scriptquan + 1)
BEGIN
insert into #tab1 select ItemNumber, Item from dbo.mytable cross apply dbo.DelimitedSplit8K(substring(errortext, #A, #B), CHAR(13)+CHAR(10))
SET #A = #A + 8000
SET #B = #B + 8000
SET #Counter = #Counter + 1
END
This followed by using below code
declare #tab2 table (Item nvarchar(max),itemnumber int, Colseq varchar(10)) -- declare table variable
;with cte as (
select [etext] ,ItemNumber, Item from #tab1 -- insert table name
cross apply dbo.DelimitedSplit8K(etext,' ')) -- insert table columns name that contains text
insert into #tab2 Select Item,itemnumber, 'a'+ cast (ItemNumber as varchar) colseq
from cte -- insert values to table variable
;WITH Tbl(item, colseq) AS(
select item, colseq from #tab2
),
CteRn AS(
SELECT item, colseq,
Rn = ROW_NUMBER() OVER(PARTITION BY colseq ORDER BY colseq)
FROM Tbl
)
SELECT
a1 Time,a2 Number,a3 Type,a4 Remarks
FROM CteRn r
PIVOT(
MAX(item)
FOR colseq IN(a1,a2,a3,a4)
)p
where a3 = 'error'
gives the desired output. However, just the loop takes 15 minutes to complete and overall query completes by 27 minutes. Is there any way I can make it faster? Total row count in my table is 2. So I don't think Index can help.
Client uses Azure SQL Database so I can't choose PowerShell or Python to accomplish this either.
Please let me know if more information is needed. I tried my best to mention everything I could.

Select rows with any member of list of substrings in string

In a Micrososft SQL Server table I have a column with a string.
Example:
'Servernamexyz.server.operationunit.otherstuff.icouldnt.predict.domain.domain2.domain3'
I also have a dynamic list of substrings
Example:
('icouldnt', 'stuff', 'banana')
I don't care for string manipulation. The substrings could also be called:
('%icouldnt%', '%stuff%', '%banana%')
What's the best way to find all rows where the string contains one of the substrings?
Solutions that are not possible:
multiple OR Statements in the WHERE clause, the list is dynamic
external Code to do a "for each", its a multi value parameter from the reportbuilder, so nothing useful here
changing the database, its the database of a tool a costumer is using and we can't change it, even if we would like... so much
I really cant believe how hard such a simple problem can turn out. It would need a "LIKE IN" command to do it in a way that looks ok. Right now I cant think of anything but a messy temp table.
One option is to use CHARINDEX
DECLARE #tab TABLE (Col1 NVARCHAR(200))
INSERT INTO #tab (Col1)
VALUES (N'Servernamexyz.server.operationunit.otherstuff.icouldnt.predict.domain.domain2.domain3' )
;WITH cteX
AS(
SELECT 'icouldnt' Strings
UNION ALL
SELECT 'stuff'
UNION ALL
SELECT 'banana'
)
SELECT
T.*, X.Strings
FROM #tab T
CROSS APPLY (SELECT X.Strings FROM cteX X) X
WHERE CHARINDEX(X.Strings, T.Col1) > 1
Output
EDIT - using an unknown dynamic string variable - #substrings
DECLARE #tab TABLE (Col1 NVARCHAR(200))
INSERT INTO #tab (Col1)
VALUES (N'Servernamexyz.server.operationunit.otherstuff.icouldnt.predict.domain.domain2.domain3' )
DECLARE #substrings NVARCHAR(200) = 'icouldnt,stuff,banana'
SELECT
T.*, X.Strings
FROM #tab T
CROSS APPLY
( --dynamically split the string
SELECT Strings = y.i.value('(./text())[1]', 'nvarchar(4000)')
FROM
(
SELECT x = CONVERT(XML, '<i>'
+ REPLACE(#substrings, ',', '</i><i>')
+ '</i>').query('.')
) AS a CROSS APPLY x.nodes('i') AS y(i)
) X
WHERE CHARINDEX(X.Strings, T.Col1) > 1

Replace placeholder content in result set

I have this table as a simple example, with food items:
Table: FoodItem
1 Burgers
2 French Fries
3 Pizzas
and I have another table with phrases like these:
Table: Phrase
1 I want {1} and {2}!
2 I just want {3}.
I want to create an sp that grabs all the phrases from the phrase table and replaces the placeholder parts with content from the food table, like this:
I want Burgers and French Fries!
I just want Pizzas.
How can I accomplish this? I already tried "like" and "patindex", but I'm unsure of whether these are even suited for this task.
For a little number of replacements and small amount of data you can use recursive CTE (I have seen bad performance when a lot of replacements are performed). Something like this:
Declare #Phrase table (ID int,Phrase varchar(100))
Insert into #Phrase values
(1,'I want {1} and {2}!')
,(2,'I just want {3}.')
,(3,'i just don not like {1} and {3}');
Declare #FoodItem table (ID int, MapTo varchar(100))
Insert Into #FoodItem values
(1 ,'Burgers')
,(2 ,'French Fries')
,(3 ,'Pizza');
With DataSource AS
(
SELECT ID
,Phrase
,1 as level
FROM #Phrase
UNION ALL
SELECT DS.[ID]
,cast(REPLACE(ds.Phrase, '{'+ CAST(DS.[Level] AS VARCHAR(8)) +'}', FI.[MapTo]) as varchar(100))
,level + 1 as level
FROM DataSource DS
INNER JOIN #FoodItem FI
ON DS.[level] = FI.[ID]
)
SELECT *
FROM DataSource
WHERE level = (SELECT max(id) from #FoodItem) + 1;
I am sure this can be improved further.
If you are going to work with huge amount of data it will be good to implement SQL CLR function for replacing multiple strings and concatenating strings.
So, for each row you will have something like this:
(1,'I want {1} and {2}!', '{1}|{2}','Burgers|French Fries')
,(2,'I just want {3}.', '{3}', 'Pizza')
,(3,'i just don not like {1} and {3}', '{1}|{3}','Burgers|Pizza');
Then your function with accept the three columns and perform the replace internally.
Example
Declare #Phrase table (ID int,Phrase varchar(100))
Insert into #Phrase values
(1,'I want {1} and {2}!')
,(2,'I just want {3}.')
Declare #FoodItem table (ID int, MapTo varchar(100))
Insert Into #FoodItem values
(1 ,'Burgers')
,(2 ,'French Fries')
,(3 ,'Pizza')
Select A.ID
,NewStr = replace(replace(B.S,' ||',''),'|| ','')
From #Phrase A
Cross Apply (
Select S = Stuff((Select ' ' +coalesce(MapTo,RetVal)
From (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(replace(replace(A.Phrase,'{','|| {'),'}','} ||'),' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B1
Left Join #FoodItem B2 on B1.RetVal = concat('{',B2.ID,'}')
Order by RetSeq
For XML Path ('')),1,1,'')
) B
Returns
ID NewStr
1 I want Burgers and French Fries!
2 I just want Pizza.
Edit - It may be more performat to create a UDF which does something like the
following
Declare #S varchar(max) = 'I want {1} and {2}!'
Select #S = replace(#S,concat('{',ID,'}'),MapTo)
From FoodItem
Select #S
Returns
I want Burgers and French Fries!

SQL Server Conversion failed varchar to int

I have a table (no.1) which has 10 columns. One of them clm01 is integer and not allowed with null values.
There is a second table (no.2) which has many columns. One of them is string type clm02. An example of this column data is 1,2,3.
I'd like to make a query like:
select *
from table1 t1, table2 t2
where t1.clm01 not in (t2.clm2)
For example in table1 I have 5 records with values in clm01 1,2,3,4,5 and in table2 I've got 1 record with value in clm02 = 1,2,3
So I would like with the query to return only the record with the value 4 and 5 in the clm01.
Instead I get:
Conversion failed when converting the varchar value '1,2,3' to data type int
Any ideas?
Use STRING_SPLIT() function to split the comma separated values, if you are using SQL Server 2016.
SELECT *
FROM table1 t1
WHERE t1.clm1 NOT IN (SELECT Value FROM table2 t2
CROSS APPLY STRING_SPLIT(t2.clm2,','))
If you are using any lower versions of SQL server write a UDF to split string and use the function in CROSS APPLY clause.
CREATE FUNCTION [dbo].[SplitString]
(
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #output TABLE(Value NVARCHAR(MAX)
)
BEGIN
DECLARE #start INT, #end INT
SELECT #start = 1, #end = CHARINDEX(#delimiter, #string)
WHILE #start < LEN(#string) + 1 BEGIN
IF #end = 0
SET #end = LEN(#string) + 1
INSERT INTO #output (Value)
VALUES(SUBSTRING(#string, #start, #end - #start))
SET #start = #end + 1
SET #end = CHARINDEX(#delimiter, #string, #start)
END
RETURN
END
I decided to give you a couple of options but this really is a duplicate question I see pretty often.
There are two main ways of going about the problem.
1) Use LIKE to and compare the strings but you actually have to build strings a little oddly to do it:
SELECT *
FROM
#Table1 t1
WHERE
NOT EXISTS (SELECT *
FROM #Table2 t2
WHERE ',' + t2.clm02 + ',' LIKE '%,' + CAST(t1.clm01 AS VARCHAR(15)) + ',%')
What you see is ,1,2,3, is like %,clm01value,% you must add the delimiter to the strings for this to work properly and you have to cast/convert clm01 to a char datatype. There are drawbacks to this solution but if your data sets are straight forward it could work for you.
2) Split the comma delimited string to rows and then use a left join, not exists, or not in. here is a method to convert your csv to xml and then split
;WITH cteClm02Split AS (
SELECT
clm02
FROM
(SELECT
CAST('<X>' + REPLACE(clm02,',','</X><X>') + '</X>' AS XML) as xclm02
FROM
#Table2) t
CROSS APPLY (SELECT t.n.value('.','INT') clm02
FROM
t.xclm02.nodes('X') as t(n)) ca
)
SELECT t1.*
FROM
#Table1 t1
LEFT JOIN cteClm02Split t2
ON t1.clm01 = t2.clm02
WHERE
t2.clm02 IS NULL
OR use NOT EXISTS with same cte
SELECT t1.*
FROM
#Table1 t1
WHERE
NOT EXISTS (SELECT * FROM cteClm02Split t2 WHERE t1.clm01 = t2.clm02)
There are dozens of other ways to split delimited strings and you can choose whatever way works for you.
Note: I am not showing IN/NOT IN as an answer because I don't recommend the use of it. If you do use it make sure that you are never comparing a NULL in the select etc. Here is another good post concerning performance etc. NOT IN vs NOT EXISTS
here are the table variables that were used:
DECLARE #Table1 AS TABLE (clm01 INT)
DECLARE #Table2 AS TABLE (clm02 VARCHAR(15))
INSERT INTO #Table1 VALUES (1),(2),(3),(4),(5)
INSERT INTO #Table2 VALUES ('1,2,3')

SQL Server: UPDATE a table by using ORDER BY

I would like to know if there is a way to use an order by clause when updating a table. I am updating a table and setting a consecutive number, that's why the order of the update is important. Using the following sql statement, I was able to solve it without using a cursor:
DECLARE #Number INT = 0
UPDATE Test
SET #Number = Number = #Number +1
now what I'd like to to do is an order by clause like so:
DECLARE #Number INT = 0
UPDATE Test
SET #Number = Number = #Number +1
ORDER BY Test.Id DESC
I've read: How to update and order by using ms sql The solutions to this question do not solve the ordering problem - they just filter the items on which the update is applied.
Take care,
Martin
No.
Not a documented 100% supported way. There is an approach sometimes used for calculating running totals called "quirky update" that suggests that it might update in order of clustered index if certain conditions are met but as far as I know this relies completely on empirical observation rather than any guarantee.
But what version of SQL Server are you on? If SQL2005+ you might be able to do something with row_number and a CTE (You can update the CTE)
With cte As
(
SELECT id,Number,
ROW_NUMBER() OVER (ORDER BY id DESC) AS RN
FROM Test
)
UPDATE cte SET Number=RN
You can not use ORDER BY as part of the UPDATE statement (you can use in sub-selects that are part of the update).
UPDATE Test
SET Number = rowNumber
FROM Test
INNER JOIN
(SELECT ID, row_number() OVER (ORDER BY ID DESC) as rowNumber
FROM Test) drRowNumbers ON drRowNumbers.ID = Test.ID
Edit
Following solution could have problems with clustered indexes involved as mentioned here. Thanks to Martin for pointing this out.
The answer is kept to educate those (like me) who don't know all side-effects or ins and outs of SQL Server.
Expanding on the answer gaven by Quassnoi in your link, following works
DECLARE #Test TABLE (Number INTEGER, AText VARCHAR(2), ID INTEGER)
DECLARE #Number INT
INSERT INTO #Test VALUES (1, 'A', 1)
INSERT INTO #Test VALUES (2, 'B', 2)
INSERT INTO #Test VALUES (1, 'E', 5)
INSERT INTO #Test VALUES (3, 'C', 3)
INSERT INTO #Test VALUES (2, 'D', 4)
SET #Number = 0
;WITH q AS (
SELECT TOP 1000000 *
FROM #Test
ORDER BY
ID
)
UPDATE q
SET #Number = Number = #Number + 1
The row_number() function would be the best approach to this problem.
UPDATE T
SET T.Number = R.rowNum
FROM Test T
JOIN (
SELECT T2.id,row_number() over (order by T2.Id desc) rowNum from Test T2
) R on T.id=R.id
update based on Ordering by the order of values in a SQL IN() clause
Solution:
DECLARE #counter int
SET #counter = 0
;WITH q AS
(
select * from Products WHERE ID in (SELECT TOP (10) ID FROM Products WHERE ID IN( 3,2,1)
ORDER BY ID DESC)
)
update q set Display= #counter, #counter = #counter + 1
This updates based on descending 3,2,1
Hope helps someone.
I had a similar problem and solved it using ROW_NUMBER() in combination with the OVER keyword. The task was to retrospectively populate a new TicketNo (integer) field in a simple table based on the original CreatedDate, and grouped by ModuleId - so that ticket numbers started at 1 within each Module group and incremented by date. The table already had a TicketID primary key (a GUID).
Here's the SQL:
UPDATE Tickets SET TicketNo=T2.RowNo
FROM Tickets
INNER JOIN
(select TicketID, TicketNo,
ROW_NUMBER() OVER (PARTITION BY ModuleId ORDER BY DateCreated) AS RowNo from Tickets)
AS T2 ON T2.TicketID = Tickets.TicketID
Worked a treat!
I ran into the same problem and was able to resolve it in very powerful way that allows unlimited sorting possibilities.
I created a View using (saving) 2 sort orders (*explanation on how to do so below).
After that I simply applied the update queries to the View created and it worked great.
Here are the 2 queries I used on the view:
1st Query:
Update MyView
Set SortID=0
2nd Query:
DECLARE #sortID int
SET #sortID = 0
UPDATE MyView
SET #sortID = sortID = #sortID + 1
*To be able to save the sorting on the View I put TOP into the SELECT statement. This very useful workaround allows the View results to be returned sorted as set when the View was created when the View is opened. In my case it looked like:
(NOTE: Using this workaround will place an big load on the server if using a large table and it is therefore recommended to include as few fields as possible in the view if working with large tables)
SELECT TOP (600000)
dbo.Items.ID, dbo.Items.Code, dbo.Items.SortID, dbo.Supplier.Date,
dbo.Supplier.Code AS Expr1
FROM dbo.Items INNER JOIN
dbo.Supplier ON dbo.Items.SupplierCode = dbo.Supplier.Code
ORDER BY dbo.Supplier.Date, dbo.Items.ID DESC
Running: SQL Server 2005 on a Windows Server 2003
Additional Keywords: How to Update a SQL column with Ascending or Descending Numbers - Numeric Values / how to set order in SQL update statement / how to save order by in sql view / increment sql update / auto autoincrement sql update / create sql field with ascending numbers
SET #pos := 0;
UPDATE TABLE_NAME SET Roll_No = ( SELECT #pos := #pos + 1 ) ORDER BY First_Name ASC;
In the above example query simply update the student Roll_No column depending on the student Frist_Name column. From 1 to No_of_records in the table. I hope it's clear now.
IF OBJECT_ID('tempdb..#TAB') IS NOT NULL
BEGIN
DROP TABLE #TAB
END
CREATE TABLE #TAB(CH1 INT,CH2 INT,CH3 INT)
DECLARE #CH2 INT = NULL , #CH3 INT=NULL,#SPID INT=NULL,#SQL NVARCHAR(4000)='', #ParmDefinition NVARCHAR(50)= '',
#RET_MESSAGE AS VARCHAR(8000)='',#RET_ERROR INT=0
SET #ParmDefinition='#SPID INT,#CH2 INT OUTPUT,#CH3 INT OUTPUT'
SET #SQL='UPDATE T
SET CH1=#SPID,#CH2= T.CH2,#CH3= T.CH3
FROM #TAB T WITH(ROWLOCK)
INNER JOIN (
SELECT TOP(1) CH1,CH2,CH3
FROM
#TAB WITH(NOLOCK)
WHERE CH1 IS NULL
ORDER BY CH2 DESC) V ON T.CH2= V.CH2 AND T.CH3= V.CH3'
INSERT INTO #TAB
(CH2 ,CH3 )
SELECT 1,2 UNION ALL
SELECT 2,3 UNION ALL
SELECT 3,4
BEGIN TRY
WHILE EXISTS(SELECT TOP 1 1 FROM #TAB WHERE CH1 IS NULL)
BEGIN
EXECUTE #RET_ERROR = sp_executesql #SQL, #ParmDefinition,#SPID =##SPID, #CH2=#CH2 OUTPUT,#CH3=#CH3 OUTPUT;
SELECT * FROM #TAB
SELECT #CH2,#CH3
END
END TRY
BEGIN CATCH
SET #RET_ERROR=ERROR_NUMBER()
SET #RET_MESSAGE = '#ERROR_NUMBER : ' + CAST(ERROR_NUMBER() AS VARCHAR(255)) + '#ERROR_SEVERITY :' + CAST( ERROR_SEVERITY() AS VARCHAR(255))
+ '#ERROR_STATE :' + CAST(ERROR_STATE() AS VARCHAR(255)) + '#ERROR_LINE :' + CAST( ERROR_LINE() AS VARCHAR(255))
+ '#ERROR_MESSAGE :' + ERROR_MESSAGE() ;
SELECT #RET_ERROR,#RET_MESSAGE;
END CATCH

Resources