MSSQL Bulk String to XML Insert Into - sql-server

Currently we import a table Import_File which has an Options column which has delimited values.
We need to load those delimited values to a different table.
Currently we do that one row at a time which tends to be slower as the number of rows can be 100k+
Is there a way to speed up the code below?
Declare #InvId uniqueidentifier
Declare #xml xml
Declare CurrFeatureList Cursor For
Select
import.InventoryId,
N'<root><r><![CDATA[' + replace( import.OPTIONS ,',',']]></r><r><![CDATA[') + ']]></r></root>'
From Import_File import with (nolock)
Where
import.options IS NOT NULL
And ISNULL(import.IsFeatureProcessed,0) = 0
And LEN(ISNULL(import.OPTIONS,''))>10
And import.InventoryId Is Not Null
OPEN CurrFeatureList
FETCH NEXT FROM CurrFeatureList
INTO #InvId, #xml
Print 'Inventory Import #10000'
Print GetDate()
WHILE ##FETCH_STATUS = 0
BEGIN
BEGIN TRY
Insert Into Import_File_Feature
(
FeatureId,
InventoryId,
FeatureText,
FeatureGroup,
FeatureCategory,
FeatureIsAvailable,
FeatureIsStandard
)
Select
NEWID(),
#InvId,
t.value('.','varchar(250)'),
'',
'',
1,
1
From #xml.nodes('//root/r') as a(t)
FETCH NEXT FROM CurrFeatureList
INTO #InvId, #xml
END TRY
BEGIN CATCH
Print 'Error '
Print #InvId
Print ERROR_NUMBER()
Print ERROR_SEVERITY()
Print ERROR_STATE()
Print ERROR_PROCEDURE()
Print ERROR_LINE()
Print ERROR_MESSAGE()
FETCH NEXT FROM CurrFeatureList
INTO #InvId, #xml
END CATCH
END
Close CurrFeatureList
Deallocate CurrFeatureList
GO

The only reason I have ever seen a cursor & TRY/CATCH block used for this kind of thing is for identifying and analyzing bad records while developing a new ETL process. If that's not what you're doing then the cursor is not required and will slow you down.
Let's review what you're doing:
You're pulling data into your cursor (CurrFeatureList) and turning Import_File.Options into an XML field so you can later use the XML NODES method to "split" your string.
Kick off the cursor
For each InventoryId you're:
a. splitting the Import_File.Options into multiple rows
b. inserting that InventoryId and associated rows into Import_File_Feature
c. If there's an error you print it and move on to the next record
What you should be doing
Note how I split this string using XML nodes:
DECLARE #x varchar(100) = 'abc,cde,fff';
SELECT item = xxx.value('(text())[1]', 'varchar(100)')
FROM (VALUES (CAST(('<r>'+REPLACE(#x,',','</r><r>') +'</r>') AS xml))) x(xx)
CROSS APPLY xx.nodes('r') xxx(xxx);
Results
item
-----
abc
cde
fff
OPTION 1
Combine your initial join and subsequent XML/XML nodes splitter logic into one statement and do the insert:
WITH
yourData AS
(
Select
import.InventoryId,
x = N'<r><![CDATA[' + replace( import.OPTIONS ,',',']]></r><r><![CDATA[') + ']]></r>'
From Import_File import with (nolock)
Where
import.options IS NOT NULL
And ISNULL(import.IsFeatureProcessed,0) = 0
And LEN(ISNULL(import.OPTIONS,''))>10
And import.InventoryId Is Not Null
),
split AS
(
SELECT InventoryId, item = i.value('.', 'varchar(8000)')
FROM yourData
CROSS APPLY x.nodes('r') s(i)
)
Insert Into Import_File_Feature
(
FeatureId,
InventoryId,
FeatureText,
FeatureGroup,
FeatureCategory,
FeatureIsAvailable,
FeatureIsStandard
)
Select
newid(),
import.InventoryId,
item, -- this is the split out item from import.Options
'',
'',
1,
1
FROM split;
OPTION 2
Get a copy of DelimitedSplit8K and use it to do your splitting.
WITH split AS
(
Select
import.InventoryId,
import.OPTIONS
From Import_File import with (nolock)
CROSS APPLY dbo.DelimitedSplit8K(import.OPTIONS, ',')
Where
import.options IS NOT NULL
And ISNULL(import.IsFeatureProcessed,0) = 0
And LEN(ISNULL(import.OPTIONS,''))>10
And import.InventoryId Is Not Null
)
Insert Into Import_File_Feature
(
FeatureId,
InventoryId,
FeatureText,
FeatureGroup,
FeatureCategory,
FeatureIsAvailable,
FeatureIsStandard
)
Select
newid(),
import.InventoryId,
item,
'',
'',
1,
1
FROM split
Note that, because I don't have any table definitions or sample data there was no way for me to test the code above.

Related

How I can remove the cursor from below query? +

DECLARE #U_CHK TABLE (
ACCOUNT_ID NUMERIC
,START_NO NUMERIC
,END_NO NUMERIC
,SEQ_NO NUMERIC
,USED INT
)
DECLARE #ST_NO NUMERIC
,#END_NO NUMERIC
,#ACID NUMERIC
,#SEQ_NO NUMERIC
DECLARE cCurChk Scroll CURSOR
FOR
SELECT DISTINCT AC_ID
,ST_DOC_NO
,ST_DOC_NO
,END_DOC_NO
FROM AC_LVL_INVEN
GROUP BY
ST_DOC_NO
,END_DOC_NO
,AC_ID
OPEN cCurChk
FETCH NEXT FROM cCurChk INTO #ACID,#ST_NO,#SEQ_NO,#END_NO
WHILE ##Fetch_Status=0
BEGIN
WHILE (#SEQ_NO<=#END_NO)
BEGIN
INSERT INTO #U_CHK
VALUES
(
#ACID
,#ST_NO
,#END_NO
,#SEQ_NO
,0
)
SET #SEQ_NO = #SEQ_NO+1
END FETCH NEXT FROM cCurChk
INTO #ACID,#ST_NO,#SEQ_NO,#END_NO
END CLOSE cCurChk DEALLOCATE cCurChk
UpDate #U_CHK
SET USED = 1 FROM #U_CHK Ch INNER JOIN FA_Trans FA(NOLOCK) ON Fa.
Account_ID = Ch.Account_ID AND CONVERT(VARCHAR(10) ,Ch.Seq_No) = Fa.Instrument_No
WHERE FA.Status>4 AND STATUS<>12 AND Ac_Head_Type<>1 AND Trans_type = 'Debit'
You can use a merge statement to do the insert / update in place of the cursor. Here is an example
MERGE dbo.FactBuyingHabits AS Target
USING (SELECT CustomerID, ProductID, PurchaseDate FROM dbo.Purchases) AS Source
ON (Target.ProductID = Source.ProductID AND Target.CustomerID = Source.CustomerID)
WHEN MATCHED THEN
UPDATE SET Target.LastPurchaseDate = Source.PurchaseDate
WHEN NOT MATCHED BY TARGET THEN
INSERT (CustomerID, ProductID, LastPurchaseDate)
VALUES (Source.CustomerID, Source.ProductID, Source.PurchaseDate)
Take a look at Technet - Inserting, Updating, and Deleting Data by Using MERGE

T-SQL - Update first letter in each word of a string that are not 'or', 'of' or 'and' to uppercase. Lowercase 'or', 'of' or 'and' if found

Given the below table and data:
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
CREATE TABLE #Temp
(
ID INT,
Code INT,
PDescription VARCHAR(2000)
)
INSERT INTO #Temp
(ID,
Code,
PDescription)
VALUES (1,0001,'c and d, together'),
(2,0002,'equals or Exceeds $27.00'),
(3,0003,'Fruit Evaporating Or preserving'),
(4,0004,'Domestics And domestic Maintenance'),
(5,0005,'Bakeries and cracker')
SELECT *
FROM #Temp
DROP TABLE #Temp
Output:
ID Code PDescription
1 1 c and d, together
2 2 equals or Exceeds $27.00
3 3 Fruit Evaporating Or preserving
4 4 Domestics And domestic Maintenance
5 5 Bakeries and cracker
I need a way to achieve the below update to the description field:
ID Code PDescription
1 1 C and D, Together
2 2 Equals or Exceeds $27.00
3 3 Fruit Evaporating or Preserving
4 4 Domestics and Domestic Maintenance
5 5 Bakeries and Cracker
If you fancied going the SQL CLR route the function could look something like
using System.Data.SqlTypes;
using System.Text.RegularExpressions;
public partial class UserDefinedFunctions
{
//One or more "word characters" or apostrophes
private static readonly Regex _regex = new Regex("[\\w']+");
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlString ProperCase(SqlString subjectString)
{
string resultString = null;
if (!subjectString.IsNull)
{
resultString = _regex.Replace(subjectString.ToString().ToLowerInvariant(),
(Match match) =>
{
var word = match.Value;
switch (word)
{
case "or":
case "of":
case "and":
return word;
default:
return char.ToUpper(word[0]) + word.Substring(1);
}
});
}
return new SqlString(resultString);
}
}
Doubtless there may be Globalization issues in the above but it should do the job for English text.
You could also investigate TextInfo.ToTitleCase but that still leaves you needing to handle your exceptions.
The following function is not the most elegant of solutions but should do what you want.
ALTER FUNCTION [dbo].[ToProperCase](#textValue AS NVARCHAR(2000))
RETURNS NVARCHAR(2000)
AS
BEGIN
DECLARE #reset BIT;
DECLARE #properCase NVARCHAR(2000);
DECLARE #index INT;
DECLARE #character NCHAR(1);
SELECT #reset = 1, #index=1, #properCase = '';
WHILE (#index <= len(#textValue))
BEGIN
SELECT #character= substring(#textValue,#index,1),
#properCase = #properCase + CASE WHEN #reset=1 THEN UPPER(#character) ELSE LOWER(#character) END,
#reset = CASE WHEN #character LIKE N'[a-zA-Z\'']' THEN 0 ELSE 1 END,
#index = #index +1
END
SET #properCase = N' ' + #properCase + N' ';
SET #properCase = REPLACE(#properCase, N' And ', N' and ');
SET #properCase = REPLACE(#properCase, N' Or ', N' or ');
SET #properCase = REPLACE(#properCase, N' Of ', N' of ');
RETURN RTRIM(LTRIM(#properCase))
END
Example use:
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
CREATE TABLE #Temp
(
ID INT,
Code INT,
PDescription VARCHAR(2000)
)
INSERT INTO #Temp
(ID,
Code,
PDescription)
VALUES (1,0001, N'c and d, together and'),
(2,0002, N'equals or Exceeds $27.00'),
(3,0003, N'Fruit Evaporating Or preserving'),
(4,0004, N'Domestics And domestic Maintenance'),
(5,0005, N'Bakeries and cracker')
SELECT ID, Code, dbo.ToProperCase(PDescription) AS [Desc]
FROM #Temp
DROP TABLE #Temp
If you want to convert your text to proper case before inserting into table, then simply call function as follow:
INSERT INTO #Temp
(ID,
Code,
PDescription)
VALUES (1,0001, dbo.ToProperCase( N'c and d, together and')),
(2,0002, dbo.ToProperCase( N'equals or Exceeds $27.00')),
(3,0003, dbo.ToProperCase( N'Fruit Evaporating Or preserving')),
(4,0004, dbo.ToProperCase( N'Domestics And domestic Maintenance')),
(5,0005, dbo.ToProperCase( N'Bakeries and cracker'))
This is a dramatically modified version of my Proper UDF. The good news is you may be able to process the entire data-set in ONE SHOT rather than linear.
Take note of #OverR (override)
Declare #Table table (ID int,Code int,PDescription varchar(150))
Insert into #Table values
(1,1,'c and d, together'),
(2,2,'equals or Exceeds $27.00'),
(3,3,'Fruit Evaporating Or preserving'),
(4,4,'Domestics And domestic Maintenance'),
(5,5,'Bakeries and cracker')
-- Generate Base Mapping Table - Can be an Actual Table
Declare #Pattn table (Key_Value varchar(25));Insert into #Pattn values (' '),('-'),('_'),(','),('.'),('&'),('#'),(' Mc'),(' O''') -- ,(' Mac')
Declare #Alpha table (Key_Value varchar(25));Insert Into #Alpha values ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I'),('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R'),('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('X')
Declare #OverR table (Key_Value varchar(25));Insert Into #OverR values (' and '),(' or '),(' of ')
Declare #Map Table (MapSeq int,MapFrom varchar(25),MapTo varchar(25))
Insert Into #Map
Select MapSeq=1,MapFrom=A.Key_Value+B.Key_Value,MapTo=A.Key_Value+B.Key_Value From #Pattn A Join #Alpha B on 1=1
Union All
Select MapSeq=99,MapFrom=A.Key_Value,MapTo=A.Key_Value From #OverR A
-- Convert Base Data Into XML
Declare #XML xml
Set #XML = (Select KeyID=ID,String=+' '+lower(PDescription)+' ' from #Table For XML RAW)
-- Convert XML to varchar(max) for Global Search & Replace
Declare #String varchar(max)
Select #String = cast(#XML as varchar(max))
Select #String = Replace(#String,MapFrom,MapTo) From #Map Order by MapSeq
-- Convert Back to XML
Select #XML = cast(#String as XML)
-- Generate Final Results
Select KeyID = t.col.value('#KeyID', 'int')
,NewString = ltrim(rtrim(t.col.value('#String', 'varchar(150)')))
From #XML.nodes('/row') AS t (col)
Order By 1
Returns
KeyID NewString
1 C and D, Together
2 Equals or Exceeds $27.00
3 Fruit Evaporating or Preserving
4 Domestics and Domestic Maintenance
5 Bakeries and Cracker
You don't even need functions and temporary objects. Take a look at this query:
WITH Processor AS
(
SELECT ID, Code, 1 step,
CONVERT(nvarchar(MAX),'') done,
LEFT(PDescription, CHARINDEX(' ', PDescription, 0)-1) process,
SUBSTRING(PDescription, CHARINDEX(' ', PDescription, 0)+1, LEN(PDescription)) waiting
FROM #temp
UNION ALL
SELECT ID, Code, step+1,
done+' '+CASE WHEN process IN ('and', 'or', 'of') THEN LOWER(process) ELSE UPPER(LEFT(process, 1))+LOWER(SUBSTRING(process, 2, LEN(process))) END,
CASE WHEN CHARINDEX(' ', waiting, 0)>0 THEN LEFT(waiting, CHARINDEX(' ', waiting, 0)-1) ELSE waiting END,
CASE WHEN CHARINDEX(' ', waiting, 0)>0 THEN SUBSTRING(waiting, CHARINDEX(' ', waiting, 0)+1, LEN(waiting)) ELSE NULL END FROM Processor
WHERE process IS NOT NULL
)
SELECT ID, Code, done PSDescription FROM
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY step DESC) RowNum FROM Processor
) Ordered
WHERE RowNum=1
ORDER BY ID
It produces desired result as well. You can SELECT * FROM Processor to see all steps executed.

T-SQL - Merge all columns from source to target table w/o listing all the columns

I'm trying to merge a very wide table from a source (linked Oracle server) to a target table (SQL Server 2012) w/o listing all the columns. Both tables are identical except for the records in them.
This is what I have been using:
TRUNCATE TABLE TargetTable
INSERT INTO TargetTable
SELECT *
FROM SourceTable
When/if I get this working I would like to make it a procedure so that I can pass into it the source, target and match key(s) needed to make the update. For now I would just love to get it to work at all.
USE ThisDatabase
GO
DECLARE
#Columns VARCHAR(4000) = (
SELECT COLUMN_NAME + ','
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'TargetTable'
FOR XML PATH('')
)
MERGE TargetTable AS T
USING (SELECT * FROM SourceTable) AS S
ON (T.ID = S.ID AND T.ROWVERSION = S.ROWVERSION)
WHEN MATCHED THEN
UPDATE SET #Columns = S.#Columns
WHEN NOT MATCHED THEN
INSERT (#Columns)
VALUES (S.#Columns)
Please excuse my noob-ness. I feel like I'm only half way there, but I don't understand some parts of SQL well enough to put it all together. Many thanks.
As previously mentioned in the answers, if you don't want to specify the columns , then you have to write a dynamic query.
Something like this in your case should help:
DECLARE
#Columns VARCHAR(4000) = (
SELECT COLUMN_NAME + ','
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'TargetTable'
FOR XML PATH('')
)
DECLARE #MergeQuery NVARCHAR(MAX)
DECLARE #UpdateQuery VARCHAR(MAX)
DECLARE #InsertQuery VARCHAR(MAX)
DECLARE #InsertQueryValues VARCHAR(MAX)
DECLARE #Col VARCHAR(200)
SET #UpdateQuery='Update Set '
SET #InsertQuery='Insert ('
SET #InsertQueryValues=' Values('
WHILE LEN(#Columns) > 0
BEGIN
SET #Col=left(#Columns, charindex(',', #Columns+',')-1);
IF #Col<> 'ID' AND #Col <> 'ROWVERSION'
BEGIN
SET #UpdateQuery= #UpdateQuery+ 'TargetTable.'+ #Col + ' = SourceTable.'+ #Col+ ','
SET #InsertQuery= #InsertQuery+#Col + ','
SET #InsertQueryValues=#InsertQueryValues+'SourceTable.'+ #Col+ ','
END
SET #Columns = stuff(#Columns, 1, charindex(',', #Columns+','), '')
END
SET #UpdateQuery=LEFT(#UpdateQuery, LEN(#UpdateQuery) - 1)
SET #InsertQuery=LEFT(#InsertQuery, LEN(#InsertQuery) - 1)
SET #InsertQueryValues=LEFT(#InsertQueryValues, LEN(#InsertQueryValues) - 1)
SET #InsertQuery=#InsertQuery+ ')'+ #InsertQueryValues +')'
SET #MergeQuery=
N'MERGE TargetTable
USING SourceTable
ON TargetTable.ID = SourceTable.ID AND TargetTable.ROWVERSION = SourceTable.ROWVERSION ' +
'WHEN MATCHED THEN ' + #UpdateQuery +
' WHEN NOT MATCHED THEN '+#InsertQuery +';'
Execute sp_executesql #MergeQuery
If you want more information about Merge, you could read the this excellent article
Don't feel bad. It takes time. Merge has interesting syntax. I've actually never used it. I read Microsoft's documentation on it, which is very helpful and even has examples. I think I covered everything. I think there may be a slight amount of tweaking you might have to do, but I think it should work.
Here's the documentation for MERGE:
https://msdn.microsoft.com/en-us/library/bb510625.aspx
As for your code, I commented pretty much everything to explain it and show you how to do it.
This part is to help write your merge statement
USE ThisDatabase --This says what datbase context to use.
--Pretty much what database your querying.
--Like this: database.schema.objectName
GO
DECLARE
#SetColumns VARCHAR(4000) = (
SELECT CONCAT(QUOTENAME(COLUMN_NAME),' = S.',QUOTENAME(COLUMN_NAME),',',CHAR(10)) --Concat just says concatenate these values. It's adds the strings together.
--QUOTENAME adds brackets around the column names
--CHAR(10) is a line break for formatting purposes(totally optional)
FROM INFORMATION_SCHEMA.COLUMNS
--WHERE TABLE_NAME = 'TargetTable'
FOR XML PATH('')
) --This uses some fancy XML trick to get your Columns concatenated into one row.
--What really is in your table is a column of your column names in different rows.
--BTW If the columns names in both tables are identical, then this will work.
DECLARE #Columns VARCHAR(4000) = (
SELECT QUOTENAME(COLUMN_NAME) + ','
FROM INFORMATION_SCHEMA.COLUMNS
--WHERE TABLE_NAME = 'TargetTable'
FOR XML PATH('')
)
SET #Columns = SUBSTRING(#Columns,0,LEN(#Columns)) -- this gets rid off the comma at the end of your list
SET #SetColumns = SUBSTRING(#SetColumns,0,LEN(#SetColumns)) --same thing here
SELECT #SetColumns --Your going to want to copy and paste this into your WHEN MATCHED statement
SELECT #Columns --Your going to want to copy this into your WHEN NOT MATCHED statement
GO
Merge Statement
Especially look at my notes on ROWVERSION.
MERGE INTO TargetTable AS T
USING SourceTable AS S --Don't really need to write SELECT * FROM since you need the whole table anyway
ON (T.ID = S.ID AND T.[ROWVERSION] = S.[ROWVERSION]) --These are your matching parameters
--One note on this, if ROWVERSION is different versions of the same data you don't want to have RowVersion here
--Like lets say you have ID 1 ROWVERSION 2 in your source but only version 1 in your targetTable
--If you leave T.ID =S.ID AND T.ROWVERSION = S.ROWVERSION, then it will insert the new ROWVERSION
--So you'll have two versions of ID 1
WHEN MATCHED THEN --When TargetTable ID and ROWVERSION match in the matching parameters
--Update the values in the TargetTable
UPDATE SET /*Copy and Paste #SetColumnss here*/
--Should look like this(minus the "--"):
--Col1 = S.Col1,
--Col2 = S.Col2,
--Col3 = S.Col3,
--Etc...
WHEN NOT MATCHED THEN --This says okay there are no rows with the existing ID, now insert a new row
INSERT (col1,col2,col3) --Copy and paste #Columns in between the parentheses. Should look like I show it. Note: This is insert into target table so your listing the target table columns
VALUES (col1,col2,col3) --Same thing here. This is the list of source table columns

SQL Server 2012 Concatenating Strings from Multiple Rows

[Edit] Due to time constraints I gave up on using a CTE and created a function that returns the concatenated string:
CREATE FUNCTION fn_GetCategoryNamesAsString
(
#lawID INT
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE #categoryNames NVARCHAR(MAX)
SET #categoryNames = ''
DECLARE #categoryID INT
DECLARE CUR CURSOR LOCAL FORWARD_ONLY READ_ONLY FOR
SELECT t1.LawCategoryID FROM [GWS].[dbo].[GWSMasterLawsLawCategories] t1 WHERE t1.LawID = #lawID
OPEN CUR
FETCH FROM CUR INTO #categoryID
WHILE ##FETCH_STATUS = 0
BEGIN
SET #categoryNames = #categoryNames + (SELECT t2.Name
FROM [GWS].[dbo].GWSMasterLawCategories t2
WHERE t2.LawCategoryID = #categoryID) + ', '
FETCH NEXT FROM CUR INTO #categoryID
END
CLOSE CUR
DEALLOCATE CUR
RETURN #categoryNames
END
GO
This does the job but I don't really like it. If anyone has a better solution I'd love to know.
[End edit]
I have seen several questions the deal roughly with the same topic but none cover the inclusion of null values.
I am writing a query that should return the full contents of one table with a couple of columns added with relevant data from other tables. These columns can include 0 - n values.
Null values need to be stored as an empty string and sets that do have the extra data should display it separated by commas.
Some approaches delivered all the names strung together, some only returned the values separately, some no values at all and, most often, the recursion went to deep (which means I fouled up as the dataset is small).
This is my current approach:
DECLARE #categoryNames NVARCHAR(MAX);
SET #categoryNames = '';
WITH sources (sourcesLawSourceID, sourcesName) AS (
SELECT DISTINCT [LawSourceID], [name]
FROM [GWS].[dbo].[GWSMasterLawSources]
),
categories AS(
SELECT GWSCategories.LawCategoryID AS categoryID, GWSLawCategories.LawID AS lawID,
categoryNames = #categoryNames
--CAST(LEFT( GWSCategories.name, CHARINDEX(',', GWSCategories.name + ',') -1) AS NVARCHAR(MAX)) categoryName,
--STUFF(GWSCategories.name, 1, CHARINDEX(',', GWSCategories.name + ','), '') categoryNames
FROM [GWS].[dbo].[GWSMasterLawCategories] GWSCategories
JOIN [GWS].[dbo].[GWSMasterLawsLawCategories] GWSLawCategories
ON GWSCategories.LawCategoryID = GWSLawCategories.LawCategoryID
UNION ALL
SELECT categories.categoryID, categories.lawID,
CAST(LEFT( #categoryNames, CHARINDEX(',', #categoryNames + ',') -1) AS NVARCHAR(MAX)) + GWSCategories.Name
FROM categories
JOIN [GWS].[dbo].[GWSMasterLawCategories] GWSCategories
ON categories.categoryID = GWSCategories.LawCategoryID
WHERE #categoryNames > ''
)
SELECT DISTINCT GWSMaster.[LawID]
,[Name]
,sources.sourcesName LawSourceName
,(SELECT STUFF((SELECT DISTINCT ', ' + RTRIM(LTRIM(categories.CategoryNames))
FROM categories
FOR XML PATH ('')), 1, 1, '')) Categories
FROM [GWS].[dbo].[GWSMasterLaws] GWSMaster
JOIN sources
ON sources.sourcesLawSourceID = GWSMaster.LawSourceID
JOIN categories
ON categories.lawID = GWSMaster.LawID
This leaves the category name field completely empty.
If I can give any more information or I have missed a question that answers my problem please let me know.

User defined function replacing WHERE col IN(...)

I have created a user defined function to gain performance with queries containing 'WHERE col IN (...)' like this case:
SELECT myCol1, myCol2
FROM myTable
WHERE myCol3 IN (100, 200, 300, ..., 4900, 5000);
The queries are generated from an web application and are in some cases much more complex.
The function definition looks like this:
CREATE FUNCTION [dbo].[udf_CSVtoIntTable]
(
#CSV VARCHAR(MAX),
#Delimiter CHAR(1) = ','
)
RETURNS
#Result TABLE
(
[Value] INT
)
AS
BEGIN
DECLARE #CurrStartPos SMALLINT;
SET #CurrStartPos = 1;
DECLARE #CurrEndPos SMALLINT;
SET #CurrEndPos = 1;
DECLARE #TotalLength SMALLINT;
-- Remove space, tab, linefeed, carrier return
SET #CSV = REPLACE(#CSV, ' ', '');
SET #CSV = REPLACE(#CSV, CHAR(9), '');
SET #CSV = REPLACE(#CSV, CHAR(10), '');
SET #CSV = REPLACE(#CSV, CHAR(13), '');
-- Add extra delimiter if needed
IF NOT RIGHT(#CSV, 1) = #Delimiter
SET #CSV = #CSV + #Delimiter;
-- Get total string length
SET #TotalLength = LEN(#CSV);
WHILE #CurrStartPos < #TotalLength
BEGIN
SET #CurrEndPos = CHARINDEX(#Delimiter, #CSV, #CurrStartPos);
INSERT INTO #Result
VALUES (CAST(SUBSTRING(#CSV, #CurrStartPos, #CurrEndPos - #CurrStartPos) AS INT));
SET #CurrStartPos = #CurrEndPos + 1;
END
RETURN
END
The function is intended to be used like this (or as an INNER JOIN):
SELECT myCol1, myCol2
FROM myTable
WHERE myCol3 IN (
SELECT [Value]
FROM dbo.udf_CSVtoIntTable('100, 200, 300, ..., 4900, 5000', ',');
Do anyone have some optimiztion idears of my function or other ways to improve performance in my case?
Is there any drawbacks that I have missed?
I am using MS SQL Server 2005 Std and .NET 2.0 framework.
I'm not sure of the performance increase, but I would use it as an inner join and get away from the inner select statement.
Using a UDF in a WHERE clause or (worse) a subquery is asking for trouble. The optimizer sometimes gets it right, but often gets it wrong and evaluates the function once for every row in your query, which you don't want.
If your parameters are static (they appear to be) and you can issue a multistatement batch, I'd load the results of your UDF into a table variable, then use a join against the table variable to do your filtering. This should work more reliably.
that loop will kill performance!
create a table like this:
CREATE TABLE Numbers
(
Number int not null primary key
)
that has rows containing values 1 to 8000 or so and use this function:
CREATE FUNCTION [dbo].[FN_ListAllToNumberTable]
(
#SplitOn char(1) --REQUIRED, the character to split the #List string on
,#List varchar(8000) --REQUIRED, the list to split apart
)
RETURNS
#ParsedList table
(
RowNumber int
,ListValue varchar(500)
)
AS
BEGIN
/*
DESCRIPTION: Takes the given #List string and splits it apart based on the given #SplitOn character.
A table is returned, one row per split item, with a columns named "RowNumber" and "ListValue".
This function workes for fixed or variable lenght items.
Empty and null items will be included in the results set.
PARAMETERS:
#List varchar(8000) --REQUIRED, the list to split apart
#SplitOn char(1) --OPTIONAL, the character to split the #List string on, defaults to a comma ","
RETURN VALUES:
a table, one row per item in the list, with a column name "ListValue"
TEST WITH:
----------
SELECT * FROM dbo.FN_ListAllToNumTable(',','1,12,123,1234,54321,6,A,*,|||,,,,B')
DECLARE #InputList varchar(200)
SET #InputList='17;184;75;495'
SELECT
'well formed list',LEFT(#InputList,40) AS InputList,h.Name
FROM Employee h
INNER JOIN dbo.FN_ListAllToNumTable(';',#InputList) dt ON h.EmployeeID=dt.ListValue
WHERE dt.ListValue IS NOT NULL
SET #InputList='17;;;184;75;495;;;'
SELECT
'poorly formed list join',LEFT(#InputList,40) AS InputList,h.Name
FROM Employee h
INNER JOIN dbo.FN_ListAllToNumTable(';',#InputList) dt ON h.EmployeeID=dt.ListValue
SELECT
'poorly formed list',LEFT(#InputList,40) AS InputList, ListValue
FROM dbo.FN_ListAllToNumTable(';',#InputList)
**/
/*this will return empty rows, and row numbers*/
INSERT INTO #ParsedList
(RowNumber,ListValue)
SELECT
ROW_NUMBER() OVER(ORDER BY number) AS RowNumber
,LTRIM(RTRIM(SUBSTRING(ListValue, number+1, CHARINDEX(#SplitOn, ListValue, number+1)-number - 1))) AS ListValue
FROM (
SELECT #SplitOn + #List + #SplitOn AS ListValue
) AS InnerQuery
INNER JOIN Numbers n ON n.Number < LEN(InnerQuery.ListValue)
WHERE SUBSTRING(ListValue, number, 1) = #SplitOn
RETURN
END /*Function FN_ListAllToNumTable*/
I have other versions that do not return empty or null rows, ones that return just the item and not the row number, etc. Look in the header comment to see how to use this as part of a JOIN, which is much faster than in a where clause.
The CLR solution did not give me an good performance so I will use a recursive query. So here is the definition of the SP I will use (mostly based on Erland Sommarskogs examples):
CREATE FUNCTION [dbo].[priudf_CSVtoIntTable]
(
#CSV VARCHAR(MAX),
#Delimiter CHAR(1) = ','
)
RETURNS
#Result TABLE
(
[Value] INT
)
AS
BEGIN
-- Remove space, tab, linefeed, carrier return
SET #CSV = REPLACE(#CSV, ' ', '');
SET #CSV = REPLACE(#CSV, CHAR(9), '');
SET #CSV = REPLACE(#CSV, CHAR(10), '');
SET #CSV = REPLACE(#CSV, CHAR(13), '');
WITH csvtbl(start, stop) AS
(
SELECT start = CONVERT(BIGINT, 1),
stop = CHARINDEX(#Delimiter, #CSV + #Delimiter)
UNION ALL
SELECT start = stop + 1,
stop = CHARINDEX(#Delimiter, #CSV + #Delimiter, stop + 1)
FROM csvtbl
WHERE stop > 0
)
INSERT INTO #Result
SELECT CAST(SUBSTRING(#CSV, start, CASE WHEN stop > 0 THEN stop - start ELSE 0 END) AS INT) AS [Value]
FROM csvtbl
WHERE stop > 0
OPTION (MAXRECURSION 1000)
RETURN
END
Thank for the input, I have to admit that I have made som bad research before I started my work. I found that Erland Sommarskog has written a lot of this problem on his webpage, after your responeses and after reading his page I decided that I will try to make a CLR to solve this.
I tried a recursive query, this resulted in good performance but I will try CLR function anyway.

Resources