Debugging rtf to text function - sql-server

I have a function we use to convert RTF formatted text to plain text. It has worked pretty well in the past, and seems to work pretty well so far on the text in question.
However, somewhere in my dataset of 230,000 records, it makes a bad SUBSTRING call and aborts the entire thing (without telling me the offending record).
Is there any way I can get some feedback into what is going on?
I know that SQLServer functions do not allow PRINT statements, or INSERT statements.
And the dataset of 230,000 records is not mine, but a clients. I really don't want to have to try to go record by record and see which one is causing the error.
SQL Function below:
CREATE FUNCTION [dbo].[RTF2Text]
(
#rtf nvarchar(max)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #Pos1 int;
DECLARE #Pos2 int;
DECLARE #hex varchar(316);
DECLARE #Stage table
(
[Char] char(1),
[Pos] int
);
INSERT #Stage
(
[Char]
, [Pos]
)
SELECT SUBSTRING(#rtf, [Number], 1)
, [Number]
FROM [master]..[spt_values]
WHERE ([Type] = 'p')
AND (SUBSTRING(#rtf, Number, 1) IN ('{', '}'));
SELECT #Pos1 = MIN([Pos])
, #Pos2 = MAX([Pos])
FROM #Stage;
DELETE
FROM #Stage
WHERE ([Pos] IN (#Pos1, #Pos2));
WHILE (1 = 1)
BEGIN
SELECT TOP 1 #Pos1 = s1.[Pos]
, #Pos2 = s2.[Pos]
FROM #Stage s1
INNER JOIN #Stage s2 ON s2.[Pos] > s1.[Pos]
WHERE (s1.[Char] = '{')
AND (s2.[Char] = '}')
ORDER BY s2.[Pos] - s1.[Pos];
IF ##ROWCOUNT = 0
BREAK
DELETE
FROM #Stage
WHERE ([Pos] IN (#Pos1, #Pos2));
UPDATE #Stage
SET [Pos] = [Pos] - #Pos2 + #Pos1 - 1
WHERE ([Pos] > #Pos2);
SET #rtf = STUFF(#rtf, #Pos1, #Pos2 - #Pos1 + 1, '');
END
SET #rtf = REPLACE(#rtf, '\pard', '^*^');
SET #rtf = REPLACE(#rtf, '\par', '^*^');
SET #rtf = REPLACE(#rtf, '\t', '^~^');
SET #rtf = STUFF(#rtf, 1, CHARINDEX(' ', #rtf), '');
IF len(#rtf) > 0
WHILE (Right(#rtf, 1) IN (' ', CHAR(13), CHAR(10), '}'))
BEGIN
SELECT #rtf = SUBSTRING(#rtf, 1, (LEN(#rtf + 'x') - 2));
IF LEN(#rtf) = 0 BREAK
END
SET #Pos1 = CHARINDEX('\''', #rtf);
WHILE #Pos1 IS NOT NULL AND #Pos1 > 0
BEGIN
IF #Pos1 IS NOT NULL AND #Pos1 > 0
BEGIN
SET #hex = '0x' + SUBSTRING(#rtf, #Pos1 + 2, 2);
SET #rtf = REPLACE(#rtf, SUBSTRING(#rtf, #Pos1, 4), CHAR(CONVERT(int, CONVERT (binary(1), #hex,1))));
SET #Pos1 = CHARINDEX('\''', #rtf);
END
END
SET #rtf = COALESCE(#rtf, '') + ' ';
SET #Pos1 = PATINDEX('%\%[0123456789][\ ]%', #rtf);
WHILE #Pos1 IS NOT NULL AND #Pos1 > 0 AND #rtf != ''
BEGIN
SET #Pos2 = CHARINDEX(' ', #rtf, #Pos1 + 1);
IF #Pos2 < #Pos1
SET #Pos2 = CHARINDEX('\', #rtf, #Pos1 + 1);
IF #Pos2 < #Pos1
BEGIN
SET #rtf = SUBSTRING(#rtf, 1, #Pos1 - 1);
SET #Pos1 = 0;
END
ELSE
BEGIN
SET #rtf = STUFF(#rtf, #Pos1, #Pos2 - #Pos1 + 1, '');
SET #Pos1 = PATINDEX('%\%[0123456789][\ ]%', #rtf);
END
END
IF RIGHT(#rtf, 1) = ' '
SET #rtf = SUBSTRING(#rtf, 1, LEN(#rtf) -1);
RETURN #rtf;
END

Not to be too rude, but have you actually tested on your function?
Have you run any unit tests to try and break your function, e.g. invalid values, boundary conditions etc.?
Have you checked documentation to see under what conditions SUBSTRING can throw an exception?
I have run these cases and am getting exceptions:
SELECT dbo.[RTF2Text]( NULL )
SELECT dbo.[RTF2Text]( '' )
SELECT dbo.[RTF2Text]( '1' )
SELECT dbo.[RTF2Text]( 'blah' )
If you know under what conditions/input values your function will fail, then it is a simple matter of checking for these in your table.

I had a similar situation and have very little knowledge of SQL functions however needed to strip RTF and tried this code. Debugging suggested that this function was failing was here as I was getting Invalid length parameter.
IF RIGHT(#rtf, 1) = ' '
SET #rtf = SUBSTRING(#rtf, 1, LEN(#rtf) -1);
As I have minimal knowledge and lack of time I just added a second if to make sure it wasn't doing a subtraction i.e. -1 from 0 which worked for my dataset.
IF RIGHT(#rtf, 1) = ' '
IF LEN(#rtf) > 0
SET #rtf = SUBSTRING(#rtf, 1, LEN(#rtf) -1);

Related

SQL error when trying to loop in databases and create function

I have created a SQL script to loop throw databases and create a function (the aim of the function is to convert RTF to Plain Text).
I put the script of the function creation on a variable and try to execute it using the exec command.
I have used While loop and I have put the request on a varchar variable, then, exec #command
but I get this error:
enter image description here
declare #Total as int
select #Total = count(*) from Temp1
declare #counter as int
set #counter = 1
declare #CurrentVal as varchar(max)
declare #command varchar(max)
while (#counter <= #Total)
begin
select #CurrentVal = name from Temp1 where RowId = #counter
set #command=' use '+#CurrentVal+'
GO
CREATE FUNCTION dbo.fnParseTEXTRTF
(
#rtf VARCHAR(max)
)
RETURNS VARCHAR(max)
AS
BEGIN
DECLARE #Stage TABLE
(
Chr CHAR(1),
Pos INT
)
INSERT #Stage
(
Chr,
Pos
)
SELECT SUBSTRING(#rtf, Number, 1),
Number
FROM master..spt_values
WHERE Type = ''p''
AND SUBSTRING(#rtf, Number, 1) IN (''{'', ''}'')
DECLARE #Pos1 INT,
#Pos2 INT
SELECT #Pos1 = MIN(Pos),
#Pos2 = MAX(Pos)
FROM #Stage
DELETE
FROM #Stage
WHERE Pos IN (#Pos1, #Pos2)
WHILE 1 = 1
BEGIN
SELECT TOP 1 #Pos1 = s1.Pos, #Pos2 = s2.Pos
FROM #Stage AS s1
INNER JOIN #Stage AS s2 ON s2.Pos > s1.Pos
WHERE s1.Chr = ''{''
AND s2.Chr = ''}''
ORDER BY s2.Pos - s1.Pos
IF ##ROWCOUNT = 0
BREAK
DELETE
FROM #Stage
WHERE Pos IN (#Pos1, #Pos2)
UPDATE #Stage
SET Pos = Pos - #Pos2 + #Pos1 - 1
WHERE Pos > #Pos2
SET #rtf = STUFF(#rtf, #Pos1, #Pos2 - #Pos1 + 1, '''')
END
SET #Pos1 = PATINDEX(''%\cf[0123456789][0123456789 ]%'', #rtf)
WHILE #Pos1 > 0
SELECT #Pos2 = CHARINDEX('' '', #rtf, #Pos1 + 1), #rtf = STUFF(#rtf, #Pos1, #Pos2 - #Pos1 + 1, ''''), #Pos1 = PATINDEX(''%\cf[0123456789][0123456789 ]%'', #rtf)
SELECT #rtf = REPLACE(#rtf, ''\pard'', ''''), #rtf = REPLACE(#rtf, ''\par'', ''''), #rtf = case when LEN(#rtf)>0 then LEFT(#rtf, LEN(#rtf) - 1) else #rtf end
SELECT #rtf = REPLACE(#rtf, ''\b0 '', ''''), #rtf = REPLACE(#rtf, ''\b '', '''')
SELECT #rtf = STUFF(#rtf, 1, CHARINDEX('' '', #rtf), '''')
RETURN #rtf
end'
set #counter = #counter + 1
exec #command
end
As i understand, the function is correct, the main problem to execute statement for function creation across multiple databases.
GO can't be used here, it's not sql, it's instruction to client to execute all statements in scope
Unfortunately, you can't create function not in current database, using [DatabaseName].[schema].[FuncName].
I can propose such technique to solve the problem, code just for example
declare #funccode nvarchar(4000);
set #funccode='CREATE FUNCTION [dbo].[fntest]
(
)
RETURNS int
AS
BEGIN
RETURN 1
END;'
declare #dbname nvarchar(100)='TestDatabase'
declare #statement nvarchar(max) ='use '+#dbname+';
exec (#func);'
exec sp_executesql #stmt = #statement,#params=N'#func nvarchar(4000)',#func=#funccode;
Th idea - to switch current database inside exec statement and call new exec statement, when current database is already set

Replace Value in Custom format for MS SQL

I have a set of data for example:
Part no Custom Format
1128005 \Machines\3D\PartNo(2)\PartNo(4)xx\PartNo(7)
11.88.006 \Machines\3D\PartNo(2)\PartNo+3(2)xx\PartNo+6(3)
I want to replace the variable set in the custom format define in it. The result i am looking for is
For Part no
1128005
the result is
\Machines\3D\11\1128xx\1128005
11.88.006
\Machines\3D\11\88xx\006
Any ideas?
Thanks
Regards
If my understanding is correct, below script should be what you want? But because it processes row by row, it can be very slow when dealing with millions of rows.
--declare #input varchar(50) = '11.88.006', #pattern varchar(255) = '\Machines\3D\PartNo(2)\PartNo+3(2)xx\PartNo+6(3)'
declare #input varchar(50) = '1128005', #pattern varchar(255) = '\Machines\3D\PartNo(2)\PartNo(4)xx\PartNo(7)'
declare #tblPattern table (tmpKey int identity(1,1), result varchar(50), Position varchar(50), SkipCnt int, CharLength int, Sufix varchar(50))
declare #i int = 1, #output varchar(max) = '\Machines\3D\'
Declare #PatternXml XML
Set #PatternXml = N'<root><r>' + REPLACE(replace(#pattern, '\Machines\3D\', ''), '\', '</r><r>') + '</r></root>'
insert into #tblPattern(result)
select r.value('.', 'VARCHAR(MAX)') as t
from #PatternXml.nodes('//root//r') as records(r)
update #tblPattern set Position = REPLACE(result, 'PartNo', '')
, SkipCnt = CASE WHEN CHARINDEX('+', result, 1) > 0 THEN SUBSTRING(result, CHARINDEX('+', result, 1) + 1, CHARINDEX('(', result, 1) - CHARINDEX('+', result, 1) - 1) ELSE 0 END
, CharLength = SUBSTRING(result, CHARINDEX('(', result, 1) + 1, CHARINDEX(')', result, 1) - CHARINDEX('(', result, 1) - 1 )
, Sufix = SUBSTRING(result, CHARINDEX(')', result, 1) + 1, LEN(result))
while #i <= 3
begin
select #output += SUBSTRING(#input, 1 + SkipCnt, CharLength) + Sufix + '\'
from #tblPattern
where tmpKey = #i
set #i += 1
end
select #output = STUFF(#output, len(#output), 1, '')
select #output

order by column based on starting Number(Number may be decimal) in a string in sql server

i have a column named name in my table and example data i have included below
name
-----
1.arun888
2.nikl55555
11.abcd5566
1.123.bhdf
2.767ss777
1.21cdm
and i want to sort the deatils like below
name
----
1.arun888
1.123.bhdf
1.21cdm
2.nikl55555
2.767ss777
11.abcd5566
I have tried many ways but nothing works for me
first method i had used
DECLARE #string varchar(100),
#start int,
#end int,
#len int
SET #string = '66555.12tttthe hollies 12345 Test Ad77dress Dr.'
set #string = replace(#string, ' ' , '')
set #len = len(#string)
set #start = PATINDEX('%[0-9]%',#string)
set #end = PATINDEX('%[^0-9]%',substring(#string, #start, #len))-1
print substring(#string, #start, #end)
but it gives only 66555
but i need
66555.12
second method i had used
CREATE FUNCTION dbo.fn_GetNumeric
(#strAlphaNumeric VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
GO
i have used the above function but it wil return all the numbers from string
example
if string is 12.dddh5555
then it return 125555
so i am stuck here. i hope somebody can help me to find this
Try this code:
DECLARE #t TABLE ( name VARCHAR(20) )
INSERT INTO #t
VALUES ( '1.arun888' ),
( '2.nikl55555' ),
( '11.abcd5566' ),
( '1.123.bhdf' ),
( '2.767ss777' ),
( '1.21cdm' );
WITH cte
AS ( SELECT name ,
SUBSTRING(name, 1, PATINDEX('%[^0-9.]%', name) - 1) d
FROM #t
)
SELECT *
FROM cte
ORDER BY CAST(CASE WHEN RIGHT(d, 1) = '.' THEN SUBSTRING(d, 1, LEN(d) - 1)
WHEN d = '' THEN '0'
ELSE d
END AS DECIMAL(30, 10))
First I select substrings till the first symbol that is not dot or digit. Then just remove last dot and order by the result.
With function:
CREATE FUNCTION dbo.fn_GetNumeric
(
#strAlphaNumeric VARCHAR(256)
)
RETURNS DECIMAL(30, 10)
AS
BEGIN
DECLARE #s1 VARCHAR(256) = SUBSTRING(#strAlphaNumeric, 1,
PATINDEX('%[^0-9.]%',
#strAlphaNumeric) - 1)
RETURN CAST(CASE WHEN RIGHT(#s1, 1) = '.' THEN SUBSTRING(#s1, 1, LEN(#s1) - 1)
WHEN #s1 = '' THEN '0'
ELSE #s1
END AS DECIMAL(30, 10))
END
GO
SELECT * FROM TableName
ORDER BY dbo.fn_GetNumeric(name)

How do you prepend space in a string where Upper Case letter comes or where a space really needed [duplicate]

This question already has answers here:
Finding Uppercase Character then Adding Space
(3 answers)
Closed 8 years ago.
How do you prepends space in a string where Upper Case letter comes or where a space really needed.
The Sample code is:
DECLARE #teams TABLE (Team NVARCHAR(100))
INSERT INTO #teams
SELECT 'TataConsultencyServices'
UNION ALL
SELECT 'TataConsultencyCompany'
UNION ALL
SELECT 'CompanyHumanResource'
Expected Result
Tata Consultency Services
Tata Consultency Company
Company Human Resource
A set based solution:
DECLARE #s NVARCHAR(100);
SET #s = 'CompanyHumanResources';
DECLARE #Idx INT = 1;
WITH CteRecursive
AS
(
SELECT 1 AS Idx,
CONVERT(NVARCHAR(200), #s) AS String
UNION ALL
SELECT src.Idx + src.IsUpper + 1,
CONVERT(NVARCHAR(200),
CASE WHEN src.IsUpper = 1 THEN STUFF(src.String, src.Idx+1, 0, ' ') ELSE src.String END
)
FROM
(
SELECT rec.*,
CASE WHEN SUBSTRING(rec.String, rec.Idx, 1) <> ' ' AND SUBSTRING(rec.String, rec.Idx+1, 1) LIKE '[A-Z]' AND SUBSTRING(rec.String, rec.Idx+1, 1) COLLATE Romanian_CS_AS = UPPER(SUBSTRING(rec.String, rec.Idx+1, 1)) COLLATE Romanian_CS_AS THEN 1 ELSE 0 END AS IsUpper
FROM CteRecursive rec
WHERE rec.Idx + 1 <= LEN(rec.String)
) src
)
SELECT TOP(1) x.String
FROM CteRecursive x
ORDER BY x.Idx DESC;
Results:
String
-----------------------
Company Human Resources
You may surely get some help from this:-
CREATE FUNCTION CaseSensitiveSQLSplitFunction
(
#str nvarchar(max)
)
returns #t table (val nvarchar(max))
as
begin
declare #i int, #j int
select #i = 1, #j = len(#str)
declare #w nvarchar(max)
while #i <= #j
begin
if substring(#str,#i,1) = UPPER(substring(#str,#i,1)) collate Latin1_General_CS_AS
begin
if #w is not null
insert into #t (val) select #w
set #w = substring(#str,#i,1)
end
else
set #w = #w + substring(#str,#i,1)
set #i = #i + 1
end
if #w is not null
insert into #t (val) select #w
return
end
Taking the sample as:-
declare #str nvarchar(max) = N'ThisIsATest'
select * from dbo.CaseSensitiveSQLSplitFunction(#str)
set #str = N'ThisIsASqlServerCaseSensitiveSplitStringFunction'
select * from dbo.CaseSensitiveSQLSplitFunction(#str)
It is now possible to sql concatenate string values in a way from rows to single column value.
We can just use any of the sql concatenation function.
declare #str nvarchar(max) = N'ThisIsATest'
SELECT LTRIM(STUFF((
SELECT ' ' + val FROM dbo.CaseSensitiveSQLSplitFunction(#str) FOR XML PATH('')
), 1, 1, '')) string
set #str = N'ThisIsASqlServerCaseSensitiveSplitStringFunction'
SELECT LTRIM(STUFF((
SELECT ' ' + val FROM dbo.CaseSensitiveSQLSplitFunction(#str) FOR XML PATH('')
), 1, 1, '')) string
WHILE 1 = 1
BEGIN
UPDATE #teams
SET TeamName = STUFF(TeamName, patindex('%[a-z,.][A-Z]%', TeamName COLLATE Latin1_General_BIN) + 1,0,' ')
WHERE patindex('%[a-z,.][A-Z]%', TeamName COLLATE Latin1_General_BIN) > 0
IF ##ROWCOUNT = 0 BREAK
END
UPDATE #teams
SET TeamName = STUFF(TeamName, patindex('%[A-Z][a-z]%', RIGHT(TeamName,LEN(TeamName) -1) COLLATE Latin1_General_BIN) +1 ,0,' ')
WHERE patindex('%[A-Z][a-z]%', TeamName COLLATE Latin1_General_BIN) > 0

SQL WHILE loop inside WHERE clause

Can I put a WHILE loop inside WHERE clause? I have a stored procedure where I'm trying to put in text searching capability. I have it working for an exact match like this:
AND (#exactString = ''
OR (CHARINDEX(#exactString, [Short Description]) > 0
OR CHARINDEX(#exactString, [Description]) > 0
OR CHARINDEX(#exactString, [Manufacturer]) > 0))
Next I'm trying to do a "any word" match and an "all words" match. I can get the search string I want to search for with the following WHILE loop:
DECLARE #searchString varchar(max)
DECLARE #endIndex int
SET #allString = LTRIM(RTRIM(#allString))
WHILE LEN(#allString) > 0
BEGIN
SET #endIndex = CHARINDEX(' ', #allString) > 0
IF #endIndex > 0
BEGIN
SET #searchString = LEFT(#allString, #endIndex)
SET #allString = LTRIM(RTRIM(RIGHT(#allString, #endIndex)))
END
ELSE
BEGIN
SET #searchString = #allString
SET #allString = ''
END
END
Now I want to use the #searchString variable like I used #exactString above. Is there a way to do this inside my loop or is there some other technique I'm missing that would work here?
Thanks for your help,
Dan
I have used a table value function to perform this task using a query such as the following:
SELECT I.*
FROM #info AS I
INNER JOIN dbo.funcSplitToTable( ' ', #allString ) AS S
ON I.[Manufacturer] LIKE '%' + S.result + '%'
OR I.[Description] LIKE '%' + S.result + '%'
OR I.[Short Description] LIKE '%' + S.result + '%'
This table value function is defined as follows:
CREATE FUNCTION dbo.funcSplitToTable
/*
Split a string into parts base on a separation character to produce
a table that has one column containing the results of the split.
EXAMPLE:
SELECT * FROM dbo.funcSplitToTable( '~', 'MAINT~12221~10001~10/25/2004~CANCELLED~1' )
SELECT * FROM dbo.funcSplitToTable( '~', '' )
SELECT * FROM dbo.funcSplitToTable( '~', NULL )
SELECT * FROM dbo.funcSplitToTable( NULL, 'MAINT~12221~10001~10/25/2004~CANCELLED~1' )
SELECT * FROM dbo.funcSplitToTable( '', 'MAINT~12221~10001~10/25/2004~CANCELLED~1' )
RETURN:
Table with one column containing resulting strings.
*/
(
#strSearch AS varchar(255) -- String to search for.
,#strText AS varchar(MAX ) -- Text to search for string.
)
RETURNS #tblResult TABLE (
result varchar(MAX) NOT NULL
)
WITH SCHEMABINDING
AS
BEGIN
DECLARE #iLastPos int
DECLARE #iPos int
DECLARE #strResult varchar(MAX)
IF #strText IS NULL RETURN ;
IF #strSearch IS NULL SET #strSearch = '' ;
SET #strResult = NULL ;
SET #iLastPos = 1 ;
SET #iPos = CHARINDEX( #strSearch, #strText ) ;
WHILE #iPos > 0
BEGIN
IF (#iPos - #iLastPos) > 0
INSERT INTO #tblResult
SELECT SUBSTRING( #strText, #iLastPos, (#iPos - #iLastPos) ) AS result
;
SET #iLastPos = #iPos + 1 ;
SET #iPos = CHARINDEX( #strSearch, #strText, #iLastPos ) ;
END
IF (1 + LEN(#strText) - #iLastPos) > 0
INSERT INTO #tblResult
SELECT SUBSTRING( #strText, #iLastPos, (1 + LEN(#strText) - #iLastPos) ) AS result
;
RETURN ;
END
I got a great answer from Michael Erickson that totally works for the "any" search. For the "all" search. I built up an sql string with the entire query. The "all" search section is here:
IF LEN(#allString) > 0
BEGIN
DECLARE #searchString varchar(max)
DECLARE #endIndex int
DECLARE #isFirstString bit
SET #isFirstString = 0
SET #allString = LTRIM(RTRIM(#allString))
WHILE LEN(#allString) > 0
BEGIN
SET #endIndex = CHARINDEX(' ', #allString)
IF #endIndex > 0
BEGIN
SET #searchString = LTRIM(RTRIM(LEFT(#allString, #endIndex)))
SET #allString = LTRIM(RTRIM(RIGHT(#allString, LEN(#allString) - #endIndex)))
END
ELSE
BEGIN
SET #searchString = #allString
SET #allString = ''
END
SET #sql = #sql + ' AND ((CHARINDEX(''' + cast(#searchString as varchar(max)) + ''', [Short Description]) > 0
OR CHARINDEX(''' + cast(#searchString as varchar(max)) + ''', [Description]) > 0
OR CHARINDEX(''' + cast(#searchString as varchar(max)) + ''', [Manufacturer]) > 0))'
END
END
EXEC (#sql)
Thanks again,
Dan

Resources