split alpha and numeric using sql - sql-server

I have a table and it has a 3 columns. The first column is the data that contains value(numeric) and unit(percentage and etc..), the second column is numeric column, the third is Unit column. What I want to do is split the numeric and the unit from the first column then put those split-ted data to its designated column.
Here is my table:
I tried this function:SO link here..., it really does splitting alpha and numeric but then I'm new in using SQL Function, my problem there is the parameter must be in string STRING, so what I did is change it to Sub Query but it gives me error.
Sample COde:
SQL FUNCTION:
create function [dbo].[GetNumbersFromText](#String varchar(2000))
returns table as return
(
with C as
(
select cast(substring(S.Value, S1.Pos, S2.L) as int) as Number,
stuff(s.Value, 1, S1.Pos + S2.L, '') as Value
from (select #String+' ') as S(Value)
cross apply (select patindex('%[0-9]%', S.Value)) as S1(Pos)
cross apply (select patindex('%[^0-9]%', stuff(S.Value, 1, S1.Pos, ''))) as S2(L)
union all
select cast(substring(S.Value, S1.Pos, S2.L) as int),
stuff(S.Value, 1, S1.Pos + S2.L, '')
from C as S
cross apply (select patindex('%[0-9]%', S.Value)) as S1(Pos)
cross apply (select patindex('%[^0-9]%', stuff(S.Value, 1, S1.Pos, ''))) as S2(L)
where patindex('%[0-9]%', S.Value) > 0
)
select Number
from C
)
SELECT STATEMENT with SUB Query:
declare #S varchar(max)
select number from GetNumbersFromText(Select SomeColm From Table_Name) option (maxrecursion 0)
BTW, im using sql server 2005.
Thanks!

If the numeric part is always at the beginning, then you can use this:
PATINDEX('%[0-9][^0-9]%', ConcUnit)
to get the index of the last digit.
Thus, this:
DECLARE #str VARCHAR(MAX) = '4000 ug/ML'
SELECT LEFT(#str, PATINDEX('%[0-9][^0-9]%', #str )) AS Number,
LTRIM(RIGHT(#str, LEN(#str) - PATINDEX('%[0-9][^0-9]%', #str ))) As Unit
gives you:
Number Unit
-------------
4000 ug/ML
EDIT:
If numeric data include double values as well, then you can use this:
SELECT LEN(#str) - PATINDEX ('%[^0-9][0-9]%', REVERSE(#str))
to get the index of the last digit.
Thus, this:
SELECT LEFT(#str, LEN(#str) - PATINDEX ('%[^0-9][0-9]%', REVERSE(#str)))
gives you the numeric part.
And this:
SELECT LEFT(#str, LEN(#str) - PATINDEX ('%[^0-9][0-9]%', REVERSE(#str))) AS Numeric,
CASE
WHEN CHARINDEX ('%', #str) <> 0 THEN LTRIM(RIGHT(#str, LEN(#str) - CHARINDEX ('%', #str)))
ELSE LTRIM(RIGHT(#str, PATINDEX ('%[^0-9][0-9]%', REVERSE(#str))))
END AS Unit
gives you both numberic and unit part.
Here are some tests that I made with the data you have posted:
Input:
DECLARE #str VARCHAR(MAX) = '50 000ug/ML'
Output:
Numeric Unit
------------
50 000 ug/ML
Input:
DECLARE #str VARCHAR(MAX) = '99.5%'
Output:
Numeric Unit
------------
99.5
Input:
DECLARE #str VARCHAR(MAX) = '4000 . 35 % ug/ML'
Output:
Numeric Unit
------------------
4000 . 35 ug/ML

Here is my answer. Check output in SQLFiddle for the same.
create TABLE temp
(
string NVARCHAR(50)
)
INSERT INTO temp (string)
VALUES
('4000 ug\ml'),
('2000 ug\ml'),
('%'),
('ug\ml')
SELECT subsrtunit,LEFT(subsrtnumeric, PATINDEX('%[^0-9]%', subsrtnumeric+'t') - 1)
FROM (
SELECT subsrtunit = SUBSTRING(string, posofchar, LEN(string)),
subsrtnumeric = SUBSTRING(string, posofnumber, LEN(string))
FROM (
SELECT string, posofchar = PATINDEX('%[^0-9]%', string),
posofnumber = PATINDEX('%[0-9]%', string)
FROM temp
) d
) t
Updated Version to handle 99.5 ug\ml
create TABLE temp
(
string NVARCHAR(50)
)
INSERT INTO temp (string)
VALUES
('4000 ug\ml'),
('2000 ug\ml'),
('%'),
('ug\ml'),
('99.5 ug\ml')
SELECT subsrtunit,LEFT(subsrtnumeric, PATINDEX('%[^0-9.]%', subsrtnumeric+'t') - 1)
FROM (
SELECT subsrtunit = SUBSTRING(string, posofchar, LEN(string)),
subsrtnumeric = SUBSTRING(string, posofnumber, LEN(string))
FROM (
SELECT string, posofchar = PATINDEX('%[^0-9.]%', string),
posofnumber = PATINDEX('%[0-9.]%', string)
FROM temp
) d
) t
Updated Version: To handle 1 000 ug\ml,20 000ug\ml
create TABLE temp
(
string NVARCHAR(50)
)
INSERT INTO temp (string)
VALUES
('4000 ug\ml'),
('2000 ug\ml'),
('%'),
('ug\ml'),
('99.5 ug\ml'),
('1 000 ug\ml'),
('20 000ug\ml')
SELECT substring(replace(subsrtunit,' ',''),PATINDEX('%[0-9.]%', replace(subsrtunit,' ',''))+1,len(subsrtunit)),
LEFT(replace(subsrtnumeric,' ',''), PATINDEX('%[^0-9.]%', replace(subsrtnumeric,' ','')+'t') - 1)
FROM (
SELECT subsrtunit = SUBSTRING(string, posofchar, LEN(string)),
subsrtnumeric = SUBSTRING(string, posofnumber, LEN(string))
FROM (
SELECT string, posofchar = PATINDEX('%[^0-9.]%', replace(string,' ','')),
posofnumber = PATINDEX('%[0-9.]%', replace(string,' ',''))
FROM temp
) d
) t
Check out SQLFiddle for the same.

Would something like this work? Based on the shown data it looks like it would.
Apply it to your data set as a select and if you like the results then you can make an update from it.
WITH cte as (SELECT 'ug/mL' ConcUnit, 500 as [Numeric], '' as Unit
UNION ALL SELECT '2000 ug/mL', NULL, '')
SELECT
[ConcUnit] as [ConcUnit],
[Numeric] as [Original Numeric],
[Unit] as [Original Unit],
CASE WHEN ConcUnit LIKE '% %' THEN
SUBSTRING(ConcUnit, 1, CHARINDEX(' ', ConcUnit) - 1)
ELSE [Numeric] END as [New Numeric],
CASE WHEN ConcUnit LIKE '% %'
THEN SUBSTRING(ConcUnit, CHARINDEX(' ', ConcUnit) + 1, LEN(ConcUnit))
ELSE ConcUnit END as [New Unit]
FROM cte

change #concunit & #unitx Respectively
DECLARE #concunit varchar(10)='45.5%'
DECLARE #unitx varchar(10)='%'
BEGIN
SELECT RTRIM(SUBSTRING( #concunit , 1 , CHARINDEX( #unitx , #concunit
) - 1
)) AS Number,
RTRIM(SUBSTRING( #concunit , CHARINDEX( #unitx , #concunit
) , LEN( #concunit
) - (CHARINDEX( #unitx , #concunit
) - 1)
)) AS Unit
end

I had the same dilemma, but in my case the alpha's were in front of the numerics.
So using the logic that #Giorgos Betsos added to his answer, I just reversed it.
I.e., when your input is :
abc123
You can split it like this:
declare #input varchar(30) = 'abc123'
select
replace(#input,reverse(LEFT(reverse(#input), PATINDEX('%[0-9][^0-9]%', reverse(#input) ))),'') Alpha
, reverse(LEFT(reverse(#input), PATINDEX('%[0-9][^0-9]%', reverse(#input) ))) Numeric
Results :

Related

Multiple values stored in one column

I have a table that contains a column with settings, they're formatted like:
setting_name=setting_value|setting_name=setting_value|setting_name=setting_value
The thing is that it varies a lot on which settings have been filled. I would like to split all values and store them in a better way.
Currently it looks like this:
And I would like it to be:
To get there I used a function to split the values. Then I union them together and use a substring to get the setting_value that belongs to the setting_name. This is what I got so far:
/*
create function [dbo].[split_to_columns](#text varchar(8000)
, #column tinyint
, #separator char(1))
returns varchar(8000)
as
begin
declare #pos_start int = 1
declare #pos_end int = charindex(#separator, #text, #pos_start)
while (#column > 1 and #pos_end > 0)
begin
set #pos_start = #pos_end + 1
set #pos_end = charindex(#separator, #text, #pos_start)
set #column = #column - 1
end
if #column > 1 set #pos_start = len(#text) + 1
if #pos_end = 0 set #pos_end = len(#text) + 1
return substring(#text, #pos_start, #pos_end - #pos_start)
end
*/
create table #settings(id int, setting varchar(255))
insert into #settings(id, setting) values(1,'setting1=a|setting2=b|setting3=c')
insert into #settings(id, setting) values(2,'setting1=d|setting2=e')
insert into #settings(id, setting) values(3,'setting1=f|setting3=g')
insert into #settings(id, setting) values(4,'setting2=h')
;
with cte as (
select id, dbo.split_to_columns(setting, 1, '|') as setting from #settings
union select id, dbo.split_to_columns(setting, 2, '|') from #settings
union select id, dbo.split_to_columns(setting, 3, '|') from #settings
)
select distinct
x.id
, (select substring(setting, charindex('=', setting) + 1, 255) from cte where setting like 'setting1=%' and id = x.id) as setting1
, (select substring(setting, charindex('=', setting) + 1, 255) from cte where setting like 'setting2=%' and id = x.id) as setting2
, (select substring(setting, charindex('=', setting) + 1, 255) from cte where setting like 'setting3=%' and id = x.id) as setting3
from cte x
drop table #settings
Am I doing this in the right way? I can't help myself thinking that I am making it too complex. Though I am not a big fan of the way my settings are formatted right now, I do see it more often. Which means that more people have to do this trick...
Edit:
I am importing picture-properties into a database. The settings mentioned above are the picture-properties and the id is the name of the picture the settings belong to.
Example of settings in one column:
FullName=D:\8.jpg|FolderName=D:\|FileName=8.jpg|Size=7284351|Extension=.jpg|datePictureTaken=10-3-2017
11:53:38|ApertureValue=2|DateTime=10-3-2017
11:53:38|DateTimeDigitized=10-3-2017
11:53:38|DateTimeOriginal=10-3-2017
11:53:38|ExposureTime=0,0025706940874036|FocalLength=3,65|GPSAltitude=43|GPSDateStamp=10-3-2017
0:00:00|Model=QCAM-AA|ShutterSpeedValue=8,604
This is the reason I would like to have it formatted in the way described above.
I would convert the text into a basic chunk of XML so that we can then take a set-based approach to transforming the data into the results you want:
declare #settings table(id int, setting varchar(255))
insert into #settings (id,setting) values
(1,'setting1=a|setting2=b|setting3=c'),
(2,'setting1=d|setting2=e'),
(3,'setting1=f|setting3=g'),
(4,'setting2=h')
;with Xmlised (id,detail) as (
select id,CONVERT(xml,'<prob><setting name="' +
REPLACE(
REPLACE(setting,'=','">'),
'|','</setting><setting name="') + '</setting></prob>')
from #settings
), shredded as (
select
x.id,
S.value('./#name','varchar(50)') as name,
S.value('./text()[1]','varchar(100)') as value
from
Xmlised x
cross apply
detail.nodes('prob/setting') as T(S)
)
select
id,setting1,setting2,setting3
from
shredded
pivot (MAX(value) for name in (setting1,setting2,setting3)) u
Hopefully I've broken it into enough steps that you can see what it's doing and how.
Results:
id setting1 setting2 setting3
----------- --------- --------- ---------
1 a b c
2 d e NULL
3 f NULL g
4 NULL h NULL
As Sean suggested in the comments though, I'd normally not consider storing the pivotted result and would generally skip that step
WITH is pretty slow. I would suggest table that would store setting name, value, and some kind of group id. For example:
CREATE TABLE [dbo].[settings_table](
[id] [int] NULL,
[group] [int] NULL,
[name] [nchar](10) NULL,
[value] [nchar](10) NOT NULL
) ON [PRIMARY]
I don't know exactly what your program is doing with those settings, but this structure would be much more efficient in long run.
I would do the following 3 steps:
1) Create a generic Split function. This is the one I use:
CREATE FUNCTION Split(
#StringToSplit VARCHAR(MAX)
,#Delimiter VARCHAR(10)
)
RETURNS #SplitResult TABLE (id int, item VARCHAR(MAX))
BEGIN
DECLARE #item VARCHAR(8000)
DECLARE #counter int = 1
WHILE CHARINDEX(#Delimiter, #StringToSplit,0) <> 0
BEGIN
SELECT
#item = RTRIM(LTRIM(SUBSTRING(#StringToSplit,1, CHARINDEX(#Delimiter,#StringToSplit,0)-1))),
#StringToSplit = RTRIM(LTRIM(SUBSTRING(#StringToSplit, CHARINDEX(#Delimiter,#StringToSplit,0) + LEN(#Delimiter), LEN(#StringToSplit))))
IF LEN(#item) > 0
INSERT INTO #SplitResult SELECT #counter, #item
SET #counter = #counter + 1
END
IF LEN(#StringToSplit) > 0
INSERT INTO #SplitResult SELECT #counter,#StringToSplit
SET #counter = #counter + 1
RETURN
END
GO
-- You use it like this
SELECT S.id, T.item FROM #settings AS S CROSS APPLY Split(S.setting, '|') AS T
2) Split the settings and separate the setting name from it's value.
SELECT
S.id,
T.item,
SettingName = SUBSTRING(T.item, 1, CHARINDEX('=', T.item, 1) - 1), -- -1 to not include the "="
SettingValue = SUBSTRING(T.item, CHARINDEX('=', T.item, 1) + 1, 100) -- +1 to not include the "="
FROM
#settings AS S
CROSS APPLY Split(S.setting, '|') AS T
3) Pivot the known settings by name:
;WITH SplitValues AS
(
SELECT
S.id,
SettingName = SUBSTRING(T.item, 1, CHARINDEX('=', T.item, 1) - 1), -- -1 to not include the "="
SettingValue = SUBSTRING(T.item, CHARINDEX('=', T.item, 1) + 1, 100) -- +1 to not include the "="
FROM
#settings AS S
CROSS APPLY Split(S.setting, '|') AS T
)
SELECT
P.id,
P.setting1,
P.setting2,
P.setting3
FROM
SplitValues AS S
PIVOT (
MAX(S.SettingValue) FOR SettingName IN ([setting1], [setting2], [setting3])
) AS P
For set columns (photo properties) I agree with columns in a row
Use the proper type e.g. DateTime, Int, Numeric as you can search on range, sort, and it is just more efficient.
I know you asked for SQL but I would do this in .NET as you are going to need to do some clean up like remove comma from an integer. In real life read lines from a file so you can leave the command (insert) open.
public static void ParsePhoto(string photo)
{
if(string.IsNullOrEmpty(photo))
{
photo = #"FullName = D:\8.jpg | FolderName = D:\| FileName = 8.jpg | Size = 7284351 | Extension =.jpg | datePictureTaken = 10 - 3 - 2017 11:53:38 | ApertureValue = 2 | DateTime = 10 - 3 - 2017 11:53:38 | DateTimeDigitized = 10 - 3 - 2017 11:53:38 | DateTimeOriginal = 10 - 3 - 2017 11:53:38 | ExposureTime = 0,0025706940874036 | FocalLength = 3,65 | GPSAltitude = 43 | GPSDateStamp = 10 - 3 - 2017 0:00:00 | Model = QCAM - AA | ShutterSpeedValue = 8,604";
}
List<KeyValuePair<string, string>> kvp = new List<KeyValuePair<string, string>>();
foreach(string s in photo.Trim().Split(new char[] {'|'}, StringSplitOptions.RemoveEmptyEntries))
{
string[] sp = s.Split(new char[] { '=' }, StringSplitOptions.RemoveEmptyEntries);
if (sp.Count() == 2)
{
kvp.Add(new KeyValuePair<string, string>(sp[0].Trim(), sp[1].Trim()));
}
else
{
throw new IndexOutOfRangeException("bad photo");
}
}
foreach(KeyValuePair<string, string> pair in kvp)
{
Debug.WriteLine($"{pair.Key} = {pair.Value}");
//build up and execute insert statement here
}
Debug.WriteLine("Done");
}
FullName = D:\8.jpg
FolderName = D:\
FileName = 8.jpg
Size = 7284351
Extension = .jpg
datePictureTaken = 10 - 3 - 2017 11:53:38
ApertureValue = 2
DateTime = 10 - 3 - 2017 11:53:38
DateTimeDigitized = 10 - 3 - 2017 11:53:38
DateTimeOriginal = 10 - 3 - 2017 11:53:38
ExposureTime = 0,0025706940874036
FocalLength = 3,65
GPSAltitude = 43
GPSDateStamp = 10 - 3 - 2017 0:00:00
Model = QCAM - AA
ShutterSpeedValue = 8,604
If performance is important you can get this done easily without a splitter function, casting the data as XML or doing any pivoting. This technique is commonly referred to as the Cascading CROSS APPLY. The code is a little more verbose but the performance pay-off is amazing. First the solution:
SELECT
id,
setting1 = substring(setting, s1.p+1, x1.x),
setting2 = substring(setting, s2.p+1, x2.x),
setting3 = substring(setting, s3.p+1, x3.x)
FROM #settings t
CROSS APPLY (VALUES (nullif(charindex('setting1=', t.setting),0)+8)) s1(p)
CROSS APPLY (VALUES (nullif(charindex('setting2=', t.setting),0)+8)) s2(p)
CROSS APPLY (VALUES (nullif(charindex('setting3=', t.setting),0)+8)) s3(p)
CROSS APPLY (VALUES (isnull(nullif(charindex('|',t.setting,s1.p),0)-s1.p-1, 1))) x1(x)
CROSS APPLY (VALUES (isnull(nullif(charindex('|',t.setting,s2.p),0)-s2.p-1, 1))) x2(x)
CROSS APPLY (VALUES (isnull(nullif(charindex('|',t.setting,s3.p),0)-s3.p-1, 1))) x3(x);
Note the execution plans:
I don't have time to put together a performance test but, based on the execution plans - the cascading cross apply technique is roughly 44,000 times faster.
Try this:
declare #table table (id int, setting varchar(100))
insert into #table values
(1,'setting1=a|setting2=b|setting3=c'),
(2,'setting1=d|setting2=e'),
(3,'setting1=f|setting3=g'),
(4,'setting2=h')
select id,
case when charindex('setting1=',setting) = 0 then null else SUBSTRING(setting, charindex('setting1=',setting) + 9, 1) end [setting1],
case when charindex('setting2=',setting) = 0 then null else SUBSTRING(setting, charindex('setting2=',setting) + 9, 1) end [setting2],
case when charindex('setting3=',setting) = 0 then null else SUBSTRING(setting, charindex('setting3=',setting) + 9, 1) end [setting3]
from #table

TSQL: How insert separator between each character in a string

I have a string like this:
Apple
I want to include a separator after each character so the end result will turn out like this:
A,p,p,l,e
In C#, we have one liner method to achieve the above with Regex.Replace('Apple', ".{1}", "$0,");
I can only think of looping each character with charindex to append the separator but seems a little complicated. Is there any elegant way and simpler way to achieve this?
Thanks HABO for the suggestions. I'm able to generate the result that I want using the code but takes a little bit of time to really understand how the code work.
After some searching, I manage to found one useful article to insert empty spaces between each character and it's easier for me to understand.
I modify the code a little to define and include desire separator instead of fixing it to space as the separator:
DECLARE #pos INT = 2 -- location where we want first space
DECLARE #result VARCHAR(100) = 'Apple'
DECLARE #separator nvarchar(5) = ','
WHILE #pos < LEN(#result)+1
BEGIN
SET #result = STUFF(#result, #pos, 0, #separator);
SET #pos = #pos+2;
END
select #result; -- Output: A,p,p,l,e
Reference
In following SQL scripts, I get each character using SUBSTRING() function using with a number table (basically I used spt_values view here for simplicity) and then I concatenate them via two different methods, you can choose one
If you are using SQL Server 2017, we have a new SQL string aggregation function
First script uses string_agg function
declare #str nvarchar(max) = 'Apple'
SELECT
string_agg( substring(#str,number,1) , ',') Within Group (Order By number)
FROM master..spt_values n
WHERE
Type = 'P' and
Number between 1 and len(#str)
If you are working with a previous version, you can use string concatenation using FOR XML Path and SQL Stuff function as follows
declare #str nvarchar(max) = 'Apple'
; with cte as (
SELECT
number,
substring(#str,number,1) as L
FROM master..spt_values n
WHERE
Type = 'P' and
Number between 1 and len(#str)
)
SELECT
STUFF(
(
SELECT
',' + L
FROM cte
order by number
FOR XML PATH('')
), 1, 1, ''
)
Both solution yields the same result, I hope it helps
If you have SQL Server 2017 and a copy of ngrams8k it's ultra simple:
declare #word varchar(100) = 'apple';
select newString = string_agg(token, ',') within group (order by position)
from dbo.ngrams8k(#word,1);
For pre-2017 systems it's almost as simple:
declare #word varchar(100) = 'apple';
select newstring =
( select token + case len(#word)+1-position when 1 then '' else ',' end
from dbo.ngrams8k(#word,1)
order by position
for xml path(''))
One ugly way to do it is to split the string into characters, ideally using a numbers table, and reassemble it with the desired separator.
A less efficient implementation uses recursion in a CTE to split the characters and insert the separator between pairs of characters as it goes:
declare #Sample as VarChar(20) = 'Apple';
declare #Separator as Char = ',';
with Characters as (
select 1 as Position, Substring( #Sample, 1, 1 ) as Character
union all
select Position + 1,
case when Position & 1 = 1 then #Separator else Substring( #Sample, Position / 2 + 1, 1 ) end
from Characters
where Position < 2 * Len( #Sample ) - 1 )
select Stuff( ( select Character + '' from Characters order by Position for XML Path( '' ) ), 1, 0, '' ) as Result;
You can replace the select Stuff... line with select * from Characters; to see what's going on.
Try this
declare #var varchar(50) ='Apple'
;WITH CTE
AS
(
SELECT
SeqNo = 1,
MyStr = #var,
OpStr = CAST('' AS VARCHAR(50))
UNION ALL
SELECT
SeqNo = SeqNo+1,
MyStr = MyStR,
OpStr = CAST(ISNULL(OpStr,'')+SUBSTRING(MyStR,SeqNo,1)+',' AS VARCHAR(50))
FROM CTE
WHERE SeqNo <= LEN(#var)
)
SELECT
OpStr = LEFT(OpStr,LEN(OpStr)-1)
FROM CTE
WHERE SeqNo = LEN(#Var)+1

How to find UPPER case characters in a string and replace them with a space with SQL or SSRS

I have a column which has string values with mixed upper and lower case characters like (AliBabaSaidHello). I want to use this column values for my SSRS table cell headers like (Ali Baba Said Hello). First, I like to find each UPPER case letter and add space to it.
Ascii 65-90 tip was helpful for creating below code for a function:
declare #Reset bit;
declare #Ret varchar(8000);
declare #i int;
declare #c char(1);
select #Reset = 1, #i=1, #Ret = '';
while (#i <= len('AliBabaSaidHello'))
select #c= substring('AliBabaSaidHello',#i,1),
#Reset = case when ascii(#c) between 65 and 90 then 1 else 0 end,
#Ret = #Ret + case when #Reset=1 then ' ' + #c else #c end,
#i = #i +1
select #Ret
Thanks all, after Reading all the answers, I created this flexible and very efficient function:
FUNCTION dbo.UDF_DelimitersForCases (#string NVARCHAR(MAX), #Delimiter char(1))
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE #len INT = LEN(#string)
,#iterator INT = 2 --Don't put space to left of first even if it's a capital
;
WHILE #iterator <= LEN(#string)
BEGIN
IF PATINDEX('[ABCDEFGHIJKLMNOPQRSTUVWXYZ]',SUBSTRING(#string,#iterator,1) COLLATE Latin1_General_CS_AI) <> 0
BEGIN
SET #string = STUFF(#string,#iterator,0,#Delimiter);
SET #iterator += 1;
END
;
SET #iterator += 1;
END
RETURN #string;
END
;
GO
Example:
SELECT dbo.udf_DelimitersForCases('AliBabaSaidHello','_');
Returns "Ali_Baba_Said_Hello" (no quotes).
get chars one by one like "A" , "l" , "i", and look whether returning value of method ascii('&i_char') is between 65 and 90, those are "capital letters".
( ascii('A')=65(capital), ascii('l')=108(non-capital), ascii('i')=105(non-capital) )
Use case sensitive collation for your qry and combine with like for each of character. When you itterate characters you can easily replace upper characters for upper char + space.
WHERE SourceText COLLATE Latin1_General_CS_AI like '[A-Z]'
-- or for variable #char COLLATE Latin1_General_CS_AI = upper(#char)
The important in Latin1_General_CS_AI where "CS" is Case sensitive.
If you want to make this reusable for some reason, here's the code to make a user function to call.
DROP FUNCTION IF EXISTS udf_SpacesforCases;
GO
CREATE FUNCTION udf_SpacesForCases (#string NVARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE #len INT = LEN(#string)
,#iterator INT = 2 --Don't put space to left of first even if it's a capital
;
WHILE #iterator <= LEN(#string)
BEGIN
IF PATINDEX('[ABCDEFGHIJKLMNOPQRSTUVWXYZ]',SUBSTRING(#string,#iterator,1) COLLATE Latin1_General_CS_AI) <> 0
BEGIN
SET #string = STUFF(#string,#iterator,0,' ');
SET #iterator += 1;
END
;
SET #iterator += 1;
END
RETURN #string;
END
;
GO
SELECT dbo.udf_SpacesForCases('AliBabaSaidHello');
Any solution that involves a scalar user defined function and/or a loop will not perform as well as a set-based solution. This is a cake walk using using NGrams8K:
DECLARE #string varchar(1000) = 'AliBabaSaidHello';
SELECT newString =
( SELECT
CASE
WHEN ASCII(token) BETWEEN 65 AND 90 AND position > 1 THEN ' '+token ELSE token
END+''
FROM dbo.NGrams8k(#string, 1)
FOR XML PATH(''));
Returns: "Ali Baba Said Hello" (no quotes).
Note that the there is not a space before the first character. Alternatively, a set-based solution that doesn't use the function would look like this:
WITH
E1(N) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))t(c)),
iTally(N) AS
(
SELECT TOP (LEN(#string)) ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM E1 a, E1 b, E1 c, E1 d
),
nGrams(NewString) AS
(
SELECT
CASE WHEN ASCII(SUBSTRING(#string, N, 1)) BETWEEN 65 AND 90 AND N > 1
THEN ' '+SUBSTRING(#string, N, 1) ELSE SUBSTRING(#string, N, 1)
END+''
FROM iTally
FOR XML PATH('')
)
SELECT NewString
FROM nGrams;
The APL approach is to split the input into characters, pad the characters as needed, then reassemble the string. In T-SQL it would look rather like this:
-- Sample data.
declare #Samples as Table ( Sample VarChar(32) );
insert into #Samples ( Sample ) values ( 'AliBabaSaidHello' ), ( 'MeshMuscleShirt' );
select * from #Samples;
-- Stuff it.
with
Ten ( Number ) as ( select Number from ( values (0), (1), (2), (3), (4), (5), (6), (7), (8), (9) ) as Digits( Number ) ),
TenUp2 ( Number ) as ( select 42 from Ten as L cross join Ten as R ),
TenUp4 ( Number ) as ( select 42 from TenUp2 as L cross join TenUp2 as R ),
Numbers ( Number ) as ( select Row_Number() over ( order by ( select NULL ) ) from TenUp4 ),
Characters ( Sample, Number, PaddedCh ) as (
select S.Sample, N.Number, PC.PaddedCh
from #Samples as S inner join
Numbers as N on N.Number <= Len( S.Sample ) cross apply
( select SubString( S.Sample, N.Number, 1 ) as Ch ) as SS cross apply
( select case when N.Number > 1 and ASCII( 'A' ) <= ASCII( SS.Ch ) and ASCII( SS.Ch ) <= ASCII( 'Z' ) then ' ' + Ch else Ch end as PaddedCh ) as PC )
select S.Sample,
( select PaddedCh from Characters where Sample = S.Sample order by Number for XML path(''), type).value('.[1]', 'VarChar(max)' ) as PaddedSample
from #Samples as S
order by Sample;
Another option (quite verbose) might be:
SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE('AliBabaSaidHello' COLLATE Latin1_General_CS_AS,'A',' A'),'B',' B'),'C',' C'),'D',' D'),'E',' E'),'F',' F'),'G',' G'),'H',' H'),'I',' I'),'J',' J'),'K',' K'),'L',' L'),'M',' M'),'N',' N'),'O',' O'),'P',' P'),'Q',' Q'),'R',' R'),'S',' S'),'T',' T'),'U',' U'),'V',' V'),'W',' W'),'X',' X'),'Y',' Y'),'Z',' Z')

T-SQL - Update first letter in each word of a string that are not 'or', 'of' or 'and' to uppercase. Lowercase 'or', 'of' or 'and' if found

Given the below table and data:
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
CREATE TABLE #Temp
(
ID INT,
Code INT,
PDescription VARCHAR(2000)
)
INSERT INTO #Temp
(ID,
Code,
PDescription)
VALUES (1,0001,'c and d, together'),
(2,0002,'equals or Exceeds $27.00'),
(3,0003,'Fruit Evaporating Or preserving'),
(4,0004,'Domestics And domestic Maintenance'),
(5,0005,'Bakeries and cracker')
SELECT *
FROM #Temp
DROP TABLE #Temp
Output:
ID Code PDescription
1 1 c and d, together
2 2 equals or Exceeds $27.00
3 3 Fruit Evaporating Or preserving
4 4 Domestics And domestic Maintenance
5 5 Bakeries and cracker
I need a way to achieve the below update to the description field:
ID Code PDescription
1 1 C and D, Together
2 2 Equals or Exceeds $27.00
3 3 Fruit Evaporating or Preserving
4 4 Domestics and Domestic Maintenance
5 5 Bakeries and Cracker
If you fancied going the SQL CLR route the function could look something like
using System.Data.SqlTypes;
using System.Text.RegularExpressions;
public partial class UserDefinedFunctions
{
//One or more "word characters" or apostrophes
private static readonly Regex _regex = new Regex("[\\w']+");
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlString ProperCase(SqlString subjectString)
{
string resultString = null;
if (!subjectString.IsNull)
{
resultString = _regex.Replace(subjectString.ToString().ToLowerInvariant(),
(Match match) =>
{
var word = match.Value;
switch (word)
{
case "or":
case "of":
case "and":
return word;
default:
return char.ToUpper(word[0]) + word.Substring(1);
}
});
}
return new SqlString(resultString);
}
}
Doubtless there may be Globalization issues in the above but it should do the job for English text.
You could also investigate TextInfo.ToTitleCase but that still leaves you needing to handle your exceptions.
The following function is not the most elegant of solutions but should do what you want.
ALTER FUNCTION [dbo].[ToProperCase](#textValue AS NVARCHAR(2000))
RETURNS NVARCHAR(2000)
AS
BEGIN
DECLARE #reset BIT;
DECLARE #properCase NVARCHAR(2000);
DECLARE #index INT;
DECLARE #character NCHAR(1);
SELECT #reset = 1, #index=1, #properCase = '';
WHILE (#index <= len(#textValue))
BEGIN
SELECT #character= substring(#textValue,#index,1),
#properCase = #properCase + CASE WHEN #reset=1 THEN UPPER(#character) ELSE LOWER(#character) END,
#reset = CASE WHEN #character LIKE N'[a-zA-Z\'']' THEN 0 ELSE 1 END,
#index = #index +1
END
SET #properCase = N' ' + #properCase + N' ';
SET #properCase = REPLACE(#properCase, N' And ', N' and ');
SET #properCase = REPLACE(#properCase, N' Or ', N' or ');
SET #properCase = REPLACE(#properCase, N' Of ', N' of ');
RETURN RTRIM(LTRIM(#properCase))
END
Example use:
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
CREATE TABLE #Temp
(
ID INT,
Code INT,
PDescription VARCHAR(2000)
)
INSERT INTO #Temp
(ID,
Code,
PDescription)
VALUES (1,0001, N'c and d, together and'),
(2,0002, N'equals or Exceeds $27.00'),
(3,0003, N'Fruit Evaporating Or preserving'),
(4,0004, N'Domestics And domestic Maintenance'),
(5,0005, N'Bakeries and cracker')
SELECT ID, Code, dbo.ToProperCase(PDescription) AS [Desc]
FROM #Temp
DROP TABLE #Temp
If you want to convert your text to proper case before inserting into table, then simply call function as follow:
INSERT INTO #Temp
(ID,
Code,
PDescription)
VALUES (1,0001, dbo.ToProperCase( N'c and d, together and')),
(2,0002, dbo.ToProperCase( N'equals or Exceeds $27.00')),
(3,0003, dbo.ToProperCase( N'Fruit Evaporating Or preserving')),
(4,0004, dbo.ToProperCase( N'Domestics And domestic Maintenance')),
(5,0005, dbo.ToProperCase( N'Bakeries and cracker'))
This is a dramatically modified version of my Proper UDF. The good news is you may be able to process the entire data-set in ONE SHOT rather than linear.
Take note of #OverR (override)
Declare #Table table (ID int,Code int,PDescription varchar(150))
Insert into #Table values
(1,1,'c and d, together'),
(2,2,'equals or Exceeds $27.00'),
(3,3,'Fruit Evaporating Or preserving'),
(4,4,'Domestics And domestic Maintenance'),
(5,5,'Bakeries and cracker')
-- Generate Base Mapping Table - Can be an Actual Table
Declare #Pattn table (Key_Value varchar(25));Insert into #Pattn values (' '),('-'),('_'),(','),('.'),('&'),('#'),(' Mc'),(' O''') -- ,(' Mac')
Declare #Alpha table (Key_Value varchar(25));Insert Into #Alpha values ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I'),('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R'),('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('X')
Declare #OverR table (Key_Value varchar(25));Insert Into #OverR values (' and '),(' or '),(' of ')
Declare #Map Table (MapSeq int,MapFrom varchar(25),MapTo varchar(25))
Insert Into #Map
Select MapSeq=1,MapFrom=A.Key_Value+B.Key_Value,MapTo=A.Key_Value+B.Key_Value From #Pattn A Join #Alpha B on 1=1
Union All
Select MapSeq=99,MapFrom=A.Key_Value,MapTo=A.Key_Value From #OverR A
-- Convert Base Data Into XML
Declare #XML xml
Set #XML = (Select KeyID=ID,String=+' '+lower(PDescription)+' ' from #Table For XML RAW)
-- Convert XML to varchar(max) for Global Search & Replace
Declare #String varchar(max)
Select #String = cast(#XML as varchar(max))
Select #String = Replace(#String,MapFrom,MapTo) From #Map Order by MapSeq
-- Convert Back to XML
Select #XML = cast(#String as XML)
-- Generate Final Results
Select KeyID = t.col.value('#KeyID', 'int')
,NewString = ltrim(rtrim(t.col.value('#String', 'varchar(150)')))
From #XML.nodes('/row') AS t (col)
Order By 1
Returns
KeyID NewString
1 C and D, Together
2 Equals or Exceeds $27.00
3 Fruit Evaporating or Preserving
4 Domestics and Domestic Maintenance
5 Bakeries and Cracker
You don't even need functions and temporary objects. Take a look at this query:
WITH Processor AS
(
SELECT ID, Code, 1 step,
CONVERT(nvarchar(MAX),'') done,
LEFT(PDescription, CHARINDEX(' ', PDescription, 0)-1) process,
SUBSTRING(PDescription, CHARINDEX(' ', PDescription, 0)+1, LEN(PDescription)) waiting
FROM #temp
UNION ALL
SELECT ID, Code, step+1,
done+' '+CASE WHEN process IN ('and', 'or', 'of') THEN LOWER(process) ELSE UPPER(LEFT(process, 1))+LOWER(SUBSTRING(process, 2, LEN(process))) END,
CASE WHEN CHARINDEX(' ', waiting, 0)>0 THEN LEFT(waiting, CHARINDEX(' ', waiting, 0)-1) ELSE waiting END,
CASE WHEN CHARINDEX(' ', waiting, 0)>0 THEN SUBSTRING(waiting, CHARINDEX(' ', waiting, 0)+1, LEN(waiting)) ELSE NULL END FROM Processor
WHERE process IS NOT NULL
)
SELECT ID, Code, done PSDescription FROM
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY step DESC) RowNum FROM Processor
) Ordered
WHERE RowNum=1
ORDER BY ID
It produces desired result as well. You can SELECT * FROM Processor to see all steps executed.

SQL Server : select all after specific character

How I can select
"ALT1" if value is "W61N03D20V0-WHIH-ALT1"
"ALT2" if for "W61N03D20V0-WHIH-ALT2"
"SW" for "W61N03D20V0-WHIH-SW"
"Default" for "W61N26D1YA1-VICU" (without prefix)
"Defailt" for "W61N27D21V2-AZTD"
In other words I'm looking for a way extract last part after second suffix, but if I have't second suffix - then default
Thanks for advice
Try it like this:
First you "split" the string on its minus signs with the XML trick.
Then you read the third node from you XML - voila!
CREATE TABLE #tbl(content VARCHAR(100));
INSERT INTO #tbl VALUES('W61N03D20V0-WHIH-ALT1')
,('W61N03D20V0-WHIH-SW')
,('W61N26D1YA1-VICU');
WITH SplittedAsXml AS
(
SELECT CAST('<x>' + REPLACE(content,'-','</x><x>') + '</x>' AS XML) AS Content
FROM #tbl
)
SELECT ISNULL(Content.value('/x[3]','varchar(max)'),'default') AS TheThirdPart
FROM SplittedAsXml;
DROP TABLE #tbl;
The result
ALT1
SW
default
Going this ways would also give you the chance to get the other parts in one go just querying /x[1] and /x[2] too
I did it using the built-in substring() function:
declare #str VARCHAR(40) = 'W61N03D20V0-WHIH-ALT1' -- also works for the other examples
declare #sep VARCHAR(1) = '-'
declare #middleToEnd VARCHAR(40) = substring(#str, charindex(#sep, #str) + 1, len(#str))
declare #pos INT = charindex(#sep, #middleToEnd)
declare #lastPart VARCHAR(40) =
CASE WHEN #pos = 0
THEN 'Default'
ELSE substring(#middleToEnd, #pos + 1, len(#middleToEnd))
END
select #lastPart
For best performance, you can solve it with this one-liner(calculation is one line)
SELECT
COALESCE(STUFF(col,1,NULLIF(CHARINDEX('-',col, CHARINDEX('-',col)+1), 0),''),'Default')
FROM (values
('W61N03D20V0-WHIH-ALT1'),('W61N03D20V0-WHIH-ALT2'),
('W61N03D20V0-WHIH-SW'),('W61N26D1YA1-VICU'),
('W61N27D21V2-AZTD')) x(col)
Result:
ALT1
ALT2
SW
Default
Default
If I understand what you are asking for, the following does what you need:
-- fake table
WITH SomeTable AS (
SELECT 'W61N03D20V0-WHIH-ALT1' AS Field1
UNION ALL
SELECT 'W61N03D20V0-WHIH-SW'
UNION ALL
SELECT 'W61N26D1YA1-VICU'
)
-- select
SELECT
CASE CHARINDEX('-WHIH-', Field1)
WHEN 0 THEN 'Default'
ELSE SUBSTRING(Field1, CHARINDEX('-WHIH-', Field1) + 6, LEN(Field1) - (CHARINDEX('-WHIH-', Field1) + 5))
END
FROM SomeTable
Use can use a CASE expression to check whether the string starts with W61N03D20V0-WHIH.
If it starts with it use a combination of RIGHT, REVERSE and CHARINDEX functions to get last part from the string, else Default.
Query
select case when [your_column_name] like 'W61N03D20V0-WHIH%'
then right([your_column_name], charindex('-', reverse([your_column_name]), 1) - 1)
else 'Default' end as new_column_name
from your_table_name;
SQl Fiddle demo

Resources