SQL string before and after certain characters - sql-server

SELECT NAME
FROM SERVERS
returns:
SDACR.hello.com
SDACR
SDACR\AIR
SDACR.hello.com\WATER
I need the SELECT query for below result:
SDACR
SDACR
SDACR\AIR
SDACR\WATER
Kindly help ! I tried using LEFT and RIGHT functions as below, but not able to get combined output correctly:
SELECT
LEFT(Name, CHARINDEX('.', Name) - 1)
FROM
SERVERS
SELECT
RIGHT(Name, LEN(Name) - CHARINDEX('\', Name))
FROM
SERVERS

It looks like you're just trying to REPLACE a substring of characters in your column. You should try this:
SELECT REPLACE(Name,'.hello.com','') AS ReplacementName
FROM SERVERS

In tsql, you can concatenate values with CONCAT(), or you can simply add strings together with +.
SELECT LEFT(Name, CHARINDEX('.',Name)-1) + RIGHT(Name,LEN(Name)-CHARINDEX('\',Name)) from SERVERS
Also, be careful with doing arithmetic with CHARINDEX(). A value without a '.' or a '\' will return a NULL and you will get an error.

You can use LEFT for this to select everything up to the first period (dot) and add on everything after the last \
declare #servers table ([NAME] varchar(64))
insert into #servers
values
('SDACR.hello.com '),
('SDACR'),
('SDACR\AIR'),
('SDACR.hello.com\WATER')
select
left([NAME],case when charindex('.',[NAME]) = 0 then len([NAME]) else charindex('.',[NAME]) -1 end) +
case when charindex('\',left([NAME],case when charindex('.',[NAME]) = 0 then len([NAME]) else charindex('.',[NAME]) -1 end)) = 0 then right([NAME],charindex('\',reverse([NAME]))) else '' end
from #servers

Throwing my hat in.... Showing how to use Values and APPLY for cleaner code.
-- sample data in an easily consumable format
declare #yourdata table (txt varchar(100));
insert #yourdata values
('SDACR.hello.com'),
('SDACR'),
('SDACR\AIR'),
('SDACR.hello.com\WATER');
-- solution
select
txt,
newTxt =
case
when loc.dot = 0 then txt
when loc.dot > 0 and loc.slash = 0 then substring(txt, 1, loc.dot-1)
else substring(txt, 1, loc.dot-1) + substring(txt, loc.slash, 100)
end
from #yourdata
cross apply (values (charindex('.',txt), (charindex('\',txt)))) loc(dot,slash);
Results
txt newTxt
------------------------------ --------------------
SDACR.hello.com SDACR
SDACR SDACR
SDACR\AIR SDACR\AIR
SDACR.hello.com\WATER SDACR\WATER

Related

Replace specials chars with HTML entities

I have the following in table TABLE
id content
-------------------------------------
1 Hellö world, I äm text
2 ènd there äré many more chars
3 that are speçial in my dat£base
I now need to export these records into HTML files, using bcp:
set #command = 'bcp "select [content] from [TABLE] where [id] = ' +
#id queryout +' + #filename + '.html" -S ' + #instance +
' -c -U ' + #username + ' -P ' + #password"
exec xp_cmdshell #command, no_ouput
To make the output look correct, I need to first replace all special characters with their respective HTML entities (pseudo)
insert into [#temp_html] ..
replace(replace([content], 'ö', 'ö'), 'ä', 'ä')
But by now, I have 30 nested replaces and it's starting to look insane.
After much searching, I found this post which uses a HTML conversion table but it is too advanced for me to understand:
The table does not list the special chars itself as they are in my text (ö, à etc) but UnicodeHex. Do I need to add them to the table to make the conversions that I need?
I am having trouble understanding how to update my script to replace all special chars. Can someone please show me a snippet of (pseudo) code?
One way to do that with a translation table is using a recursive cte to do the replaces, and one more cte to get only the last row of each translated value.
First, create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
id int,
content nvarchar(100)
)
INSERT INTO #T (id, content) VALUES
(1, 'Hellö world, I äm text'),
(2, 'ènd there äré many more chars'),
(3, 'that are speçial in my dat£base')
Then, create and populate the translation table (I don't know the HTML entities for these chars, so I've just used numbers [plus it's easier to see in the results]). Also, please note that this can be done using yet another cte in the chain.
DECLARE #Translations AS TABLE
(
str nchar(1),
replacement nvarchar(10)
)
INSERT INTO #Translations (str, replacement) VALUES
('ö', '-1-'),
('ä', '-2-'),
('è', '-3-'),
('ä', '-4-'),
('é', '-5-'),
('ç', '-6-'),
('£', '-7-')
Now, the first cte will do the replaces, and the second cte just adds a row_number so that for each id, the last value of lvl will get 1:
;WITH CTETranslations AS
(
SELECT id, content, 1 As lvl
FROM #T
UNION ALL
SELECT id, CAST(REPLACE(content, str, replacement) as nvarchar(100)), lvl+1
FROM CTETranslations
JOIN #Translations
ON content LIKE '%' + str + '%'
), cteNumberedTranslation AS
(
SELECT id, content, ROW_NUMBER() OVER(PARTITION BY Id ORDER BY lvl DESC) rn
FROM CTETranslations
)
Select from the second cte where rn = 1, I've joined the original table to show the source and translation side by side:
SELECT r.id, s.content, r.content
FROM #T s
JOIN cteNumberedTranslation r
ON s.Id = r.Id
WHERE rn = 1
ORDER BY Id
Results:
id content content
1 Hellö world, I äm text Hell-1- world, I -4-m text
2 ènd there äré many more chars -3-nd there -4-r-5- many more chars
3 that are speçial in my dat£base that are spe-6-ial in my dat-7-base
Please note that if your content have more that 100 special chars, you will need to add the maxrecursion 0 hint to the final select:
SELECT r.id, s.content, r.content
FROM #T s
JOIN cteNumberedTranslation r
ON s.Id = r.Id
WHERE rn = 1
ORDER BY Id
OPTION ( MAXRECURSION 0 );
See a live demo on rextester.

TSQL Using SUBSTRING PATINDEX and STUFF to Amend Data

TSQL MSSQL 2008r2
I need help to amend data.
I've got so far and now I need help.
Sample Data
[EDIT] Additonal examples added
DECLARE #Table TABLE (NodePropertyValue NVARCHAR(50))
INSERT INTO #Table (NodePropertyValue)
VALUES
(N'AA11✏AAA ZZZZ'),
(N'CRAP BB22✏BBB'),
(N'CC55✏CC1'),
(N'DD66✏666'),
(N'EE55✏EEE ES177'),
(N'RUBBISH FF22✏FFF XXXXXX'),
(N'NONSENSE')
I want to show the data like so.
If NCHAR(9999) or pencil exists and the next 3 characters are letters then add a slash (/) after the third character. If any other characters exist after the added slash then delete them. So for [AA11✏AAA ZZZZ] should be updated to [AA11✏AAA/].
If NCHAR(9999) exists and there are characters before the preceding 4 characters then delete them. So for [CRAP BB22✏BBB] should be updated to [BB22✏BBB/]
For [NONSENSE] should be shown as NULL.
This is as far as I have got. As you can see I'm stuck with adding a slash and removing characters not needed.
SELECT
V.NodePropertyValue 'Orignal'
,CASE --Pencil NCHAR(9999) exists
WHEN PATINDEX('%'+NCHAR(9999)+'%', UPPER(V.NodePropertyValue)) > 0
THEN
CASE
WHEN --FIRST 4 chars match XX11 and 5th char equals NCHAR(9999)
PATINDEX('[A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue)) > 0
AND SUBSTRING(V.NodePropertyValue, PATINDEX('%[A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue))+ 4, 1) = NCHAR(9999)
THEN
STUFF(V.NodePropertyValue, PATINDEX('[A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue))+ 4
, 50
, SUBSTRING(V.NodePropertyValue, PATINDEX('[A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue))+ 4, 50) )
WHEN --Any 4 chars match XX11 and preceding char is space and 5th char equals NCHAR(9999)
PATINDEX('% [A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue)) > 0
AND SUBSTRING(V.NodePropertyValue, PATINDEX('%[A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue))+ 4, 1) = NCHAR(9999)
THEN
STUFF(V.NodePropertyValue, PATINDEX('% [A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue))+ 4
, 50
, SUBSTRING(V.NodePropertyValue, PATINDEX('% [A-Z][A-Z][0-9][0-9]%', UPPER(V.NodePropertyValue))+ 4, 50) )
ELSE
NULL
END
ELSE
NULL
END 'Updated'
FROM
#Table V
Here is a way to get your desired results:
Create and populate sample table (I've added some more sample data based on our conversation in the comments)
DECLARE #Table TABLE (NodePropertyValue NVARCHAR(50))
INSERT INTO #Table (NodePropertyValue)
VALUES
(N'AA11✏AAA ZZZZ'),
(N'CRAP BB22✏BBB'),
(N'EE55✏EEE ES177'),
(N'RUBBISH FF22✏FFF XXXXXX'),
(N'AA✏AAA ZZZZ'),
(N'AA✏A2A ZZZZ'),
(N'AA✏A'),
(N'NONSENSE')
A cte to calculate the start and end of the desired pattern
;WITH CTE AS
(
SELECT NodePropertyValue,
-- note: there are are 4 underscores before the pencil
PATINDEX('%____'+ NCHAR(9999) +'[a-z][a-z][a-z]%', NodePropertyValue) As startPattern,
CHARINDEX(NCHAR(9999), NodePropertyValue) + 3 As EndPattern
FROM #Table
)
query the cte:
SELECT NodePropertyValue,
CASE WHEN startPattern > 0 THEN
SUBSTRING(NodePropertyValue, startPattern, EndPattern-startPattern+1) + '/'
ELSE
NULL
END As Updated
FROM CTE
Result:
NodePropertyValue Updated
AA11✏AAA ZZZZ AA11✏AAA/
CRAP BB22✏BBB BB22✏BBB/
EE55✏EEE ES177 EE55✏EEE/
RUBBISH FF22✏FFF XXXXXX FF22✏FFF/
AA✏AAA ZZZZ NULL
AA✏A2A ZZZZ NULL
AA✏A NULL
NONSENSE NULL
See a live demo on rextester.
If there are always letters after the pencil and no numbers, does this suffice?
select case when patindex('%' + nchar(9999) + '%' , NodePropertyValue)=0 then null
else substring( NodePropertyValue, patindex('%' + nchar(9999) + '%', NodePropertyValue)-4, 8) + '/'
end as StringStart
from #Table

SQL Server : extracting number from a string

I have to following SQL command to extract only numbers from a string :
UPDATE Oesskattings
SET alfasorteer1 = CASE
WHEN CHARINDEX('-', blokno) > 0
THEN SUBSTRING(blokno + '-', 0, CHARINDEX('-', blokno))
ELSE SUBSTRING(blokno, PATINDEX('%[0-9]%', blokno), LEN(blokno))
END
My problem is when I have a record where blokno is eg 1B (conversion failed where number is followed by character).
How can I improve my code?
Regards
This will pull out all numbers from a string regardless of other characters and sequence.
It uses a Recursive CTE to identify numbers in order, then puts them back together with STUFF XML PATH
DROP TABLE #TMP
CREATE TABLE #TMP(ID INT IDENTITY(1,1),txt VARCHAR(20))
INSERT INTO #TMP VALUES
('q12w--e32w')
,('vfr45tgbnhy67')
,('12wq3&&r5f5')
,('1qw%%23er45t')
,('de32()ws2')
,('desfghj')
;WITH A
AS (
SELECT ID,1 POS
,txt
,SUBSTRING(txt,PATINDEX('%[1-9]%',txt),1) CHR
,RIGHT(txt,LEN(txt)-PATINDEX('%[1-9]%',txt)) REM
FROM #TMP
WHERE PATINDEX('%[1-9]%',txt) > 0
UNION ALL
SELECT ID,POS + 1
,txt
,SUBSTRING(REM,PATINDEX('%[1-9]%',REM),1) CHR
,RIGHT(REM,LEN(REM)-PATINDEX('%[1-9]%',REM)) REM
FROM
A
WHERE
PATINDEX('%[1-9]%',REM) > 0
)
,c AS
(
SELECT
ID,txt
,STUFF(
(SELECT ''+b.chr
FROM a b
WHERE a.ID = b.id
ORDER BY POS
FOR XML PATH('')),1,0,'') AS chrs
FROM
A
)
SELECT DISTINCT
*
FROM
C
What worked for me was to delete all non-numeric characters in the string with :
set alfasorteer1 = substring(blokno, patindex('%[0-9]%', blokno), 1+patindex('%[0-9][^0-9]%', blokno+'x')-patindex('%[0-9]%', blokno))

Concatenate the result of an ordered String_Split in a variable

In a SqlServer database I use, the database name is something like StackExchange.Audio.Meta, or StackExchange.Audio or StackOverflow . By sheer luck this is also the url for a website. I only need split it on the dots and reverse it: meta.audio.stackexchange. Adding http:// and .com and I'm done. Obviously Stackoverflow doesn't need any reversing.
Using the SqlServer 2016 string_split function I can easy split and reorder its result:
select value
from string_split(db_name(),'.')
order by row_number() over( order by (select 1)) desc
This gives me
| Value |
-----------------
| Meta |
| Audio |
| StackExchange |
As I need to have the url in a variable I hoped to concatenate it using this answer so my attempt looks like this:
declare #revname nvarchar(150)
select #revname = coalesce(#revname +'.','') + value
from string_split(db_name(),'.')
order by row_number() over( order by (select 1)) desc
However this only returns me the last value, StackExchange. I already noticed the warnings on that answer that this trick only works for certain execution plans as explained here.
The problem seems to be caused by the order by clause. Without that I get all values, but then in the wrong order. I tried to a add ltrimand rtrim function as suggested in the Microsoft article as well as a subquery but so far without luck.
Is there a way I can nudge the Sql Server 2016 Query Engine to concatenate the ordered result from that string_split in a variable?
I do know I can use for XML or even a plain cursor to get the result I need but I don't want to give up this elegant solution yet.
As I'm running this on the Stack Exchange Data Explorer I can't use functions, as we lack the permission to create those. I can do Stored procedures but I hoped I could evade those.
I prepared a SEDE Query to experiment with. The database names to expect are either without dots, aka StackOverflow, with 1 dot: StackOverflow.Meta or 2 dots, `StackExchange.Audio.Meta, the full list of databases is here
I think you are over-complicating things. You could use PARSENAME:
SELECT 'http://' + PARSENAME(db_name(),1) +
ISNULL('.' + PARSENAME(db_name(),2),'') + ISNULL('.'+PARSENAME(db_name(),3),'')
+ '.com'
This is exactly why I have the Presentation Sequence (PS) in my split function. People often scoff at using a UDF for such items, but it is generally a one-time hit to parse something for later consumption.
Select * from [dbo].[udf-Str-Parse]('meta.audio.stackexchange','.')
Returns
Key_PS Key_Value
1 meta
2 audio
3 stackexchange
The UDF
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#delimeter varchar(10))
--Usage: Select * from [dbo].[udf-Str-Parse]('meta.audio.stackexchange','.')
-- Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
-- Select * from [dbo].[udf-Str-Parse]('id26,id46|id658,id967','|')
Returns #ReturnTable Table (Key_PS int IDENTITY(1,1) NOT NULL , Key_Value varchar(max))
As
Begin
Declare #intPos int,#SubStr varchar(max)
Set #IntPos = CharIndex(#delimeter, #String)
Set #String = Replace(#String,#delimeter+#delimeter,#delimeter)
While #IntPos > 0
Begin
Set #SubStr = Substring(#String, 0, #IntPos)
Insert into #ReturnTable (Key_Value) values (#SubStr)
Set #String = Replace(#String, #SubStr + #delimeter, '')
Set #IntPos = CharIndex(#delimeter, #String)
End
Insert into #ReturnTable (Key_Value) values (#String)
Return
End
Probably less elegant solution but it takes only a few lines and works with any number of dots.
;with cte as (--build xml
select 1 num, cast('<str><s>'+replace(db_name(),'.','</s><s>')+'</s></str>' as xml) str
)
,x as (--make table from xml
select row_number() over(order by num) rn, --add numbers to sort later
t.v.value('.[1]','varchar(50)') s
from cte cross apply cte.str.nodes('str/s') t(v)
)
--combine into string
select STUFF((SELECT '.' + s AS [text()]
FROM x
order by rn desc --in reverse order
FOR XML PATH('')
), 1, 1, '' ) name
Is there a way I can nudge the Sql Server 2016 Query Engine to concatenate the ordered result from that string_split in a variable?
You can just use CONCAT:
DECLARE #URL NVARCHAR(MAX)
SELECT #URL = CONCAT(value, '.', #URL) FROM STRING_SPLIT(DB_NAME(), '.')
SET #URL = CONCAT('http://', LOWER(#URL), 'com');
The reversal is accomplished by the order of parameters to CONCAT. Here's an example.
It changes StackExchange.Garage.Meta to http://meta.garage.stackexchange.com.
This can be used to split and reverse strings in general, but note that it does leave a trailing delimiter. I'm sure you could add some logic or a COALESCE in there to make that not happen.
Also note that vNext will be adding STRING_AGG.
To answer the 'X' of this XY problem, and to address the HTTPS switch (especially for Meta sites) and some other site name changes, I've written the following SEDE query which outputs all site names in the format used on the network site list.
SELECT name,
LOWER('https://' +
IIF(PATINDEX('%.Mathoverflow%', name) > 0,
IIF(PATINDEX('%.Meta', name) > 0, 'meta.mathoverflow.net', 'mathoverflow.net'),
IIF(PATINDEX('%.Ubuntu%', name) > 0,
IIF(PATINDEX('%.Meta', name) > 0, 'meta.askubuntu.com', 'askubuntu.com'),
IIF(PATINDEX('StackExchange.%', name) > 0,
CASE SUBSTRING(name, 15, 200)
WHEN 'Audio' THEN 'video'
WHEN 'Audio.Meta' THEN 'video.meta'
WHEN 'Beer' THEN 'alcohol'
WHEN 'Beer.Meta' THEN 'alcohol.meta'
WHEN 'CogSci' THEN 'psychology'
WHEN 'CogSci.Meta' THEN 'psychology.meta'
WHEN 'Garage' THEN 'mechanics'
WHEN 'Garage.Meta' THEN 'mechanics.meta'
WHEN 'Health' THEN 'medicalsciences'
WHEN 'Health.Meta' THEN 'medicalsciences.meta'
WHEN 'Moderators' THEN 'communitybuilding'
WHEN 'Moderators.Meta' THEN 'communitybuilding.meta'
WHEN 'Photography' THEN 'photo'
WHEN 'Photography.Meta' THEN 'photo.meta'
WHEN 'Programmers' THEN 'softwareengineering'
WHEN 'Programmers.Meta' THEN 'softwareengineering.meta'
WHEN 'Vegetarian' THEN 'vegetarianism'
WHEN 'Vegetarian.Meta' THEN 'vegetarianism.meta'
WHEN 'Writers' THEN 'writing'
WHEN 'Writers.Meta' THEN 'writing.meta'
ELSE SUBSTRING(name, 15, 200)
END + '.stackexchange.com',
IIF(PATINDEX('StackOverflow.%', name) > 0,
CASE SUBSTRING(name, 15, 200)
WHEN 'Br' THEN 'pt'
WHEN 'Br.Meta' THEN 'pt.meta'
ELSE SUBSTRING(name, 15, 200)
END + '.stackoverflow.com',
IIF(PATINDEX('%.Meta', name) > 0,
'meta.' + SUBSTRING(name, 0, PATINDEX('%.Meta', name)) + '.com',
name + '.com'
)
)
)
)
) + '/'
)
FROM sys.databases WHERE database_id > 5

SQL Server : select all after specific character

How I can select
"ALT1" if value is "W61N03D20V0-WHIH-ALT1"
"ALT2" if for "W61N03D20V0-WHIH-ALT2"
"SW" for "W61N03D20V0-WHIH-SW"
"Default" for "W61N26D1YA1-VICU" (without prefix)
"Defailt" for "W61N27D21V2-AZTD"
In other words I'm looking for a way extract last part after second suffix, but if I have't second suffix - then default
Thanks for advice
Try it like this:
First you "split" the string on its minus signs with the XML trick.
Then you read the third node from you XML - voila!
CREATE TABLE #tbl(content VARCHAR(100));
INSERT INTO #tbl VALUES('W61N03D20V0-WHIH-ALT1')
,('W61N03D20V0-WHIH-SW')
,('W61N26D1YA1-VICU');
WITH SplittedAsXml AS
(
SELECT CAST('<x>' + REPLACE(content,'-','</x><x>') + '</x>' AS XML) AS Content
FROM #tbl
)
SELECT ISNULL(Content.value('/x[3]','varchar(max)'),'default') AS TheThirdPart
FROM SplittedAsXml;
DROP TABLE #tbl;
The result
ALT1
SW
default
Going this ways would also give you the chance to get the other parts in one go just querying /x[1] and /x[2] too
I did it using the built-in substring() function:
declare #str VARCHAR(40) = 'W61N03D20V0-WHIH-ALT1' -- also works for the other examples
declare #sep VARCHAR(1) = '-'
declare #middleToEnd VARCHAR(40) = substring(#str, charindex(#sep, #str) + 1, len(#str))
declare #pos INT = charindex(#sep, #middleToEnd)
declare #lastPart VARCHAR(40) =
CASE WHEN #pos = 0
THEN 'Default'
ELSE substring(#middleToEnd, #pos + 1, len(#middleToEnd))
END
select #lastPart
For best performance, you can solve it with this one-liner(calculation is one line)
SELECT
COALESCE(STUFF(col,1,NULLIF(CHARINDEX('-',col, CHARINDEX('-',col)+1), 0),''),'Default')
FROM (values
('W61N03D20V0-WHIH-ALT1'),('W61N03D20V0-WHIH-ALT2'),
('W61N03D20V0-WHIH-SW'),('W61N26D1YA1-VICU'),
('W61N27D21V2-AZTD')) x(col)
Result:
ALT1
ALT2
SW
Default
Default
If I understand what you are asking for, the following does what you need:
-- fake table
WITH SomeTable AS (
SELECT 'W61N03D20V0-WHIH-ALT1' AS Field1
UNION ALL
SELECT 'W61N03D20V0-WHIH-SW'
UNION ALL
SELECT 'W61N26D1YA1-VICU'
)
-- select
SELECT
CASE CHARINDEX('-WHIH-', Field1)
WHEN 0 THEN 'Default'
ELSE SUBSTRING(Field1, CHARINDEX('-WHIH-', Field1) + 6, LEN(Field1) - (CHARINDEX('-WHIH-', Field1) + 5))
END
FROM SomeTable
Use can use a CASE expression to check whether the string starts with W61N03D20V0-WHIH.
If it starts with it use a combination of RIGHT, REVERSE and CHARINDEX functions to get last part from the string, else Default.
Query
select case when [your_column_name] like 'W61N03D20V0-WHIH%'
then right([your_column_name], charindex('-', reverse([your_column_name]), 1) - 1)
else 'Default' end as new_column_name
from your_table_name;
SQl Fiddle demo

Resources