How to pad a number with zero? - sql-server

A table was built, however, there is one column for the District School. This field should be in a 0000 format, however when the table was built, it dropped the leading zeros, so, if a District School number was 0001, it came in as 1, same as if the DS number was 0052, it came in as 52, but I need to alter that column so that all values are reflected as 0000. How do I go about doing that?

SELECT RIGHT('0000'+ISNULL(field,''),4)

CREATE FUNCTION [dbo].[strCeros]
(
#numero INT,
#cifras TINYINT
)
RETURNS NVARCHAR
(
25
)
AS
BEGIN
DECLARE #str NVARCHAR(25)
IF #cifras > 25
SET #cifras = 25
SET #str = RIGHT(
REPLICATE('0', #cifras) + CAST(#numero AS NVARCHAR(25)),
#cifras
)
RETURN #str
END
GO
SELECT dbo.strCeros(99, 4) padZeros

select id , REPLICATE('0', 4 - len( cast(id as nvarchar) ) ) + cast(id as nvarchar)
from (
select 1 id
union
select 5 id
union
select 8 id
union
select 19 id
union
select 33 id
union
select 52 id
) dts

Related

Translating SQL Server Cursor to Azure Synapse

I have the following code that loops through a table with unique model numbers and creates a new table that contains, for each model numbers, a row based on the year and week number. How can I translate this so it doesn't use a cursor?
DECLARE #current_model varchar(50);
--declare a cursor that iterates through model numbers in ItemInformation table
DECLARE model_cursor CURSOR FOR
SELECT model from ItemInformation
--start the cursor
OPEN model_cursor
--get the next (first value)
FETCH NEXT FROM model_cursor INTO #current_model;
DECLARE #year_counter SMALLINT;
DECLARE #week_counter TINYINT;
WHILE (##FETCH_STATUS = 0) --fetch status returns the status of the last cursor, if 0 then there is a next value (FETCH statement was successful)
BEGIN
SET #year_counter = 2019;
WHILE (#year_counter <= Datepart(year, Getdate() - 1) + 2)
BEGIN
SET #week_counter = 1;
WHILE (#week_counter <= 52)
BEGIN
INSERT INTO dbo.ModelCalendar(
model,
sales_year,
sales_week
)
VALUES(
#current_model,
#year_counter,
#week_counter
)
SET #week_counter = #week_counter + 1
END
SET #year_counter = #year_counter + 1
END
FETCH NEXT FROM model_cursor INTO #current_model
END;
CLOSE model_cursor;
DEALLOCATE model_cursor;
If ItemInformation contains the following table:
model,invoice
a,4.99
b,9.99
c,1.99
d,8.99
then the expected output is:
model,sales_year,sales_week
A,2019,1
A,2019,2
A,2019,3
...
A,2019,52
A,2020,1
A,2020,2
A,2020,3
...
A,2020,51
A,2020,52
A,2020,53 (this is 53 because 2020 is leap year and has 53 weeks)
A,2021,1
A,2021,2
...
A,2022,1
A,2022,2
...
A,2022,52
B,2019,1
B,2019,2
...
D, 2022,52
Using CTE's you can get all combinations of weeks and years within the range required. Then join your data table on.
declare #Test table (model varchar(1), invoice varchar(4));
insert into #Test (model, invoice)
values
('a', '4.99'),
('b', '9.99'),
('c', '1.99'),
('d', '8.99');
with Week_CTE as (
select 1 as WeekNo
union all
select 1 + WeekNo
from Week_CTE
where WeekNo < 53
), Year_CTE as (
select 2019 YearNo
union all
select 1 + YearNo
from Year_CTE
where YearNo <= datepart(year, current_timestamp)
)
select T.model, yr.YearNo, wk.WeekNo
from Week_CTE wk
cross join (
select YearNo
-- Find the last week of the year (52 or 53) -- might need to change the start day of the week for this to be correct
, datepart(week, dateadd(day, -1, dateadd(year, 1, '01 Jan ' + convert(varchar(4),YearNo)))) LastWeek
from Year_CTE yr
) yr
cross join (
-- Assuming only a single row per model is required, and the invoice column can be ignored
select model
from #Test
group by model
) T
where wk.WeekNo <= yr.LastWeek
order by yr.YearNo, wk.WeekNo;
As you have advised that using a recursive CTE is not an option, you can try using a CTE without recursion:
with T(N) as (
select X.N
from (values (0),(0),(0),(0),(0),(0),(0),(0)) X(N)
), W(N) as (
select top (53) row_number() over (order by ##version) as N
from T T1
cross join T T2
), Y(N) as (
-- Upper limit on number of years
select top (12) 2018 + row_number() over (order by ##version) AS N
from T T1
cross join T T2
)
select W.N as WeekNo, Y.N YearNo, T.model
from W
cross join (
select N
-- Find the last week of the year (52 or 53) -- might need to change the start day of the week for this to be correct
, datepart(week, dateadd(day, -1, dateadd(year, 1, '01 Jan ' + convert(varchar(4),N)))) LastWeek
from Y
) Y
cross join (
-- Assuming only a single row per model is required, and the invoice column can be ignored
select model
from #Test
group by model
) T
-- Filter to required number of years.
where Y.N <= datepart(year, current_timestamp) + 1
and W.N <= Y.LastWeek
order by Y.N, W.N, T.model;
Note: If you setup your sample data in future with the DDL/DML as shown here you will greatly assist people attempting to answer.
I don't like to see a loop solution where a set solution can be found. So here goes Take II with no CTE, no values and no row_number() (the table variable is just to simulate your data so not part of the actual solution):
declare #Test table (model varchar(1), invoice varchar(4));
insert into #Test (model, invoice)
values
('a', '4.99'),
('b', '9.99'),
('c', '1.99'),
('d', '8.99');
select Y.N + 2019 YearNumber, W.WeekNumber, T.Model
from (
-- Cross join 5 * 10, then filter to 52/53 as required
select W1.N * 10 + W2.N + 1 WeekNumber
from (
select 0 N
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
) W1
cross join (
select 0 N
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9
) W2
) W
-- Cross join number of years required, just ensure its more than will ever be needed then filter back
cross join (
select 0 N
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9
) Y
cross join (
-- Assuming only a single row per model is required, and the invoice column can be ignored
select model
from #Test
group by model
) T
-- Some filter to restrict the years
where Y.N <= 3
-- Some filter to restrict the weeks
and W.WeekNumber <= 53
order by YearNumber, WeekNumber;
I created a table to temporary calendar table containing all the weeks and years. To account for leap years, I took the last 7 days of a year and got the ISO week for each day. To know how many weeks are in a year, I put these values into another temp table and took the max value of it. Azure Synapse doesn't support multiple values in one insert so it looks a lot longer than it should be. I also have to declare each as variable since Synapse can only insert literal or variable. I then cross-joined with my ItemInformation table.
CREATE TABLE #temp_dates
(
year SMALLINT,
week TINYINT
);
CREATE TABLE #temp_weeks
(
week_num TINYINT
);
DECLARE #year_counter SMALLINT
SET #year_counter = 2019
DECLARE #week_counter TINYINT
WHILE ( #year_counter <= Datepart(year, Getdate() - 1) + 2 )
BEGIN
SET #week_counter = 1;
DECLARE #day_1 TINYINT
SET #day_1 = Datepart(isowk, Concat('12-25-', #year_counter))
DECLARE #day_2 TINYINT
SET #day_2 = Datepart(isowk, Concat('12-26-', #year_counter))
DECLARE #day_3 TINYINT
SET #day_3 = Datepart(isowk, Concat('12-27-', #year_counter))
DECLARE #day_4 TINYINT
SET #day_4 = Datepart(isowk, Concat('12-28-', #year_counter))
DECLARE #day_5 TINYINT
SET #day_5 = Datepart(isowk, Concat('12-29-', #year_counter))
DECLARE #day_6 TINYINT
SET #day_6 = Datepart(isowk, Concat('12-30-', #year_counter))
DECLARE #day_7 TINYINT
SET #day_7 = Datepart(isowk, Concat('12-31-', #year_counter))
TRUNCATE TABLE #temp_weeks
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_1)
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_2)
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_3)
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_4)
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_5)
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_6)
INSERT INTO #temp_weeks
(week_num)
VALUES (#day_7)
DECLARE #max_week TINYINT
SET #max_week = (SELECT Max(week_num)
FROM #temp_weeks)
WHILE ( #week_counter <= #max_week )
BEGIN
INSERT INTO #temp_dates
(year,
week)
VALUES ( #year_counter,
#week_counter )
SET #week_counter = #week_counter + 1
END
SET #year_counter = #year_counter + 1
END
DROP TABLE #temp_weeks;
SELECT i.model,
d.year,
d.week
FROM dbo.iteminformation i
CROSS JOIN #temp_dates d
ORDER BY model,
year,
week
DROP TABLE #temp_dates

Multiple tables in the where clause SQL

I have 2 tables:-
Table_1
GetID UnitID
1 1,2,3
2 4,5
3 5,6
4 6
Table_2
ID UnitID UserID
1 1 1
1 2 1
1 3 1
1 4 1
1 5 2
1 6 3
I want the 'GetID' based on 'UserID'.
Let me explain you with an example.
For e.g.
I want all the GetID where UserID is 1.
The result set should be 1 and 2. 2 is included because one of the Units of 2 has UserID 1.
I want all the GetID where UserID is 2
The result set should be 2 and 3. 2 is included because one of Units of 2 has UserID 2.
I want to achieve this.
Thank you in Advance.
You can try a query like this:
See live demo
select
distinct userid,getid
from Table_1 t1
join Table_2 t2
on t1.unitId+',' like '%' +cast(t2.unitid as varchar(max))+',%'
and t2.userid=1
The query for this will be relatively ugly, because you made the mistake of storing CSV data in the UnitID column (or maybe someone else did and you are stuck with it).
SELECT DISTINCT
t1.GetID
FROM Table_1 t1
INNER JOIN Table_2 t2
ON ',' + t1.UnitID + ',' LIKE '%,' + CONVERT(varchar(10), t2.UnitID) + ',%'
WHERE
t2.UserID = 1;
Demo
To understand the join trick being used here, for the first row of Table_1 we are comparing ,1,2,3, against other single UnitID values from Table_2, e.g. %,1,%. Hopefully it is clear that my logic would match a single UnitID value in the CSV string in any position, including the first and last.
But a much better long term approach would be to separate those CSV values across separate records. Then, in addition to requiring a much simpler query, you could take advantage of things like indices.
try this:
declare #Table_1 table(GetID INT, UnitId VARCHAR(10))
declare #Table_2 table(ID INT, UnitId INT,UserId INT)
INSERT INTO #Table_1
SELECT 1,'1,2,3'
union
SELECT 2,'4,5'
union
SELECT 3,'5,6'
union
SELECT 4,'6'
INSERT INTO #Table_2
SELECT 1,1,1
union
SELECT 1,2,1
union
SELECT 1,3,1
union
SELECT 1,4,1
union
SELECT 1,5,2
union
SELECT 1,6,3
declare #UserId INT = 2
DECLARE #UnitId VARCHAR(10)
SELECT #UnitId=COALESCE(#UnitId + ',', '') + CAST(UnitId AS VARCHAR(5)) from #Table_2 WHERE UserId=#UserId
select distinct t.GetId
from #Table_1 t
CROSS APPLY [dbo].[Split](UnitId,',') AS AA
CROSS APPLY [dbo].[Split](#UnitId,',') AS BB
WHERE AA.Value=BB.Value
Split Function:
CREATE FUNCTION [dbo].Split(#input AS Varchar(4000) )
RETURNS
#Result TABLE(Value BIGINT)
AS
BEGIN
DECLARE #str VARCHAR(20)
DECLARE #ind Int
IF(#input is not null)
BEGIN
SET #ind = CharIndex(',',#input)
WHILE #ind > 0
BEGIN
SET #str = SUBSTRING(#input,1,#ind-1)
SET #input = SUBSTRING(#input,#ind+1,LEN(#input)-#ind)
INSERT INTO #Result values (#str)
SET #ind = CharIndex(',',#input)
END
SET #str = #input
INSERT INTO #Result values (#str)
END
RETURN
END

Remove some characters from string sql [duplicate]

I've got dirty data in a column with variable alpha length. I just want to strip out anything that is not 0-9.
I do not want to run a function or proc. I have a script that is similar that just grabs the numeric value after text, it looks like this:
Update TableName
set ColumntoUpdate=cast(replace(Columnofdirtydata,'Alpha #','') as int)
where Columnofdirtydata like 'Alpha #%'
And ColumntoUpdate is Null
I thought it would work pretty good until I found that some of the data fields I thought would just be in the format Alpha # 12345789 are not.
Examples of data that needs to be stripped
AB ABCDE # 123
ABCDE# 123
AB: ABC# 123
I just want the 123. It is true that all data fields do have the # prior to the number.
I tried substring and PatIndex, but I'm not quite getting the syntax correct or something. Anyone have any advice on the best way to address this?
See this blog post on extracting numbers from strings in SQL Server. Below is a sample using a string in your example:
DECLARE #textval NVARCHAR(30)
SET #textval = 'AB ABCDE # 123'
SELECT LEFT(SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000) + 'X') -1)
Here is an elegant solution if your server supports the TRANSLATE function (on sql server it's available on sql server 2017+ and also sql azure).
First, it replaces any non numeric characters with a # character.
Then, it removes all # characters.
You may need to add additional characters that you know may be present in the second parameter of the TRANSLATE call.
select REPLACE(TRANSLATE([Col], 'abcdefghijklmnopqrstuvwxyz+()- ,#+', '##################################'), '#', '')
You can use stuff and patindex.
stuff(Col, 1, patindex('%[0-9]%', Col)-1, '')
SQL Fiddle
This works well for me:
CREATE FUNCTION [dbo].[StripNonNumerics]
(
#Temp varchar(255)
)
RETURNS varchar(255)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^0-9]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
Then call the function like so to see the original something next to the sanitized something:
SELECT Something, dbo.StripNonNumerics(Something) FROM TableA
In case if there are some characters possible between digits (e.g. thousands separators), you may try following:
declare #table table (DirtyCol varchar(100))
insert into #table values
('AB ABCDE # 123')
,('ABCDE# 123')
,('AB: ABC# 123')
,('AB#')
,('AB # 1 000 000')
,('AB # 1`234`567')
,('AB # (9)(876)(543)')
;with tally as (select top (100) N=row_number() over (order by ##spid) from sys.all_columns),
data as (
select DirtyCol, Col
from #table
cross apply (
select (select C + ''
from (select N, substring(DirtyCol, N, 1) C from tally where N<=datalength(DirtyCol)) [1]
where C between '0' and '9'
order by N
for xml path(''))
) p (Col)
where p.Col is not NULL
)
select DirtyCol, cast(Col as int) IntCol
from data
Output is:
DirtyCol IntCol
--------------------- -------
AB ABCDE # 123 123
ABCDE# 123 123
AB: ABC# 123 123
AB # 1 000 000 1000000
AB # 1`234`567 1234567
AB # (9)(876)(543) 9876543
For update, add ColToUpdate to select list of the data cte:
;with num as (...),
data as (
select ColToUpdate, /*DirtyCol, */Col
from ...
)
update data
set ColToUpdate = cast(Col as int)
CREATE FUNCTION FN_RemoveNonNumeric (#Input NVARCHAR(512))
RETURNS NVARCHAR(512)
AS
BEGIN
DECLARE #Trimmed NVARCHAR(512)
SELECT #Trimmed = #Input
WHILE PATINDEX('%[^0-9]%', #Trimmed) > 0
SELECT #Trimmed = REPLACE(#Trimmed, SUBSTRING(#Trimmed, PATINDEX('%[^0-9]%', #Trimmed), 1), '')
RETURN #Trimmed
END
GO
SELECT dbo.FN_RemoveNonNumeric('ABCDE# 123')
Pretty late to the party, I found the following which I though worked brilliantialy.. if anyone is still looking
SELECT
(SELECT CAST(CAST((
SELECT SUBSTRING(FieldToStrip, Number, 1)
FROM master..spt_values
WHERE Type='p' AND Number <= LEN(FieldToStrip) AND
SUBSTRING(FieldToStrip, Number, 1) LIKE '[0-9]' FOR XML Path(''))
AS xml) AS varchar(MAX)))
FROM
SourceTable
Here's a version which pulls all digits from a string; i.e. given I'm 35 years old; I was born in 1982. The average family has 2.4 children. this would return 35198224. i.e. it's good where you've got numeric data which may have been formatted as a code (e.g. #123,456,789 / 123-00005), but isn't appropriate if you're looking to pull out specific numbers (i.e. as opposed to digits / just the numeric characters) from the text. Also it only handles digits; so won't return negative signs (-) or periods .).
declare #table table (id bigint not null identity (1,1), data nvarchar(max))
insert #table (data)
values ('hello 123 its 45613 then') --outputs: 12345613
,('1 some other string 98 example 4') --outputs: 1984
,('AB ABCDE # 123') --outputs: 123
,('ABCDE# 123') --outputs: 123
,('AB: ABC# 123') --outputs: 123
; with NonNumerics as (
select id
, data original
--the below line replaces all digits with blanks
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(data,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9','') nonNumeric
from #table
)
--each iteration of the below CTE removes another non-numeric character from the original string, putting the result into the numerics column
, Numerics as (
select id
, replace(original, substring(nonNumeric,1,1), '') numerics
, replace(nonNumeric, substring(nonNumeric,1,1), '') charsToreplace
, len(replace(nonNumeric, substring(nonNumeric,1,1), '')) charsRemaining
from NonNumerics
union all
select id
, replace(numerics, substring(charsToreplace,1,1), '') numerics
, replace(charsToreplace, substring(charsToreplace,1,1), '') charsToreplace
, len(replace(charsToreplace, substring(charsToreplace,1,1), '')) charsRemaining
from Numerics
where charsRemaining > 0
)
--we select only those strings with `charsRemaining=0`; i.e. the rows for which all non-numeric characters have been removed; there should be 1 row returned for every 1 row in the original data set.
select * from Numerics where charsRemaining = 0
This code works by removing all the digits (i.e. the characters we want) from a the given strings by replacing them with blanks. Then it goes through the original string (which includes the digits) removing all of the characters that were left (i.e. the non-numeric characters), thus leaving only the digits.
The reason we do this in 2 steps, rather than just removing all non-numeric characters in the first place is there are only 10 digits, whilst there are a huge number of possible characters; so replacing that small list is relatively fast; then gives us a list of those non-numeric characters which actually exist in the string, so we can then replace that small set.
The method makes use of recursive SQL, using common table expressions (CTEs).
To add on to Ken's answer, this handles commas and spaces and parentheses
--Handles parentheses, commas, spaces, hyphens..
declare #table table (c varchar(256))
insert into #table
values
('This is a test 111-222-3344'),
('Some Sample Text (111)-222-3344'),
('Hello there 111222 3344 / How are you?'),
('Hello there 111 222 3344 ? How are you?'),
('Hello there 111 222 3344. How are you?')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
Create function fn_GetNumbersOnly(#pn varchar(100))
Returns varchar(max)
AS
BEGIN
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#pn)
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if ISNUMERIC(#c) = 1 and #c <> '-'
Select #r = #r + #c
Select #x = #x +1
end
return #r
End
In your case It seems like the # will always be after teh # symbol so using CHARINDEX() with LTRIM() and RTRIM() would probably perform the best. But here is an interesting method of getting rid of ANY non digit. It utilizes a tally table and table of digits to limit which characters are accepted then XML technique to concatenate back to a single string without the non-numeric characters. The neat thing about this technique is it could be expanded to included ANY Allowed characters and strip out anything that is not allowed.
DECLARE #ExampleData AS TABLE (Col VARCHAR(100))
INSERT INTO #ExampleData (Col) VALUES ('AB ABCDE # 123'),('ABCDE# 123'),('AB: ABC# 123')
DECLARE #Digits AS TABLE (D CHAR(1))
INSERT INTO #Digits (D) VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')
;WITH cteTally AS (
SELECT
I = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Digits d10
CROSS APPLY #Digits d100
--add more cross applies to cover longer fields this handles 100
)
SELECT *
FROM
#ExampleData e
OUTER APPLY (
SELECT CleansedPhone = CAST((
SELECT TOP 100
SUBSTRING(e.Col,t.I,1)
FROM
cteTally t
INNER JOIN #Digits d
ON SUBSTRING(e.Col,t.I,1) = d.D
WHERE
I <= LEN(e.Col)
ORDER BY
t.I
FOR XML PATH('')) AS VARCHAR(100))) o
Declare #MainTable table(id int identity(1,1),TextField varchar(100))
INSERT INTO #MainTable (TextField)
VALUES
('6B32E')
declare #i int=1
Declare #originalWord varchar(100)=''
WHile #i<=(Select count(*) from #MainTable)
BEGIN
Select #originalWord=TextField from #MainTable where id=#i
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#originalWord)
declare #pn varchar(100)=#originalWord
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if(#c!='')
BEGIN
if ISNUMERIC(#c) = 0 and #c <> '-'
BEGIN
Select #r = cast(#r as varchar) + cast(replace((SELECT ASCII(#c)-64),'-','') as varchar)
end
ELSE
BEGIN
Select #r = #r + #c
END
END
Select #x = #x +1
END
Select #r
Set #i=#i+1
END
I have created a function for this
Create FUNCTION RemoveCharacters (#text varchar(30))
RETURNS VARCHAR(30)
AS
BEGIN
declare #index as int
declare #newtexval as varchar(30)
set #index = (select PATINDEX('%[A-Z.-/?]%', #text))
if (#index =0)
begin
return #text
end
else
begin
set #newtexval = (select STUFF ( #text , #index , 1 , '' ))
return dbo.RemoveCharacters(#newtexval)
end
return 0
END
GO
Here is the answer:
DECLARE #t TABLE (tVal VARCHAR(100))
INSERT INTO #t VALUES('123')
INSERT INTO #t VALUES('123S')
INSERT INTO #t VALUES('A123,123')
INSERT INTO #t VALUES('a123..A123')
;WITH cte (original, tVal, n)
AS
(
SELECT t.tVal AS original,
LOWER(t.tVal) AS tVal,
65 AS n
FROM #t AS t
UNION ALL
SELECT tVal AS original,
CAST(REPLACE(LOWER(tVal), LOWER(CHAR(n)), '') AS VARCHAR(100)),
n + 1
FROM cte
WHERE n <= 90
)
SELECT t1.tVal AS OldVal,
t.tval AS NewVal
FROM (
SELECT original,
tVal,
ROW_NUMBER() OVER(PARTITION BY tVal + original ORDER BY original) AS Sl
FROM cte
WHERE PATINDEX('%[a-z]%', tVal) = 0
) t
INNER JOIN #t t1
ON t.original = t1.tVal
WHERE t.sl = 1
You can create SQL CLR scalar function in order to be able to use regular expressions like replace patterns.
Here you can find example of how to create such function.
Having such function will solve the issue with just the following lines:
SELECT [dbo].[fn_Utils_RegexReplace] ('AB ABCDE # 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('ABCDE# 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('AB: ABC# 123', '[^0-9]', '');
More important, you will be able to solve more complex issues as the regular expressions will bring a whole new world of options directly in your T-SQL statements.
Use this:
REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
Demo:
DROP TABLE IF EXISTS #MyTempTable;
CREATE TABLE #MyTempTable (SomeString VARCHAR(255));
INSERT INTO #MyTempTable
VALUES ('ssss123ssg99d362sdg')
, ('hey 62q&*^(n43')
, (NULL)
, ('')
, ('hi')
, ('123');
SELECT SomeString
, REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
FROM #MyTempTable;
DROP TABLE IF EXISTS #MyTempTable;
Results:
SomeString
(No column name)
ssss123ssg99d362sdg
12399362
hey62q&*^(n43
6243
NULL
NULL
hi
123
123
While the OP wanted to "strip out anything that is not 0-9", the post is also tagged with "substring" and "patindex", and the OP mentioned the concern "not quite getting the syntax correct or something". To address that the requirements note that "all data fields do have the # prior to the number" and to provide an answer that addresses the challenges with substring/patindex, consider the following:
/* A sample select */
;WITH SampleValues AS
( SELECT 'AB ABCDE # 123' [Columnofdirtydata]
UNION ALL SELECT 'AB2: ABC# 123')
SELECT
s.Columnofdirtydata,
f1.pos1,
'['+ f2.substr +']' [InspectOutput]
FROM
SampleValues s
CROSS APPLY (SELECT PATINDEX('%# %',s.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(s.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(s.Columnofdirtydata)) [substr]) f2
/* Using update scenario from OP */
UPDATE t1
SET t1.Columntoupdate = CAST(f2.substr AS INT)
FROM
TableName t1
CROSS APPLY (SELECT PATINDEX('%# %',t1.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(t1.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(t1.Columnofdirtydata)) [substr]) f2
Note that my syntax advice for patindex/substring, is to:
consider using APPLY as a way to temporarily alias results from one function for use as parameters in the next. It's not uncommon to (in ETL, for example) need to parse out parameter/position-based substrings in an updatable column of a staging table. If you need to "debug" and potentially fix some parsing logic, this style will help.
consider using LEN('PatternSample') in your substring logic, to account for reusing this pattern or adjusting it when your source data changes (instead of "+ 1"
SUBSTRING() requires a length parameter, but it can be greater than the length of the string. Therefore, if you are getting "the rest of the string" after the pattern, you can just use "The source length"
DECLARE #STR VARCHAR(400)
DECLARE #specialchars VARCHAR(50) = '%[~,#,#,$,%,&,*,(,),!^?:]%'
SET #STR = '1, 45 4,3 68.00-'
WHILE PATINDEX( #specialchars, #STR ) > 0
---Remove special characters using Replace function
SET #STR = Replace(Replace(REPLACE( #STR, SUBSTRING( #STR, PATINDEX( #specialchars, #STR ), 1 ),''),'-',''), ' ','')
SELECT #STR
SELECT REGEXP_REPLACE( col, '[^[:digit:]]', '' ) AS new_col FROM my_table

Find non-ASCII characters in varchar columns using SQL Server

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.
I am doing something like this now, but it is not working
select *
from Staging.APARMRE1 as ar
where ar.Line like '%[^!-~ ]%'
For extra credit, if it can span all varchar columns in a table, that would be outstanding! In this solution, it would be nice to return three columns:
The identity field for that record. (This will allow the whole record to be reviewed with another query.)
The column name
The text with the invalid character
Id | FieldName | InvalidText |
----+-----------+-------------------+
25 | LastName | Solís |
56 | FirstName | François |
100 | Address1 | 123 Ümlaut street |
Invalid characters would be any outside the range of SPACE (3210) through ~ (12710)
Here is a solution for the single column search using PATINDEX.
It also displays the StartPosition, InvalidCharacter and ASCII code.
select line,
patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) as [Position],
substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1) as [InvalidCharacter],
ascii(substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1)) as [ASCIICode]
from staging.APARMRE1
where patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) >0
I've been running this bit of code with success
declare #UnicodeData table (
data nvarchar(500)
)
insert into
#UnicodeData
values
(N'Horse�')
,(N'Dog')
,(N'Cat')
select
data
from
#UnicodeData
where
data collate LATIN1_GENERAL_BIN != cast(data as varchar(max))
Which works well for known columns.
For extra credit, I wrote this quick script to search all nvarchar columns in a given table for Unicode characters.
declare
#sql varchar(max) = ''
,#table sysname = 'mytable' -- enter your table here
;with ColumnData as (
select
RowId = row_number() over (order by c.COLUMN_NAME)
,c.COLUMN_NAME
,ColumnName = '[' + c.COLUMN_NAME + ']'
,TableName = '[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'
from
INFORMATION_SCHEMA.COLUMNS c
where
c.DATA_TYPE = 'nvarchar'
and c.TABLE_NAME = #table
)
select
#sql = #sql + 'select FieldName = ''' + c.ColumnName + ''', InvalidCharacter = [' + c.COLUMN_NAME + '] from ' + c.TableName + ' where ' + c.ColumnName + ' collate LATIN1_GENERAL_BIN != cast(' + c.ColumnName + ' as varchar(max)) ' + case when c.RowId <> (select max(RowId) from ColumnData) then ' union all ' else '' end + char(13)
from
ColumnData c
-- check
-- print #sql
exec (#sql)
I'm not a fan of dynamic SQL but it does have its uses for exploratory queries like this.
try something like this:
DECLARE #YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20));
INSERT #YourTable VALUES (1, 'ok','ok','ok');
INSERT #YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok');
INSERT #YourTable VALUES (3, 'ok',char(182)+'BAD','ok');
INSERT #YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD');
INSERT #YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD');
INSERT #YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182));
--if you have a Numbers table use that, other wise make one using a CTE
WITH AllNumbers AS
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<1000
)
SELECT
pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
UNION
SELECT
pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
UNION
SELECT
pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
order by 1
OPTION (MAXRECURSION 1000);
OUTPUT:
pk BadValueColumn BadValue
----------- -------------- --------------------
2 Col1 BA¶D
3 Col2 ¶BAD
4 Col3 B¶AD
5 Col1 ¶BAD
5 Col3 ¶BAD
6 Col1 BAD¶
6 Col2 B¶AD
6 Col3 BAD¶¶¶
(8 row(s) affected)
This script searches for non-ascii characters in one column. It generates a string of all valid characters, here code point 32 to 127. Then it searches for rows that don't match the list:
declare #str varchar(128);
declare #i int;
set #str = '';
set #i = 32;
while #i <= 127
begin
set #str = #str + '|' + char(#i);
set #i = #i + 1;
end;
select col1
from YourTable
where col1 like '%[^' + #str + ']%' escape '|';
running the various solutions on some real world data - 12M rows varchar length ~30, around 9k dodgy rows, no full text index in play, the patIndex solution is the fastest, and it also selects the most rows.
(pre-ran km. to set the cache to a known state, ran the 3 processes, and finally ran km again - the last 2 runs of km gave times within 2 seconds)
patindex solution by Gerhard Weiss -- Runtime 0:38, returns 9144 rows
select dodgyColumn from myTable fcc
WHERE patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,dodgyColumn ) >0
the substring-numbers solution by MT. -- Runtime 1:16, returned 8996 rows
select dodgyColumn from myTable fcc
INNER JOIN dbo.Numbers32k dn ON dn.number<(len(fcc.dodgyColumn ))
WHERE ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))<32
OR ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))>127
udf solution by Deon Robertson -- Runtime 3:47, returns 7316 rows
select dodgyColumn
from myTable
where dbo.udf_test_ContainsNonASCIIChars(dodgyColumn , 1) = 1
There is a user defined function available on the web 'Parse Alphanumeric'. Google UDF parse alphanumeric and you should find the code for it. This user defined function removes all characters that doesn't fit between 0-9, a-z, and A-Z.
Select * from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name
That should bring back any records that have a last_name with invalid chars for you...though your bonus points question is a bit more of a challenge, but I think a case statement could handle it. This is a bit psuedo code, I'm not entirely sure if it'd work.
Select id, case when udf_parsealpha(ar.last_name) <> ar.last_name then 'last name'
when udf_parsealpha(ar.first_name) <> ar.first_name then 'first name'
when udf_parsealpha(ar.Address1) <> ar.last_name then 'Address1'
end,
case when udf_parsealpha(ar.last_name) <> ar.last_name then ar.last_name
when udf_parsealpha(ar.first_name) <> ar.first_name then ar.first_name
when udf_parsealpha(ar.Address1) <> ar.last_name then ar.Address1
end
from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name or
udf_parsealpha(ar.first_name) <> ar.first_name or
udf_parsealpha(ar.Address1) <> ar.last_name
I wrote this in the forum post box...so I'm not quite sure if that'll function as is, but it should be close. I'm not quite sure how it will behave if a single record has two fields with invalid chars either.
As an alternative, you should be able to change the from clause away from a single table and into a subquery that looks something like:
select id,fieldname,value from (
Select id,'last_name' as 'fieldname', last_name as 'value'
from Staging.APARMRE1 ar
Union
Select id,'first_name' as 'fieldname', first_name as 'value'
from Staging.APARMRE1 ar
---(and repeat unions for each field)
)
where udf_parsealpha(value) <> value
Benefit here is for every column you'll only need to extend the union statement here, while you need to put that comparisson three times for every column in the case statement version of this script
To find which field has invalid characters:
SELECT * FROM Staging.APARMRE1 FOR XML AUTO, TYPE
You can test it with this query:
SELECT top 1 'char 31: '+char(31)+' (hex 0x1F)' field
from sysobjects
FOR XML AUTO, TYPE
The result will be:
Msg 6841, Level 16, State 1, Line 3 FOR XML could not serialize the
data for node 'field' because it contains a character (0x001F) which
is not allowed in XML. To retrieve this data using FOR XML, convert it
to binary, varbinary or image data type and use the BINARY BASE64
directive.
It is very useful when you write xml files and get error of invalid characters when validate it.
Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:
create function [dbo].[udf_ContainsNonASCIIChars]
(
#string nvarchar(4000),
#checkExtendedCharset bit
)
returns bit
as
begin
declare #pos int = 0;
declare #char varchar(1);
declare #return bit = 0;
while #pos < len(#string)
begin
select #char = substring(#string, #pos, 1)
if ascii(#char) < 32 or ascii(#char) > 126
begin
if #checkExtendedCharset = 1
begin
if ascii(#char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
else
begin
select #pos = #pos + 1
end
end
else
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
end
else
begin
select #pos = #pos + 1
end
end
return #return;
end
USAGE:
select Address1
from PropertyFile_English
where udf_ContainsNonASCIIChars(Address1, 1) = 1

SQL Server PIVOT Column Data

I have a table with data as given below:
DATE Price
---------- ------
31/12/2009 10
31/12/2009 11
31/12/2009 12
30/12/2009 20
30/12/2009 21
30/12/2009 22
29/12/2009 30
29/12/2009 32
29/12/2009 31
I want to convert this data as given below:
31/12/2009 30/12/2009 29/12/2009
---------- ---------- ----------
10 10 10
11 11 11
12 12 12
But the values in the date column is dynamic. So, I dont know how to use this using SQL Server Pivot.
Could you please let me know how to get this data.
Given below is the script to replicate this scenario:
CREATE TABLE TEMP(EffectiveDate DATETIME,Price INT)
INSERT INTO TEMP(EffectiveDate,Price)
SELECT GETDATE(),10
UNION ALL
SELECT GETDATE(),11
UNION ALL
SELECT GETDATE(),12
UNION ALL
SELECT GETDATE()-1,20
UNION ALL
SELECT GETDATE()-1,21
UNION ALL
SELECT GETDATE()-1,22
UNION ALL
SELECT GETDATE()-2,30
UNION ALL
SELECT GETDATE()-2,32
UNION ALL
SELECT GETDATE()-2,31
SELECT CONVERT(VARCHAR,EffectiveDATE,103) AS 'DATE',Price FROM Temp
Thanks in advance,
Mahesh
OK, as I mentioned, your data does not make sense, but maybe this can help.
The only way to create a dynamic pivot, is by creating dynamic sql.
Also, PIVOT requires that you use an Aggregate function (SUM, AVG, COUNT).
Ok, let see if this can help you.
CREATE TABLE #TEMP (EffectiveDate DATETIME,Price INT)
INSERT INTO #TEMP(EffectiveDate,Price)
SELECT GETDATE(),10
UNION ALL
SELECT GETDATE(),11
UNION ALL
SELECT GETDATE(),12
UNION ALL
SELECT GETDATE()-1,20
UNION ALL
SELECT GETDATE()-1,21
UNION ALL
SELECT GETDATE()-1,22
UNION ALL
SELECT GETDATE()-2,30
UNION ALL
SELECT GETDATE()-2,32
UNION ALL
SELECT GETDATE()-2,31
DECLARE #Cols VARCHAR(MAX)
SELECT #cols = COALESCE(#cols + ',[' + colName + ']',
'[' + colName + ']')
FROM (
SELECT DISTINCT
CONVERT(VARCHAR,EffectiveDATE,103) colName
FROM #TEMP
) s
ORDER BY colName DESC
DECLARE #query VARCHAR(MAX)
SET #query = N'SELECT *
FROM
(SELECT CONVERT(VARCHAR,EffectiveDATE,103) AS ''DATE'',Price
FROM #TEMP) p
PIVOT
(
SUM(Price) FOR DATE IN
( '+
#cols +' )
) AS pvt'
EXECUTE(#query)
DROP TABLE #TEMP

Resources