Character-by-character comparison strings in sql - sql-server

How to compare the strings on characters, check that the strings consist of the same symbols using T-SQL?
For example:
'aaabbcd' vs 'ddbca' (TRUE): both strings consist of the same symbols
'abcddd' vs 'cda' (FALSE): both strings do not consist of the same symbols

If performance is important then I would suggest a purely set-based solution using Ngrams8k.
This will give you the correct answer:
SELECT AllSame = COALESCE(MAX(0),1)
FROM dbo.ngrams8k(#string1, 1) ng1
FULL JOIN dbo.ngrams8k(#string2, 1) ng2 ON ng1.token = ng2.token
WHERE ng1.token IS NULL OR ng2.token IS NULL;
To use this logic against a table you could use CROSS APPLY like so:
-- Sample data
DECLARE #table TABLE (string1 varchar(100), string2 varchar(100));
INSERT #table VALUES ('aaabbcd','ddbca'),('abcddd','cda');
-- Solution using CROSS APPLY
SELECT *
FROM #table t
CROSS APPLY
(
SELECT AllSame = COALESCE(MAX(0),1)
FROM dbo.ngrams8k(t.string1, 1) ng1
FULL JOIN dbo.ngrams8k(t.string2, 1) ng2 ON ng1.token = ng2.token
WHERE ng1.token IS NULL OR ng2.token IS NULL
) x;
Results:
string1 string2 AllSame
--------- --------- --------
aaabbcd ddbca 1
abcddd cda 0
Not only will this be the fastest solution presented thus far, notice that we're getting the job done with as little code possible.
UPDATE TO INCLUDE COMPARE PERFORMANCE TO MARTIN SMITH'S SOLUTION
-- sample data
IF OBJECT_ID('tempdb..#sample') IS NOT NULL DROP TABLE #sample;
SELECT TOP (10000)
string1 = replicate('a',abs(checksum(newid())%5))+replicate('b',abs(checksum(newid())%4))+
replicate('c',abs(checksum(newid())%5))+replicate('d',abs(checksum(newid())%4))+
replicate('e',abs(checksum(newid())%5))+replicate('f',abs(checksum(newid())%4)),
string2 = replicate('a',abs(checksum(newid())%5))+replicate('b',abs(checksum(newid())%4))+
replicate('c',abs(checksum(newid())%5))+replicate('d',abs(checksum(newid())%4))+
replicate('e',abs(checksum(newid())%5))+replicate('f',abs(checksum(newid())%4))
INTO #sample
FROM sys.all_columns a, sys.all_columns b;
SET NOCOUNT ON;
SET STATISTICS TIME ON;
PRINT 'ajb serial'+char(10)+replicate('-',50);
SELECT flag
FROM #sample t
CROSS APPLY
(
SELECT Flag = COALESCE(MAX(0),1)
FROM dbo.ngrams8k(t.string1, 1) ng1
FULL JOIN dbo.ngrams8k(t.string2, 1) ng2 ON ng1.token = ng2.token
WHERE ng1.token IS NULL OR ng2.token IS NULL
) x
OPTION (MAXDOP 1);
PRINT 'ajb parallel'+char(10)+replicate('-',50);
SELECT flag
FROM #sample t
CROSS APPLY
(
SELECT Flag = COALESCE(MAX(0),1)
FROM dbo.ngrams8k(t.string1, 1) ng1
FULL JOIN dbo.ngrams8k(t.string2, 1) ng2 ON ng1.token = ng2.token
WHERE ng1.token IS NULL OR ng2.token IS NULL
) x
OPTION (querytraceon 8649);
PRINT 'M Smith - serial'+char(10)+replicate('-',50);
WITH Nums AS
(
SELECT TOP (100) ROW_NUMBER() OVER ( ORDER BY (SELECT NULL)) number
FROM sys.all_columns
)
SELECT flag
FROM #sample T
CROSS APPLY (SELECT CASE WHEN Min(Cnt) = 2 THEN 1 ELSE 0 END AS Flag
FROM (SELECT Count(*) AS Cnt
FROM (SELECT 1 AS s,
Substring(t.string1, N1.number, 1) AS c
FROM Nums N1
WHERE N1.number <= Len(t.string1)
UNION
SELECT 2 AS s,
Substring(t.string2, N2.number, 1) AS c
FROM Nums N2
WHERE N2.number <= Len(t.string2)) D1
GROUP BY c) D2
) Ca
OPTION (MAXDOP 1);
SET STATISTICS TIME OFF;
Results:
ajb serial
--------------------------------------------------
SQL Server Execution Times:
CPU time = 656 ms, **elapsed time = 660 ms**.
ajb parallel
--------------------------------------------------
SQL Server Execution Times:
CPU time = 1281 ms, **elapsed time = 204 ms**.
M Smith serial
--------------------------------------------------
SQL Server Execution Times:
CPU time = 1390 ms, **elapsed time = 1393 ms**.
Note that I did not test Martin's solution with a parallel plan because, as is, that query cannot run in parallel.

An inline method.
This uses a numbers table
CREATE TABLE dbo.Numbers (number INT PRIMARY KEY);
INSERT INTO dbo.Numbers
SELECT TOP 8000 ROW_NUMBER() OVER (ORDER BY ##SPID)
FROM sys.all_columns c1,
sys.all_columns c2
A version without but with lesser performance is in the edit history if you'd prefer trading off performance against not having to use one.
WITH T(S1, S2)
AS (SELECT 'aaabbcd',
'ddbca'
UNION ALL
SELECT 'abcddd',
'cda')
SELECT *
FROM T
CROSS APPLY (SELECT CASE WHEN Min(Cnt) = 2 THEN 1 ELSE 0 END AS Flag
FROM (SELECT Count(*) AS Cnt
FROM (SELECT 1 AS s,
Substring(S1, N1.number, 1) AS c
FROM dbo.Numbers N1
WHERE N1.number <= Len(S1)
UNION
SELECT 2 AS s,
Substring(S2, N2.number, 1) AS c
FROM dbo.Numbers N2
WHERE N2.number <= Len(S2)) D1
GROUP BY c) D2
) Ca

You can use this'%your-search-string%' to find your string contains any substring.
SELECT * FROM TableName
WHERE Name LIKE '%searchText%'
You can use the stored procedure for checking that characters of the string.
CREATE PROCEDURE IsStringMatching
(
#originalString NVARCHAR(32) ,
#stringToBeChecked NVARCHAR(32),
#IsMatching BIT OUTPUT
)
AS
BEGIN
DECLARE #inputStringCount INT = LEN(#originalString);
DECLARE #loopCount INT = 0, #temp INT;
DECLARE #char VARCHAR;
SET #IsMatching = 1
WHILE #loopCount < #inputStringCount
BEGIN
SET #char = SUBSTRING(#originalString,#loopCount+1,1);
SET #temp = CHARINDEX(#char, #stringToBeChecked,1);
IF(#temp = 0)
BEGIN
SET #IsMatching = 0;
BREAK;
END
SET #loopCount = #loopCount + 1;
END;
END
You can validate like this:
DECLARE #IsMatching BIT;
SELECT EXECUTE IsStringMatchingQ 'aaabbcd', 'ABC';
SELECT #IsMatching

Related

Convert strings to integers using PatIndex

I want to return integers from rather complex strings which combined unicode characters such as - and . with characters and integers.
I've come a long way in achieving this, but I still have troubles with some strings of a more complex structure. For instance:
DECLARE #Tabl as table
(
dats nvarchar(15)
)
INSERT INTO #Tabl VALUES
('103-P705hh'),
('115-xxx-44'),
('103-705.13'),
('525-hheef4')
select LEFT(SUBSTRING(REPLACE(REPLACE(dats, '.',''),'-',''), PATINDEX('%[0-9.-]%', REPLACE(REPLACE(dats, '.',''),'-','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(REPLACE(REPLACE(dats, '.',''),'-',''), PATINDEX('%[0-9.-]%', REPLACE(REPLACE(dats, '.',''),'-','')), 8000) + 'X')-1)
from #tabl
Gives
Raw Input Actual return: Desired return:
103-P705hh 103 103705
115-xxx-44 115 11544
103-705.13 10370513 10370513
525-hheef4 525 5254
I had a topic regarding this yesterday to cover the case when multiple - or . are present, but as seen in the return this is actually taken care of now. However, expanding the databases I work with I encountered much more complex string such as those I presented here.
Does anyone have any idea what to do when characters and integers are "mixed up" in the string?
Regards,
Cenderze
I have seen loads of solutions that use a scalar udf with a loop, but I don't like either of these things, so throwing my hat into the ring with a different approach.
With the help of a numbers table you can deconstruct each value into its individual characters, remove non-numeric characters, then reconstruct it using FOR XML to concatenate rows, e.g.
WITH Numbers (Number) AS
( SELECT ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N1 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N2 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N3 (N) -- 1,000
--CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N4 (N) -- 10,000
--CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N5 (N) -- 100,000
--COMMENT OR UNCOMMENT ROWS AS NECESSARY DEPENDING ON YOU MAX STRING LENGTH
)
SELECT t.dats,
Stripped = x.data.value('.', 'INT')
FROM #tabl AS t
CROSS APPLY
( SELECT SUBSTRING(t.dats, n.Number, 1)
FROM Numbers n
WHERE n.Number <= LEN(t.dats)
AND SUBSTRING(t.dats, n.Number, 1) LIKE '[0-9]'
ORDER BY n.Number
FOR XML PATH(''), TYPE
) x (data);
Gives:
dats Stripped
----------------------
103-P705hh 103705
115-xxx-44 11544
103-705.13 10370513
525-hheef4 5254
I haven't done any testing so it could be that the added overhead of expanding each string into individual characters and reconstructing it is actually a lot more overhead than than a UDF with a loop.
I decided to bench mark this
1. Set up functions
CREATE FUNCTION dbo.ExtractNumeric_TVF (#Input VARCHAR(8000))
RETURNS TABLE
AS
RETURN
( WITH Numbers (Number) AS
( SELECT TOP (LEN(#Input)) ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N1 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N2 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N3 (N) -- 1,000
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N4 (N) -- 10,000
)
SELECT Stripped = x.data.value('.', 'VARCHAR(MAX)')
FROM ( SELECT SUBSTRING(#Input, n.Number, 1)
FROM Numbers n
WHERE n.Number <= LEN(#Input)
AND SUBSTRING(#Input, n.Number, 1) LIKE '[0-9]'
ORDER BY n.Number
FOR XML PATH(''), TYPE
) x (data)
);
GO
create function dbo.ExtractNumeric_UDF(#s varchar(8000))
returns varchar(8000)
as
begin
declare #out varchar(max) = ''
declare #c char(1)
while len(#s) > 0 begin
set #c = left(#s,1)
if #c like '[0123456789]' set #out += #c
set #s = substring(#s, 2, len(#s) -1)
end
return #out
end
GO
2. Create first set of sample data and log table
CREATE TABLE dbo.T (Value VARCHAR(8000) NOT NULL);
INSERT dbo.T (Value)
SELECT TOP 1000 LEFT(NEWID(), CEILING(RAND(CHECKSUM(NEWID())) * 36))
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;
CREATE TABLE dbo.TestLog (Fx VARCHAR(255), NumberOfRows INT, TimeStart DATETIME2(7), TimeEnd DATETIME2(7))
3. Run Tests
GO
DECLARE #T TABLE (Val VARCHAR(8000));
INSERT dbo.TestLog (fx, NumberOfRows, TimeStart)
VALUES ('dbo.ExtractNumeric_UDF', 1000, SYSDATETIME());
INSERT #T (Val)
SELECT dbo.ExtractNumeric_UDF(Value)
FROM dbo.T;
UPDATE dbo.TestLog
SET TimeEnd = SYSDATETIME()
WHERE TimeEnd IS NULL;
GO 100
DECLARE #T TABLE (Val VARCHAR(8000));
INSERT dbo.TestLog (fx, NumberOfRows, TimeStart)
VALUES ('dbo.ExtractNumeric_TVF', 1000, SYSDATETIME());
INSERT #T (Val)
SELECT f.Stripped
FROM dbo.T
CROSS APPLY dbo.ExtractNumeric_TVF(Value) f;
UPDATE dbo.TestLog
SET TimeEnd = SYSDATETIME()
WHERE TimeEnd IS NULL;
GO 100
4. Get Results
SELECT Fx,
NumberOfRows,
RunTime = AVG(DATEDIFF(MILLISECOND, TimeStart, TimeEnd))
FROM dbo.TestLog
GROUP BY fx, NumberOfRows;
I did the following (using just NEWID() so only a maximum of 36 characters) over 1,000 and 10,000 rows, the results were:
Fx NumberOfRows RunTime
--------------------------------------------------------
dbo.ExtractNumeric_TVF 1000 31
dbo.ExtractNumeric_UDF 1000 56
dbo.ExtractNumeric_TVF 10000 280
dbo.ExtractNumeric_UDF 10000 510
So the TVF coming in at just under half the time of the UDF.
I wanted to test edge cases so put 1,000 rows of longer strings (5,400 characters)
TRUNCATE TABLE dbo.T;
INSERT dbo.T (Value)
SELECT TOP 1000
REPLICATE(CONCAT(NEWID(), NEWID(), NEWID(), NEWID(), NEWID()), 30)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;
And this is where the TVF came into its own, running over 5x faster:
Fx NumberOfRows RunTime
------------------------------------------------
dbo.ExtractNumeric_TVF 1000 2485
dbo.ExtractNumeric_UDF 1000 12955
I also really don't like the looping solutions so I decided to try my hand at one. This is using a predefined tally table but is quite similar to others posted here already.
This is my tally table. I keep this as a view on my system.
create View [dbo].[cteTally] as
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select N from cteTally
GO
Because I don't like looping I decided to use the table valued function approach which let me reuse this functionality in other queries with little to no effort. Here is one way to write such a function.
create function GetOnlyNumbers
(
#SearchVal varchar(8000)
) returns table as return
with MyValues as
(
select substring(#SearchVal, N, 1) as number
, t.N
from cteTally t
where N <= len(#SearchVal)
and substring(#SearchVal, N, 1) like '[0-9]'
)
select distinct NumValue = STUFF((select number + ''
from MyValues mv2
order by mv2.N
for xml path('')), 1, 0, '')
from MyValues mv
That looks good but the proof is in the pudding. Let's take this out with our sample data and kick the tires a few times.
DECLARE #Tabl as table
(
dats nvarchar(15)
)
INSERT INTO #Tabl VALUES
('103-P705hh'),
('115-xxx-44'),
('103-705.13'),
('525-hheef4')
select *
from #Tabl t
cross apply dbo.GetOnlyNumbers(t.dats) x
Sure looks nice and tidy. I tested against several of the other solutions posted here and without going into deep testing this appears to be significantly faster than the other approaches posted at this time.
DECLARE #Tabl as table
(
ID INT,
dats nvarchar(15)
)
INSERT INTO #Tabl VALUES
(1, '103-P705hh'),
(2, '115-xxx-44'),
(3, '103-705.13'),
(4, '525-hheef4')
SELECT T.ID, t.dats
,(
SELECT SUBSTRING(tt.dats,V.number,1)
FROM #Tabl tt
JOIN master.dbo.spt_values V ON V.type='P' AND V.number BETWEEN 1 AND LEN(tt.dats)
WHERE tt.ID=T.ID AND SUBSTRING(TT.dats,V.number,1) LIKE '[0-9]'
ORDER BY V.number
FOR XML PATH('')
) S
FROM #Tabl t
ORDER BY T.ID;
Can you use a udf ? If so, try this
create alter function numerals(#s varchar(max))
returns varchar(max)
as
begin
declare #out varchar(max) = ''
declare #c char(1)
while len(#s) > 0 begin
set #c = left(#s,1)
if #c like '[0123456789]' set #out += #c
set #s = substring(#s, 2, len(#s) -1)
end
return #out
end
to use it on your temp table...
select dbo.numerals(dats) from #Tabl
another solution, that does not use a UDF, but will work only if your table has a primary key, uses a recursive CTE. It is:
DECLARE #Tabl as table
(pk int identity not null, -- <=== added a primary key
dats nvarchar(max) )
INSERT INTO #Tabl VALUES
('103-P705hh'),
('115-xxx-44'),
('103-705.13'),
('525-hheef4');
with newVals(pk, pos, newD) as
(select pk, 1,
case when left(Dats,1) like '[0123456789]'
then left(Dats,1) else '' end
from #tabl
Union All
Select t.pk, pos + 1, n.newD +
case when substring(dats, pos+1, 1) like '[0123456789]'
then substring(dats, pos+1, 1) else '' end
from #tabl t join newVals n on n.pk = t.pk
where pos+1 <= len(dats) )
Select newD from newVals x
where pos = (Select Max(pos)
from newVals
where pk = x.pk)

How to optimize a stored procedure that takes too long to execute?

I've written a stored procedure to generate random SMS records/events.
When inserting 1.2 million rows, the query takes hundreds of minutes
exec insert_random_sms 1200000
I've coded the stored procedure in a 'procedural' way. But, from what I see, SQL is not very efficient in this respect.
create proc insert_random_sms
#number_of_records int
as
begin
declare #cnt int = 0; -- loop counter
declare #phone_id int;
declare #dest_id int;
while (#cnt < #number_of_records)
begin
declare #charge int = rand() * 100; -- will generate a random charge value between 0 and 100.
declare #tarrif_plan int = round(rand() * 5, 0);
select top 1 #phone_id = phone_no
from tbl_phone_agenda
order by newid();
select top 1 #dest_id = phone_no
from tbl_phone_agenda
order by newid();
insert into tbl_sms (phone_id, dest_id, charge, tarrif_plan)
values (#phone_id, #dest_id, #charge,
convert(nvarchar(50), #tarrif_plan));
set #cnt += 1;
end
end
go
What is the way to optimize this stored procedure?
The method I like to use for generating x number of records is the stacked CTE method (having read this article by Aaron Bertrand who credits Itzik Ben-Gan for the stacked CTE approach):
WITH N1 (N) AS
( SELECT 1
FROM (VALUES
(1), (1), (1), (1), (1),
(1), (1), (1), (1), (1)
) n (Number)
),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
SELECT COUNT(*)
FROM N4
This simply starts of with 10 rows, and keeps cross joining, until in the case of the above there are 100,000,000 rows. This would be how I would generate your rows,
When you are using a set based approach you can no longer use RAND() on its own because it is a run time constant, in order to get a new evaluation for each row you need to combine RAND() with NEWID() which is unique per row, so the following will generate a random number between 0 and 100 that is different for each row:
SELECT CAST(ROUND(RAND(CHECKSUM(NEWID())) * 100, 0) AS INT)
The next thing I would do is put all your phonenumbers into a temp table so that they have a sequential ID (this will be used to allocate randomly):
CREATE TABLE #Phone
(
ID INT IDENTITY NOT NULL PRIMARY KEY,
PhoneNo VARCHAR(50) NOT NULL
);
INSERT #Phone (PhoneNo)
SELECT PhoneNo
FROM tbl_phone_agenda;
So your final query will be
CREATE PROC insert_random_sms #number_of_records IN
AS
BEGIN
CREATE TABLE #Phone
(
ID INT IDENTITY NOT NULL PRIMARY KEY,
PhoneNo VARCHAR(50) NOT NULL
);
INSERT #Phone (PhoneNo)
SELECT PhoneNo
FROM tbl_phone_agenda;
-- NEEDED SO WE KNOW WHAT NUMBER TO GENERATE A RANDOM
-- NUMBER IN THE RIGHT RANGE LATER
DECLARE #PhoneCount INT = (SELECT COUNT(*) FROM #Phone);
WITH N1 (N) AS
( SELECT 1
FROM (VALUES
(1), (1), (1), (1), (1),
(1), (1), (1), (1), (1)
) n (Number)
),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
INSERT tbl_sms (phone_id, dest_id, charge, tarrif_plan)
SELECT TOP (#number_of_records)
p.PhoneNo,
d.PhoneNo,
Charge = CAST(ROUND(RAND(CHECKSUM(NEWID())) * 100, 0) AS INT),
tarrif_plan = CAST(ROUND(RAND(CHECKSUM(NEWID())) * 5, 0) AS INT)
FROM N4
INNER JOIN #Phone p
ON p.ID = CAST(CEILING(RAND(CHECKSUM(NEWID())) * #PhoneCount) AS INT)
INNER JOIN #Phone d
ON d.ID = CAST(CEILING(RAND(CHECKSUM(NEWID())) * #PhoneCount) AS INT)
END
In my tests this ran in about 20-30 seconds to generate 1.2m records, looking up against 100,000 phone numbers.
With a minor change in the way random phone_no are fetched from the existing table tbl_phone_agenda, I've achived the insertion of 1.2 million records in ~50 sec. No doubt that GarethD's solution is the fastest, though.
-- create stored procedure to insert random records into the sms table, automatically | tried and tested
create proc insert_random_sms #number_of_records int
as
begin
declare #cnt int = 0; -- loop counter
declare #phone_id int;
declare #dest_id int;
while (#cnt < #number_of_records)
begin
declare #charge int = rand() * 100; -- will generate a random charge value between 0 and 100.
declare #tarrif_plan int = round(rand() * 5, 0);
-- here come the changes
select top 1 #phone_id = phone_no from tbl_phone_agenda where (abs(cast((binary_checksum(*) * rand()) as int)) % 100) < 10
select top 1 #dest_id = phone_no from tbl_phone_agenda where (abs(cast((binary_checksum(*) * rand()) as int)) % 100) < 10
insert into tbl_sms (phone_id, dest_id, charge, tariff_plan) values (#phone_id, #dest_id, #charge , convert(nvarchar(50), #tarrif_plan));
set #cnt += 1;
end
end
go
The inspiration for my solution can be found here: MSDN article - Selecting Rows Randomly from a Large Table

Generate a repetitive sequential number using SQL Server 2008

Can anyone help me to generate a repetitive sequential number using SQL Server 2008. Say I have a table of 1000 rows and a new field (int) added to the table. All I need is to auto fill that particular field with sequential numbers 1-100 all the way to the last row.
I have this but doesnt seem that it is working. You help is much appreciated.
DECLARE #id INT
SET #id = 0
while #id < 101
BEGIN
UPDATE Names SET id=#id
set #id=#id+1
END
USE tempdb
GO
DROP TABLE tableof1000rows
GO
CREATE TABLE tableof1000rows (id int identity(1,1), nb int, value varchar(128))
GO
INSERT INTO tableof1000rows (value)
SELECT TOP 1000 o1.name
FROM sys.objects o1
CROSS JOIN sys.objects o2
GO
UPDATE t1
SET nb = t2.nb
FROM tableof1000rows t1
JOIN (SELECT id, (ROW_NUMBER() OVER (ORDER BY id) % 100) + 1 as nb FROM tableof1000rows) t2 ON t1.id = t2.id
GO
SELECT *
FROM tableof1000rows
Use ROW_NUMBER to generate a number. Use modulo maths to get values from 1 to 100.
go
create table dbo.Demo1
(
DID int not null identity primary key,
RepetitiveSequentialNumber int not null
) ;
go
insert into dbo.Demo1 values ( 0 )
go 1000 -- This is just to get 1,000 rows into the table.
-- Get a sequential number.
select DID, row_number() over ( order by DID ) as RSN
into #RowNumbers
from dbo.Demo1 ;
-- Take the last two digits. This gives us values from 0 to 99.
update #RowNumbers
set RSN = RSN % 100 ;
-- Change the 0 values to 100.
update #RowNumbers
set RSN = case when RSN = 0 then 100 else RSN end ;
-- Update the main table.
update dbo.Demo1
set RepetitiveSequentialNumber = r.RSN
from dbo.Demo1 as d inner join #RowNumbers as r on r.DID = d.DID ;
select *
from dbo.Demo1 ;
Not pretty or elegant, but..
while exists(select * from tbl where new_col is null)
update top(1) tbl
set new_col=(select ISNULL(max(new_col),0)+1 from tbl WHERE new_col is null)
Your way isn't working because you are trying to set a value rather than insert one. I'm sure you have found a solution by now but if not then try this instead.
DECLARE #id INT
SET #id = 0
while #id < 101
BEGIN
INSERT Names select #id
set #id=#id+1
END

How to make this sql query

I have 2 SQL Server tables with the following structure
Turns-time
cod_turn (PrimaryKey)
time (datetime)
Taken turns
cod_taken_turn (Primary Key)
cod_turn
...
and several other fields which are irrelevant to the problem. I cant alter the table structures because the app was made by someone else.
given a numeric variable parameter, which we will assume to be "3" for this example, and a given time, I need to create a query which looking from that time on, it looks the first 3 consecutive records by time which are not marked as "taken". For example:
For example, for these turns, starting by the time of "8:00" chosen by the user
8:00 (not taken)
9:00 (not taken)
10:00 (taken)
11:00 (not taken)
12:00 (not taken)
13:00 (not taken)
14:00 (taken)
The query it would have to list
11:00
12:00
13:00
I cant figure out how to make the query in pure sql, if possible.
with a cursor
declare #GivenTime datetime,
#GivenSequence int;
select #GivenTime = cast('08:00' as datetime),
#GivenSequence = 3;
declare #sequence int,
#code_turn int,
#time datetime,
#taked int,
#firstTimeInSequence datetime;
set #sequence = 0;
declare turnCursor cursor FAST_FORWARD for
select turn.cod_turn, turn.[time], taken.cod_taken_turn
from [Turns-time] as turn
left join [Taken turns] as taken on turn.cod_turn = taken.cod_turn
where turn.[time] >= #GivenTime
order by turn.[time] asc;
open turnCursor;
fetch next from turnCursor into #code_turn, #time, #taked;
while ##fetch_status = 0 AND #sequence < #GivenSequence
begin
if #taked IS NULL
select #firstTimeInSequence = coalesce(#firstTimeInSequence, #time)
,#sequence = #sequence + 1;
else
select #sequence = 0,
#firstTimeInSequence = null;
fetch next from turnCursor into #code_turn, #time, #taked;
end
close turnCursor;
deallocate turnCursor;
if #sequence = #GivenSequence
select top (#GivenSequence) * from [Turns-time] where [time] >= #firstTimeInSequence
order by [time] asc
WITH Base AS (
SELECT *,
CASE WHEN EXISTS(
SELECT *
FROM Taken_turns taken
WHERE taken.cod_turn = turns.cod_turn) THEN 1 ELSE 0 END AS taken
FROM [Turns-time] turns)
, RecursiveCTE As (
SELECT TOP 1 cod_turn, [time], taken AS run, 0 AS grp
FROM Base
WHERE [time] >= #start_time
ORDER BY [time]
UNION ALL
SELECT R.cod_turn, R.[time], R.run, R.grp
FROM (
SELECT T.*,
CASE WHEN T.taken = 0 THEN 0 ELSE run+1 END AS run,
CASE WHEN T.taken = 0 THEN grp + 1 ELSE grp END AS grp,
rn = ROW_NUMBER() OVER (ORDER BY T.[time])
FROM Base T
JOIN RecursiveCTE R
ON R.[time] < T.[time]
) R
WHERE R.rn = 1 AND run < #run_length
), T AS(
SELECT *,
MAX(grp) OVER () AS FinalGroup,
COUNT(*) OVER (PARTITION BY grp) AS group_size
FROM RecursiveCTE
)
SELECT cod_turn,time
FROM T
WHERE grp=FinalGroup AND group_size=#run_length
I think there is not a simple way to achieve this.
But probably there are many complex ways :). This is an approach that should work in Transact-SQL:
CREATE TABLE #CONSECUTIVE_TURNS (id int identity, time datetime, consecutive int)
INSERT INTO #CONSECUTIVE_TURNS (time, consecutive, 0)
SELECT cod_turn
, time
, 0
FROM Turns-time
ORDER BY time
DECLARE #i int
#n int
SET #i = 0
SET #n = 3 -- Number of consecutive not taken records
while (#i < #n) begin
UPDATE #CONSECUTIVE_TURNS
SET consecutive = consecutive + 1
WHERE not exists (SELECT 1
FROM Taken-turns
WHERE id = cod_turn + #i
)
SET #i = #i + 1
end
DECLARE #firstElement int
SELECT #firstElement = min(id)
FROM #CONSECUTIVE_TURNS
WHERE consecutive >= #n
SELECT *
FROM #CONSECUTIVE_TURNS
WHERE id between #firstElement
and #firstElement + #n - 1
This is untested but I think it will work.
Pure SQL
SELECT TOP 3 time FROM [turns-time] WHERE time >= (
-- get first result of the 3 consecutive results
SELECT TOP 1 time AS first_result
FROM [turns-time] tt
-- start from given time, which is 8:00 in this case
WHERE time >= '08:00'
-- turn is not taken
AND cod_turn NOT IN (SELECT cod_turn FROM taken_turns)
-- 3 consecutive turns from current turn are not taken
AND (
SELECT COUNT(*) FROM
(
SELECT TOP 3 cod_turn AS selected_turn FROM [turns-time] tt2 WHERE tt2.time >= tt.time
GROUP BY cod_turn ORDER BY tt2.time
) AS temp
WHERE selected_turn NOT IN (SELECT cod_turn FROM taken_turns)) = 3
) ORDER BY time
Note: I tested it on Postgresql (with some code modification), but not MS SQL Server. I'm not sure about performance compared to T-SQL.
Another set-based solution (tested):
DECLARE #Results TABLE
(
cod_turn INT NOT NULL
,[status] TINYINT NOT NULL
,RowNumber INT PRIMARY KEY
);
INSERT #Results (cod_turn, [status], RowNumber)
SELECT a.cod_turn
,CASE WHEN b.cod_turn IS NULL THEN 1 ELSE 0 END [status] --1=(not taken), 0=(taken)
,ROW_NUMBER() OVER(ORDER BY a.[time]) AS RowNumber
FROM [Turns-time] a
LEFT JOIN [Taken_turns] b ON a.cod_turn = b.cod_turn
WHERE a.[time] >= #Start;
--SELECT * FROM #Results r ORDER BY r.RowNumber;
SELECT *
FROM
(
SELECT TOP(1) ca.LastRowNumber
FROM #Results a
CROSS APPLY
(
SELECT SUM(c.status) CountNotTaken, MAX(c.RowNumber) LastRowNumber
FROM
(
SELECT TOP(#Len)
b.RowNumber, b.[status]
FROM #Results b
WHERE b.RowNumber <= a.RowNumber
ORDER BY b.RowNumber DESC
) c
) ca
WHERE ca.CountNotTaken = #Len
ORDER BY a.RowNumber ASC
) x INNER JOIN #Results y ON x.LastRowNumber - #Len + 1 <= y.RowNumber AND y.RowNumber <= x.LastRowNumber;

SqlServer Random Data Generation Observation

I have a question on why the output of these two queries differ. I would have expected them to work the same.
Query 1:
declare #cache table(originalValue nvarchar(255), obfuscateValue nvarchar(255));
declare #table1 table(c char(1));
declare #i1 int;
set #i1 = ASCII('0');
while #i1 <= ASCII('9')
begin
insert into #table1 (c)
select (CHAR(#i1))
set #i1 = #i1 +1;
end
insert into #cache (originalValue, obfuscateValue)
select [firstname],
(select top 1 c from #table1 order by NEWID()) +
(select top 1 c from #table1 order by NEWID())
from Customer
where [firstname] is not null
select * from #cache;
Query 2:
declare #cache table(originalValue nvarchar(255), obfuscateValue nvarchar(255));
declare #table1 table(c char(1));
declare #i1 int;
set #i1 = ASCII('0');
while #i1 <= ASCII('9')
begin
insert into #table1 (c)
select (CHAR(#i1))
set #i1 = #i1 +1;
end
insert into #cache (originalValue)
select [firstname]
from Customer
where [firstname] is not null
update c
set c.obfuscateValue = t.Value
from #cache c
join
(
select originalValue,
(
(select top 1 c from #table1 order by NEWID()) +
(select top 1 c from #table1 order by NEWID())
) as Value
from #cache
) t on t.originalValue = c.originalValue
select * from #cache;
They should do the same, but first query returns following results:
Jonathon 73
Everett 73
Janet 73
Andy 73
Shauna 73
And second:
Jonathon 82
Everett 40
Janet 68
Andy 79
Shauna 29
As you noticed, the second column in second result has different values, while first - same values.
It looks like in first query the
(select top 1 c from #table1 order by NEWID()) +
(select top 1 c from #table1 order by NEWID())
is called only once.
Can someone explain this mystery?
I think random values can be generated in another way.
This is how to generated [a-zA-Z]{3,6}
declare #min int, #max int;
declare #alpha varchar(max)
set #min = 3;
set #max = 6;
set #alpha = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
declare #cache table(originalValue nvarchar(255), obfuscateValue nvarchar(255));
insert into #cache (originalValue, obfuscateValue)
select [firstname], LEFT(t.Value, case when t.maxLen < #min then #min else t.maxLen end)
from Customer
join
(
select ABS(CHECKSUM(NEWID()))%#max + 1 as maxLen,
SUBSTRING(#alpha, ABS(CHECKSUM(NEWID()))%LEN(#alpha) + 1, 1) +
SUBSTRING(#alpha, ABS(CHECKSUM(NEWID()))%LEN(#alpha) + 1, 1) +
SUBSTRING(#alpha, ABS(CHECKSUM(NEWID()))%LEN(#alpha) + 1, 1) +
SUBSTRING(#alpha, ABS(CHECKSUM(NEWID()))%LEN(#alpha) + 1, 1) +
SUBSTRING(#alpha, ABS(CHECKSUM(NEWID()))%LEN(#alpha) + 1, 1) +
SUBSTRING(#alpha, ABS(CHECKSUM(NEWID()))%LEN(#alpha) + 1, 1) as Value
)t on t.Value is not null
where [firstname] is not null
select * from #cache;
One line?
SELECT
RIGHT( --number of zeros to match expected max length. Or use REPLICATE.
'000000' + CAST(
--The 2 newid() expression means we'll get a larger number
--less chance of using leading static zeroes
CAST(CHECKSUM(NEWD_ID()) as bigint) * CAST(CHECKSUM(NEWD_ID()) as bigint)
as varchar(30))
--The 3 gives us the desired mask. Currently 3 digits.
, 3)
You are correct in your assumption that the first query is only running the ‘select top’ once. The behavior is happening because of how the optimizer chose to optimize the query. It decided because the subqueries (the select top queries) are self-contained and are not correlated with the outside select query it uses a Tablespool (Lazy Spool) operator in the execution plan. This causes the select top value to be placed in the tempdb for reuse.
Because the optimizer chooses to use a Nested Loops operator to bring all the data together no rebinding is needed, the spooled value is used instead of reapplying the queries for each input outer row.
During the second query the optimizer chose not to use a Tablespool operator (I believe do to fact the input table being from tempdb). So you have the select top subqueries being reapplied for each input row from the temporary table.
If needed, you may be able to use a table/query hints if you want to force the execution plan to perform as desired.

Resources