SQL Server: How to query within a string?

SQL Server: How to query within a string? - sql-server

I have a requirement where my client wants me to retrieve specific information from a text column
Following is the sample of the same
the student scored following result: class: 6 subject: result: english 80 math 23
science 45
The expected outcome needs to be like -
English Maths Science
80 23 45
I tried using string_split
select value from STRING_SPLIT( (select value from mytable where [student roll number] = 'SCH-01097') , ' ' )
but that only split the value into multiple rows that can't be queried.
I also tried using LTRIM with CHARINDEX approach, but the column have different text and not always organized. the initial text is different most of the time.
can this be done?
edit - I am close but just not there yet
So far I have reached here
SELECT VALUE FROM STRING_SPLIT ((select
substring(value, charindex('Block',value),1000)
from mytable where [rollnumber ] = 'SCH-01097'),' ') WHERE VALUE <> ' '
this gives me everything I need but in a single column
class6:
Subject
result
english
80
math
23
science
45
now how to make it in desired table form?

To maintain the order of the split values this answer uses DelimitedSplit8K. Something like this works.
[Edit] Instead of having specific strings in a CTE, the query now uses 'stems' to map multiple strings to the same class. For example, if English is entered as En it will still be mapped to English.
Table and data
drop table if exists #tTest;
go
create table #tTest(
string Varchar(256));
insert #tTest(string) values
('the student scored following result: class: 6 subject: result: english 80 math 23');
DelimitedSplit8k
CREATE FUNCTION dbo.DelimitedSplit8K
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
Query
;with
stems_cte(stem, word) as (
select 'English', 'English' union all
select 'En', 'English' union all
select 'Math', 'Math' union all
select 'Maths', 'Math' union all
select 'Science', 'Science'),
splt_cte(string, str_val, ndx, lead_ndx, lead_len, rn) as (
select t.string, ds.Item, charndx.ndx,
lead(charndx.ndx) over (order by ds.ItemNumber),
lead(len(ds.[Item])) over (order by ds.ItemNumber),
ItemNumber
from #tTest t
cross apply dbo.DelimitedSplit8K(t.string, ' ') ds
cross apply (select charindex(ds.Item, t.string, 1) ndx) charndx
where Item <> ' '),
spec_rows_cte(word, ndx, lead_ndx, lead_len, rn) as (
select sp.word, sc.ndx, sc.lead_ndx, sc.lead_len, sc.rn
from splt_cte sc
join stems_cte sp on sc.str_val=sp.stem)
select max(case when src.word='English' then substring(sc.string, src.lead_ndx, src.lead_len) else null end) English,
max(case when src.word='Math' then substring(sc.string, src.lead_ndx, src.lead_len) else null end) Math,
max(case when src.word='Science' then substring(sc.string, src.lead_ndx, src.lead_len) else null end) Science
from splt_cte sc
join spec_rows_cte src on sc.rn=src.rn;
Output
English Math Science
80 23 NULL

You can insert the results from your query into a table variable with an Identity column, then get the next row for each required subset:
declare #tmp table (Id int identity, Value varchar(20))
Insert into #tmp (VALUE)
SELECT VALUE FROM STRING_SPLIT ((select
substring(value, charindex('Block',value),1000)
from mytable where [rollnumber ] = 'SCH-01097'),' ') WHERE VALUE <> ' '
select
English = (Select top 1 Value From #tmp where Id = (Select Id + 1 From #tmp where Value = 'english')),
Math = (Select top 1 Value From #tmp where Id = (Select Id + 1 From #tmp where Value = 'math')),
Science = (Select top 1 Value From #tmp where Id = (Select Id + 1 From #tmp where Value = 'science'))
Output:

Related

return longest sequence of digits from a string in SQL Server 2012

I need a function to return the longest sequence of digits in a string, for example:
P0123/99282 returns 99282,
P9-123BB-12339 returns 12339,
12345/54321 returns 12345 (should return first instance when length is the same).
I have developed this but this is very slow, I wonder if there is something faster than this:
DECLARE #str NVARCHAR(40) = N'P0120993/123-AB1239'
DECLARE #x XML
;WITH e1(n) AS(SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1),
e2(n) AS (SELECT 1 FROM e1 CROSS JOIN (SELECT 1 as t UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1) AS b),
n(Number) AS(SELECT n = ROW_NUMBER() OVER (ORDER BY n) FROM e2)
SELECT #x = CAST(N'<A>'+ REPLACE((SELECT CAST(CAST((
SELECT
CASE WHEN SUBSTRING(#str, Number, 1) like N'[^0-9]' THEN N' ' ELSE SUBSTRING(#str, Number, 1) end
FROM n
WHERE Number <= LEN(#str) FOR XML Path(''))
AS xml) AS nvarchar(max))),N' ',N'</A><A>')+ N'</A>' AS XML)
SELECT TOP 1
case when t.value('.', 'nvarchar(max)') = N'' then null else t.value('.', 'nvarchar(max)') end AS inVal
FROM
#x.nodes('/A') AS x(t)
ORDER BY
LEN(t.value('.', 'nvarchar(max)')) DESC;
EXPLANATION:
The max length of the string I will pass is 40, and what I do is to generate a sequence of numbers from one to forty, extract the Nth character from the string where N is the sequence value but if the character is not a digit then I replace with a white space, then I return the XML as string enlcosing with <A>XXX</A>
to then convert to xml and then query that and return the first item order by it's length desc.
thanks,

While I'm not 100% sure how much better this would be with performance, here is an approach that breaks down the strings into any potential numeric combination and returns the first with the longest length:
DECLARE #foo TABLE(ID varchar(40));
INSERT #foo VALUES('P0123/99282'),('P9-123BB-12339'),('12345/54321');
;WITH NumbersTable AS
(
SELECT TOP (40) n = ROW_NUMBER() OVER (ORDER BY Number)
FROM master.dbo.spt_values
ORDER BY Number
), Results AS
(
SELECT f.Id, SUBSTRING(f.ID, t1.n, t2.n) numericvalues,
row_number() over (partition by f.Id
order by LEN(SUBSTRING(f.ID, t1.n, t2.n)) desc) rn
FROM NumbersTable t1
INNER JOIN #foo AS f
ON t1.n <= LEN(f.ID)
INNER JOIN NumbersTable t2
ON t2.n <= LEN(f.ID)
WHERE SUBSTRING(f.ID, t1.n, t2.n) NOT LIKE '%[^0-9]%'
)
SELECT *
FROM Results
WHERE rn = 1
This creates a numbers table from 1 to 40 (since that was your max length), and then using joins creates every substring variation of the data that have a numeric value using NOT LIKE '%[^0-9]%', and then establishes a row_number based on the len of that substring.
Fiddle Demo

Finding sequence of the last numeric value in a varchar variable

I have a column in a table which has incremented values like:
AAA0000001
AAA0000002
... and so on
I want to find if the values stored in this column are in proper sequential order or if any value is missing in between or is deleted.
How can i achieve this?

Assuming the pattern is always: AAA[0-9][0-9][0-9][0-9][0-9][0-9][0-9], you can do this with a Tally Table.
Sample Data:
CREATE TABLE Tbl(val VARCHAR(10))
INSERT INTO Tbl VALUES
('AAA0000001'), ('AAA0000002'), ('AAA0000004'), ('AAA0000011');
val
----------
AAA0000001
AAA0000002
AAA0000004
AAA0000011
SQL Fiddle
;WITH Cte AS(
SELECT *,
num = CAST(SUBSTRING(val, 4, LEN(val) - 3) AS INT)
FROM Tbl
),
E1(N) AS(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b),
Tally(N) AS(
SELECT TOP(SELECT MAX(num) FROM Cte)
ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM E4
)
SELECT
N,
val = 'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7)
FROM Tally
WHERE NOT EXISTS(
SELECT 1 FROM Cte WHERE num = N
)
RESULT
N val
-------------------- ----------
3 AAA0000003
5 AAA0000005
6 AAA0000006
7 AAA0000007
8 AAA0000008
9 AAA0000009
10 AAA0000010
Explanation:
The first CTE, named as Cte, extracts the numeric part of the strings and CASTs them to INT.
The succeeding CTEs, from E1 to Tally(N) generates a table with sequential values from 1 up to the MAX(num) - the INT return from the first CTE.
The final SELECT just checks for the non-existing num from the first CTE.
'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7) transforms N so that it follows the pattern.

This is a Gaps problem. You can look into this article by Dwain Camps for more solutions on Gaps and Islands.
You can use ROW_NUMBER like this.
Sample Data
DECLARE #tab1 TABLE(id VARCHAR(20));
insert into #tab1 VALUES('AAA0000001'),('AAA0000002'),('AAA0000003'),('AAA0000004'),('AAA0000006'),('AAA0000007'),('AAA0000010');
Query
;WITH CTE as
(
SELECT convert(int,STUFF(id,1,3,'')) id,convert(int,STUFF(id,1,3,'')) - ROW_NUMBER()OVER(ORDER BY convert(int,STUFF(id,1,3,''))) rn
FROM #tab1
),CTE2 as
(
SELECT ROW_NUMBER()OVER(ORDER BY rn) as rn, MIN(id) series_start,MAX(id) series_end
FROM CTE
GROUP BY rn
)
SELECT C2.series_end,C1.series_start
FROM CTE2 C1
INNER JOIN CTE2 C2 ON C1.rn = C2.rn + 1;
SQL Fiddle
Explanation
Output of CTE is the difference of gaps between id values.
Output of CTE2 is the start and end of continuous series of numbers
Final Output gives the start and end of gaps within the series
Output
series_end series_start
4 6
7 10

If the schema is fixed then no need for complex queries. This works:
DECLARE #t TABLE ( v VARCHAR(100) );
INSERT INTO #t
VALUES ( 'AAA0000001' ),
( 'AAA0000002' ),
( 'AAA0000007' ),
( 'AAA0000008' ),
( 'AAA0000010' ),
( 'AAA0000011' ),
( 'AAA0000012' );
SELECT * FROM #t t1
CROSS APPLY(SELECT TOP 1 v FROM #t t2 WHERE t2.v > t1.v ORDER BY v) ca
WHERE RIGHT(t1.v, 7) <> RIGHT(ca.v, 7) - 1
Output:
v v
AAA0000002 AAA0000007
AAA0000008 AAA0000010

In sqlserver 2012, you can use LAG and LEAD
DECLARE #t table(col1 varchar(15))
INSERT #t values('AAA0000001'),('AAA0000002'),('AAA0000004')
SELECT
case when
stuff(lag(col1) over (order by col1), 1,3,'') + 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end previous_exists,
case when
stuff(lead(col1) over (order by col1), 1,3,'') - 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end next_exists,
col1
FROM #t
Result:
previous_exists next_exists col1
No Yes AAA0000001
Yes No AAA0000002
No No AAA0000004

put same ID different rows in one line

How could I put those multiple rows into one line, and the contents are in different columns:
From:
ID | Subject1/Catalog/Session
10868952 | NUR/3110/D507
10868952 | NUR/3110/D512
10868952 | NUR/4010/D523
10868952 | NUR/4010/HD20
To
ID |Subject1/Catalog/Session |Subject2/Catalog/Session | Subject3/Catalog/Session |Subject4/Catalog/Session | Subject5/Catalog/Session
10868952 |NUR/3110/D507 | NUR/3110/D512 | NUR/4010/D523 | NUR/4010/HD20 |

Would be best if in the future you can provide ddl and sample data. I did this for you this time.
Here is how you could do this if you know the number of elements per row. I put links in the comments of the original post to both the static and dynamic versions of this type of approach.
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
ID int,
Subject1 varchar(50)
)
insert #Something
select 10868952, 'NUR/3110/D507' union all
select 10868952, 'NUR/3110/D512' union all
select 10868952, 'NUR/4010/D523' union all
select 10868952, 'NUR/4010/HD20';
with OrderedResults as
(
select *, ROW_NUMBER() over(partition by ID order by Subject1) as RowNum
from #Something
)
select ID
, MAX(Case when RowNum = 1 then Subject1 end) as Subject1
, MAX(Case when RowNum = 2 then Subject1 end) as Subject2
, MAX(Case when RowNum = 3 then Subject1 end) as Subject3
, MAX(Case when RowNum = 4 then Subject1 end) as Subject4
from OrderedResults
group by ID

Here is how you can do this as a dynamic pivot. There are a number of concepts going on here. One is a tally table. In this code it is implemented as a cte. In my actual system I have this as a view. It generates 10,000 rows with zero reads. The tally table and most of the other concepts here were learned by the immortal Jeff Moden. If you do not know what a tally table is or how they work, check out Jeff's article here. http://www.sqlservercentral.com/articles/T-SQL/62867/
I will post some code for how to do this for this example but anybody who is unfamiliar with this technique should read his article. http://www.sqlservercentral.com/articles/Crosstab/65048/
Here is a full working example of doing this as a dynamic cross tab. When you are satisfied that the sql this generates is safe feel free to uncomment the last two lines.
LAST but certainly not least. Make sure that you fully understand what this code and how it works. It is not going to be my phone that rings at 3am when something goes wrong. You are the one who will have to be there to support this code.
if OBJECT_ID('Something') is not null
drop table Something
create table Something
(
ID int,
Subject1 varchar(50)
)
insert Something
select 10868952, 'NUR/3110/D507' union all
select 10868952, 'NUR/3110/D512' union all
select 10868952, 'NUR/4010/D523' union all
select 10868952, 'NUR/4010/HD20' union all
select 12345, 'asdfasdf'
declare #MaxCols int
declare #StaticPortion nvarchar(2000) =
'with OrderedResults as
(
select *, ROW_NUMBER() over(partition by ID order by Subject1) as RowNum
from Something
)
select ID';
declare #DynamicPortion nvarchar(max) = '';
declare #FinalStaticPortion nvarchar(2000) = ' from OrderedResults Group by ID order by ID';
with E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select #DynamicPortion = #DynamicPortion +
', MAX(Case when RowNum = ' + CAST(N as varchar(6)) + ' then Subject1 end) as Subject' + CAST(N as varchar(6)) + CHAR(10)
from cteTally t
where t.N <=
(
select top 1 Count(*)
from Something
group by ID
order by COUNT(*) desc
)
select #StaticPortion + #DynamicPortion + #FinalStaticPortion
--declare #SqlToExecute nvarchar(max) = #StaticPortion + #DynamicPortion + #FinalStaticPortion;
--exec sp_executesql #SqlToExecute

Get string between two specific characters

I have a table fileslist with column Filetext which contains text data
create table fileslist
(
FileID int identity (1,1),
Filetext nvarchar(max)
)
insert into fileslist select 'this file(''SQL_SCALAR1'') has been created to test your tricks of (''SQL_SCALAR2'')'
insert into fileslist select 'this file(''SCALAR3'') created to test your tricks of (''SQL_SCALAR4'')'
insert into fileslist select 'this file(''SQL_SCALAR5'') has been created to test your tricks of (''SQL_SCALAR6'')'
insert into fileslist select 'this file(''SQL_7'') has been created'
insert into fileslist select 'this file(''SQL_SCALAR8'') has been created to test your tricks of (''SQL_SCALAR9''), ohh i have more text than other (''SQL_SCALAR10'')files'
I need help to read all records one by one to extract string between two specific characters '(' and ')' and Output should be
SQL_SCALAR1
SQL_SCALAR2
SCALAR3
SQL_SCALAR4
SQL_SCALAR5
SQL_SCALAR6
SQL_7
SQL_SCALAR8
SQL_SCALAR9
SQL_SCALAR10

The reason I would choose this approach is that it handles a dynamic number of the patterns you are searching for, per row, very well.
First I would use Jeff Moden's string splitter;
/*
* Jeff Moden's famous string spliiter
* http://www.sqlservercentral.com/articles/Tally+Table/72993/
*/
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
Then, I would write the following;
SELECT SUBSTRING (
Item,
2,
CHARINDEX(')', Item) - 3
)
FROM FilesList
CROSS
APPLY [dbo].[DelimitedSplit8K](FileText, '(')
WHERE Item LIKE '''SQL%'
As requested in comment; (I'm not thrilled about butchering Jeff's code this way...)
CREATE FUNCTION [dbo].[DelimitedSplit8K_Butchered]
--===== Define I/O parameters
(#pString VARCHAR(8000), #DelimiterA CHAR(1), #DelimiterB CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #DelimiterA
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#DelimiterA,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
),
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
OriginalOutput AS
(
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
)
SELECT
Item = SUBSTRING (
Item,
1,
CHARINDEX(#DelimiterB, Item) - 1
)
FROM OriginalOutput
WHERE Item LIKE '%' + #DelimiterB + '%'
--AND SUBSTRING(#pString, l.N1, l.L1) LIKE '%' + #DelimiterB + '%'
;
GO
Used as follows;
SELECT *
FROM FilesList
CROSS
APPLY [dbo].[DelimitedSplit8K_Butchered](FileText, '(', ')')

declare #ss varchar(100)= 'this file(''SQL_SCALAR1'') has been created to test your tricks of (''SQL_SCALAR2'')'
Not sure how you want to display
SELECT SUBSTRING ( (LEFT(#ss,CHARINDEX(')',#ss) - 1)),CHARINDEX('(',#ss)+1,LEN(LEFT(#ss,CHARINDEX(')',#ss) - 1))),
SUBSTRING ((RIGHT(#ss,CHARINDEX('(',REVERSE(#ss)) - 1)),1,LEN(RIGHT(#ss,CHARINDEX('(',REVERSE(#ss)) - 1))-1)
OR
SELECT SUBSTRING ( (LEFT(#ss,CHARINDEX(')',#ss) - 1)),CHARINDEX('(',#ss)+1,LEN(LEFT(#ss,CHARINDEX(')',#ss) - 1)))
Union
SELECT SUBSTRING ((RIGHT(#ss,CHARINDEX('(',REVERSE(#ss)) - 1)),1,LEN(RIGHT(#ss,CHARINDEX('(',REVERSE(#ss)) - 1))-1)

this is the solution what you are looking for, cheer....
;WITH CTE AS
(
SELECT SUBSTRING(FILETEXT,CHARINDEX('(',FileText,1)+2,11) 'Result'
FROM FILESLIST
UNION ALL
SELECT * FROM
(
SELECT
SUBSTRING(
(SUBSTRING(FILETEXT,CHARINDEX(')',FILETEXT,1)+1,LEN(FILETEXT)-
CHARINDEX(')',FILETEXT,1)) )
,CHARINDEX('(',(SUBSTRING(FILETEXT,CHARINDEX(')',FILETEXT,1)+1,LEN(FILETEXT)-
CHARINDEX(')',FILETEXT,1)) ) ,1)+2,11
) 'Result'
FROM FILESLIST
) T1
WHERE Result like '%SQL%'
)
SELECT * FROM CTE
ORDER BY RESULT ASC

A solution with recursive CTE is
WITH REP AS (
SELECT FileID
, Filetext = REPLACE(Filetext, ')', '(')
FROM fileslist
), Splitter AS (
SELECT FileID
, FileText
, NextStart = CHARINDEX('(', FileText)
, Pos = 0
, Token = CAST(SubString(FileText, 0, CHARINDEX('(', FileText))
AS NVarchar(50))
FROM REP
UNION ALL
SELECT FileId
, FileText
, NextStart = CHARINDEX('(', FileText, NextStart + 1)
, Pos = Pos + 1
, Token = CAST(SubString(FileText, NextStart + 1
, CHARINDEX('(', FileText, NextStart + 1)
- NextStart - 1) AS NVarchar(50))
FROM Splitter
WHERE CHARINDEX('(', FileText, NextStart + 1) - NextStart - 1 > 0
)
SELECT FileID, Token
FROM REC
WHERE Pos % 2 = 1
order by FileID, Pos
The Splitter CTE split the rows using a single delimiter, so before use it we need to make the two delimiter the same. Only the odd token are returned by the main query because the even token are other part of the string, for example the string 'this file(''SQL_7'') has been created' will be returned by the Splitter like (with only the interesting columns):
Pos | Token
----+------------------
0 | this file
1 | SQL_7
2 | has been created

How do I get distinct COUNT in pivot?

I have a following table:
State LAB GROUP DATE CODE ID
UP A I 1-Jan 1 345
UP R S 1-Feb 1 456
UP A S 1-Jan 2 567
DL R S 1-Feb 3 678
DL T S 1-Jan 1 789
DL A S 1-Jan 2 900
MN T S 1-Jan 3 1011
MN R I 1-Feb 1 1122
MN S I 1-Feb 2 1233
I need a pivot table of following type:
STATE A R T TOTAL
UP 2 1 0 3
DL 1 1 1 3
MN 0 1 1 2
DISTINCT COUNT OF ID FOR EACH LAB FOR EACH STATE.
I then need the pivot tables filtered for following columns:
GROUP
DATE
CODE
So 1st table will have the pivot table above counting only those records which have GROUP=S
2nd table will have the pivot table above counting only those records which have CODE=1
and so on, I wish to put multiple conditions. and generate several tables one by one and export them.
If this is possible in SQL please let me know! I ruled out excel vba due to the size of table (source table will have 800,000 records approx).

Try this :-
Select [State],[A],[R],[T],Total = [A] + [R]+ [T]
from
(
Select [State],
[A] = Sum(Case when LAB='A' then 1 else 0 END) ,
[R] = Sum(Case when LAB='R' then 1 else 0 END) ,
[T] = Sum(Case when LAB='T' then 1 else 0 END)
from YourTable
group by [State]
)a
SQL FIDDLE

CREATE TABLE #t(States VARCHAR(10),LAB VARCHAR(5),GROUPs VARCHAR(5),DATEs VARCHAR(10),CODE INT,ID INT)
INSERT INTO #t values('UP','A','I','1-Jan',1,345)
INSERT INTO #t values('UP','R','S','1-Feb',1,456)
INSERT INTO #t values('UP','A','S','1-Jan',2,567)
INSERT INTO #t values('DL','R','S','1-Feb',3,678)
INSERT INTO #t values('DL','T','S','1-Jan',1,789)
INSERT INTO #t values('DL','A','S','1-Jan',2,900)
INSERT INTO #t values('MN','T','S','1-Jan',3,1011)
INSERT INTO #t values('MN','R','I','1-Feb',1,1122)
INSERT INTO #t values('MN','S','I','1-Feb',2,1233)
SELECT States,ISNULL(A,0) A,ISNULL(R,0) R,ISNULL(T,0) T,ISNULL(A,0)+ISNULL(R,0)+ISNULL(T,0) total
FROM
(
SELECT States,LAB,Count(ID) AS cnt FROM #t GROUP BY States,LAB /*apply GROUP DATE CODE condition here*/
) AS PVT
PIVOT(MAX(cnt) FOR LAB IN (A,R,T)) pvt

Another solution using PIVOT :
WITH PivotInUse AS (
SELECT state,lab,COUNT(*) AS cnt
FROM YourTable
GROUP BY state,lab
)
SELECT STATE
,COALESCE([A], 0) AS A
,COALESCE([R], 0) AS R
,COALESCE([T], 0) AS T
,COALESCE([A], 0) + COALESCE([R], 0) + COALESCE([T], 0) AS TOTAL
FROM PivotInUse
PIVOT(SUM(cnt) FOR lab IN ([A],[R],[T])) AS p;

Your sample table
SELECT * INTO #TEMP FROM
(
SELECT 'UP' [State],'A' LAB,'I' [GROUP],'1-Jan' [DATE],1 CODE,345 ID
UNION ALL
SELECT 'UP','R','S','1-Feb',1,456
UNION ALL
SELECT 'UP','A','S','1-Jan',2,567
UNION ALL
SELECT 'DL','R','S','1-Feb',3,678
UNION ALL
SELECT 'DL','T','S','1-Jan',1,789
UNION ALL
SELECT 'DL','A','S','1-Jan',2,900
UNION ALL
SELECT 'MN','T','S','1-Jan',3,1011
UNION ALL
SELECT 'MN','R','I','1-Feb',1,1122
UNION ALL
SELECT 'MN','S','I','1-Feb',2,1233
)TAB
Now you need to get the distinct count of each state and get the sum as the result to show Total
in pivoted result.
SELECT DISTINCT [State],LAB,SUM(CNT) CNT
INTO #NEWTABLE
FROM
(
SELECT DISTINCT
[State],LAB,
CASE WHEN [State] IS NULL THEN NULL ELSE COUNT([State]) OVER(PARTITION BY [State],LAB) END CNT
FROM #TEMP
)TAB
GROUP BY [State],LAB
WITH ROLLUP
Now we need to get the distinct columns for pivot(#cols) and columns to identify and replace null with zero in pivot(#NullToZeroCols)
DECLARE #cols NVARCHAR (MAX)
DECLARE #NullToZeroCols NVARCHAR (MAX)
SET #cols = SUBSTRING((SELECT DISTINCT ',['+LAB+']' FROM #NEWTABLE GROUP BY LAB FOR XML PATH('')),2,8000)
SET #NullToZeroCols = SUBSTRING((SELECT DISTINCT ',ISNULL(['+LAB+'],0) AS ['+LAB+']'
FROM #NEWTABLE GROUP BY LAB FOR XML PATH('')),2,8000)
Join the pivotted query with the #NEWTABLE to get the Total for each State
DECLARE #query NVARCHAR(MAX)
SET #query = 'SELECT P.State,' + #NullToZeroCols + ',T2.CNT TOTAL FROM
(
SELECT DISTINCT [State],LAB,CNT FROM #NEWTABLE
) x
PIVOT
(
SUM(CNT)
FOR [LAB] IN (' + #cols + ')
) p
JOIN #NEWTABLE T2 ON P.[STATE]=T2.[STATE]
WHERE P.State IS NOT NULL AND T2.LAB IS NULL AND T2.[STATE] IS NOT NULL;'
EXEC SP_EXECUTESQL #query
Here is your result
Here is the SQLFiddle http://sqlfiddle.com/#!3/c2588/1 (If it shows any error while loading the page, just click RUNSQL, it will work)
Now if you want to get the result as you said DISTINCT COUNT OF ID FOR EACH LAB FOR EACH STATE, just change
OVER(PARTITION BY [State],LAB)
to
OVER(PARTITION BY [State],LAB,Id)
which will show the following result after executing the pivot query

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

SQL Server: How to query within a string? - sql-server

Related

return longest sequence of digits from a string in SQL Server 2012

Finding sequence of the last numeric value in a varchar variable

put same ID different rows in one line

Get string between two specific characters

How do I get distinct COUNT in pivot?

Categories

Resources