How to do this in SQL Server query instead of function? - sql-server

I have a table that has a string in one of its columns.
My table look like this:
RowCnt
Lvl
TargetID
Codes
1000
1
0
1,1,0,1,0,1,...,1,0,0,0,0
1000
1
1
0,0,1,0,1,0,...,0,1,1,1,1
1000
1
2
1,0,0,0,1,1,...,0,0,0,0,0
1000
1
3
0,1,1,1,0,1,...,1,1,1,1,1
1000
1
4
1,1,0,0,1,0,...,0,0,1,0,0
1000
2
0
0,0,1,1,0,1,...,0,1,0,1,1
1000
2
1
0,1,0,1,1,1,...,1,1,1,1,0
1000
2
2
0,0,0,0,0,1,...,0,0,0,0,1
1500
1
0
1,1,1,1,1,0,...,1,1,1,1,0
1500
1
1
1,0,0,0,0,1,...,0,0,0,0,1
I have to compare each line with each line and see how many of digits differ in the Codes column.
So the first record 1,1,0,1,0,1,...,1,0,0,0,0 will be compared with the 2nd 0,0,1,0,1,0,...,0,1,1,1,1 and find that there are 14 out 328 digits different, then compare with the 3rd record 1,0,0,0,1,1,...,0,0,0,0,0 and find 29 / 328 till do all records
Then compare the 2nd record with the 3rd then the 4th and do on till do them all
My table has around 2000 records and I assume that would take around 4 million operations.
I have built a function to do the comparison
ALTER FUNCTION [dbo].[Sim]
(#x varchar(max),
#y varchar(max))
RETURNS decimal(18,10)
AS
BEGIN
DECLARE #Xt AS TABLE (id int identity, x int)
DECLARE #Yt AS TABLE (id int identity, y int)
DECLARE #Match int
DECLARE #All int
INSERT INTO #Xt (x)
SELECT value FROM STRING_SPLIT(#x, ',')
INSERT INTO #Yt (y)
SELECT value FROM STRING_SPLIT(#y, ',')
SELECT #Match = COUNT(*)
FROM #Xt xx
INNER JOIN #Yt yy ON xx.id = yy.id AND xx.x = yy.y
SELECT #All = COUNT(*)
FROM #Xt xx
INNER JOIN #Yt yy ON xx.id = yy.id
RETURN 1.0 * #Match / #All
END
and my query is like this
WITH Y AS (
select a.RowCnt, a.Lvl, a.TargetID a_TargetID, b.targetid b_TargetID, a.codes a_codes, b.codes b_codes, dbo.sim(a.codes, b.codes) sim
from TargetsComp A inner join TargetsComp B
on a.RowCnt = b.RowCnt
and a.TargetID < b.TargetID
)
insert into TargetFilled (RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim)
SELECT RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim FROM Y ORDER BY RowCnt,Lvl, sim desc
but my method fills C drive and does not finish!!
Any better way?

A faster version of your function would be an inline Table Valued Function.
CREATE OR ALTER FUNCTION dbo.Similar (#x varchar(max), #y varchar(max))
RETURNS TABLE AS RETURN
SELECT COUNT(CASE WHEN xJ.value <> yJ.value THEN 1 END) * 1.0 / COUNT(*) AS Pct
FROM (
SELECT *,
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn
FROM STRING_SPLIT(#x, ',')
) xJ
JOIN (
SELECT *,
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn
FROM STRING_SPLIT(#y, ',')
) yJ ON yJ.rn = xJ.rn;
However, STRING_SPLIT with a row-number is not guaranteed to always return results in the actual order of the string. It may do it once, it may do it a million times, but there is always a chance the compiler could rearrange things. So instead you could use OPENJSON
CREATE OR ALTER FUNCTION dbo.Similar (#x varchar(max), #y varchar(max))
RETURNS TABLE AS RETURN
SELECT COUNT(CASE WHEN xJ.value <> yJ.value THEN 1 END) * 1.0 / COUNT(*) AS Pct
FROM OPENJSON('[' + #x + ']') xJ
JOIN OPENJSON('[' + #y + ']') yJ ON yJ.[key] = xJ.[key];
You would use it like this
WITH Y AS (
select
a.RowCnt,
a.Lvl,
a.TargetID a_TargetID,
b.targetid b_TargetID,
a.codes a_codes,
b.codes b_codes,
sim.Pct sim
from TargetsComp A
inner join TargetsComp B
on a.RowCnt = b.RowCnt
and a.TargetID < b.TargetID
CROSS APPLY dbo.sim(a.codes, b.codes) sim
)
insert into TargetFilled
(RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim)
SELECT RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim
FROM Y;
-- you may want to add
-- WHERE sim.Pct < 100
I have removed the ORDER BY from the insert as I don't think it's necessary.
You should index your table as follows
CLUSTERED INDEX ON TargetsComp (RowCnt, TargetID)

Related

Grouping based on the match percentage

I need to self-join and display the records based on the match percentage
Name | Village
Jones Ashley, MPK
Meyer Peter, JSK
A Jones, MPK
David, ARK
Peter M, JSK
Peter M, JSK
David, ARK
select
x.Name,
y.Name,
dbo.matchname(x.Name, y.Name) 'match'
from cust x, cust y where dbo.matchname(x.Name, y.Name) >= 80
and x.village = y.village
I have written a function which takes two names and calculates the percentage. For ex: Peter M and Meyer Peter is 80%.
I would now like to display the related records order by the match percentage. For ex:
Jones Ashley, MPK
A Jones, MPK
David, ARK
David, ARK
Peter M, JSK
Peter M, JSK
Meyer Peter, JSK
Order by doesn't work here as the initial could be at the last for some names. I need some kind of grouping but not sure how to do.
I don't know what your matchname scalar function does so I just created a generic scalar function that compares two strings and returns a small number.
-- (0) Prep: a matchname function
IF OBJECT_ID('tempdb.dbo.matchname') IS NOT NULL DROP FUNCTION dbo.matchname;
GO
CREATE FUNCTION dbo.matchname(#string1 varchar(40), #string2 varchar(40))
RETURNS int AS
BEGIN RETURN((ABS(ASCII(#string1)+3) - (ASCII(#string2))))*7 END;
Below is some sample data and a solution. The most important thing to note is how I filtered my CROSS JOIN:
WHERE x.someid < y.someid
Doing this prevents you from evaluating the same records twice; e.g. dbo.matchname(John Smith, George Washington) & dbo.matchname(George Washington, John Smith.
Sample data and solution
-- Sample data
DECLARE #yourtable TABLE
(
someid int identity primary key clustered,
[Name] varchar(40),
Village varchar(10)
,index nc_yt nonclustered([Name] ASC)
);
INSERT #yourtable ([Name], Village)
VALUES
('Jones Ashley', 'MPK'),
('Meyer Peter', 'JSK'),
('A Jones', 'MPK'),
('David', 'ARK'),
('Peter M', 'JSK'),
('Peter M', 'JSK'),
('David', 'ARK');
-- Solution
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, dbo.matchname(x.name, y.name)
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
id1 = x.someid, id2 = y.someid,
dbo.matchname(x.name, y.name) AS match
from #yourtable x
CROSS JOIN #yourtable y
WHERE x.someid < y.someid
AND dbo.matchname(x.Name, y.Name) >= 80
)
SELECT Name1, Name2, match
FROM uniqueList
WHERE rn = 1
ORDER BY match;
Now about that scalar valued function... Scalar Valued User Defined Functions (scalar UDF for short) KILL performance, especially how you're using yours!. You can replace scalar UDFs with inline table valued functions (iTVF) for optimal performance.
First the new function:
IF OBJECT_ID('tempdb.dbo.itvf_matchname') IS NOT NULL DROP FUNCTION dbo.itvf_matchname;
GO
CREATE FUNCTION dbo.itvf_matchname(#string1 varchar(40), #string2 varchar(40))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN(SELECT match = (ABS(ASCII(#string1)+3) - (ASCII(#string2)))*7);
Now a solution (note that I commented out the original scalar UDF code):
-- sample data
DECLARE #yourtable TABLE
(
someid int identity primary key clustered,
[Name] varchar(40),
Village varchar(10)
,index nc_yt nonclustered([Name] ASC)
);
INSERT #yourtable ([Name], Village)
VALUES
('Jones Ashley', 'MPK'),
('Meyer Peter', 'JSK'),
('A Jones', 'MPK'),
('David', 'ARK'),
('Peter M', 'JSK'),
('Peter M', 'JSK'),
('David', 'ARK');
-- solution
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, /*dbo.matchname(x.name, y.name)*/ itvf.match
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
--dbo.matchname(x.name, y.name) AS match
itvf.match
from #yourtable x
CROSS JOIN #yourtable y
-- Below: only 1 function call with results referenced multiple times
CROSS APPLY dbo.itvf_matchname(x.Name, y.Name) itvf
WHERE x.someid < y.someid
--AND dbo.matchname(x.Name, y.Name) >= 80
AND itvf.match >= 80
)
SELECT Name1, Name2, match
FROM uniqueList
WHERE rn = 1;
The results are identical but the performance is profoundly better. To better understand why you should replace scalar UDFs with iTVFs let's do a 1500 row test (which means we'll be evaluating 1 million+ rows):
-- (3.1) Sample Data with an ID
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#yourtable') IS NOT NULL DROP TABLE #yourtable;
CREATE TABLE #yourtable
(
someid int identity primary key clustered,
[Name] varchar(40) NOT NULL,
Village varchar(10) NOT NULL
);
INSERT #yourtable
SELECT TOP (1500) LEFT(REPLACE(newid(),'-',''),10), 'xxx'
FROM sys.all_columns a
CROSS JOIN sys.all_columns b;
GO
CREATE NONCLUSTERED INDEX nc_yt ON #yourTable([Name] ASC);
GO
PRINT 'Scalar function'+char(13)+char(10)+REPLICATE('-',50);
GO
DECLARE #x bit, #st datetime2 = getdate();
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, dbo.matchname(x.name, y.name)
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
dbo.matchname(x.name, y.name) AS match
from #yourtable x
CROSS JOIN #yourtable y
WHERE x.someid < y.someid
AND dbo.matchname(x.Name, y.Name) >= 80
)
SELECT #x = 1
FROM uniqueList
WHERE rn = 1;
PRINT DATEDIFF(MS, #st, getdate());
GO 5
PRINT char(13)+char(10)+'ITVF (serial)'+char(13)+char(10)+REPLICATE('-',50);
GO
DECLARE #x bit, #st datetime2 = getdate();
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, /*dbo.matchname(x.name, y.name)*/ itvf.match
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
--dbo.matchname(x.name, y.name) AS match
itvf.match
from #yourtable x
CROSS JOIN #yourtable y
-- Below: only 1 function call with results referenced multiple times
CROSS APPLY dbo.itvf_matchname(x.Name, y.Name) itvf
WHERE x.someid < y.someid
--AND dbo.matchname(x.Name, y.Name) >= 80
AND itvf.match >= 80
)
SELECT #x = 1
FROM uniqueList
WHERE rn = 1
OPTION (MAXDOP 1);
PRINT DATEDIFF(MS, #st, getdate());
GO 5
PRINT char(13)+char(10)+'ITVF Parallel'+char(13)+char(10)+REPLICATE('-',50);
GO
DECLARE #x bit, #st datetime2 = getdate();
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, /*dbo.matchname(x.name, y.name)*/ itvf.match
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
--dbo.matchname(x.name, y.name) AS match
itvf.match
from #yourtable x
CROSS JOIN #yourtable y
-- Below: only 1 function call with results referenced multiple times
CROSS APPLY dbo.itvf_matchname(x.Name, y.Name) itvf
CROSS APPLY dbo.make_parallel()
WHERE x.someid < y.someid
--AND dbo.matchname(x.Name, y.Name) >= 80
AND itvf.match >= 80
)
SELECT #x = 1
FROM uniqueList
WHERE rn = 1;
PRINT DATEDIFF(MS, #st, getdate());
GO 5
And the results:
Scalar function
--------------------------------------------------
Beginning execution loop
4627
4504
4440
4457
4550
Batch execution completed 5 times.
ITVF (serial)
--------------------------------------------------
Beginning execution loop
1623
1610
1643
1640
1713
Batch execution completed 5 times.
ITVF Parallel
--------------------------------------------------
Beginning execution loop
1306
1067
1077
1127
1047
Batch execution completed 5 times.
The iTVF based solution was ~3X faster when run with a serial plan, 4X faster with a parallel plan.

Finding sequence of the last numeric value in a varchar variable

I have a column in a table which has incremented values like:
AAA0000001
AAA0000002
... and so on
I want to find if the values stored in this column are in proper sequential order or if any value is missing in between or is deleted.
How can i achieve this?
Assuming the pattern is always: AAA[0-9][0-9][0-9][0-9][0-9][0-9][0-9], you can do this with a Tally Table.
Sample Data:
CREATE TABLE Tbl(val VARCHAR(10))
INSERT INTO Tbl VALUES
('AAA0000001'), ('AAA0000002'), ('AAA0000004'), ('AAA0000011');
val
----------
AAA0000001
AAA0000002
AAA0000004
AAA0000011
SQL Fiddle
;WITH Cte AS(
SELECT *,
num = CAST(SUBSTRING(val, 4, LEN(val) - 3) AS INT)
FROM Tbl
),
E1(N) AS(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b),
Tally(N) AS(
SELECT TOP(SELECT MAX(num) FROM Cte)
ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM E4
)
SELECT
N,
val = 'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7)
FROM Tally
WHERE NOT EXISTS(
SELECT 1 FROM Cte WHERE num = N
)
RESULT
N val
-------------------- ----------
3 AAA0000003
5 AAA0000005
6 AAA0000006
7 AAA0000007
8 AAA0000008
9 AAA0000009
10 AAA0000010
Explanation:
The first CTE, named as Cte, extracts the numeric part of the strings and CASTs them to INT.
The succeeding CTEs, from E1 to Tally(N) generates a table with sequential values from 1 up to the MAX(num) - the INT return from the first CTE.
The final SELECT just checks for the non-existing num from the first CTE.
'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7) transforms N so that it follows the pattern.
This is a Gaps problem. You can look into this article by Dwain Camps for more solutions on Gaps and Islands.
You can use ROW_NUMBER like this.
Sample Data
DECLARE #tab1 TABLE(id VARCHAR(20));
insert into #tab1 VALUES('AAA0000001'),('AAA0000002'),('AAA0000003'),('AAA0000004'),('AAA0000006'),('AAA0000007'),('AAA0000010');
Query
;WITH CTE as
(
SELECT convert(int,STUFF(id,1,3,'')) id,convert(int,STUFF(id,1,3,'')) - ROW_NUMBER()OVER(ORDER BY convert(int,STUFF(id,1,3,''))) rn
FROM #tab1
),CTE2 as
(
SELECT ROW_NUMBER()OVER(ORDER BY rn) as rn, MIN(id) series_start,MAX(id) series_end
FROM CTE
GROUP BY rn
)
SELECT C2.series_end,C1.series_start
FROM CTE2 C1
INNER JOIN CTE2 C2 ON C1.rn = C2.rn + 1;
SQL Fiddle
Explanation
Output of CTE is the difference of gaps between id values.
Output of CTE2 is the start and end of continuous series of numbers
Final Output gives the start and end of gaps within the series
Output
series_end series_start
4 6
7 10
If the schema is fixed then no need for complex queries. This works:
DECLARE #t TABLE ( v VARCHAR(100) );
INSERT INTO #t
VALUES ( 'AAA0000001' ),
( 'AAA0000002' ),
( 'AAA0000007' ),
( 'AAA0000008' ),
( 'AAA0000010' ),
( 'AAA0000011' ),
( 'AAA0000012' );
SELECT * FROM #t t1
CROSS APPLY(SELECT TOP 1 v FROM #t t2 WHERE t2.v > t1.v ORDER BY v) ca
WHERE RIGHT(t1.v, 7) <> RIGHT(ca.v, 7) - 1
Output:
v v
AAA0000002 AAA0000007
AAA0000008 AAA0000010
In sqlserver 2012, you can use LAG and LEAD
DECLARE #t table(col1 varchar(15))
INSERT #t values('AAA0000001'),('AAA0000002'),('AAA0000004')
SELECT
case when
stuff(lag(col1) over (order by col1), 1,3,'') + 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end previous_exists,
case when
stuff(lead(col1) over (order by col1), 1,3,'') - 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end next_exists,
col1
FROM #t
Result:
previous_exists next_exists col1
No Yes AAA0000001
Yes No AAA0000002
No No AAA0000004

how to get nulls when number incremented

i have a piece of code looks like this
declare #t table (record int,string varchar(MAX))
insert into #t (record,string)values (1,'ABC')
insert into #t (record,string)values (2,'DEF/123')
insert into #t (record,string)values (3,'GHI/456/XYZ')
i got a query where i can result like this
SELECT record,
RIGHT(LEFT(T.string,Number-1),CHARINDEX('/',REVERSE(LEFT('/' + T.string,number-1))))
FROM
master..spt_values,
#t T
WHERE
Type = 'P' AND Number BETWEEN 1 AND LEN(T.string)+1
AND
(SUBSTRING(T.string,Number,1) = '/' OR SUBSTRING(T.string,Number,1) = '')
getting output
record values
1 ABC
2 DEF
2 123
3 GHI
3 456
3 XYZ
how can i get output like this
record values
1 ABC
1 NULL
1 NULL
2 DEF
2 123
2 NULL
3 GHI
3 456
3 XYZ
it has been asked by some user .i excelled upto here and from there how can i achieve desire output
The idea is to generate a rows of record cross joined to 1,2,3 to produce combination of record with another column numbered 1,2,3, then use that combination to join to your splitted values. You must add a ROW_NUMBER for your splitted values first to be able to join it with the generated combinations.
;WITH CteThree(record, N) AS(
SELECT
t.record,
x.N
FROM (
SELECT DISTINCT record FROM #t
)t
CROSS JOIN(
SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
)x(N)
),
CteSplitted AS(
SELECT
record,
ROW_NUMBER() OVER(PARTITION BY record ORDER BY Number) AS N,
RIGHT(LEFT(T.string,Number-1),CHARINDEX('/',REVERSE(LEFT('/' + T.string,number-1)))) AS str
FROM master..spt_values v
CROSS JOIN #t T
WHERE
Type = 'P'
AND Number BETWEEN 1 AND LEN(T.string)+1
AND (SUBSTRING(T.string,Number,1) = '/' OR SUBSTRING(T.string,Number,1) = '')
)
SELECT
t.record,
s.str
FROM CteThree t
LEFT JOIN CteSplitted s
ON s.record = t.record
AND s.N = t.N
how about this:
declare #t table (record int,string varchar(MAX));
declare #s char(1) = '/';
WITH counter as (
SELECT MAX(LEN(string)-LEN(REPLACE(string, #s, ''))) lines
) ,
splitter as (
SELECT record, string
, line = 1
, pos = h.pos
, value = CASE WHEN h.pos>0 THEN SUBSTRING(string,1,h.pos) ELSE string END
FROM #t
CROSS APPLY (SELECT CHARINDEX(#s, string) pos ) h
UNION ALL
SELECT record, string
, line = s.line + 1
, pos = CASE WHEN s.pos = 0 THEN 0 ELSE h.pos END
, value = CASE WHEN s.pos = 0 THEN null
WHEN h.pos > 0 THEN SUBSTRING(string,s.pos+1,h.pos-s.pos-1)
ELSE SUBSTRING(string,s.pos+1,99)
END
FROM splitter s
CROSS APPLY (SELECT CHARINDEX(#s, string, s.pos+1) pos ) h
WHERE s.line<=(SELECT lines FROM counter)
)
SELECT *
FROM splitter
ORDER BY record,line
try this
DECLARE #t TABLE
(
record INT ,
string VARCHAR(MAX)
)
INSERT INTO #t
( record, string )
VALUES ( 1, 'ABC' ),
( 2, 'DEF/123' ),
( 3, 'GHI/456/XYZ' );
WITH cte
AS ( SELECT Number = 1
UNION ALL
SELECT Number + 1
FROM cte
WHERE Number <= 100
),
NotNull
AS ( SELECT record ,
RIGHT(LEFT(T.string, Number - 1),
CHARINDEX('/',
REVERSE(LEFT('/' + T.string,
number - 1)))) string ,
ROW_NUMBER() OVER ( PARTITION BY T.record ORDER BY T.record ) AS RN
FROM cte
JOIN #t T ON Number <= ( LEN(T.string) + 1 )
AND SUBSTRING(T.string + '/', Number, 1) = '/'
)
SELECT template.record ,
NotNull.string
FROM ( SELECT *
FROM ( SELECT DISTINCT
RN
FROM NotNull
) AS A
CROSS JOIN ( SELECT Record
FROM NotNull
) AS B
) AS template
LEFT JOIN NotNull ON template.RN = NotNull.RN
AND template.Record = NotNull.Record
try this
declare #t table (record int,string varchar(MAX))
insert into #t (record,string)values (1,'ABC')
insert into #t (record,string)values (2,'DEF/123')
insert into #t (record,string)values (3,'GHI/456/XYZ')
declare #mx int
select #mx= len(string)-len(replace(string,'/','')) from #t
select record,t.c.value('.','varchar(max)') as col2 from
(select record,x=cast('<t>'+replace(left(string+'////////////////////',(len(string)+(#mx-(len(string)-len(replace(string,'/','')))))),'/','</t><t>') +'</t>' as xml) from #t)
a cross apply x.nodes('/t') t(c)

How do I get distinct COUNT in pivot?

I have a following table:
State LAB GROUP DATE CODE ID
UP A I 1-Jan 1 345
UP R S 1-Feb 1 456
UP A S 1-Jan 2 567
DL R S 1-Feb 3 678
DL T S 1-Jan 1 789
DL A S 1-Jan 2 900
MN T S 1-Jan 3 1011
MN R I 1-Feb 1 1122
MN S I 1-Feb 2 1233
I need a pivot table of following type:
STATE A R T TOTAL
UP 2 1 0 3
DL 1 1 1 3
MN 0 1 1 2
DISTINCT COUNT OF ID FOR EACH LAB FOR EACH STATE.
I then need the pivot tables filtered for following columns:
GROUP
DATE
CODE
So 1st table will have the pivot table above counting only those records which have GROUP=S
2nd table will have the pivot table above counting only those records which have CODE=1
and so on, I wish to put multiple conditions. and generate several tables one by one and export them.
If this is possible in SQL please let me know! I ruled out excel vba due to the size of table (source table will have 800,000 records approx).
Try this :-
Select [State],[A],[R],[T],Total = [A] + [R]+ [T]
from
(
Select [State],
[A] = Sum(Case when LAB='A' then 1 else 0 END) ,
[R] = Sum(Case when LAB='R' then 1 else 0 END) ,
[T] = Sum(Case when LAB='T' then 1 else 0 END)
from YourTable
group by [State]
)a
SQL FIDDLE
CREATE TABLE #t(States VARCHAR(10),LAB VARCHAR(5),GROUPs VARCHAR(5),DATEs VARCHAR(10),CODE INT,ID INT)
INSERT INTO #t values('UP','A','I','1-Jan',1,345)
INSERT INTO #t values('UP','R','S','1-Feb',1,456)
INSERT INTO #t values('UP','A','S','1-Jan',2,567)
INSERT INTO #t values('DL','R','S','1-Feb',3,678)
INSERT INTO #t values('DL','T','S','1-Jan',1,789)
INSERT INTO #t values('DL','A','S','1-Jan',2,900)
INSERT INTO #t values('MN','T','S','1-Jan',3,1011)
INSERT INTO #t values('MN','R','I','1-Feb',1,1122)
INSERT INTO #t values('MN','S','I','1-Feb',2,1233)
SELECT States,ISNULL(A,0) A,ISNULL(R,0) R,ISNULL(T,0) T,ISNULL(A,0)+ISNULL(R,0)+ISNULL(T,0) total
FROM
(
SELECT States,LAB,Count(ID) AS cnt FROM #t GROUP BY States,LAB /*apply GROUP DATE CODE condition here*/
) AS PVT
PIVOT(MAX(cnt) FOR LAB IN (A,R,T)) pvt
Another solution using PIVOT :
WITH PivotInUse AS (
SELECT state,lab,COUNT(*) AS cnt
FROM YourTable
GROUP BY state,lab
)
SELECT STATE
,COALESCE([A], 0) AS A
,COALESCE([R], 0) AS R
,COALESCE([T], 0) AS T
,COALESCE([A], 0) + COALESCE([R], 0) + COALESCE([T], 0) AS TOTAL
FROM PivotInUse
PIVOT(SUM(cnt) FOR lab IN ([A],[R],[T])) AS p;
Your sample table
SELECT * INTO #TEMP FROM
(
SELECT 'UP' [State],'A' LAB,'I' [GROUP],'1-Jan' [DATE],1 CODE,345 ID
UNION ALL
SELECT 'UP','R','S','1-Feb',1,456
UNION ALL
SELECT 'UP','A','S','1-Jan',2,567
UNION ALL
SELECT 'DL','R','S','1-Feb',3,678
UNION ALL
SELECT 'DL','T','S','1-Jan',1,789
UNION ALL
SELECT 'DL','A','S','1-Jan',2,900
UNION ALL
SELECT 'MN','T','S','1-Jan',3,1011
UNION ALL
SELECT 'MN','R','I','1-Feb',1,1122
UNION ALL
SELECT 'MN','S','I','1-Feb',2,1233
)TAB
Now you need to get the distinct count of each state and get the sum as the result to show Total
in pivoted result.
SELECT DISTINCT [State],LAB,SUM(CNT) CNT
INTO #NEWTABLE
FROM
(
SELECT DISTINCT
[State],LAB,
CASE WHEN [State] IS NULL THEN NULL ELSE COUNT([State]) OVER(PARTITION BY [State],LAB) END CNT
FROM #TEMP
)TAB
GROUP BY [State],LAB
WITH ROLLUP
Now we need to get the distinct columns for pivot(#cols) and columns to identify and replace null with zero in pivot(#NullToZeroCols)
DECLARE #cols NVARCHAR (MAX)
DECLARE #NullToZeroCols NVARCHAR (MAX)
SET #cols = SUBSTRING((SELECT DISTINCT ',['+LAB+']' FROM #NEWTABLE GROUP BY LAB FOR XML PATH('')),2,8000)
SET #NullToZeroCols = SUBSTRING((SELECT DISTINCT ',ISNULL(['+LAB+'],0) AS ['+LAB+']'
FROM #NEWTABLE GROUP BY LAB FOR XML PATH('')),2,8000)
Join the pivotted query with the #NEWTABLE to get the Total for each State
DECLARE #query NVARCHAR(MAX)
SET #query = 'SELECT P.State,' + #NullToZeroCols + ',T2.CNT TOTAL FROM
(
SELECT DISTINCT [State],LAB,CNT FROM #NEWTABLE
) x
PIVOT
(
SUM(CNT)
FOR [LAB] IN (' + #cols + ')
) p
JOIN #NEWTABLE T2 ON P.[STATE]=T2.[STATE]
WHERE P.State IS NOT NULL AND T2.LAB IS NULL AND T2.[STATE] IS NOT NULL;'
EXEC SP_EXECUTESQL #query
Here is your result
Here is the SQLFiddle http://sqlfiddle.com/#!3/c2588/1 (If it shows any error while loading the page, just click RUNSQL, it will work)
Now if you want to get the result as you said DISTINCT COUNT OF ID FOR EACH LAB FOR EACH STATE, just change
OVER(PARTITION BY [State],LAB)
to
OVER(PARTITION BY [State],LAB,Id)
which will show the following result after executing the pivot query

SQL Server 2008 : TSQL, select same data for different times based on column value

I am using mssql 2008 R2,
i have below structure
create table #temp (
product int,
[order] int,
ord_qnty int
)
insert #temp
select 10 ,3,4
now, if ord_qnty is 4 , i want to select same product,order four times but in all four rows thevalue of ord_qnty should be 1 , i.e.
out put should be
Product order ord_qnty
10 3 1
10 3 1
10 3 1
10 3 1
If you have a numbers table, you can use that. If not, you can generate one:
;with Numbers(n) as (
select ROW_NUMBER() OVER (ORDER BY object_id) from sys.objects
)
select product,[order],1 as ord_qnty
from #temp t inner join Numbers num
on t.ord_qnty >= num.n
(In my nearly empty scratch database, the ROW_NUMBER() generates 77 rows. If that's not going to be enough, you can introduce cross-joins or use other tricks to generate more numbers, or you can create and populate a permanent numbers table)
Try this one -
Query:
DECLARE #temp TABLE
(
product INT
, [order] INT
, ord_qnty INT
)
INSERT #temp(product, [order], ord_qnty)
SELECT 10, 3, 4
SELECT
t.product
, t.[order]
, ord_qnty = 1
FROM #temp t
JOIN [master].dbo.spt_values sv ON t.ord_qnty > sv.number
WHERE sv.[type] = 'p'
SELECT
t.product
, t.[order]
, ord_qnty = 1
FROM #temp t
JOIN (
SELECT number = ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM sys.system_parameters p
) sv ON t.ord_qnty >= sv.number
Output:
product order ord_qnty
----------- ----------- -----------
10 3 1
10 3 1
10 3 1
10 3 1
Query Cost:
For any "millions value":
SET NOCOUNT ON;
DECLARE #numbers TABLE (number INT)
DECLARE #temp TABLE
(
product INT
, [order] INT
, ord_qnty INT
)
INSERT #temp(product, [order], ord_qnty)
SELECT 10, 3, 4
DECLARE
#i BIGINT = 1
, #max BIGINT = (
SELECT MAX(ord_qnty)
FROM #temp
)
WHILE (#i <= #max) BEGIN
INSERT INTO #numbers (number)
VALUES (#i), (#i+1), (#i+2), (#i+3), (#i+4), (#i+5), (#i+6), (#i+7), (#i+8), (#i+9)
SELECT #i += 10
END
SELECT
t.product
, t.[order]
, ord_qnty = 1
FROM #temp t
CROSS JOIN (
SELECT *
FROM #numbers
WHERE number < #max + 1
) t2

Resources