grouping, splitting, and counting rows - sql-server

given this table and data:
DECLARE #Table table (RowID int, RowCode char(1), RowValue int);set nocount on
INSERT #Table VALUES ( 6,'A',3757 )
INSERT #Table VALUES ( 5,'A',37827)
INSERT #Table VALUES (14,'A',48411)
INSERT #Table VALUES ( 1,'A',48386)
INSERT #Table VALUES (20,'A',48450)
INSERT #Table VALUES ( 7,'A',46155)
INSERT #Table VALUES (13,'A',721 )
INSERT #Table VALUES ( 2,'A',49335)
INSERT #Table VALUES (15,'A',4700 )
INSERT #Table VALUES (19,'A',64416)
INSERT #Table VALUES ( 8,'A',27246)
INSERT #Table VALUES (12,'B',54929)
INSERT #Table VALUES (16,'B',3872 )
INSERT #Table VALUES ( 3,'C',728 )
INSERT #Table VALUES (11,'C',1050 )
INSERT #Table VALUES ( 9,'C',3191 )
INSERT #Table VALUES (17,'C',866 )
INSERT #Table VALUES ( 4,'C',838 )
INSERT #Table VALUES (10,'D',550 )
INSERT #Table VALUES (18,'D',1434 );set nocount off
I need this:
VVVVVVVV
RowID RowCode RowValue RowChunk
----- ------- -------- --------
1 A 48386 1
2 A 49335 1
5 A 37827 1
6 A 3757 1
7 A 46155 1
8 A 27246 2
13 A 721 2
14 A 48411 2
15 A 4700 2
19 A 64416 2
20 A 48450 3
12 B 54929 4
16 B 3872 4
3 C 728 5
4 C 838 5
9 C 3191 5
11 C 1050 5
17 C 866 5
10 D 550 6
18 D 1434 6
RowChunk starts at 1 and is incremented by 1 for each RowCode change and/or when there have been 5 of the same RowCode values.

Basically my solution uses the same approach as yours, only with slightly different devices employed.
WITH NumberedRows AS (
SELECT
RowID,
RowCode,
RowValue,
CodeChunk = (ROW_NUMBER() OVER (PARTITION BY RowCode ORDER BY RowID) - 1) / 5
FROM #Table
)
SELECT
RowID,
RowCode,
RowValue,
RowChunk = DENSE_RANK() OVER (ORDER BY RowCode, CodeChunk)
FROM NumberedRows

I don't think there's an analysis function, or any reasonable combination of such, which will address this. You'll have to do it RBAR with a cursor or, slightly faster in my experience, a loop.
This example of looping assumes that RowID is unique. If RowID is not the clustered PK, this will be very slow, so if that's the case you'll want to create a temp table.
DECLARE #RowID INT = (SELECT MIN(RowID) FROM #Table)
DECLARE #MaxRowID INT = (SELECT MAX(RowID) FROM #Table)
DECLARE #RowCode CHAR(1)
DECLARE #LastRowCode CHAR(1)
DECLARE #RowValue INT
DECLARE #Chunk INT = 0
DECLARE #RecsThisChunk INT
DECLARE #Results TABLE (RowID INT NOT NULL PRIMARY KEY, RowCode CHAR(1) NOT NULL, RowValue INT NOT NULL, Chunk INT NOT NULL)
WHILE #RowID <= #MaxRowID
BEGIN
-- Handle gaps in RowID
IF NOT EXISTS (SELECT * FROM #Table WHERE RowID = #RowID) GOTO EndOfLoop
-- Load values for this record
SELECT #RowCode = RowCode, #RowValue = RowValue FROM #Table WHERE RowID = #RowID
IF #LastRowCode IS NULL OR #RowCode <> #LastRowCode OR #RecsThisChunk = 5
BEGIN
-- Start a new chunk
SET #Chunk = #Chunk + 1
SET #RecsThisChunk = 1
END
ELSE
BEGIN
-- Same chunk
SET #RecsThisChunk = #RecsThisChunk + 1
END
SET #LastRowCode = #RowCode
INSERT INTO #Results (RowID, RowCode, RowValue, Chunk) VALUES (#RowID, #RowCode, #RowValue, #Chunk)
EndOfLoop:
SET #RowID = #RowID + 1
END
SELECT * FROM #Results
You may have tweak this a bit for 2005, I use 2008 routinely and don't recall all the little differences.
FYI, the results you show don't quite match the sample data.
Hope this helps! The only alternative I see is a cursor, or handling this in the application layer.

this does the trick without a loop:
;WITH NumberedRows AS (
SELECT
r.RowID, r.RowCode, r.RowValue, CEILING(ROW_NUMBER() OVER(PARTITION BY r.RowCode ORDER BY r.RowCode,r.RowID)/5.0) AS CodeRowChunk
FROM #Table r
)
, AllChunks AS (
SELECT r.*,ROW_NUMBER() OVER(ORDER BY RowCode,CodeRowChunk) AS ChunkRowNumber
FROM (SELECT DISTINCT
RowCode, CodeRowChunk
FROM NumberedRows) r
)
SELECT
a.RowID, RowCode, a.RowValue,
(SELECT ChunkRowNumber FROM AllChunks c WHERE c.RowCode=a.RowCode and c.CodeRowChunk=a.CodeRowChunk) AS RowChunk
FROM NumberedRows a

This is the answer you are looking for :
create Table [table] (RowID int, RowCode char(1), RowValue int)
INSERT [Table] VALUES ( 6,'A',3757 )
INSERT [Table] VALUES ( 5,'A',37827)
INSERT [Table] VALUES (14,'A',48411)
INSERT [Table] VALUES ( 1,'A',48386)
INSERT [Table] VALUES (20,'A',48450)
INSERT [Table] VALUES ( 7,'A',46155)
INSERT [Table] VALUES (13,'A',721 )
INSERT [Table] VALUES ( 2,'A',49335)
INSERT [Table] VALUES (15,'A',4700 )
INSERT [Table] VALUES (19,'A',64416)
INSERT [Table] VALUES ( 8,'A',27246)
INSERT [Table] VALUES (12,'B',54929)
INSERT [Table] VALUES (16,'B',3872 )
INSERT [Table] VALUES ( 3,'C',728 )
INSERT [Table] VALUES (11,'C',1050 )
INSERT [Table] VALUES ( 9,'C',3191 )
INSERT [Table] VALUES (17,'C',866 )
INSERT [Table] VALUES ( 4,'C',838 )
INSERT [Table] VALUES (10,'D',550 )
INSERT [Table] VALUES (18,'D',1434 )
IF object_id('tempdb..#tempTable') IS NOT NULL
BEGIN
DROP TABLE #tempTable
END
CREATE TABLE #tempTable
(RowID int, RowCode char(1), RowValue int,RowChunk int)
INSERT INTO #tempTable
select RowID,RowCode,RowValue,null from [table]
declare #RowId int
declare #RowCode char(1)
declare #Count int
declare #CurrentCode char(1)
declare #CountCurrent int
set #Count=1
set #CurrentCode=1
set #CountCurrent=0
DECLARE contact_cursor CURSOR FOR
SELECT RowID,RowCode FROM [table]
OPEN contact_cursor
FETCH NEXT FROM contact_cursor into #RowId,#RowCode
set #CurrentCode=#RowCode
WHILE ##FETCH_STATUS = 0
BEGIN
if(#CurrentCode=#RowCode)
begin
if(#CountCurrent=5)
begin
set #CountCurrent=1
set #Count=#Count+1
update #tempTable set RowChunk=#Count where RowID=#RowID
end
else
begin
set #CountCurrent=#CountCurrent+1
update #tempTable set RowChunk=#Count where RowID=#RowID
end
end
else
begin
set #CurrentCode=#RowCode
set #CountCurrent=1
set #Count=#Count+1
update #tempTable set RowChunk=#Count where RowID=#RowID
end
FETCH NEXT FROM contact_cursor into #RowId,#RowCode
END
CLOSE contact_cursor
DEALLOCATE contact_cursor
select * from #tempTable
GO

Related

Insert Into Table with String Insert Or Table Type

I have a table called #Tbl1, Each GROUP is 1 row and I have to extract the number of rows for each to #Tbl_Insert type.
Declare #Tbl1 Table (TableName NVARCHAR(250),ColumnName NVARCHAR(250),DataType NVARCHAR(250),DataValue NVARCHAR(250),InGroup NVARCHAR(250))
Declare #Tbl_Insert Table (ID INT, Name NVARCHAR(250), Age INT)
-- Sample Data
Insert Into #Tbl1 values ('#Tbl_Insert','ID','INT','1','Group1'),('#Tbl_Insert','Name','NVARCHAR(250)','John.Adam','Group1'),('#Tbl_Insert','Age','INT','10','Group1')
Insert Into #Tbl1 values ('#Tbl_Insert','ID','INT','2','Group2'),('#Tbl_Insert','Name','NVARCHAR(250)','Andy.Law','Group2'),('#Tbl_Insert','Age','INT','18','Group2')
I can convert #tbl1 to row by row into #Table_TEMP
Declare #Table_TEMP (Data nvarchar(max))
Insert Into #Table_TEMP
SELECT LEFT([DataValues] , LEN([DataValues] )-1)
FROM #Tbl1 AS extern
CROSS APPLY
(
SELECT Concat('''', Replace( ISNULL([DataValue],''), '''','' ) + ''',')
FROM #Tbl1 AS intern
WHERE extern.InGroup = intern.InGroup
Order By InGroup, ColumnName
FOR XML PATH('')
) pre_trimmed ( [DataValues])
GROUP BY InGroup, [DataValues]
I have to extract the number of rows in #Tbl1 ( Or #Table_TEMP) to #Tbl_Insert.
I don't want to use cursor to loop Insert row by row in #Table_TEMP, because, when you met with big data (example > 10000 rows). It's run to slow.
Please help.
I found sample in stackorverflow
Declare #tbl_Temp Table (Data NVARCHAR(MAX))
Declare #tbl2 Table (A NVARCHAR(MAX),B NVARCHAR(MAX),C NVARCHAR(MAX))
Insert Into #tbl_Temp values ('a1*b1*c1')
INSERT INTO #tbl2 (A,B,C)
SELECT PARSENAME(REPLACE(Data,'*','.'),3)
,PARSENAME(REPLACE(Data,'*','.'),2)
,PARSENAME(REPLACE(Data,'*','.'),1)
FROM #tbl_Temp
select * from #tbl2
It's nearly the same, but,
My data have "DOT", can not use PARSENAME
I must know numbers of DOT to Build Dynamics SQL??
PARSENAME only support 3 "DOT", It's null when More Dot.
EXAMPLE:
Declare #ObjectName nVarChar(1000)
Set #ObjectName = 'HeadOfficeSQL1.Northwind.dbo.Authors'
SELECT
PARSENAME(#ObjectName, 5) as Server4,
PARSENAME(#ObjectName, 4) as Server,
PARSENAME(#ObjectName, 3) as DB,
PARSENAME(#ObjectName, 2) as Owner,
PARSENAME(#ObjectName, 1) as Object
If, i understand correctly you will need to use apply in order to fetch the records & insert the data into other table
insert into #Tbl_Insert (ID, Name, Age)
select max(a.id) [id], max(a.Name) [Name], max(a.Age) [Age] from #Tbl1 t
cross apply
(values
(case when t.ColumnName = 'ID' then t.DataValue end,
case when t.ColumnName = 'Name' then t.DataValue end,
case when t.ColumnName = 'Age' then t.DataValue end, t.InGroup)
) as a(id, Name, Age, [Group])
group by a.[Group]
select * from #Tbl_Insert
I do both #Tbl_Insert & create 1 store to do like PARSENAME. It's improved performance.
create function dbo.fnGetCsvPart(#csv varchar(8000),#index tinyint, #last bit = 0)
returns varchar(4000)
as
/* function to retrieve 0 based "column" from csv string */
begin
declare #i int; set #i = 0
while 1 = 1
begin
if #index = 0
begin
if #last = 1 or charindex(',',#csv,#i+1) = 0
return substring(#csv,#i+1,len(#csv)-#i+1)
else
return substring(#csv,#i+1,charindex(',',#csv,#i+1)-#i-1)
end
select #index = #index-1, #i = charindex(',',#csv,#i+1)
if #i = 0 break
end
return null
end
GO

SQL Server select (top) two rows into two temp variables

I have a query which results in two or more rows (just one column) and I want to catch the first row value into first temp variable and second row value into second temp variable without using multiple times the select top 1 and select top 1 order by desc
Something like this;
Select row1 value into #tempvariable1, row2 value into #tempvariable2 from blah blah
You need somehow to identify the row (I am using a row ID in the example below, ordering by value - you can order by id or something else):
DECLARE #DataSource TABLE
(
[value] VARCHAR(12)
);
INSERT INTO #DataSource
VALUES ('value 1')
,('value 2')
,('value 3');
DECLARE #tempVariable1 VARCHAR(12)
,#tempVariable2 VARCHAR(12);
WITH DataSource ([value], [rowID]) AS
(
SELECT [value]
,ROW_NUMBER() OVER (ORDER BY [value])
FROM #DataSource
)
SELECT #tempVariable1 = IIF([rowID] = 1, [value], #tempVariable1)
,#tempVariable2 = IIF([rowID] = 2, [value], #tempVariable2)
FROM DataSource;
SELECT #tempVariable1
,#tempVariable2;
You can use a CTE where you will get the X values you need and then select from it:
declare #data table(id int);
insert into #data(id) values(8), (6), (4), (3);
with vals(id, n) as (
Select top(2) id, ROW_NUMBER() over(order by id)
From #data
)
Select #A = (Select id From vals Where n = 1)
, #B = (Select id From vals Where n = 2)
You could also use PIVOT:
Select #A = [1], #B = [2]
From (
Select id, ROW_NUMBER() over(order by id)
From #data
) v(id, n)
PIVOT (
max(id) FOR n in ([1], [2])
) as piv
You have two options
Let's say we test case is build as below
create table dbo.Test
(
value varchar(100) not null
)
GO
insert into dbo.Test
values
('A'),('B'),('NO THIS ONE'),('NO THIS ONE'),('NO THIS ONE')
GO
Now let's say you fetch your data as below
select t.value
from dbo.Test t
where t.value != 'NO THIS ONE'
GO
The first and easier option is to save the data in a temp table
declare #results as Table (value varchar(100))
insert into #results
select t.value
from dbo.Test t
where t.value != 'NO THIS ONE'
you still use TOP 1 BUT not in the entire data, only in the results.
Use TOP 1 to find the first result and a second TOP 1 where value is different from the first.
declare #A varchar(100), #B varchar(100)
set #A = (select top 1 r.value from #results r)
set #B = (select top 1 r.value from #results r where r.value != #A)
select #A, #B
GO
This approach have the advantage of performance.
Of course that don't work great if both values are equal. You can fix it by using a top 1 and ordering in the inverse order.
There's a better alternative using rownumber.
It works because if you set a variable when returning multiple rows the varible sticks with the last one (in fact it's reseted for each row iteration).
The case statement makes sure the variable #A is seted only on the first row iteration.
declare #A varchar(100), #B varchar(100)
/* This way #B receives the last value and #A the first */
select #B = t.value,
#A = (case when ROW_NUMBER() OVER(order by t.Value) = 1
then t.Value else #A
end)
from dbo.Test t
where t.value != 'NO THIS ONE'
select #A, #B

Inserting records in loop without any unique columns

I have the following table for example which does not have any identity or unique column:
create table tbl_test ( first_name nvarchar(255), last_name nvarchar(255),
[address] nvarchar(255))
Insert tbl_test values ('Andrei','Corovei','str Meteor')
Insert tbl_test values ('Pop','Ionut','str Meteor')
Insert tbl_test values ('Whitehead','John','str Lunii')
Insert tbl_test values ('Grisham','Robert','str Corcoduselor')
Insert tbl_test values ('Eugen','Johnesco','str Prunelor')
I can insert the rows from the above table to a another table using below syntax:
select * into tbl_test_loop from tbl_test
This will insert all records in one shot.
Can I insert in a loop with a batch count of 2 the above results rather than at one shot without creating any Identity or rownumber function?
Declare #loopcount int
Declare #rcount int
Declare #idn int
Declare #iteration int
Declare #strsql varchar(1000)
select #loopcount=2,#idn=1
select #rcount=count(*) from tbl_test
SET #iteration=(#rcount/#loopcount)+(#rcount%#loopcount)
while(#idn<=#iteration)
BEGIN
SET #strsql='INSERT INTO tbl_test_loop select top '+cast(#loopcount as varchar(2))+' * from tbl_test t where not exists(select * from tbl_test_loop l where l.first_name = t.first_name and l.last_name = t.last_name and l.[address] = t.[address])'
exec(#strsql)
select 'inserted '+cast(#loopcount as varchar(2))
SET #idn=#idn+1
END
select tbl_test.* into tbl_test_loop from tbl_test, ( select 1 as loop union select 2) v
Try NTILE, no loop required:
select NTILE(3) OVER( ORDER BY ( SELECT 1 ) ) x, *
into tbl_test_loop
from tbl_test

SQL Server: How do I delimit this data?

declare #mydata nvarchar(4000)
set #mydata = '36|0, 77|5, 132|61'
I have this data that I need to get into a table. So for Row1 columnA would be 36 and columnB would be 0. For Row2 columnA would be 77 and columnB would be 5 etc.
What is the best way to do this?
Thanks
You need a split table-valued-function. There's plenty of examples on the web, e.g. http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=50648
CREATE FUNCTION dbo.Split
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END
go
declare #mydata nvarchar(4000)
set #mydata = '36|0, 77|5, 132|61'
select
rowid, [1] as col1, [2] as col2
from
(
select
Row.Id as rowid, Col.Id as colid, Col.Data
from dbo.Split(#mydata, ',') as Row
cross apply dbo.Split(Row.Data, '|') as Col
) d
pivot
(
min(d.data)
for d.colid in ([1], [2])
) pd
I just picked the first split function I found. I don't thnk it's the best one but it works for this eample.
Ths outputs:
rowi col1 col2
1 36 0
2 77 5
3 132 61
If the data is in a file, you should be able tp bcp or BULK INSERT specifying row and column terminators
Otherwise, you'll need a nested split function
Of course, you could also send the data to SQL Server as xml

SQL: How to limit the number of records the MERGE statement will insert

Some sample data:
DECLARE #TARGET TABLE ( ID INT, value INT ) ;
DECLARE #SOURCE TABLE ( ID INT, value INT )
INSERT INTO #TARGET VALUES ( 1, 213 )
INSERT INTO #TARGET VALUES ( 2, 3 )
INSERT INTO #TARGET VALUES ( 3, 310 )
INSERT INTO #TARGET VALUES ( 4, 43 )
INSERT INTO #SOURCE VALUES ( 1, 134 )
INSERT INTO #SOURCE VALUES ( 2, 34 )
INSERT INTO #SOURCE VALUES ( 13, 310 )
INSERT INTO #SOURCE VALUES ( 14, 43 )
INSERT INTO #SOURCE VALUES ( 15,32 )
INSERT INTO #SOURCE VALUES ( 16, 30 )
INSERT INTO #SOURCE VALUES ( 17, 60 )
INSERT INTO #SOURCE VALUES ( 18, 5 )
MERGE #TARGET t USING (SELECT * FROM #SOURCE) AS s ON (t.id = s.id)
WHEN NOT MATCHED THEN
INSERT VALUES (s.id,s.value);
SELECT * FROM #TARGET
So I'm having a target table , and a source table. What I want to accomplish is that when there is a large number of not matched items, to only insert the x top items with the highest value.
Using top on the merge itself won't work, because that would limit the whole source table, I want to do something like
WHEN NOT MATCHED
LIMIT(5) AND ORDER BY Value DESC --only insert the 5 non-matches with the highest value
INSERT VALUES (s.id,s.value)
---- UPDATE ----
My MERGE statement also contains an WHEN MATCHED THEN statement:
WHEN MATCHED THEN
UPDATE SET t.value = s.value
this sadly negates the answers given by Ian and Dog...
Isn't SET ROWCOUNT Deprecated, you could use the top clause if you do it like this:
;MERGE TOP (5) #TARGET t USING
(SELECT TOP (100) PERCENT * FROM #SOURCE ORDER BY VALUE DESC) AS s ON (t.id = s.id)
WHEN NOT MATCHED
THEN
INSERT VALUES (s.id,s.value);
SELECT * FROM #TARGET
The ORDER BY int the merge wont work unless you have a TOP Clause so using TOP (100) PERCENT tricks SQL into allowing the ordering.
Edit:
What about doing it in two steps?
;MERGE TOP (5) #TARGET t USING
(SELECT TOP (100) PERCENT * FROM #SOURCE ORDER BY VALUE DESC) AS s ON (t.id = s.id)
WHEN NOT MATCHED
THEN
INSERT VALUES (s.id,s.value);/*
WHEN MATCHED THEN
UPDATE SET t.value = s.value;*/
update t
set t.Value = s.Value
from #Target t
join #Source s on t.ID = s.ID
where t.Value <> s.Value
SELECT * FROM #TARGET
You can use SET ROWCOUNT n;
For example;
SET ROWCOUNT 4;
UPDATE Production.ProductInventory
SET Quantity = 400
WHERE Quantity < 300;
See; http://msdn.microsoft.com/en-us/library/ms188774.aspx
Or you can do
Insert to #Target
Select top 5 s.id, s.value from #Source s
order by s.value desc ... etc.

Resources