grouping, splitting, and counting rows

grouping, splitting, and counting rows - sql-server

given this table and data:
DECLARE #Table table (RowID int, RowCode char(1), RowValue int);set nocount on
INSERT #Table VALUES ( 6,'A',3757 )
INSERT #Table VALUES ( 5,'A',37827)
INSERT #Table VALUES (14,'A',48411)
INSERT #Table VALUES ( 1,'A',48386)
INSERT #Table VALUES (20,'A',48450)
INSERT #Table VALUES ( 7,'A',46155)
INSERT #Table VALUES (13,'A',721 )
INSERT #Table VALUES ( 2,'A',49335)
INSERT #Table VALUES (15,'A',4700 )
INSERT #Table VALUES (19,'A',64416)
INSERT #Table VALUES ( 8,'A',27246)
INSERT #Table VALUES (12,'B',54929)
INSERT #Table VALUES (16,'B',3872 )
INSERT #Table VALUES ( 3,'C',728 )
INSERT #Table VALUES (11,'C',1050 )
INSERT #Table VALUES ( 9,'C',3191 )
INSERT #Table VALUES (17,'C',866 )
INSERT #Table VALUES ( 4,'C',838 )
INSERT #Table VALUES (10,'D',550 )
INSERT #Table VALUES (18,'D',1434 );set nocount off
I need this:
VVVVVVVV
RowID RowCode RowValue RowChunk
----- ------- -------- --------
1 A 48386 1
2 A 49335 1
5 A 37827 1
6 A 3757 1
7 A 46155 1
8 A 27246 2
13 A 721 2
14 A 48411 2
15 A 4700 2
19 A 64416 2
20 A 48450 3
12 B 54929 4
16 B 3872 4
3 C 728 5
4 C 838 5
9 C 3191 5
11 C 1050 5
17 C 866 5
10 D 550 6
18 D 1434 6
RowChunk starts at 1 and is incremented by 1 for each RowCode change and/or when there have been 5 of the same RowCode values.

Basically my solution uses the same approach as yours, only with slightly different devices employed.
WITH NumberedRows AS (
SELECT
RowID,
RowCode,
RowValue,
CodeChunk = (ROW_NUMBER() OVER (PARTITION BY RowCode ORDER BY RowID) - 1) / 5
FROM #Table
)
SELECT
RowID,
RowCode,
RowValue,
RowChunk = DENSE_RANK() OVER (ORDER BY RowCode, CodeChunk)
FROM NumberedRows

I don't think there's an analysis function, or any reasonable combination of such, which will address this. You'll have to do it RBAR with a cursor or, slightly faster in my experience, a loop.
This example of looping assumes that RowID is unique. If RowID is not the clustered PK, this will be very slow, so if that's the case you'll want to create a temp table.
DECLARE #RowID INT = (SELECT MIN(RowID) FROM #Table)
DECLARE #MaxRowID INT = (SELECT MAX(RowID) FROM #Table)
DECLARE #RowCode CHAR(1)
DECLARE #LastRowCode CHAR(1)
DECLARE #RowValue INT
DECLARE #Chunk INT = 0
DECLARE #RecsThisChunk INT
DECLARE #Results TABLE (RowID INT NOT NULL PRIMARY KEY, RowCode CHAR(1) NOT NULL, RowValue INT NOT NULL, Chunk INT NOT NULL)
WHILE #RowID <= #MaxRowID
BEGIN
-- Handle gaps in RowID
IF NOT EXISTS (SELECT * FROM #Table WHERE RowID = #RowID) GOTO EndOfLoop
-- Load values for this record
SELECT #RowCode = RowCode, #RowValue = RowValue FROM #Table WHERE RowID = #RowID
IF #LastRowCode IS NULL OR #RowCode <> #LastRowCode OR #RecsThisChunk = 5
BEGIN
-- Start a new chunk
SET #Chunk = #Chunk + 1
SET #RecsThisChunk = 1
END
ELSE
BEGIN
-- Same chunk
SET #RecsThisChunk = #RecsThisChunk + 1
END
SET #LastRowCode = #RowCode
INSERT INTO #Results (RowID, RowCode, RowValue, Chunk) VALUES (#RowID, #RowCode, #RowValue, #Chunk)
EndOfLoop:
SET #RowID = #RowID + 1
END
SELECT * FROM #Results
You may have tweak this a bit for 2005, I use 2008 routinely and don't recall all the little differences.
FYI, the results you show don't quite match the sample data.
Hope this helps! The only alternative I see is a cursor, or handling this in the application layer.

this does the trick without a loop:
;WITH NumberedRows AS (
SELECT
r.RowID, r.RowCode, r.RowValue, CEILING(ROW_NUMBER() OVER(PARTITION BY r.RowCode ORDER BY r.RowCode,r.RowID)/5.0) AS CodeRowChunk
FROM #Table r
)
, AllChunks AS (
SELECT r.*,ROW_NUMBER() OVER(ORDER BY RowCode,CodeRowChunk) AS ChunkRowNumber
FROM (SELECT DISTINCT
RowCode, CodeRowChunk
FROM NumberedRows) r
)
SELECT
a.RowID, RowCode, a.RowValue,
(SELECT ChunkRowNumber FROM AllChunks c WHERE c.RowCode=a.RowCode and c.CodeRowChunk=a.CodeRowChunk) AS RowChunk
FROM NumberedRows a

This is the answer you are looking for :
create Table [table] (RowID int, RowCode char(1), RowValue int)
INSERT [Table] VALUES ( 6,'A',3757 )
INSERT [Table] VALUES ( 5,'A',37827)
INSERT [Table] VALUES (14,'A',48411)
INSERT [Table] VALUES ( 1,'A',48386)
INSERT [Table] VALUES (20,'A',48450)
INSERT [Table] VALUES ( 7,'A',46155)
INSERT [Table] VALUES (13,'A',721 )
INSERT [Table] VALUES ( 2,'A',49335)
INSERT [Table] VALUES (15,'A',4700 )
INSERT [Table] VALUES (19,'A',64416)
INSERT [Table] VALUES ( 8,'A',27246)
INSERT [Table] VALUES (12,'B',54929)
INSERT [Table] VALUES (16,'B',3872 )
INSERT [Table] VALUES ( 3,'C',728 )
INSERT [Table] VALUES (11,'C',1050 )
INSERT [Table] VALUES ( 9,'C',3191 )
INSERT [Table] VALUES (17,'C',866 )
INSERT [Table] VALUES ( 4,'C',838 )
INSERT [Table] VALUES (10,'D',550 )
INSERT [Table] VALUES (18,'D',1434 )
IF object_id('tempdb..#tempTable') IS NOT NULL
BEGIN
DROP TABLE #tempTable
END
CREATE TABLE #tempTable
(RowID int, RowCode char(1), RowValue int,RowChunk int)
INSERT INTO #tempTable
select RowID,RowCode,RowValue,null from [table]
declare #RowId int
declare #RowCode char(1)
declare #Count int
declare #CurrentCode char(1)
declare #CountCurrent int
set #Count=1
set #CurrentCode=1
set #CountCurrent=0
DECLARE contact_cursor CURSOR FOR
SELECT RowID,RowCode FROM [table]
OPEN contact_cursor
FETCH NEXT FROM contact_cursor into #RowId,#RowCode
set #CurrentCode=#RowCode
WHILE ##FETCH_STATUS = 0
BEGIN
if(#CurrentCode=#RowCode)
begin
if(#CountCurrent=5)
begin
set #CountCurrent=1
set #Count=#Count+1
update #tempTable set RowChunk=#Count where RowID=#RowID
end
else
begin
set #CountCurrent=#CountCurrent+1
update #tempTable set RowChunk=#Count where RowID=#RowID
end
end
else
begin
set #CurrentCode=#RowCode
set #CountCurrent=1
set #Count=#Count+1
update #tempTable set RowChunk=#Count where RowID=#RowID
end
FETCH NEXT FROM contact_cursor into #RowId,#RowCode
END
CLOSE contact_cursor
DEALLOCATE contact_cursor
select * from #tempTable
GO

Related

Insert Into Table with String Insert Or Table Type

I have a table called #Tbl1, Each GROUP is 1 row and I have to extract the number of rows for each to #Tbl_Insert type.
Declare #Tbl1 Table (TableName NVARCHAR(250),ColumnName NVARCHAR(250),DataType NVARCHAR(250),DataValue NVARCHAR(250),InGroup NVARCHAR(250))
Declare #Tbl_Insert Table (ID INT, Name NVARCHAR(250), Age INT)
-- Sample Data
Insert Into #Tbl1 values ('#Tbl_Insert','ID','INT','1','Group1'),('#Tbl_Insert','Name','NVARCHAR(250)','John.Adam','Group1'),('#Tbl_Insert','Age','INT','10','Group1')
Insert Into #Tbl1 values ('#Tbl_Insert','ID','INT','2','Group2'),('#Tbl_Insert','Name','NVARCHAR(250)','Andy.Law','Group2'),('#Tbl_Insert','Age','INT','18','Group2')
I can convert #tbl1 to row by row into #Table_TEMP
Declare #Table_TEMP (Data nvarchar(max))
Insert Into #Table_TEMP
SELECT LEFT([DataValues] , LEN([DataValues] )-1)
FROM #Tbl1 AS extern
CROSS APPLY
(
SELECT Concat('''', Replace( ISNULL([DataValue],''), '''','' ) + ''',')
FROM #Tbl1 AS intern
WHERE extern.InGroup = intern.InGroup
Order By InGroup, ColumnName
FOR XML PATH('')
) pre_trimmed ( [DataValues])
GROUP BY InGroup, [DataValues]
I have to extract the number of rows in #Tbl1 ( Or #Table_TEMP) to #Tbl_Insert.
I don't want to use cursor to loop Insert row by row in #Table_TEMP, because, when you met with big data (example > 10000 rows). It's run to slow.
Please help.

I found sample in stackorverflow
Declare #tbl_Temp Table (Data NVARCHAR(MAX))
Declare #tbl2 Table (A NVARCHAR(MAX),B NVARCHAR(MAX),C NVARCHAR(MAX))
Insert Into #tbl_Temp values ('a1*b1*c1')
INSERT INTO #tbl2 (A,B,C)
SELECT PARSENAME(REPLACE(Data,'*','.'),3)
,PARSENAME(REPLACE(Data,'*','.'),2)
,PARSENAME(REPLACE(Data,'*','.'),1)
FROM #tbl_Temp
select * from #tbl2
It's nearly the same, but,
My data have "DOT", can not use PARSENAME
I must know numbers of DOT to Build Dynamics SQL??
PARSENAME only support 3 "DOT", It's null when More Dot.
EXAMPLE:
Declare #ObjectName nVarChar(1000)
Set #ObjectName = 'HeadOfficeSQL1.Northwind.dbo.Authors'
SELECT
PARSENAME(#ObjectName, 5) as Server4,
PARSENAME(#ObjectName, 4) as Server,
PARSENAME(#ObjectName, 3) as DB,
PARSENAME(#ObjectName, 2) as Owner,
PARSENAME(#ObjectName, 1) as Object

If, i understand correctly you will need to use apply in order to fetch the records & insert the data into other table
insert into #Tbl_Insert (ID, Name, Age)
select max(a.id) [id], max(a.Name) [Name], max(a.Age) [Age] from #Tbl1 t
cross apply
(values
(case when t.ColumnName = 'ID' then t.DataValue end,
case when t.ColumnName = 'Name' then t.DataValue end,
case when t.ColumnName = 'Age' then t.DataValue end, t.InGroup)
) as a(id, Name, Age, [Group])
group by a.[Group]
select * from #Tbl_Insert

I do both #Tbl_Insert & create 1 store to do like PARSENAME. It's improved performance.
create function dbo.fnGetCsvPart(#csv varchar(8000),#index tinyint, #last bit = 0)
returns varchar(4000)
as
/* function to retrieve 0 based "column" from csv string */
begin
declare #i int; set #i = 0
while 1 = 1
begin
if #index = 0
begin
if #last = 1 or charindex(',',#csv,#i+1) = 0
return substring(#csv,#i+1,len(#csv)-#i+1)
else
return substring(#csv,#i+1,charindex(',',#csv,#i+1)-#i-1)
end
select #index = #index-1, #i = charindex(',',#csv,#i+1)
if #i = 0 break
end
return null
end
GO

SQL Server select (top) two rows into two temp variables

I have a query which results in two or more rows (just one column) and I want to catch the first row value into first temp variable and second row value into second temp variable without using multiple times the select top 1 and select top 1 order by desc
Something like this;
Select row1 value into #tempvariable1, row2 value into #tempvariable2 from blah blah

You need somehow to identify the row (I am using a row ID in the example below, ordering by value - you can order by id or something else):
DECLARE #DataSource TABLE
(
[value] VARCHAR(12)
);
INSERT INTO #DataSource
VALUES ('value 1')
,('value 2')
,('value 3');
DECLARE #tempVariable1 VARCHAR(12)
,#tempVariable2 VARCHAR(12);
WITH DataSource ([value], [rowID]) AS
(
SELECT [value]
,ROW_NUMBER() OVER (ORDER BY [value])
FROM #DataSource
)
SELECT #tempVariable1 = IIF([rowID] = 1, [value], #tempVariable1)
,#tempVariable2 = IIF([rowID] = 2, [value], #tempVariable2)
FROM DataSource;
SELECT #tempVariable1
,#tempVariable2;

You can use a CTE where you will get the X values you need and then select from it:
declare #data table(id int);
insert into #data(id) values(8), (6), (4), (3);
with vals(id, n) as (
Select top(2) id, ROW_NUMBER() over(order by id)
From #data
)
Select #A = (Select id From vals Where n = 1)
, #B = (Select id From vals Where n = 2)
You could also use PIVOT:
Select #A = [1], #B = [2]
From (
Select id, ROW_NUMBER() over(order by id)
From #data
) v(id, n)
PIVOT (
max(id) FOR n in ([1], [2])
) as piv

You have two options
Let's say we test case is build as below
create table dbo.Test
(
value varchar(100) not null
)
GO
insert into dbo.Test
values
('A'),('B'),('NO THIS ONE'),('NO THIS ONE'),('NO THIS ONE')
GO
Now let's say you fetch your data as below
select t.value
from dbo.Test t
where t.value != 'NO THIS ONE'
GO
The first and easier option is to save the data in a temp table
declare #results as Table (value varchar(100))
insert into #results
select t.value
from dbo.Test t
where t.value != 'NO THIS ONE'
you still use TOP 1 BUT not in the entire data, only in the results.
Use TOP 1 to find the first result and a second TOP 1 where value is different from the first.
declare #A varchar(100), #B varchar(100)
set #A = (select top 1 r.value from #results r)
set #B = (select top 1 r.value from #results r where r.value != #A)
select #A, #B
GO
This approach have the advantage of performance.
Of course that don't work great if both values are equal. You can fix it by using a top 1 and ordering in the inverse order.
There's a better alternative using rownumber.
It works because if you set a variable when returning multiple rows the varible sticks with the last one (in fact it's reseted for each row iteration).
The case statement makes sure the variable #A is seted only on the first row iteration.
declare #A varchar(100), #B varchar(100)
/* This way #B receives the last value and #A the first */
select #B = t.value,
#A = (case when ROW_NUMBER() OVER(order by t.Value) = 1
then t.Value else #A
end)
from dbo.Test t
where t.value != 'NO THIS ONE'
select #A, #B

Inserting records in loop without any unique columns

I have the following table for example which does not have any identity or unique column:
create table tbl_test ( first_name nvarchar(255), last_name nvarchar(255),
[address] nvarchar(255))
Insert tbl_test values ('Andrei','Corovei','str Meteor')
Insert tbl_test values ('Pop','Ionut','str Meteor')
Insert tbl_test values ('Whitehead','John','str Lunii')
Insert tbl_test values ('Grisham','Robert','str Corcoduselor')
Insert tbl_test values ('Eugen','Johnesco','str Prunelor')
I can insert the rows from the above table to a another table using below syntax:
select * into tbl_test_loop from tbl_test
This will insert all records in one shot.
Can I insert in a loop with a batch count of 2 the above results rather than at one shot without creating any Identity or rownumber function?

Declare #loopcount int
Declare #rcount int
Declare #idn int
Declare #iteration int
Declare #strsql varchar(1000)
select #loopcount=2,#idn=1
select #rcount=count(*) from tbl_test
SET #iteration=(#rcount/#loopcount)+(#rcount%#loopcount)
while(#idn<=#iteration)
BEGIN
SET #strsql='INSERT INTO tbl_test_loop select top '+cast(#loopcount as varchar(2))+' * from tbl_test t where not exists(select * from tbl_test_loop l where l.first_name = t.first_name and l.last_name = t.last_name and l.[address] = t.[address])'
exec(#strsql)
select 'inserted '+cast(#loopcount as varchar(2))
SET #idn=#idn+1
END

select tbl_test.* into tbl_test_loop from tbl_test, ( select 1 as loop union select 2) v

Try NTILE, no loop required:
select NTILE(3) OVER( ORDER BY ( SELECT 1 ) ) x, *
into tbl_test_loop
from tbl_test

SQL Server: How do I delimit this data?

declare #mydata nvarchar(4000)
set #mydata = '36|0, 77|5, 132|61'
I have this data that I need to get into a table. So for Row1 columnA would be 36 and columnB would be 0. For Row2 columnA would be 77 and columnB would be 5 etc.
What is the best way to do this?
Thanks

You need a split table-valued-function. There's plenty of examples on the web, e.g. http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=50648
CREATE FUNCTION dbo.Split
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END
go
declare #mydata nvarchar(4000)
set #mydata = '36|0, 77|5, 132|61'
select
rowid, [1] as col1, [2] as col2
from
(
select
Row.Id as rowid, Col.Id as colid, Col.Data
from dbo.Split(#mydata, ',') as Row
cross apply dbo.Split(Row.Data, '|') as Col
) d
pivot
(
min(d.data)
for d.colid in ([1], [2])
) pd
I just picked the first split function I found. I don't thnk it's the best one but it works for this eample.
Ths outputs:
rowi col1 col2
1 36 0
2 77 5
3 132 61

If the data is in a file, you should be able tp bcp or BULK INSERT specifying row and column terminators
Otherwise, you'll need a nested split function
Of course, you could also send the data to SQL Server as xml

SQL: How to limit the number of records the MERGE statement will insert

Some sample data:
DECLARE #TARGET TABLE ( ID INT, value INT ) ;
DECLARE #SOURCE TABLE ( ID INT, value INT )
INSERT INTO #TARGET VALUES ( 1, 213 )
INSERT INTO #TARGET VALUES ( 2, 3 )
INSERT INTO #TARGET VALUES ( 3, 310 )
INSERT INTO #TARGET VALUES ( 4, 43 )
INSERT INTO #SOURCE VALUES ( 1, 134 )
INSERT INTO #SOURCE VALUES ( 2, 34 )
INSERT INTO #SOURCE VALUES ( 13, 310 )
INSERT INTO #SOURCE VALUES ( 14, 43 )
INSERT INTO #SOURCE VALUES ( 15,32 )
INSERT INTO #SOURCE VALUES ( 16, 30 )
INSERT INTO #SOURCE VALUES ( 17, 60 )
INSERT INTO #SOURCE VALUES ( 18, 5 )
MERGE #TARGET t USING (SELECT * FROM #SOURCE) AS s ON (t.id = s.id)
WHEN NOT MATCHED THEN
INSERT VALUES (s.id,s.value);
SELECT * FROM #TARGET
So I'm having a target table , and a source table. What I want to accomplish is that when there is a large number of not matched items, to only insert the x top items with the highest value.
Using top on the merge itself won't work, because that would limit the whole source table, I want to do something like
WHEN NOT MATCHED
LIMIT(5) AND ORDER BY Value DESC --only insert the 5 non-matches with the highest value
INSERT VALUES (s.id,s.value)
---- UPDATE ----
My MERGE statement also contains an WHEN MATCHED THEN statement:
WHEN MATCHED THEN
UPDATE SET t.value = s.value
this sadly negates the answers given by Ian and Dog...

Isn't SET ROWCOUNT Deprecated, you could use the top clause if you do it like this:
;MERGE TOP (5) #TARGET t USING
(SELECT TOP (100) PERCENT * FROM #SOURCE ORDER BY VALUE DESC) AS s ON (t.id = s.id)
WHEN NOT MATCHED
THEN
INSERT VALUES (s.id,s.value);
SELECT * FROM #TARGET
The ORDER BY int the merge wont work unless you have a TOP Clause so using TOP (100) PERCENT tricks SQL into allowing the ordering.
Edit:
What about doing it in two steps?
;MERGE TOP (5) #TARGET t USING
(SELECT TOP (100) PERCENT * FROM #SOURCE ORDER BY VALUE DESC) AS s ON (t.id = s.id)
WHEN NOT MATCHED
THEN
INSERT VALUES (s.id,s.value);/*
WHEN MATCHED THEN
UPDATE SET t.value = s.value;*/
update t
set t.Value = s.Value
from #Target t
join #Source s on t.ID = s.ID
where t.Value <> s.Value
SELECT * FROM #TARGET

You can use SET ROWCOUNT n;
For example;
SET ROWCOUNT 4;
UPDATE Production.ProductInventory
SET Quantity = 400
WHERE Quantity < 300;
See; http://msdn.microsoft.com/en-us/library/ms188774.aspx
Or you can do
Insert to #Target
Select top 5 s.id, s.value from #Source s
order by s.value desc ... etc.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

grouping, splitting, and counting rows - sql-server

Related

Insert Into Table with String Insert Or Table Type

SQL Server select (top) two rows into two temp variables

Inserting records in loop without any unique columns

SQL Server: How do I delimit this data?

SQL: How to limit the number of records the MERGE statement will insert

Categories

Resources