I have a table with values
ID Son Father
----------- ---------- ----------
1 Mark Gerard
2 Gerard Ivan
3 Leo Samuel
4 Samuel Johan
5 Ivan Carles
I need to change table like this:
ID Son Father
----------- ---------- ----------
1 Mark Carles
2 Gerard Carles
3 Leo Johan
4 Samuel Johan
5 Ivan Carles
The goal is to find a major 'Father' and update all 'Son' records with this value. Major 'Father' can be different.
My code is next:
DECLARE #CNT INT
DECLARE #CH_1 NVARCHAR(10)
DECLARE #CH_2 NVARCHAR(10)
CREATE TABLE #PPL (ID INT, Son NVARCHAR(10), Father NVARCHAR(10))
INSERT INTO #PPL VALUES (1, 'Mark', 'Gerard')
INSERT INTO #PPL VALUES (2, 'Gerard', 'Ivan')
INSERT INTO #PPL VALUES (3, 'Leo', 'Samuel')
INSERT INTO #PPL VALUES (4, 'Samuel', 'Johan')
INSERT INTO #PPL VALUES (5, 'Ivan', 'Carles')
SET #I = 1
SET #CNT = (SELECT COUNT(ID) FROM #PPL)
WHILE #I <= #CNT
BEGIN
SET #J = 1
WHILE #J <= #CNT
BEGIN
SET #CH_1 = (SELECT Son FROM #PPL WHERE ID = #J)
SET #CH_2 = (SELECT Father FROM #PPL WHERE ID = #J)
UPDATE #PPL SET Father = #CH_2 WHERE Father = #CH_1
SET #J = #J + 1
END;
SET #I = #I + 1
END;
SELECT * FROM #PPL
DROP TABLE #PPL
This code is working correct, but for the low number of records. How this code can be optimized?
Thanks!
Here is how you can do it with a recursive CTE.
CREATE TABLE #PPL (ID INT, Son NVARCHAR(10), Father NVARCHAR(10))
INSERT INTO #PPL VALUES (1, 'Mark', 'Gerard')
INSERT INTO #PPL VALUES (2, 'Gerard', 'Ivan')
INSERT INTO #PPL VALUES (3, 'Leo', 'Samuel')
INSERT INTO #PPL VALUES (4, 'Samuel', 'Johan')
INSERT INTO #PPL VALUES (5, 'Ivan', 'Carles')
;WITH CTE_FamilyGenealogy
AS
(
SELECT ID
,Son
,Father
,1 AS [Level]
FROM #PPL Ancor
UNION ALL
SELECT CTE_FamilyGenealogy.ID
,CTE_FamilyGenealogy.Son
,Fathers.Father AS Father
,CTE_FamilyGenealogy.[Level] + 1 AS [Level]
FROM #PPL Fathers
INNER JOIN CTE_FamilyGenealogy ON CTE_FamilyGenealogy.Father = Fathers.Son
),
CTE_MajorFathers
AS
(
SELECT ID
,Son
,Father
,ROW_NUMBER() OVER (PARTITION BY Son ORDER BY [Level] DESC) AS RowRank
FROM CTE_FamilyGenealogy
)
SELECT ID
,Son
,Father
FROM CTE_MajorFathers
WHERE RowRank = 1
ORDER BY ID
The Recursive CTE CTE_FamilyGenealogy finds all the father son combination and determine the level within the family tree. The CTE_MajorFathers
CTE uses ROW_NUMBER to rank the possible combinations based on the Level with in the FamilyGenealogy to determine the Major Father.
Try following approach base on recursion (see recursive Common Table Expressions) and HIERARCHYID (SQL2008+) data type. The basic idea is to build for every row one hierarchy value starting from the "first" father :-) and ending with "last" son :-). For example: for first row (1, 'Mark', 'Gerard') this node/family tree is /5/2/1/ where /5/ is "first" father ;-) and /1/ is the "last" son. Next it convert these values to hiearchyid values and it uses GetLevel and GetAncestor methods to compute the "first" father: Father1ID: Johan or Carles.
IF OBJECT_ID('tempdb.dbo.#Results') IS NOT NULL
BEGIN
DROP TABLE #Results;
END
CREATE TABLE #Results (ID INT NOT NULL PRIMARY KEY, Father1ID INT);
WITH CteRec
AS (
-- It returns Father only rows
SELECT l1.ID, l1.Son, l1.Father, CONVERT(VARCHAR(900), '/'+LTRIM(l1.ID)+'/') AS Node -- FamilyTree
FROM #PPL AS l1 -- First level
WHERE NOT EXISTS(SELECT * FROM #PPL p WHERE p.Son = l1.Father)
UNION ALL
-- It returns Son only and Son-Father rows
SELECT ln.ID, ln.Son, ln.Father, CONVERT(VARCHAR(900), prt.Node+LTRIM(ln.ID)+'/') AS Node -- FamilyTree
FROM #PPL AS ln -- Next level
JOIN CteRec AS prt ON prt.Son = ln.Father
)
INSERT #Results (ID, Father1ID)
SELECT ID,
Father1ID = CONVERT(INT,REPLACE(CONVERT(HIERARCHYID, Node).GetAncestor(CONVERT(HIERARCHYID, Node).GetLevel()-1).ToString(),'/',''))
FROM CteRec;
SELECT p.*, r.Father1ID, rp.Father AS Father1Name
FROM #PPL p
INNER JOIN #Results r ON p.ID = r.ID
INNER JOIN #PPL rp ON r.Father1ID = rp.ID
-- Also you ca use #Result with UPDATE statement but I would store this values within new column Father1
Recursive CTE's are overrated =)
This simple approach will run through just as fast (on normal data), will never complain about max recursion and is easy to read. The only downside I can see is that it might go into an eternal loop when the data is corrupt.
CREATE TABLE #PPL (ID INT, Son NVARCHAR(10), Father NVARCHAR(10))
INSERT INTO #PPL VALUES (1, 'Mark', 'Gerard')
INSERT INTO #PPL VALUES (2, 'Gerard', 'Ivan')
INSERT INTO #PPL VALUES (3, 'Leo', 'Samuel')
INSERT INTO #PPL VALUES (4, 'Samuel', 'Johan')
INSERT INTO #PPL VALUES (5, 'Ivan', 'Carles')
DECLARE #rowcount int = -1
WHILE #rowcount <> 0
BEGIN
UPDATE upd
SET Father = new.Father
FROM #PPL upd
JOIN #PPL new
ON new.Son = upd.Father
WHERE upd.Father <> new.Father
SELECT #rowcount = ##ROWCOUNT
END
SELECT * FROM #PPL
PS: it probably helps to have an index on the Son column when running on large datasets.
Related
I have Parent and Child table.
The goal is to duplicate the records, except with new primary keys.
Original Tables
Parent(id)
1
Child(id,parentId, data)
1,1
2,1
After insert:
Parent
1
2
Child
1,1
2,1
3,2
4,2
How do I do that? The part I am having trouble with is getting the new parent key for use with the child records.
This is what I have come up with so far.
--DECLARE VARS
declare #currentMetadataDocumentSetId int = 1, --Ohio
#newMetadataDocumentSetid int = 3; --PA
--CLEANUP
IF OBJECT_ID('tempdb..#tempFileRowMap') IS NOT NULL
/*Then it exists*/
DROP TABLE #tempFileRowMap
--Remove existing file row maps.
delete from file_row_map where metadata_document_set_id = #newMetadataDocumentSetid;
--Create a temptable to hold data to be copied.
Select [edi_document_code],
[functional_group],
[description],
3 as [metadata_document_set_id],
[document_name],
[incoming_file_row_subtype],
[metadata_document_id],
[document_subcode],
[outgoing_file_row_subtype],
[asi_type_code],
[asi_action_code],
[metadata_document_set],
file_row_map_id as orig_file_row_map_id
into #tempFileRowMap
from file_row_map fileRowMap
where metadata_document_set_id = #currentMetadataDocumentSetId;
--Select * from #tempFileRowMap;
Insert into file_row_map select
[edi_document_code],
[functional_group],
[description],
[metadata_document_set_id],
[document_name],
[incoming_file_row_subtype],
[metadata_document_id],
[document_subcode],
[outgoing_file_row_subtype],
[asi_type_code],
[asi_action_code],
[metadata_document_set]
from #tempFileRowMap
--Show Results
Select * from file_row_map fileRowMap where fileRowMap.metadata_document_set_id = #newMetadataDocumentSetid
--Update Detail
Select
[file_row_map_id],
[file_row_column],
[element_code],
[element_metadata_id],
[col_description],
[example],
[translate],
[is_used],
[is_mapped],
[page_num],
[subcode],
[qualifier],
[loop_code],
[loop_subcode],
[default_value],
[delete_flag]
into #tempFileRowMapDetail
from [dbo].[file_row_map_detail] d
left join #tempFileRowMap m
on m.orig_file_row_map_id = d.file_row_map_id
select * from #tempFileRowMapDetail
Simply use OUTPUT clause for getting exact Parent Table Primary Key values.
Lets build Example Schema for your case
--For Capturing inserted ID
CREATE TABLE #ID_CAPTURE (PARENT_ID INT,ORDER_NME VARCHAR(20));
--Your Intermidiate Data To insert into Actual Tables
CREATE TABLE #DUMMY_TABLE (ORDER_NME VARCHAR(20), ITEM_NME VARCHAR(20));
--Actual Tables
CREATE TABLE #ORDER_PARENT (ORDER_ID INT IDENTITY,ORDER_NME VARCHAR(20))
CREATE TABLE #ORDER_CHILD (CHILD_ID INT IDENTITY ,ORDER_ID INT, ORDER_NME VARCHAR(20))
INSERT INTO #DUMMY_TABLE
SELECT 'BILL1','Oil'
UNION ALL
SELECT 'BILL1', 'Gas'
UNION ALL
SELECT 'BILL2', 'Diesel'
Now do Inserts in Parent & Child Tables
INSERT INTO #ORDER_PARENT
OUTPUT inserted.ORDER_ID, inserted.ORDER_NME into #ID_CAPTURE
SELECT DISTINCT ORDER_NME FROM #DUMMY_TABLE
INSERT INTO #ORDER_CHILD
SELECT C.PARENT_ID, ITEM_NME FROM #DUMMY_TABLE D
INNER JOIN #ID_CAPTURE C ON D.ORDER_NME = C.ORDER_NME
SELECT * FROM #ID_CAPTURE
SELECT * FROM #ORDER_CHILD
There are other ways to get Inserted Identity values.
See documentation ##IDENTITY (Transact-SQL) , SCOPE_IDENTITY
Try following approach:
DECLARE #Table1 TABLE (
ID INT NOT NULL PRIMARY KEY,
ParentID INT NULL, -- FK
[Desc] VARCHAR(50) NOT NULL
);
INSERT #Table1 (ID, ParentID, [Desc])
VALUES
(1, NULL, 'A'),
(2, 1, 'AA.1'),
(3, 1, 'AA.2'),
(4, NULL, 'B'),
(5, 4, 'BB.1'),
(6, 4, 'BB.2'),
(7, 4, 'BB.3'),
(8, 7, 'BBB.1');
DECLARE #ParentID INT = 4;
DECLARE #LastID INT = (SELECT TOP(1) ID FROM #Table1 x ORDER BY x.ID DESC)
IF #LastID IS NULL
BEGIN
RAISERROR('Invalid call', 16, 1)
--RETURN ?
END
SELECT #LastID AS LastID;
/*
LastID
-----------
8
*/
DECLARE #RemapIDs TABLE (
OldID INT NOT NULL PRIMARY KEY,
[NewID] INT NOT NULL UNIQUE
);
WITH CteRecursion
AS (
SELECT 1 AS Lvl, crt.ID, crt.ParentID --, crt.[Desc]
FROM #Table1 crt
WHERE crt.ID = #ParentID
UNION ALL
SELECT cld.Lvl + 1 AS Lvl, crt.ID, crt.ParentID --, crt.[Desc]
FROM #Table1 crt
JOIN CteRecursion cld ON crt.ParentID = cld.ID
)
INSERT #RemapIDs (OldID, [NewID])
SELECT r.ID, #LastID + ROW_NUMBER() OVER(ORDER BY r.Lvl) AS [NewID]
FROM CteRecursion r;
--INSERT #Table1 (ID, ParentID, [Desc])
SELECT nc.[NewID] AS ID, np.[NewID] AS ParentID, o.[Desc]
FROM #Table1 o -- old
JOIN #RemapIDs nc /*new child ID*/ ON o.ID = nc.OldID
LEFT JOIN #RemapIDs np /*new parent ID*/ ON o.ParentID = np.OldID
/*
ID ParentID Desc
----------- ----------- --------------------------------------------------
9 NULL B
10 9 BB.1
11 9 BB.2
12 9 BB.3
13 12 BBB.1
*/
Note: with some minor changes should work w. many ParentIDs values.
can anyone help me figure out what I am doing wrong with this query.I am trying to filter some records from a table that contains emails sent out to clients with the status of the emails.I need to eliminate all EmailIds that has a status of Sent(1) and Bounced(0). Anything other than these two statuses are considered as Delivered(4). So the output contains only EmailId with a status of Delivered(4) for all those EmailIds that doesnt have statuses of 1 and 0.In the example below,I should see EmailId 4 too with a Status of Delivered
This is my sample set up.Really appreciate any help you guys can provide me with
create table #status
(
Id int,
Name varchar(100)
)
insert into #status (Id, Name)
values (0, 'Bounced'), (1, 'Sent'), (2, 'Clicked'),
(3, 'Opened'), (4, 'Delivered')
create table #email
(
EmailId int ,
Email varchar(100),
StatusId int
)
insert into #email (EmailId, email, StatusId)
values (1, 'rjoseph#gmail.com', 1), (1, 'rjoseph#gmail.com', 0),
(2, 'nathan#comcast.net', 1), (2, 'nathan#comcast.net', 2),
(2, 'nathan#comcast.net', 3), (3, 'nora#comcast.net', 1),
(3, 'nora#comcast.net', 2), (3, 'nora#comcast.net', 3),
(4, 'neha#comcast.net', 1)
select
e.EmailId
into
#temp
from
#email e
inner join #status st
on st.Id = e.StatusId
where
(e.StatusId not in (1,0))
group by
e.EmailId
drop table #temp
drop table #email
drop table #status
This is kind of a kludgy way to get to this (you can do this without the temporary tables, but I'm doing that here to follow your own syntax). The first query grabs the rows which match 1 AND 0. The second query returns the email IDs which do not exist in the first query:
SELECT EmailID
INTO #temp
FROM #email
WHERE StatusID = 0
AND EXISTS (SELECT 1 FROM #email WHERE StatusID = 1)
SELECT DISTINCT e.EmailID
FROM #email AS e LEFT JOIN #temp AS t
ON e.EmailID = t.EmailID
WHERE t.EmailID IS NULL
BTW: The SELECT 1 FROM ... does not have anything to do with the StatusID #1. It may seem confusing because I used SELECT 1, but it could have been SELECT 5 or SELECT 'Z'. It's mostly meaningless.
Here's the same query without the temporary table:
SELECT DISTINCT e.EmailID
FROM #email AS e
WHERE e.EmailID NOT IN (
SELECT EmailID
FROM #email
WHERE StatusID = 0
AND EXISTS (SELECT 1 FROM #email WHERE StatusID = 1)
)
I would like to replace the numbers in #CommentsTable column "Comments" with the equivalent text from #ModTable table, without using UDF in a single SELECT. May with a CTE. Tried STUFF with REPLACE, but no luck.
Any suggestions would be a great help!
Sample:
DECLARE #ModTable TABLE
(
ID INT,
ModName VARCHAR(10),
ModPos VARCHAR(10)
)
DECLARE #CommentsTable TABLE
(
ID INT,
Comments VARCHAR(100)
)
INSERT INTO #CommentsTable
VALUES (1, 'MyFirst 5 Comments with 6'),
(2, 'MySecond comments'),
(3, 'MyThird comments 5')
INSERT INTO #ModTABLE
VALUES (1, '[FIVE]', '5'),
(1, '[SIX]', '6'),
(1, '[ONE]', '1'),
(1, '[TWO]', '2')
SELECT T1.ID, <<REPLACED COMMENTS>>
FROM #CommentsTable T1
GROUP BY T1.ID, T1.Comments
**Expected Result:**
ID Comments
1 MyFirst [FIVE] Comments with [SIX]
2 MySecond comments
3 MyThird comments [FIVE]
Create a cursor, span over the #ModTable and do each replacement a time
DECLARE replcursor FOR SELECT ModPos, ModName FROM #ModTable;
OPEN replcursor;
DECLARE modpos varchar(100) DEFAULT "";
DECLARE modname varchar(100) DEFAULT "";
get_loop: LOOP
FETCH replcursor INTO #modpos, #modname
SELECT T1.ID, REPLACE(T1.Comments, #modpos, #modname)
FROM #CommentsTable T1
GROUP BY T1.ID, T1.Comments
END LOOP get_loop;
Of course, you can store the results in a temp table and get the results altogether in the end of loop.
You can use a while loop to iterate over the records and the mods. I slightly modified your #ModTable to have unique values for ID. If this is not your data structure, then you can use a window function like ROW_NUMBER() to get a unique value over which you can iterate.
Revised script example:
DECLARE #ModTable TABLE
(
ID INT,
ModName VARCHAR(10),
ModPos VARCHAR(10)
)
DECLARE #CommentsTable TABLE
(
ID INT,
Comments VARCHAR(100)
)
INSERT INTO #CommentsTable
VALUES (1, 'MyFirst 5 Comments with 6'),
(2, 'MySecond comments'),
(3, 'MyThird comments 5')
INSERT INTO #ModTABLE
VALUES (1, '[FIVE]', '5'),
(2, '[SIX]', '6'),
(3, '[ONE]', '1'),
(4, '[TWO]', '2')
declare #revisedTable table (id int, comments varchar(100))
declare #modcount int = (select count(*) from #ModTable)
declare #commentcount int = (select count(*) from #CommentsTable)
declare #currentcomment varchar(100) = ''
while #commentcount > 0
begin
set #modcount = (select count(*) from #ModTable)
set #currentcomment = (select Comments from #CommentsTable where ID = #commentcount)
while #modcount > 0
begin
set #currentcomment = REPLACE( #currentcomment,
(SELECT TOP 1 ModPos FROM #ModTable WHERE ID = #modcount),
(SELECT TOP 1 ModName FROM #ModTable WHERE ID = #modcount))
set #modcount = #modcount - 1
end
INSERT INTO #revisedTable (id, comments)
SELECT #commentcount, #currentcomment
set #commentcount = #commentcount - 1
end
SELECT *
FROM #revisedTable
order by id
I think the will work even though I generally avoid recursive queries. It assumes that you have consecutive ids though:
with Comments as
(
select ID, Comments, 0 as ConnectID
from #CommentsTable
union all
select ID, replace(c.Comments, m.ModPos, m.ModName), m.ConnectID
from Comments c inner join #ModTable m on m.ConnectID = c.ConnectID + 1
)
select * from Comments
where ConnectID = (select max(ID) from #ModTable)
=> CLR Function()
As I have lot of records in "CommentsTable" and the "ModTable" would have multiple ModName for each comments, finally decided to go with CLR Function. Thanks all of you for the suggestions and pointers.
Let's say I have a table with an ID Identity column, some data, and a datestamp. Like this:
1 data 5/1/2013 12:30
2 data 5/2/2013 15:32
3 data 5/2/2013 16:45
4 data 5/3/2013 9:32
5 data 5/5/2013 8:21
6 data 5/4/2013 9:36
7 data 5/6/2013 11:42
How do I write a query that will show me the one record that is timestamped 5/4? The table has millions of records. I've done some searching, but I don't know what to call what I'm searching for. :/
declare #t table(id int, bla char(4), timestamp datetime)
insert #t values
(1,'data','5/1/2013 12:30'),
(2,'data','5/2/2013 15:32'),
(3,'data','5/2/2013 16:45'),
(4,'data','5/3/2013 9:32'),
(5,'data','5/5/2013 8:21'),
(6,'data','5/4/2013 9:36'),
(7,'data','5/6/2013 11:42')
select timestamp
from
(
select rn1 = row_number() over (order by id),
rn2 = row_number() over (order by timestamp), timestamp
from #t
) a
where rn1 not in (rn2, rn2-1)
in 2008 r2, this would be a way
DECLARE #Table AS TABLE
(id INT , ladate DATETIME)
INSERT INTO #Table VALUES (1, '2013-05-01')
INSERT INTO #Table VALUES (2, '2013-05-02')
INSERT INTO #Table VALUES (3, '2013-05-03')
INSERT INTO #Table VALUES (4, '2013-05-05')
INSERT INTO #Table VALUES (5, '2013-05-04')
INSERT INTO #Table VALUES (6, '2013-05-06')
INSERT INTO #Table VALUES (7, '2013-05-07')
INSERT INTO #Table VALUES (8, '2013-05-08')
--I added the records in the sort order but if not just make sure you are sorted in the query
SELECT t2.ladate FROM #Table T1
INNER JOIN #Table T2 ON T1.Id = T2.Id + 1
INNER JOIN #Table t3 ON t2.id = t3.id + 1
WHERE t3.ladate < t2.ladate AND t2.ladate > t1.ladate
-- I made the assumption that your Id are all there, 1,2,3,4,5.... none missing... if there are rownumbers missing, you can use row_number()
If I have data in the following format
id subid text
1 1 Hello
1 2 World
1 3 !
2 1 B
2 2 B
2 3 Q
And would like it in this format:
id fold
1 HelloWorld!
2 BBQ
How could I accomplish it in T-SQL?
I would strongly suggest against that. That is the sort of thing that should be handled in your application layer.
But... if you must:
Concatenating Row Values in Transact-SQL
a temp table and a cursor leap to mind...
Dear Downvoters: a temp table and a cursor have got to be at least as efficient as the recursive-query and custom-function solutions accepted above. Get over your fear of cursors, sometimes they are the most efficient solution. Sometimes they are the only solution. Deal with it.
EDIT: cursor-based solution below. Note that it has none of the limitations of the non-cursor (and more complicated) solutions proposed elsewhere, and performance is probably about the same (hard to tell from a six-row table of course).
and please, don't abandon the main for-each construct of sql just because some blogger says "it's bad"; use your own judgement and some common sense. I avoid cursors whenever possible, but not to the point where the solution is not robust.
--initial data table
create table #tmp (
id int,
subid int,
txt varchar(256)
)
--populate with sample data from original question
insert into #tmp (id,subid,txt) values (1, 1, 'Hello')
insert into #tmp (id,subid,txt) values (1, 2, 'World')
insert into #tmp (id,subid,txt) values (1, 3, '!')
insert into #tmp (id,subid,txt) values (2, 1, 'B')
insert into #tmp (id,subid,txt) values (2, 2, 'B')
insert into #tmp (id,subid,txt) values (2, 3, 'Q')
--temp table for grouping results
create table #tmpgrp (
id int,
txt varchar(4000)
)
--cursor for looping through data
declare cur cursor local for
select id, subid, txt from #tmp order by id, subid
declare #id int
declare #subid int
declare #txt varchar(256)
declare #curid int
declare #curtxt varchar(4000)
open cur
fetch next from cur into #id, #subid, #txt
set #curid = #id
set #curtxt = ''
while ##FETCH_STATUS = 0 begin
if #curid <> #id begin
insert into #tmpgrp (id,txt) values (#curid,#curtxt)
set #curid = #id
set #curtxt = ''
end
set #curtxt = #curtxt + isnull(#txt,'')
fetch next from cur into #id, #subid, #txt
end
insert into #tmpgrp (id,txt) values (#curid,#curtxt)
close cur
deallocate cur
--show output
select * from #tmpgrp
--drop temp tables
drop table #tmp
drop table #tmpgrp
declare #tmp table (id int, subid int,txt varchar(256) )
--populate with sample data from original question
insert into #tmp (id,subid,txt) values (1, 1, 'Hello')
insert into #tmp (id,subid,txt) values (1, 2, 'World')
insert into #tmp (id,subid,txt) values (1, 3, '!')
insert into #tmp (id,subid,txt) values (2, 1, 'B')
insert into #tmp (id,subid,txt) values (2, 2, 'B')
insert into #tmp (id,subid,txt) values (2, 3, 'Q')
Solution
Select id, fold = (Select cast(txt as varchar(100)) from #tmp t2 where t1.id = t2.id for xml path(''))
from #tmp t1
group by t1.id
Wrap this in a function for a single execution...
DECLARE #returnValue varchar(4000)
SELECT #returnValue = ISNULL(#returnValue + ', ' + myTable.text, myTable.text)
FROM myTable
RETURN #returnValue
For a small number of records this will work... any more than 5 or 10 is too many for a SQL function and it needs to be moved to app layer as others have suggested.