STUFF in SQL Server with 2 tables on primary ID

STUFF in SQL Server with 2 tables on primary ID - sql-server

I have 2 tables Contracts and Locations.
Contracts columns: CTX_ID, Parent_CTX_ID, Company_name
Locations columns: CTX_ID, State
I'm trying to create an out put that concatenates the STATE in one column for each Parent_CTX_ID.
Contracts table:
CTX_ID | Parent_CTX_ID | Company_Name
-------+---------------+-------------
1 | 100 | ABC
2 | 100 | ABC
3 | 100 | ABC
4 | 200 | DEF
5 | 200 | DEF
6 | 200 | DEF
Locations table:
CTX_ID | State
-------+------
1 | NJ
2 | PA
3 | DE
4 | NJ
5 | TX
6 | CA
Output I'm trying to get:
CTX_ID | Parent_CTX_ID | Company_Name | State | States
-------+---------------+--------------+-------+-----------
1 | 100 | ABC | NJ | NJ,PA,DE
2 | 100 | ABC | PA | NJ,PA,DE
3 | 100 | ABC | DE | NJ,PA,DE
4 | 200 | DEF | NJ | NJ,TX,CA
5 | 200 | DEF | TX | NJ,TX,CA
6 | 200 | DEF | CA | NJ,TX,CA
Current code:
(SELECT DISTINCT
c.parent_ctx_id,
STUFF((SELECT DISTINCT ',' + s.state
FROM Locations s
FOR XML PATH('')), 1, 1, '') as STATES
FROM
Contracts AS c
INNER JOIN
Locations AS s ON s.ctx_id = c.ctx_id)
Current output:
Parent_CTX_ID | States
--------------+---------------
100 | CA,DE,NJ,PA,TX
200 | CA,DE,NJ,PA,TX

SELECT
P.CTX_ID,
P.Parent_CTX_ID,
P.Company_Name,
L.State,
(
STUFF((SELECT
',' + s.state as [text()]
FROM
Contracts AS c
JOIN Locations AS s ON
s.ctx_id = c.ctx_id
WHERE
C.Parent_CTX_ID = P.Parent_CTX_ID
FOR XML PATH('')), 1, 1, '')
) States
FROM
Contracts P
LEFT JOIN Locations L ON
L.CTX_ID = P.CTX_ID
SQL Fiddle

You can create a CTE for the new locations (with parent_ctx_id) and use that in your query like below:
;WITH NEW_LOCATIONS AS
(
SELECT l.CTX_ID, l.[State], c.Parent_CTX_ID
FROM #locations l
JOIN #contracts c ON c.CTX_ID = l.CTX_ID
)
--SELECT * FROM new_locations
,cte AS
(
SELECT DISTINCT c.parent_ctx_id,
STUFF((SELECT DISTINCT ',' + s.state
FROM NEW_LOCATIONS s
WHERE s.Parent_CTX_ID = c.Parent_CTX_ID
FOR XML PATH('')), 1, 1, '') as STATES
FROM #contracts AS c
GROUP BY c.Parent_CTX_ID
)
--SELECT * FROM cte
SELECT c2.CTX_ID , c.Parent_CTX_ID , Company_Name , l.[State] , States
FROM cte c
JOIN #contracts c2 ON c.parent_ctx_id = c2.Parent_CTX_ID
JOIN #locations l ON l.CTX_ID = c2.CTX_ID
Please find the db<>fiddle here.

Related

Retrieve connected rows in SQL Server

I have this table in SQL Server:
+--------------+---------------------+
| AccountId | AccountIdAssociated |
+--------------+---------------------+
| 2 | 3 |
| 3 | 15 |
| 1 | 30 |
| 3 | 12 |
| 12 | 10 |
| 10 | 50 |
| 19 | 32 |
| 18 | 33 |
+--------------+---------------------+
As you can see accounts 2, 3, 10, 12, 15, and 50 are connected to each other either directly or transitively how can I retrieve all these connected AccountIds by using only one number (let's say AccountId = 2)

What you need here are a couple of rCTEs to "traverse" the hierarchical data:
WITH VTE AS(
SELECT *
FROM (VALUES(2 ,3 ),
(3 ,15),
(1 ,30),
(3 ,12),
(12,10),
(19,32),
(18,33))V(AccountID,AccountIdAssociated)),
rCTEUp AS(
SELECT V.AccountID,
V.AccountIdAssociated
FROM VTE V
WHERE V.AccountID = 3
UNION ALL
SELECT V.AccountID,
V.AccountIdAssociated
FROM VTE V
JOIN rCTEUp r ON V.AccountIdAssociated = r.AccountID),
rCTEDown AS(
SELECT V.AccountID,
V.AccountIdAssociated
FROM VTE V
WHERE V.AccountID = 3
UNION ALL
SELECT V.AccountID,
V.AccountIdAssociated
FROM VTE V
JOIN rCTEDown r ON V.AccountID = r.AccountIdAssociated)
SELECT AccountID,
AccountIdAssociated
FROM rCTEUp
UNION ALL
SELECT AccountID,
AccountIdAssociated
FROM rCTEDown
WHERE AccountID != 3;

Merging multiple rows with same ID in SQL Server

What is the most efficient method to combine multiple rows of values with the same ID in SQL Server?
Original data table dbo.ProductCategory:
+-----------+----------+------+
| ProductID | CATID | AA |
+-----------+----------+------+
| 1 | 123 | A |
| 1 | 412 | B |
| 2 | 567 | C |
| 2 | 521 | A |
| 3 | 2 | D |
| 3 | 5 | A |
| 4 | 6 | C |
| 4 | 8 | E |
| 4 | 123 | A |
+----+------+----------+------+
And I'm trying to achieve the following result
+-----------+----------+------+
| ProductID | CATID | AA |
+-----------+----------+------+
| 1 | 123,412 | A,B |
| 2 | 567,521 | C,A |
| 3 | 2,5 | D,A |
| 4 | 6,8,123 | C,E,A|
+----+------+----------+------+

In SQL Server 2017+, you can use STRING_AGG
select ProductId, STRING_AGG(CATID, ',') as CATID, STRING_AGG(AA, ',') AA
from PC
GROUP BY ProductID

Sample Data
DECLARE #Temp AS TABLE (ProductID INT, CATID INT, AA CHAR(2))
INSERT INTO #Temp
SELECT 1 , 123 , 'A' UNION ALL
SELECT 1 , 412 , 'B' UNION ALL
SELECT 2 , 567 , 'C' UNION ALL
SELECT 2 , 521 , 'A' UNION ALL
SELECT 3 , 2 , 'D' UNION ALL
SELECT 3 , 5 , 'A' UNION ALL
SELECT 4 , 6 , 'C' UNION ALL
SELECT 4 , 8 , 'E' UNION ALL
SELECT 4 , 123 , 'A'
Using STUFF() In sql server
SELECT ProductID,STUFF((SELECT CONCAT(', ', CATID)
FROM #Temp i
WHERE i.ProductID = o.ProductID
FOR XML PATH ('')),1,1,'') AS CATID
,STUFF((SELECT CONCAT(', ', AA)
FROM #Temp i
WHERE i.ProductID = o.ProductID
FOR XML PATH ('')),1,1,'') AS AA
FROM #Temp o
GROUP BY ProductID
Result
ProductID CATID AA
------------------------------------
1 123, 412 A , B
2 567, 521 C , A
3 2, 5 D , A
4 6, 8, 123 C , E , A

Return column names based on which holds the maximum value in the record

I have a table with the following structure ...
+--------+------+------+------+------+------+
| ID | colA | colB | colC | colD | colE | [...] etc.
+--------+------+------+------+------+------+
| 100100 | 15 | 100 | 90 | 80 | 10 |
+--------+------+------+------+------+------+
| 100200 | 10 | 80 | 90 | 100 | 10 |
+--------+------+------+------+------+------+
| 100300 | 100 | 90 | 10 | 10 | 80 |
+--------+------+------+------+------+------+
I need to return a concatenated value of column names which hold the maximum 3 values per row ...
+--------+----------------------------------+
| ID | maxCols |
+--------+----------------------------------+
| 100100 | colB,colC,colD |
+--------+------+------+------+------+------+
| 100200 | colD,colC,colB |
+--------+------+------+------+------+------+
| 100300 | colA,colB,colE |
+--------+------+------+------+------+------+
It's okay to not concatenate the column names, and have maxCol1 | maxCol2 | maxCol3 if that's simpler
The order of the columns is important when concatenating them
The number of columns is limited and not dynamic
The number of rows is many

You could use UNPIVOT and get TOP 3 for each ID
;with temp AS
(
SELECT ID, ColValue, ColName
FROM #SampleData sd
UNPIVOT
(
ColValue For ColName in ([colA], [colB], [colC], [colD], [colE])
) unp
)
SELECT sd.ID, ca.ColMax
FROM #SampleData sd
CROSS APPLY
(
SELECT STUFF(
(
SELECT TOP 3 WITH TIES
',' + t.ColName
FROM temp t
WHERE t.ID = sd.ID
ORDER BY t.ColValue DESC
FOR XML PATH('')
)
,1,1,'') AS ColMax
) ca
See demo here: http://rextester.com/CZCPU51785

Here is one trick to do it using Cross Apply and Table Valued Constructor
SELECT Id,
maxCols= Stuff(cs.maxCols, 1, 1, '')
FROM Yourtable
CROSS apply(SELECT(SELECT TOP 3 ',' + NAME
FROM (VALUES (colA,'colA'),(colB,'colB'),(colC,'colC'),
(colD,'colD'),(colE,'colE')) tc (val, NAME)
ORDER BY val DESC
FOR xml path, type).value('.[1]', 'nvarchar(max)')) cs (maxCols)
If needed it can be made dynamic using Information_schema.Columns

How to pull the average sales per visit of the top 10% of customers from 3 segments in T-SQL?

I'm newer to my job and i am not used to doing queries as complicated as the one i am being asked to do. Please help! I'm trying to pull the average sales per visit of the top 10% of customers from 3 customer segments. I'm stuck on pulling only the top 10% of customers. Here are two sample tables and what i have so far.
SELECT
cust_segment.segment,
AVG(cust_sales.sales) / cust_sales.visits AS "sales/vist"
FROM
cust_segment
INNER JOIN
cust_sales ON cust_segment.Customer = cust_sales.Customer
WHERE
cust_sales in (SELECT TOP 10 Percent cust_sales.sales, cust_segement.segment
FROM cust_segment
INNER JOIN cust_sales ON cust_segment.customer = cust_sales.customer)
GROUP BY
segment;
cust_segment
+-------------+---------+
| Customer | Segment |
+-------------+---------+
| 10000834678 | A |
| 10000467169 | A |
| 10000217202 | B |
| 10001562687 | C |
| 10000742574 | C |
| 10001577918 | A |
| 10000825179 | B |
| 10000019009 | B |
| 10001225606 | C |
| 10000473429 | A |
+-------------+---------+
cust_sales
+-------------+----------------+--------+
| Customer | Sales | Visits |
+-------------+----------------+--------+
| 10000834678 | $ 54.56 | 8 |
| 10000467169 | $ 27.61 | 7 |
| 10000217202 | $ 150.01 | 39 |
| 10001562687 | $ 39.59 | 8 |
| 10000742574 | $ 18.35 | 9 |
| 10001577918 | $ 23.72 | 4 |
| 10000825179 | $ 7.69 | 7 |
| 10000019009 | $ 94.41 | 47 |
| 10001225606 | $ 36.00 | 12 |
| 10000473429 | $ 5.76 | 6 |
+-------------+----------------+--------+
It should return:
+---------+-------------+
| Segment | Sales/Visit |
+---------+-------------+
| A | 6.82 |
| B | 3.846410256 |
| C | 4.94875 |
+---------+-------------+

This should do it:
SELECT
seg.Segment,
SUM( sales.Sales ) / SUM( sales.Visits ) AS [Sales/Visit]
FROM
cust_segment AS seg
LEFT OUTER JOIN cust_sales AS sales
ON seg.Customer = sales.Customer
AND seg.Customer IN (
SELECT TOP 10 PERCENT sa.Customer
FROM cust_sales AS sa
INNER JOIN cust_segment AS sg
ON sg.Segment = seg.Segment
AND sg.Customer = sa.Customer
ORDER BY sa.Sales DESC
)
GROUP BY
seg.Segment

;With cust_segment( Customer , Segment )
AS
(
SELECT 10000834678 , 'A' union all
SELECT 10000467169 , 'A' union all
SELECT 10000217202 , 'B' union all
SELECT 10001562687 , 'C' union all
SELECT 10000742574 , 'C' union all
SELECT 10001577918 , 'A' union all
SELECT 10000825179 , 'B' union all
SELECT 10000019009 , 'B' union all
SELECT 10001225606 , 'C' union all
SELECT 10000473429 , 'A'
)
,cust_sales(Customer,Sales,Visits)
AS
(
SELECT 10000834678 , 54.56 , 8 UNION ALL
SELECT 10000467169 , 27.61 , 7 UNION ALL
SELECT 10000217202 , 150.01 , 39 UNION ALL
SELECT 10001562687 , 39.59 , 8 UNION ALL
SELECT 10000742574 , 18.35 , 9 UNION ALL
SELECT 10001577918 , 23.72 , 4 UNION ALL
SELECT 10000825179 , 7.69 , 7 UNION ALL
SELECT 10000019009 , 94.41 , 47 UNION ALL
SELECT 10001225606 , 36.00 , 12 UNION ALL
SELECT 10000473429 , 5.76 , 6
)
SELECT Segment,seg AS [Sales/Visit] FROM
(
SELECT * ,ROW_NUMBER()OVER(PARTITION by seg ORDER BY Segment )Seq FROM
(
SELECT * ,MAX([Sales/Visit])OVER (PARTITION BY Segment ORDER BY Segment)seg FROM
(
SELECT C.Customer,c.Segment,SUM(Sales) /SUM(Visits) AS [Sales/Visit] FROM cust_sales s
INNER JOIN cust_segment c ON c.Customer=s.Customer
GROUP BY C.Customer,c.Segment
)dt
)dt2
)Final WHERE Final.Seq=1 ORDER BY 1
OutPut
Segment|Sales/Visit
------------------
A 6.820000
B 3.846410
C 4.948750

Retrieving the most recent records within a query

I have the following tables:
tblPerson:
PersonID | Name
---------------------
1 | John Smith
2 | Jane Doe
3 | David Hoshi
tblLocation:
LocationID | Timestamp | PersonID | X | Y | Z | More Columns...
---------------------------------------------------------------
40 | Jan. 1st | 3 | 0 | 0 | 0 | More Info...
41 | Jan. 2nd | 1 | 1 | 1 | 0 | More Info...
42 | Jan. 2nd | 3 | 2 | 2 | 2 | More Info...
43 | Jan. 3rd | 3 | 4 | 4 | 4 | More Info...
44 | Jan. 5th | 2 | 0 | 0 | 0 | More Info...
I can produce an SQL query that gets the Location records for each Person like so:
SELECT LocationID, Timestamp, Name, X, Y, Z
FROM tblLocation
JOIN tblPerson
ON tblLocation.PersonID = tblPerson.PersonID;
to produce the following:
LocationID | Timestamp | Name | X | Y | Z |
--------------------------------------------------
40 | Jan. 1st | David Hoshi | 0 | 0 | 0 |
41 | Jan. 2nd | John Smith | 1 | 1 | 0 |
42 | Jan. 2nd | David Hoshi | 2 | 2 | 2 |
43 | Jan. 3rd | David Hoshi | 4 | 4 | 4 |
44 | Jan. 5th | Jane Doe | 0 | 0 | 0 |
My issue is that we're only concerned with the most recent Location record. As such, we're only really interested in the following Rows: LocationID 41, 43, and 44.
The question is: How can we query these tables to give us the most recent data on a per-person basis? What special grouping needs to happen to produce the desired result?

MySQL doesn't have ranking/analytical/windowing functionality.
SELECT tl.locationid, tl.timestamp, tp.name, X, Y, Z
FROM tblPerson tp
JOIN tblLocation tl ON tl.personid = tp.personid
JOIN (SELECT t.personid,
MAX(t.timestamp) AS max_date
FROM tblLocation t
GROUP BY t.personid) x ON x.personid = tl.personid
AND x.max_date = tl.timestamp
SQL Server 2005+ and Oracle 9i+ support analytics, so you could use:
SELECT x.locationid, x.timestamp, x.name, x.X, x.Y, x.Z
FROM (SELECT tl.locationid, tl.timestamp, tp.name, X, Y, Z,
ROW_NUMBER() OVER (PARTITION BY tp.name ORDER BY tl.timestamp DESC) AS rank
FROM tblPerson tp
JOIN tblLocation tl ON tl.personid = tp.personid) x
WHERE x.rank = 1
Using a variable to get same as ROW_NUMBER functionality on MySQL:
SELECT x.locationid, x.timestamp, x.name, x.X, x.Y, x.Z
FROM (SELECT tl.locationid, tl.timestamp, tp.name, X, Y, Z,
CASE
WHEN #name != t.name THEN
#rownum := 1
ELSE #rownum := #rownum + 1
END AS rank,
#name := tp.name
FROM tblLocation tl
JOIN tblPerson tp ON tp.personid = tl.personid
JOIN (SELECT #rownum := NULL, #name := '') r
ORDER BY tp.name, tl.timestamp DESC) x
WHERE x.rank = 1

As #Mark Byers mentions, this problem comes up frequently on Stack Overflow.
Here's the solution I most frequently recommend, given your tables:
SELECT p.*, l1.*
FROM tblPerson p
JOIN tblLocation l1 ON p.PersonID = l1.PersonID
LEFT OUTER JOIN tblLocation l2 ON p.PersonID = l2.PersonID AND
(l1.timestamp < l2.timestamp OR l1.timestamp = l2.timestamp AND l1.LocationId < l2.LocationId)
WHERE l2.LocationID IS NULL;
To see other examples, follow the tag greatest-n-per-group, which I added to your question.

This is a classic 'max per group' question that comes up on Stack Overflow almost every day. There are many ways to solve it and you can find example solutions by searching Stack Overflow. Here is one way that you can do it in MySQL:
SELECT
location.LocationId,
location.Timestamp,
person.Name,
location.X,
location.Y,
location.Z
FROM (
SELECT
LocationID,
#rn := CASE WHEN #prev_PersonID = PersonID
THEN #rn + 1
ELSE 1
END AS rn,
#prev_PersonID := PersonID
FROM (SELECT #prev_PersonID := NULL) vars, tblLocation
ORDER BY PersonID, Timestamp DESC
) T1
JOIN tblLocation location ON location.LocationID = T1.LocationId
JOIN tblPerson person ON person.PersonID = location.PersonID
WHERE rn = 1

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

STUFF in SQL Server with 2 tables on primary ID - sql-server

Related

Retrieve connected rows in SQL Server

Merging multiple rows with same ID in SQL Server

Return column names based on which holds the maximum value in the record

How to pull the average sales per visit of the top 10% of customers from 3 segments in T-SQL?

Retrieving the most recent records within a query

Categories

Resources