SQL Server : update columns with sum() and group by on a column - sql-server

I am trying to update the SQL Server table where my estimatename, region, marketname, b_date, len, creative, file_id are same and sum the spend column to make it single row.
For example:
mdl_drtv_part_b_master_id:
ESTIMATE NAME REGION MARKET NAME BCAST DATE LEN CREATIVE SPEND file_id create_date
451 4Q18 EAST CENTRAL EC PIT PA 2018-11-15 60 GET MORE - HYBRID 410.00 5862 2019-04-05 16:17:14.453
452 4Q18 EAST CENTRAL EC PIT PA 2018-11-15 60 Get More - Hybrid 350.00 5862 2019-04-05 16:17:14.453
1929 4Q18 EAST CENTRAL EC PIT PA 2018-11-15 60 GET MORE - HYBRID 646.00 5863 2019-04-05 16:18:51.490
I would like to get this as my output:
ESTIMATE NAME REGION MARKET NAME BCAST DATE LEN CREATIVE SPEND file_ID create_date
4Q18 EAST CENTRAL EC PIT PA 2018-11-15 60 GET MORE - HYBRID 760.00 5862 2019-04-05 16:17:14.453
4Q18 EAST CENTRAL EC PIT PA 2018-11-15 60 GET MORE - HYBRID 646.00 5863 2019-04-05 16:18:51.490
Here is my SQL select to get my output results:
SELECT
[ESTIMATE NAME], [REGION], [MARKET NAME], [BCAST DATE], [LEN],
[CREATIVE], SUM(SPEND), file_ID, [create_date]
FROM
dbo.mdl_drtv_part_b_sl
WHERE
[bcast date] = '2018-11-15'
-- AND region LIKE 'ec%'
AND creative = 'GET MORE - HYBRID'
GROUP BY
[ESTIMATE NAME], [REGION], [MARKET NAME], [BCAST DATE], [LEN],
[CREATIVE], file_ID, [create_date]
Thank you in advance.

Here is one possible solution:
update dbo.mdl_drtv_part_b_sl
set SPEND =
(
select sum(SPEND)
from dbo.mdl_drtv_part_b_sl mdp
where
mdp.[ESTIMATE NAME]= dbo.mdl_drtv_part_b_sl.[ESTIMATE NAME]
and
mdp.[REGION]= dbo.mdl_drtv_part_b_sl.[REGION]
and
mdp.[MARKET NAME]= dbo.mdl_drtv_part_b_sl.[MARKET NAME]
and
mdp.[BCAST DATE]= dbo.mdl_drtv_part_b_sl.[BCAST DATE]
and
mdp.[BCAST DATE]= dbo.mdl_drtv_part_b_sl.[BCAST DATE]
and
mdp.[LEN]= dbo.mdl_drtv_part_b_sl.[LEN]
and
mdp.[CREATIVE]= dbo.mdl_drtv_part_b_sl.[CREATIVE]
and
mdp.[CREATIVE]= dbo.mdl_drtv_part_b_sl.[CREATIVE]
and
mdp.[file_ID,]= dbo.mdl_drtv_part_b_sl.[file_ID,]
and
mdp.[create_date]= dbo.mdl_drtv_part_b_sl.[create_date]
)
Then after updating the SPEND column, you can remove duplicates by using a window function:
;with cte as (
select row_number() over (partition by [ESTIMATE NAME] ,[REGION], [MARKET NAME] , [BCAST DATE],[LEN],[CREATIVE] ,[file_ID],[create_date] order by [ESTIMATE NAME] desc) rn
FROM dbo.mdl_drtv_part_b_sl)
delete from cte where rn> 1

You could try using a UPDATE with join on a subquery based on your select
update your_table
SET m.SPEND = t.sum_spend
FROM your_table m
INNER JOIN (
SELECT [ESTIMATE NAME] ,[REGION], [MARKET NAME]
, [BCAST DATE],[LEN],[CREATIVE]
, SUM(SPEND) sum_spend , file_ID,[create_date]
FROM dbo.mdl_drtv_part_b_sl
where [bcast date] = '2018-11-15'
--and region like 'ec%'
and creative = 'GET MORE - HYBRID'
GROUP BY [ESTIMATE NAME] ,[REGION], [MARKET NAME] , [BCAST DATE],[LEN],[CREATIVE], file_ID,[create_date]
) t ON t.[ESTIMATE NAME] = m.[ESTIMATE NAME]
AND t.[REGION] = m.[REGION]
AND t.[MARKET NAME] = m.[MARKET NAME]
AND t.[BCAST DATE] ) m.[BCAST DATE]
AND t.[LEN] = m.[LEN]
AND t.[CREATIVE] = m.[CREATIVE]
AND t.file_ID = m.file_ID
AND t.[create_date] = m.[create_date]

Related

Percentage of Sales for each type of customer

I am wanting to find the percentage of sales per week for each customer type. I can see the math but I cant figure out how to write the query.
SELECT
c.customerType as 'Customer Type',
DATEPART(WEEK, o.orderDate) as 'Week of the year',
COUNT(c.customerType) as 'Number of sales'
FROM
[dbo].[Order] o
JOIN
Customer c ON c.id = o.customerId
GROUP BY
c.customerType, DATEPART(WEEK, o.orderDate)
This query outputs a count of each sale grouped by customer type.
CustomerType Week Number of Sales
------------------------------------
Cash 36 248
Corporate 36 10
Personal 36 5
Cash 37 113
Corporate 37 3
Personal 37 2
Cash 38 136
Corporate 38 7
Personal 38 2
Cash 39 138
Corporate 39 4
Personal 39 3
You can wrap your query and use a window function:
select
t.*,
(100.0 * [Number of sales])
/(sum([Number of sales]) over(partition by [Week of the year])
[Percent of Total Sales]
from (
select
c.customerType as [Customer Type],
datepart(week, o.orderDate) as [Week of the year],
count(c.customerType) as [Number of sales],
from [dbo].[Order] o
join Customer c ON c.id = o.customerId
group by c.customerType, datepart(week, o.orderDate), datepart(year, o.orderDate)
) t
Notes:
in SQLServer, better use brackets than quotes to define identifiers (quotes are usually reserved to strings)
the .0 in 100.0 is important : it forces SQLServer to perform decimal division (by default it would go for integer division, which is not what you want)
I added the year to the definition of the group; if your data spreads over several year, you probably don't want the same week in different years to be counted together
Side note: SQLServer is quite flexible about mixing window functions and aggregation. So this might also work:
select
c.customerType as [Customer Type],
datepart(week, o.orderDate) as [Week of the year],
count(c.customerType) as [Number of sales],
(100.0 * count(c.customerType))
/ (sum(count(c.customerType)) over(partition by datepart(week, o.orderDate)))
as [Percent of Total Sales]
from [dbo].[Order] o
join Customer c ON c.id = o.customerId
group by c.customerType, datepart(week, o.orderDate), datepart(year, o.orderDate)

Join two tables by MRN and date1 >= MAX(date2)

I need to combine the data from two hospital activity reports. What happens is this: Patients get admitted to a spinal department. Some of whom then get referred to put on ventilation. After a while patient is discharged. Later, the same patient may or may not get re-referred back to the spinal department and may or may not be re-referred for ventilation. I am sent activity data in two reports:
Monthly Activity Report:
[MRN] [NHS Number] [Admission Date] [DoB] [Blah] [Blah]
Ventilation Report
[MRN] [Admission Date] [Ventilation Days] [Ventilation Type] [blah] [blah]
N.B. The Admission Date on the Ventilation Report is the date they are referred for ventilation. This may be the same day, or some date after they are referred into spinal dept.
What I need to achieve is this: join each row to the most immediate entry prior to the patient being referred to ventilation. I need to avoid duplicating rows, but I cannot join it to the most recent row in the Monthly Activity Report as this could easily be a subsequent referral and the other information will not be applicable.
By following the answer to a similar question on Stackoverflow, I came up with this code:
SELECT [Year], [Month], MRN, [NHS Number], [Admission Date] AS [VD
Admission Date],
[Admit date] AS [MAR Admit Date], Days,
[Ventilation Type], [Ventilation Route], [Ventilation Time], [Package of
care class],
[Para/Tetra/No deficit], [Social charge date commenced ] AS [Social charge
date], [Discharge date]
FROM Spinal_Costing.Vented_Days VD
LEFT JOIN (SELECT *, ROW_NUMBER() OVER(PARTITION BY [Patient MRN] ORDER BY
[Admit Date] DESC) AS row
FROM Spinal_Costing.MAR
) MAR ON VD.MRN = MAR.[Patient MRN]
WHERE MAR.row = 1;
But this returns the most recent entry in MAR for each patient.
This can also be achieved with an apply that references the values in Vented_Days and simply returns a top 1 for each row. cross apply won't return null values whereas outer apply will:
declare #vd table(MRN int,AdmissionDate date);
declare #mar table(MRN int,AdmissionDate date);
insert into #vd values
(1,'20190102')
,(1,'20190106')
,(2,'20190104')
,(3,'20190101');
insert into #mar values
(1,'20190101')
,(1,'20190105')
,(2,'20190102');
select v.MRN
,v.AdmissionDate
,m.AdmissionDate
from #vd as v
outer apply (select top 1 m.AdmissionDate
from #mar as m
where v.MRN = m.MRN
and v.AdmissionDate >= m.AdmissionDate
order by m.AdmissionDate desc
) as m
order by v.MRN
,v.AdmissionDate;
Output
+-----+---------------+---------------+
| MRN | AdmissionDate | AdmissionDate |
+-----+---------------+---------------+
| 1 | 2019-01-02 | 2019-01-01 |
| 1 | 2019-01-06 | 2019-01-05 |
| 2 | 2019-01-04 | 2019-01-02 |
| 3 | 2019-01-01 | NULL |
+-----+---------------+---------------+
You were on the right track, you just need to add a JOIN to that derived table to limit the rows in the Spinal_Costing.MAR table to those that came at, or before discharge.
SELECT
[Year],
[Month],
MRN,
[NHS Number],
[Admission Date] AS [VD Admission Date],
[Admit date] AS [MAR Admit Date],
Days,
[Ventilation Type],
[Ventilation Route],
[Ventilation Time],
[Package of care class],
[Para/Tetra/No deficit],
[Social charge date commenced ] AS [Social charge date],
[Discharge date]
FROM
Spinal_Costing.Vented_Days VD
LEFT JOIN
(SELECT
*,
ROW_NUMBER() OVER(PARTITION BY [Patient MRN] ORDER BY [Admit Date] DESC) AS row
FROM Spinal_Costing.MAR
--added the JOIN and WHERE clause here
INNER JOIN Spinal_Costing.Vented_Days
ON Spinal_Costing.Vented_Days.MRN = Spinal_Costing.MAR.[Patient MRN]
WHERE Spinal_Costing.MAR.[Admit Date] <= Spinal_Costing.Vented_Days.[Discharge date]
) MAR ON VD.MRN = MAR.[Patient MRN]
WHERE MAR.row = 1;

Select old records in a table

I want the oldest row by date for each Distinct Number. I created this script but the problem is I keep on getting the newest record.
SELECT*
FROM
[Data].[dbo].[IAPT] t1
WHERE
[Last Contact Date] IN
(SELECT MAX([Last Contact Date])
FROM [Data].[dbo].[IAPT]
WHERE t1.[Number] = [Data].[dbo].[IAPT].[Number]
AND
[Last Contact Date] NOT IN
(SELECT MAX([Last Contact Date])
FROM [Data].[dbo].[IAPT]
WHERE t1.[Pseudo] = [Data].[dbo].[IAPT].[Pseudo]))
The Table:
Pseudo Number Last Contact Date
0X1 18 17/06/2013
0X1 18 16/04/2013
0X2 19 25/04/2013
0X2 19 16/07/2013
Desired Result:
Number Last Contact Date
1 16/04/2013
2 25/04/2013
Any help would be appreciated. Thank You
You should use MIN function instead of MAX function
SELECT*
FROM
[Data].[dbo].[IAPT] t1
WHERE
[Last Contact Date] IN
(SELECT MIN([Last Contact Date])
FROM [Data].[dbo].[IAPT]
WHERE t1.[Number] = [Data].[dbo].[IAPT].[Number]
AND
[Last Contact Date] NOT IN
(SELECT MIN([Last Contact Date])
FROM [Data].[dbo].[IAPT]
WHERE t1.[Pseudo] = [Data].[dbo].[IAPT].[Pseudo]))
You can use ROW_NUMBER with a PARTITION BY clause:
SELECT Pseudo, Number, [Last Contact Date]
FROM (
SELECT Pseudo, Number, [Last Contact Date],
ROW_NUMBER() OVER (PARTITION BY Number
ORDER BY [Last Contact Date]) AS rn
FROM [Data].[dbo].[IAPT]) AS t
WHERE t.rn = 1
The first record within each Number partition is the one having the oldest date.
This way simple
SELECT PSEUDO, NUMBER , MIN ([LAST CONTACT DATE]) FROM [DATA].[DBO].[IAPT] T1
GROUP BY PSEUDO, NUMBER

Get all funds which has at least minimum data points

I have two tables
1) Fund details
ID Symbol
-------------------
1 ABC
2 XYZ
2) Fund Price data
Fund_id date Price
-------------------------------------------
1 2014-07-01 00:00:00.000 25.25
1 2014-07-02 00:00:00.000 25.45
......
2 2014-07-01 00:00:00.000 75.25
2 2014-07-02 00:00:00.000 75.42
.......
Now what I want to achieve is:
Here I am fetching the monthly data of a particular Fund as below:
SELECT YEAR(date) [Year], MONTH(date) [Month],
DATENAME(MONTH,date) [Month Name], COUNT(1) [Sales Count], F.Symbol
FROM FundData FD inner join FundDetails F on F.ID = FD.Fund_ID
where F.Symbol = 'ABC'
GROUP BY YEAR(date), MONTH(date), DATENAME(MONTH, date), F.Symbol
Output:
Year Month Month Name Sales Count Symbol
-------------------------------------------
2014 4 April 21 ABC
2014 5 May 21 ABC
2014 6 June 21 ABC
2014 7 July 3 ABC
.......
Total Rows: 301
So here this is only for only particular fund which has returned 301 rows.
Now I want to get all the funds from the Fund details table which has rows less than given count ex 216 which I will pass as a parameter
Use Following query:
Declare #YourParameter int = 10
SELECT YEAR(date) [Year],
MONTH(date) [Month],
DATENAME(MONTH,date) [Month Name],
COUNT(1) [Sales Count],
F.Symbol
FROM FundData FD
INNER JOIN FundDetails F on FD.ID = F.Fund_ID
Where FD.ID IN (SELECT z.Fund_ID
FROM FundDetails z
WHERE z.Fund_ID=FD.ID
GROUP BY z.Fund_ID, YEAR(z.date), MONTH(z.date)
HAVING COUNT(*) <= #YourParameter
)
GROUP BY YEAR(date), MONTH(date), DATENAME(MONTH, date), F.Symbol
I have fixed it:
Declare #YourParameter int = 110
WITH CTE AS
(
SELECT YEAR(date) [Year], MONTH(date) [Month],
DATENAME(MONTH,date) [Month Name], COUNT(1) [Sales Count], F.Symbol
FROM FundData FD inner join FundDetails F on F.ID = FD.Fund_ID
where F.ID
IN (SELECT z.ID FROM FundDetails z)
GROUP BY F.Symbol, YEAR(date), MONTH(date), DATENAME(MONTH, date)
)
SELECT Symbol, COUNT(*) as cnt FROM CTE
GROUP BY Symbol
having COUNT(*) >= #YourParameter

Right Outer Join Issue in a CTE

Ultimately I'd like my end result to look like the following:
ReportingDate FundCode FundName AssetClass Rank Percentage
-------------------------------------------------------------------------
30/11/2012 1 Fund1 Bond 1 50
30/11/2012 1 Fund1 Equity 2 30
30/11/2012 1 Fund1 Balanced 3 0
30/11/2012 1 Fund1 Other 4 20
30/11/2012 2 Fund2 Equity 1 60
30/11/2012 2 Fund2 Bond 2 20
.......
Basically if there is no data for say Balanced like in the above example I would still like this to be returned in the data but with a percentage of 0.
To get this I created a table called #AssetClass and RIGHT OUTER JOIN this to my work table so that I could get all the AssetClass's returned even without data.
My script looks like this:
;;WITH CTE AS
(
SELECT
CASE
WHEN ReportingDate IS NULL THEN MAX(ReportingDate) OVER (PARTITION BY (SELECT 1))
ELSE ReportingDate
END AS ReportingDate
, CASE
WHEN PortfolioID IS NULL THEN MAX(PortfolioID) OVER (PARTITION BY (SELECT 1))
ELSE PortfolioID
END AS PortfolioID
, CASE
WHEN PortfolioNme IS NULL THEN MAX(PortfolioNme) OVER (PARTITION BY (SELECT 1))
ELSE PortfolioNme
END AS PortfolioNme
, AC.AssetClass AS AssetClass
, CASE
WHEN AC.AssetClass = 'No Asset Class' THEN 3
WHEN AC.AssetClass = 'Other' THEN 2
ELSE 1
END AS [Rank]
, CAST(SUM(ISNULL(Percentage, 0)) AS DECIMAL(22,1)) AS [Weight]
FROM #Worktable as WT
RIGHT OUTER JOIN #AssetClass AS AC
ON RTRIM(WT.AssetClass) = RTRIM(AC.AssetClass)
GROUP BY WT.ReportingDate, WT.PortfolioID, WT.PortfolioNme, AC.AssetClass
)
SELECT
CONVERT(VARCHAR, ReportingDate, 103) AS ReportingDate
, PortfolioID AS FundCode
, PortfolioNme AS FundName
, AssetClass
, RANK() OVER ( PARTITION BY PortfolioID
ORDER BY [Rank], [Weight] DESC) AS [Rank]
, [Weight] AS Percentage
FROM CTE
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
My problem is, when I run this for one portfolio this works perfectly. When I run this for multiple portfolio's it seems to exclude in the final select anything where there is no data, so in the above example the Balanced row is not returned.
Is there an issue with my script or how I've right outer joined to #AssetClass? Is there something I'm missing or something I can improve upon in my script?
Possible this can help you
UPDATE 03.01.2013
;WITH CTE AS
(
SELECT DISTINCT WT.ReportingDate, WT.PortfolioID, WT.PortfolioNme,
AC.AssetClass,
CASE WHEN AC.AssetClass = 'No Asset Class' THEN 3
WHEN AC.AssetClass = 'Other' THEN 2
ELSE 1 END AS [Rank],
SUM(CASE WHEN AC.PortfolioID IS NULL THEN 0.00 ELSE WT.Percentage END)
OVER(PARTITION BY WT.ReportingDate, WT.PortfolioID, AC.AssetClass) AS [Weight]
FROM Worktable WT CROSS APPLY (
SELECT AC2.AssetClass, WT2.ReportingDate, WT2.PortfolioID,
WT2.AssetClass AS AssetClass2
FROM AssetClass AC2 LEFT JOIN Worktable WT2
ON RTRIM(AC2.AssetClass) = RTRIM(WT2.AssetClass)
AND WT2.PortfolioID = WT.PortfolioID
) AC
WHERE (WT.ReportingDate = AC.ReportingDate AND WT.PortfolioID = AC.PortfolioID AND WT.AssetClass = AC.AssetClass)
OR (AC.AssetClass2 IS NULL)
GROUP BY WT.ReportingDate, WT.PortfolioID, WT.PortfolioNme,
AC.AssetClass, AC.PortfolioID, WT.Percentage
)
SELECT CONVERT(VARCHAR, ReportingDate, 103) AS ReportingDate,
PortfolioID AS FundCode,
PortfolioNme AS FundName,
AssetClass,
RANK() OVER (PARTITION BY PortfolioID
ORDER BY [Rank], [Weight] DESC) AS [Rank],
[Weight] AS Percentage
FROM CTE
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC

Resources