SQL aggregate function against concatenated string in pivot table - sql-server

I have a SQL table which stores the Status and Location of employees
empID date status location
---------------------------------------------------------
001 01.01.2014 1 1
001 02.01.2014 2 1
001 04.01.2014 1 3
....
055 01.01.2014 3 3
055 02.01.2014 4 2
Now I want to create a list with the employee id and days 1 to 31 as columns
and a concatenation of the Status and Location as the value. If I take max() of the status, it works, however, the location information is missing. If I try to do
SELECT *
FROM (SELECT (empId), left(datename(dd,[Date]),2)as [day_date], [Status] as status, [Location] as location, [Status] + '|' + [Location] as val FROM [dbo].[table]
WHERE [date] BETWEEN '2014-01-01' AND '2014-01-30') as s
PIVOT (MAX(val) FOR [day_date] IN ([1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30],[31])
)AS p
order by empID
it fails because I can't use max() against a concatenated string.
My goal is:
empID 01 02 03 04 .... 30 31
---------------------------------------------------------
001 1|1 2|1 null 1|3 null null
055 3|4 4|2 null null null null

Cast [Status] and [Location] to a varchar performing the concatenation. I have arbitrarily chosen 3 as the length of the varchar but that may vary depending on what your maximum possible [Status] and [Location] are.
Also, remove the [Status] and [Location] columns from the inner query.
SELECT *
FROM (
SELECT
empId,
left(datename(dd,[Date]),2)as [day_date],
cast([Status] AS varchar(3)) + '|' + cast([Location] as varchar(3)) as val
FROM [dbo].[table]
WHERE [date] BETWEEN '2014-01-01' AND '2014-01-30') as s
PIVOT (MAX(val) FOR [day_date] IN ([1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30],[31])
)AS p
order by empID

Related

How to set rows value as column in SQL Server?

I have a table tblTags (Date, Tagindex, Value)
The values in the table are:
Date Tagindex Value
---------------------------------
2017-10-21 0 21
2017-10-21 1 212
2017-10-21 2 23
2017-10-21 0 34
2017-10-21 1 52
2017-10-21 2 65
I want the result as :
Date 0 1 2
-------------------------------
2017-10-21 21 212 23
2017-10-21 34 52 65
For this I wrote the followring query
select *
from
(SELECT a.Date, a.Tagindex,a.value
FROM tblTag a) as p
pivot
(max(value)
for Tagindex in ( [tblTag])
) as pvt
But I get these errors:
Msg 8114, Level 16, State 1, Line 10
Error converting data type nvarchar to int.
Msg 473, Level 16, State 1, Line 10
The incorrect value "tblTag" is supplied in the PIVOT operator.
How to solve this issue.
I think can use a query like this:
;with t as (
select *
, row_number() over (partition by [Date],[Tagindex] order by (select 0)) seq
from tblTag
)
select [Date],
max(case when [Tagindex] = 0 then [Value] end) '0',
max(case when [Tagindex] = 1 then [Value] end) '1',
max(case when [Tagindex] = 2 then [Value] end) '2'
from t
group by [Date], seq;
SQL Server Fiddle Demo
SQL Server Fiddle Demo - with pivot
Note: In above query I use row_number() function to create a sequence number for each Date and Tagindex, But the trick is in using (select 0) that is a temporary field to use in order by part, that will not trusted to return arbitrary order of inserted rows.So, if you need to achieve a trusted result set; you need to have an extra field like a datetime or an auto increment field.
Try this:
DECLARE #tblTag TABLE
(
[Date] DATE
,[TagIndex] TINYINT
,[Value] INT
);
INSERT INTO #tblTag ([Date], [TagIndex], [Value])
VALUES ('2017-10-21', 0, 21)
,('2017-10-21', 1, 212)
,('2017-10-21', 2, 23)
,('2017-10-22', 0, 34)
,('2017-10-22', 1, 52)
,('2017-10-22', 2, 65);
SELECT *
FROM #tblTag
PIVOT
(
MAX([value]) FOR [Tagindex] IN ([0], [1], [2])
) PVT;
You need to say exactly which are the PIVOT columns. If you are going to have different values for the TagIndex and you cannot hard-coded them, you need to use dynamic PIVOT.
Also, you need to be sure you have a way to group the tagIndex values in one row. For example, different date (as in my test data), ID column which is marking when a row is inserted or something else (group ID column or date added column).

Pivot in sql server: Vertical to Horizontal data [duplicate]

This question already has an answer here:
Pivot without aggregate function in MSSQL 2008 R2
(1 answer)
Closed 6 years ago.
Hi I have the following table that I would like to use the pivot function on:
Id|Number| Code
1 | 34 |abc12345
1 | 23 |xqwe6758
2 | 37 |ghut564hg
3 | 456 |ghut8695
3 | 39 |ghtuj678
3 | 22 |fsdifje12
And I want it to be displayed horizontally as the following:
Id| Code1 | Code2 | Code3
1 | abc12345 | xqwe6758 | null
2 |ghut564hg | null | null
3 |ghut8695 | ghtuj678 | fsdifje12
SELECT Id
,[Code1]
,[Code2]
,[Code3]
FROM(SELECT Id,Code
FROM [TableName]
)d
pivot(
max(Id)
for Code in([Code1],[Code2],[Code3])
)as piv;
This throws an invalid column name error on the Id column. Could someone help identify the error ?
You can use pivot as below:
;with cte as
(
select
id, code,
RowN = Row_Number() over (partition by id order by code)
from
yourtable1
)
select *
from cte
pivot ( max(code) for RowN in([1], [2], [3])) p
For varying columns you can use stuff to create columns list and then use dynamic SQL to run with varying columns... But it is available in various examples in SO itself...
Added my output:
Try this
DECLARE #tbl TABLE(Id INT, Code VARCHAR(100));
INSERT INTO #tbl VALUES
(1,'abc12345')
,(1,'xqwe6758')
,(2,'ghut564hg')
,(3,'ghut8695')
,(3,'ghtuj678')
,(3,'fsdifje12');
SELECT p.*
FROM
(
SELECT Id
,'Code' + CAST(ROW_NUMBER() OVER(PARTITION BY Id ORDER BY Code) AS VARCHAR(10)) AS ColumnName
,Code
FROM #tbl
) AS t
PIVOT
(
MAX(Code) FOR ColumnName IN(Code1,Code2,Code3 /*add as many as you need*/)
) AS p
The result
Id Code1 Code2 Code3
1 abc12345 xqwe6758 NULL
2 ghut564hg NULL NULL
3 fsdifje12 ghtuj678 ghut8695

sql server 2014 group by date extract several fields

I have a table that looks like this:
CREATE TABLE dbo.Mails (
ID int IDENTITY(1, 1) NOT NULL,
Reference nvarchar(20) COLLATE Latin1_General_CI_AS NULL,
Email nvarchar(70) NOT NULL,
ETS datetime NULL, --Estimated Time of Shipping
ATS datetime NULL, --Actual Time of Shipping
ReadOn datetime NULL,
Unsubscribed datetime NULL,
Bounced datetime NULL,
BouncedReason nvarchar(30) COLLATE Latin1_General_CI_AS NULL,
Active bit DEFAULT 1 NULL
)
I need to show info on a chart, and I need to group by Date.
therefore if I want to group details by ReadOn field for a certain campaign I build the following query
Select
CAST(readOn as date) [date],
COUNT(*) [read]
FROM Mails m
WHERE m.Reference=#Reference
GROUP BY CAST(readOn as date)
ORDER BY CAST(readOn as date) ASC
and I get something like this:
sDate read
NULL 360
2016-05-05 67
2016-05-06 123
2016-05-07 84
2016-05-08 62
2016-05-09 89
2016-05-10 17
2016-05-11 12
2016-05-12 8
2016-05-13 4
2016-05-14 4
But I would like to extract, in the same query, not only ReadOn field, but also other fields like ETS, ATS, Unsubscribed/Read & Unread and Bounced
and get something like this
sDate read ETS ATS Bounced Unsub./Read Unsub/Unread
NULL 360
2016-05-05 67 830 570 27 7 3
2016-05-06 123 0 260 4 9 5
2016-05-07 84 0 0 0 2 2
2016-05-08 62 0 0 0 2 4
2016-05-09 89 0 0 0 7 1
2016-05-10 17 0 0 0 5 6
2016-05-11 12 0 0 0 8 2
2016-05-12 8 0 0 0 1 3
2016-05-13 4 0 0 0 0 2
2016-05-14 4 0 0 0 0 2
Is there an easier way than building 6 different queries?
can at least indicate the path to follow?
Thanks
Joe
You can do it with some pre-processing and a PIVOT. In this example, I've put the query into a stored procedure, so that it is contained and easy to test. I'm doing the pre-processing in a CTE, to keep the main query tidy.
First, create the table and populate it.
CREATE TABLE dbo.Mails
(
ID int IDENTITY(1, 1) NOT NULL,
Reference nvarchar(20) COLLATE Latin1_General_CI_AS NULL,
ETS datetime NULL, --Estimated Time of Shipping
ATS datetime NULL, --Actual Time of Shipping
ReadOn datetime NULL,
Unsubscribed datetime NULL,
Bounced bit DEFAULT 0 NULL,
BouncedReason nvarchar(30) COLLATE Latin1_General_CI_AS NULL,
Active bit DEFAULT 1 NULL
);
GO
INSERT INTO dbo.Mails (Reference, ETS, ATS, ReadOn, Unsubscribed, Bounced)
VALUES
(N'ABC', '2015-05-05', '2015-05-05', '2015-05-05', NULL, 0),
(N'ABC', '2015-05-06', '2015-05-07', '2015-05-08', NULL, 0),
(N'ABC', '2015-05-05', '2015-05-05', '2015-05-07', NULL, 0),
(N'ABC', '2015-05-07', '2015-05-08', '2015-05-09', NULL, 0),
(N'ABC', '2015-05-06', '2015-05-07', '2015-05-09', '2015-05-09', 0),
(N'ABC', '2015-05-06', '2015-05-07', NULL, '2015-05-08', 0);
Then create a stored procedure with a parameter #Reference. I'm using a CTE to create a two column row set, with Date and Type as the columns. Then, in the main SELECT statement it's being pivoted to give the result you want.
The row set produced by the CTE looks like this.
Note: I haven't included the Bounced column, because I'm not clear what the requirement is for that; it's not a date column. However you should be able to extend this example quite easily.
CREATE PROCEDURE dbo.up_ReportMails
(
#Reference nvarchar(20)
)
AS
WITH cte AS
(
SELECT CAST(ReadOn AS date) AS 'Date', 'R' AS 'Type'
FROM dbo.Mails
WHERE Reference = #Reference AND ReadOn IS NOT NULL
UNION ALL
SELECT CAST(ETS AS date), 'E'
FROM dbo.Mails
WHERE Reference = #Reference AND ETS IS NOT NULL
UNION ALL
SELECT CAST(ATS AS date), 'A'
FROM dbo.Mails
WHERE Reference = #Reference AND ATS IS NOT NULL
UNION ALL
SELECT CAST(Unsubscribed AS date), 'U'
FROM dbo.Mails
WHERE Reference = #Reference AND UNSUBSCRIBED IS NOT NULL AND ReadOn IS NOT NULL
UNION ALL
SELECT CAST(Unsubscribed AS date), 'V'
FROM dbo.Mails
WHERE Reference = #Reference AND UNSUBSCRIBED IS NOT NULL AND ReadOn IS NULL
)
SELECT [Date], [R] AS 'Read', [E] AS 'ETS', [A] AS 'ATS', [U] AS 'Unsub/Read', [V] As 'Unsub/Unread'
FROM
(SELECT [Date], [Type]
FROM cte) AS C
PIVOT
(
COUNT([Type])
FOR [Type] IN ([R], [E], [A], [U], [V])
) AS PivotTable
ORDER BY [Date];
Then we can test it.
EXEC dbo.up_ReportMails #Reference=N'ABC';
I've tested this code and it works. Assuming you have a calendar table (if you don't have one, you can Google it and make one in about 10 minutes - they're very simple and will save you loads of time):
DECLARE #StartDate date = '01/01/2016'
DECLARE #EndDate date = '05/06/2016'
SELECT C.BaseDate,
ISNULL(SUM(CASE WHEN C.BaseDate = CAST(M.ETS AS DATE) THEN 1 END), 0) AS [ETS],
ISNULL(SUM(CASE WHEN C.BaseDate = CAST(M.ATS AS DATE) THEN 1 END), 0) AS [ATS],
ISNULL(SUM(CASE WHEN C.BaseDate = CAST(M.ReadOn AS DATE) THEN 1 END), 0) AS [Read On],
ISNULL(SUM(CASE WHEN C.BaseDate = CAST(M.Unsubscribed AS DATE) THEN 1 END), 0) AS [Unsubscribed],
ISNULL(SUM(CASE WHEN C.BaseDate = CAST(M.Bounced AS DATE) THEN 1 END), 0) AS [Bounced]
FROM Calendar C
LEFT OUTER JOIN Mails M
ON C.BaseDate = CAST(M.ETS AS DATE)
OR C.BaseDate = CAST(M.ATS AS DATE)
OR C.BaseDate = CAST(M.ReadOn AS DATE)
OR C.BaseDate = CAST(M.Unsubscribed AS DATE)
OR C.BaseDate = CAST(M.Bounced AS DATE)
WHERE C.BaseDate BETWEEN #StartDate AND #EndDate
GROUP BY C.BaseDate
Basically what you're doing is selecting every date from the calendar table within your date range, and then joining it to your mails table if ANY of the datetimes match that date. The purpose of the left join is so that dates on which nothing occurs are still returned in your result set. They will all be zeros, but it's better for consistency and in case someone wants to calculate averages from your report.
Once you have all the dates - and all of the records that have a matching datetime, you just need to count how many, for each date, have a matching ETS, how many have a matching ATS, so on and so forth. Last, you group by the calendar date and you're all done.

SQL Query time spent between certain value

I have a database for all temperatures the last 10 years.
Now I want to find all periods where the temperature was above ex. 15 degree.
Simplified example:
...
2015-05-10 12
2015-05-11 15 |
2015-05-12 16 |
2015-05-13 17 |
2015-05-14 16 |
2015-05-15 15 |
2015-05-16 12
2015-05-17 11
2015-05-18 15 |
2015-05-19 12
2015-05-20 18 |
...
Så now I want get all time periods like this:
Min Max
2015-05-11 2015-05-15
2015-05-18 2015-05-18
2015-05-20 2015-05-20
Any suggestion of how this query will look like ?
You could use CTE
CREATE TABLE #Date (DateT datetime, Value int )
INSERT INTO #Date
VALUES ('2015-05-10',12),
('2015-05-11',15),
('2015-05-12',16),
('2015-05-13',17),
('2015-05-14',16),
('2015-05-15',15),
('2015-05-16',12),
('2015-05-17',11),
('2015-05-18',15),
('2015-05-19',12),
('2015-05-20',18)
WITH t AS (
SELECT DateT d,ROW_NUMBER() OVER(ORDER BY DateT) i
FROM #Date
WHERE Value >= 15
GROUP BY DateT
)
SELECT MIN(d) as DataStart,MAX(d) as DataFinal, ROW_NUMBER() OVER(ORDER BY DATEDIFF(day,i,d)) as RN
FROM t
GROUP BY DATEDIFF(day,i,d)
RN column is optional you could use
SELECT MIN(d) as DataStart,MAX(d) as DataFinal
FROM t
GROUP BY DATEDIFF(day,i,d)
Here is a solution using a gaps and islands algorithm. It looks kind of bulky but it runs fast and scales great. It is also modular if you want to add a gap-allowed parameter and you can rewrite it to partition by some other columns and it still performs nicely.
Inspired by Peter Larssons post here: http://www.sqltopia.com/?page_id=83
WITH [theSource](Col1,Col2)
AS
(
SELECT Col1,Col2 FROM (VALUES
('2015-05-10',12),
('2015-05-11',15),
('2015-05-12',16),
('2015-05-13',17),
('2015-05-14',16),
('2015-05-15',15),
('2015-05-16',12),
('2015-05-17',11),
('2015-05-18',15),
('2015-05-19',12),
('2015-05-20',18)
) as x(Col1,Col2)
)
,filteredSource([Value])
AS
(
SELECT Col1 as [Value]
FROM theSource WHERE Col2 >= 15
)
,cteSource(RangeStart, RangeEnd)
AS (
SELECT RangeStart,
CASE WHEN [RangeStart] = [RangeEnd] THEN [RangeEnd] ELSE LEAD([RangeEnd]) OVER (ORDER BY Value) END AS [RangeEnd]
FROM (
SELECT [Value],
CASE
WHEN DATEADD(DAY,1,LAG([Value]) OVER (ORDER BY [Value])) >= [Value] THEN NULL
ELSE [Value]
END AS RangeStart,
CASE
WHEN DATEADD(DAY,-1,LEAD([Value]) OVER (ORDER BY [Value])) <= [Value] THEN NULL
ELSE [Value]
END AS RangeEnd
FROM filteredSource
) AS d
WHERE RangeStart IS NOT NULL
OR RangeEnd IS NOT NULL
)
SELECT RangeStart AS [Min],
RangeEnd AS [Max]
FROM cteSource
WHERE RangeStart IS NOT NULL;

Converting rows to columns

I have a query with the columns 'Name', 'Amount', and 'ReasonId'. I want to sum the amount and put the reasons on one row to keep every name to a single line. There are about 50 distinct ReasonId's so I do not want to name the column the name of the ReasonId's. Instead, I would like to name the columns 'Reason1', 'Reason2', 'Reason3', and 'Reason4'. One single name can have up to 4 different reasons.
I have this:
Name Amount ReasonId
-------------------------
Bob $5 7
Bob $8 6
John $2 8
John $5 9
John $3 9
John $8 4
I want to produce the following:
Name Amount Reason1 Reason2 Reason3 Reason4
-----------------------------------------------------
Bob $13 7 6 NULL NULL
John $18 8 9 4 NULL
One way to do this is to use the dense_rank window function to number the rows, and then use conditional aggregation to put the reason in the correct columns.
I can't see anything that would give the specific order of the reason columns though, maybe there is some column missing that provides the order?
with cte as (
select
name,
reasonid,
amount,
dense_rank() over (partition by name order by reasonid) rn
from your_table
)
select
name,
sum(amount) amount,
max(case when rn = 1 then reasonid end) reason1,
max(case when rn = 2 then reasonid end) reason2,
max(case when rn = 3 then reasonid end) reason3,
max(case when rn = 4 then reasonid end) reason4
from cte
group by name
If you have some column that gives the order you want then change the order by clause used in the dense_rank function.
Sample SQL Fiddle (using PG as MSSQL seems to be offline).
The output from the query above would be:
| name | amount | reason1 | reason2 | reason3 | reason4 |
|------|--------|---------|---------|---------|---------|
| Bob | 13 | 6 | 7 | (null) | (null) |
| John | 18 | 4 | 8 | 9 | (null) |
You could also use a pivot to achieve this; if you know the columns you can enter them in the script, but if not, you can use dynamic sql (there are reasons why you might want to avoid the dynamic solution).
The advantage of this route is that you can enter the column list in a table and then changes to that table will result in changes to your output with change to the script involved. The disadvantages are all those associated with dynamic SQL.
In the interests of variation, here is a dynamic SQL solution using temp tables to hold your data, since a different possibility has been provided:
-- set up your data
CREATE TABLE #MyTab (Name VARCHAR(4), Amount INT, ReasonId INT)
CREATE TABLE #AllPossibleReasons (Id INT,Label VARCHAR(10))
INSERT #AllPossibleReasons
VALUES
(1,'Reason1')
,(2,'Reason2')
,(3,'Reason3')
,(4,'Reason4')
,(5,'Reason5')
,(6,'Reason6')
,(7,'Reason7')
,(8,'Reason8')
,(9,'Reason9')
INSERT #MyTab
VALUES
('Bob',7,7)
,('Bob',8,6)
,('John',2,8)
,('John',5,9)
,('John',3,9)
,('John',8,4)
-----------------------------------------------------------------------------
-- The actual query
DECLARE #ReasonList VARCHAR(MAX) = ''
DECLARE #SQL VARCHAR(MAX)
SELECT #ReasonList = #ReasonList + ',' + QUOTENAME(Label)
FROM #AllPossibleReasons
SET #ReasonList = SUBSTRING(#ReasonList,2,LEN(#ReasonList))
SET #SQL =
'SELECT Name,Value,' + #ReasonList + ' FROM
(SELECT
M.Name,SUM(Amount) AS This, Label, SUM(Total.Value) AS Value
FROM
#MyTab AS M
INNER JOIN #AllPossibleReasons AS Reason ON M.ReasonId = Reason.Id
INNER JOIN(SELECT T.Name, SUM(Amount)Value
FROM #MyTab T GROUP BY T.Name) AS Total ON M.Name = Total.Name
GROUP BY M.Name, Reason.Label) AS Up
PIVOT (SUM(THis) FOR Label IN (' + #ReasonList + ')) AS Pvt'
EXEC (#SQL)
DROP TABLE #AllPossibleReasons
DROP TABLE #MyTab
Working from the information in ListAGG in SQLSERVER, I came up with this somewhat ugly example:
with tbl1 as (
-- Set up initial data set
select 'Bob' name, 5 amount, 7 ReasonId
union all select 'Bob' , 3, 4
union all select 'Bob', 2, 1
union all select 'Brian', 8, 2
union all select 'Bob', 6, 4
union all select 'Brian', 1, 3
union all select 'Tim', 2, 2)
, TBL2 AS ( -- Add a blank to separate the concatenation
SELECT NAME
, AMOUNT
, CAST(ReasonId as varchar) + ' ' ReasonId from tbl1
)
select ta.name
, Total
, ReasonIds from (
(select distinct name, stuff((select distinct '' + t2.ReasonId from tbl2 t2
where t1.name = t2.name
for xml path(''), type).value('.','NVARCHAR(MAX)'),1,0,' ') ReasonIds from tbl2 t1) ta
inner join ( select name, sum(amount) Total from tbl1 group by name) tb on ta.name = tb.name) ;
This converts TBL1 to the following:
name Total ReasonIds
Bob 16 1 4 7
Brian 9 2 3
Tim 2 2

Resources