Using date parameter in SQL Server CTE - sql-server

I'm trying to use a start and end date parameter in a T-SQL common table expression. I'm very new to SQL Server development and I'm unsure of what I'm missing in the query.
I can specify values for #startdate & #enddate and get correct results.
However, I'm trying to figure out how to make the two parameters open so a user can specify start and end date values. The query will be used in an SSRS report.
DECLARE #startdate Datetime,
#enddate Datetime;
SET #startdate = '2017-02-09';
SET #enddate = '2017-02-10';
WITH ManHours AS
(
SELECT DISTINCT
a.plant_name AS Plant, SUM(tc.total_hr) AS TotalHours
FROM
area AS a
INNER JOIN
tf_department AS dep ON a.plant_id = dep.plant_id
INNER JOIN
tf_timecard AS tc ON dep.department_id = tc.department_id
WHERE
tc.timecard_dt BETWEEN #startdate AND #enddate
AND tc.department_id IN (266, 453, ...endlessly long list of IDs......)
AND tc.hourtype_id = 1
GROUP BY
a.plant_name),
Tonnage AS
(
SELECT DISTINCT
a.plant_name AS Plant, SUM(tglt.postqty) AS TotalTonnage
FROM
area AS a
INNER JOIN
plantgl AS pgl ON a.plant_id = pgl.plant_id
INNER JOIN
tgltransaction AS tglt ON pgl.glacckey = tglt.glacctkey
WHERE
tglt.postdate BETWEEN #startdate AND #enddate
GROUP BY
a.plant_name
)
SELECT DISTINCT
ManHours.Plant,
SUM(TotalTonnage) as 'Production Tons' ,
SUM(TotalHours) as 'Man Hours',
TotalTonnage / TotalHours AS TonsPerManHour
FROM
ManHours
LEFT OUTER JOIN
Tonnage ON ManHours.Plant = tonnage.Plant
GROUP BY
ManHours.Plant, ManHours.TotalHours, Tonnage.TotalTonnage

Below is an example of a stored procedure you could use. In addition, I provided two alternatives to the "endlessly long list of IDs" that is specified in the CTE. In my opinion, it is optimal to pull this logic out of the query and place it at the beginning of the stored procedure. This will enable you, or others, to easily go back and modify this list if / when it changes. Even better, I provided a TABLE VARIABLE (#ListOfDeptIdsFromTable) that you can use to actually retrieve this data as opposed to hard-coding a string.
CREATE PROCEDURE Report
#startdate DATETIME,
#enddate DATETIME
AS
BEGIN
SET NOCOUNT ON;
DECLARE #ListOfDeptIds VARCHAR(MAX) = '266, 453, ...endlessly long list of IDs......';
DECLARE #ListOfDeptIdsFromTable TABLE
(
department_id INT
)
INSERT INTO #ListOfDeptIdsFromTable (department_id)
SELECT department_id
FROM -- Table here
WHERE -- Where credentials to retrieve the long list
WITH ManHours AS
(
SELECT DISTINCT
a.plant_name AS Plant, SUM(tc.total_hr) AS TotalHours
FROM
area AS a
INNER JOIN
tf_department AS dep ON a.plant_id = dep.plant_id
INNER JOIN
tf_timecard AS tc ON dep.department_id = tc.department_id
WHERE
tc.timecard_dt BETWEEN #startdate AND #enddate
AND tc.department_id IN (#ListOfDeptIds) -- or ... IN (SELECT department_id FROM #ListOfDeptIdsFromTable)
AND tc.hourtype_id = 1
GROUP BY
a.plant_name),
Tonnage AS
(
SELECT DISTINCT
a.plant_name AS Plant, SUM(tglt.postqty) AS TotalTonnage
FROM
area AS a
INNER JOIN
plantgl AS pgl ON a.plant_id = pgl.plant_id
INNER JOIN
tgltransaction AS tglt ON pgl.glacckey = tglt.glacctkey
WHERE
tglt.postdate BETWEEN #startdate AND #enddate
GROUP BY
a.plant_name
)
SELECT DISTINCT
ManHours.Plant,
SUM(TotalTonnage) as 'Production Tons' ,
SUM(TotalHours) as 'Man Hours',
TotalTonnage / TotalHours AS TonsPerManHour
FROM
ManHours
LEFT OUTER JOIN
Tonnage ON ManHours.Plant = tonnage.Plant
GROUP BY
ManHours.Plant, ManHours.TotalHours, Tonnage.TotalTonnage
END
GO

Related

How to optimize below my SQL query shown here

This query is written for those users who did not log-in to the system between 1st July to 31 July.
However when we run the query in query analyzer then it's taking more than 2 minutes. But in application side giving error as 'Execution Timeout Expired. The timeout period elapsed prior to completion of the operation or the server is not responding'.
Below query takes start date as 1st July 2022 and get all the users and add those users into temp table called '#TABLE_TEMP' and increases to next date.
Again while loop runs and fetch users for 2nd July and so on until it reaches to 31st July.
Can anyone help on this to optimize the query using CTE or any other mechanism?
H
ow can we avoid While loop for better performance?
DECLARE #TABLE_TEMP TABLE
(
Row int IDENTITY(1,1),
[UserId] int,
[UserName] nvarchar(100),
[StartDate] nvarchar(20),
[FirstLogin] nvarchar(20),
[LastLogout] nvarchar(20)
)
DECLARE #START_DATE datetime = '2022-07-01';
DECLARE #END_DATE datetime = '2022-07-31';
DECLARE #USER_ID nvarchar(max) = '1,2,3,4,5,6,7,8,9';
DECLARE #QUERY nvarchar(max) = '';
WHILE(#START_DATE < #END_DATE OR #START_DATE = #END_DATE)
BEGIN
SET #QUERY = 'SELECT
s.userid AS [UserId],
s.username AS [UserName],
''' + CAST(#START_DATE as nvarchar) + ''' AS [StartDate],
MAX(h.START_TIME) as [FirstLogin],
MAX(ISNULL(h.END_TIME, s.LAST_SEEN_TIME)) as [LastLogout]
FROM USER s
LEFT JOIN USER_LOGIN_HISTORY h ON h.userid = s.userid
LEFT JOIN TEMP_USER_INACTIVATION TUI ON TUI.userid = s.userid AND ('''+ CAST(#START_DATE as nvarchar) +''' BETWEEN ACTIVATED_DATE AND DEACTIVATD_DATE)
WHERE s.userid IN (' + #USER_ID + ')
AND h.userid NOT IN (SELECT userid FROM USER_LOGIN_HISTORY WHERE CAST(START_TIME AS DATE) = '''+ CONVERT(nvarchar,(CAST(#START_DATE AS DATE))) +''') AND ACTIVATED_DATE IS NOT NULL
GROUP BY s.userid, h.userid, s.username, s.last_seen_time
HAVING CAST(MAX(ISNULL(h.END_TIME, s.LAST_SEEN_TIME)) AS DATE) <> '''+ CONVERT(nvarchar,(CAST(#START_DATE AS DATE))) + '''
ORDER BY [User Name]'
INSERT INTO #TABLE_TEMP
EXEC(#QUERY)
SET #START_DATE = DATEADD(DD, 1, #START_DATE)
END
Without the query plan, it's hard to say for sure.
But there are some clear efficiencies to be had.
Firstly, there is no need for a WHILE loop. Create a Dates table which has every single date in it. Then you can simply join it.
Furthermore, do not inject the #USER_ID values. Instead, pass them thorugh as a Table Valued Parameter. At the least, split what you have now into a temp table or table variable.
Do not cast values you want to join on. For example, to check if START_TIME falls on a certain date, you can do WHERE START_TIME >= BeginningOfDate AND START_TIME < BeginningOfNextDate.
The LEFT JOINs are suspicious, especially given you are filtering on those tables in the WHERE.
Use NOT EXISTS instead of NOT IN or you could get incorrect results
DECLARE #START_DATE date = '2022-07-01';
DECLARE #END_DATE date = '2022-07-31';
DECLARE #USER_ID nvarchar(max) = '1,2,3,4,5,6,7,8,9';
DECLARE #userIds TABLE (userId int PRIMARY KEY);
INSERT #userIds (userId)
SELECT CAST(value AS int)
FROM STRING_SPLIT(#USER_ID, ',');
SELECT
s.userid as [UserId],
s.username as [UserName],
d.Date as [StartDate],
MAX(h.START_TIME) as [FirstLogin],
MAX(ISNULL(h.END_TIME, s.LAST_SEEN_TIME)) as [LastLogout]
FROM Dates d
JOIN USER s
LEFT JOIN USER_LOGIN_HISTORY h ON h.userid = s.userid
LEFT JOIN TEMP_USER_INACTIVATION TUI
ON TUI.userid = s.userid
ON d.Date BETWEEN ACTIVATED_DATE AND DEACTIVATD_DATE -- specify table alias (don't know which?)
WHERE s.userid in (SELECT u.userId FROM #userIds u)
AND NOT EXISTS (SELECT 1
FROM USER_LOGIN_HISTORY ulh
WHERE ulh.START_TIME >= CAST(d.date AS datetime)
AND ulh.START_TIME < CAST(DATEADD(day, 1, d.date) AS datetime)
AND ulh.userid = h.userid
)
AND ACTIVATED_DATE IS NOT NULL
AND d.Date BETWEEN #START_DATE AND #END_DATE
GROUP BY
d.Date,
s.userid,
s.username,
s.last_seen_time
HAVING CAST(MAX(ISNULL(h.END_TIME, s.LAST_SEEN_TIME)) AS DATE) <> d.date
ORDER BY -- do you need this? remove if possible.
s.username;
Better to collect dates in a table rather than running query in a loop. Use following query to collect dates between given date range:
DECLARE #day INT= 1
DECLARE #dates TABLE(datDate DATE)
--creates dates table first and then create dates for the given month.
WHILE ISDATE('2022-8-' + CAST(#day AS VARCHAR)) = 1
BEGIN
INSERT INTO #dates
VALUES (DATEFROMPARTS(2022, 8, #day))
SET #day = #day + 1
END
Then to get all dates where user did not login, you have to use Cartesian join and left join as illustrated below
SELECT allDates.userID,
allDates.userName,
allDates.datDate notLoggedOn
FROM
(
--This will reutrun all users for all dates in a month i.e. 31 rows for august for every user
SELECT *
FROM Users,
#dates
) allDates
LEFT JOIN
(
--now get last login date for every user between given date range
SELECT userID,
MAX(login_date) last_Login_date
FROM USER_LOGIN_HISTORY
WHERE login_date BETWEEN '2022-08-01' AND '2022-08-31'
GROUP BY userID
) loggedDates ON loggedDates.last_Login_date = allDates.datDate
WHERE loggedDates.last_Login_date IS NULL --filter out only those users who have not logged in
ORDER BY allDates.userID,
allDates.datDate
From this query you will get every day of month when a user did not logged in.
If there is no need to list every single date when user did not log in, then Cartesian join can be omitted. This will further improve the performance.
I hope this will help.

show the column names to go along with the sum

am just doing some practice in SQL Server. My below query sums a quantity column but I also want to return the corresponding item number column and item description column with it. each time I try it tells me the item_no column is ambiguous. I have done some reading as to what this means but am still not clear on it. What am I missing?
declare #startdate int = '20161201'
declare #enddate int = '20170401'
SELECT sum(qty) as total_units, item_no as item_number from fact_sales
inner join dim_item
on fact_sales.item_no=dim_item.item_no
where vendor_id = 'roche' and date_key between #startdate and #enddate
order by fact_sales.item_no
You are missing couple of things
DECLARE #startdate INT = '20161201'
DECLARE #enddate INT = '20170401'
SELECT sum(fact_sales.qty) AS total_units
,dim_item.item_no AS item_number
,dim_item.item_discription
FROM fact_sales
INNER JOIN dim_item ON fact_sales.item_no = dim_item.item_no
WHERE vendor_id = 'roche'
AND date_key BETWEEN #startdate
AND #enddate
GROUP BY dim_item.item_no,dim_item.item_discription
ORDER BY fact_sales.item_no
Group By
Prefixing column name with Table name/ Table alias

"select" statement in stored Procedure is not working

I have tried implementing a stored procedure for displaying the result from different tables by using inner join but my problem is select statement is not returning any result but its printing some of the values as messages.
alter proc EmployeeReport(#empid int)
as
begin
declare #inTime time(0)
declare #outTime time(0)
declare #fromDate date
declare #toDate date
set #inTime = (select CAST(InTime as time(0)) from Timelog where EmployeeId=#empid)
set #outTime = (select CAST(OutTime as time(0)) from Timelog where EmployeeId = #empid)
set #fromDate = (select cast (InTime as date) from Timelog where EmployeeId= #empid)
set #toDate = (select cast (outTime as date) from Timelog where EmployeeId= #empid)
select #fromDate as FromDate
,#toDate as ToDate
,c.Name as Client
,p.Name as Project
,#inTime as InTime
,#outTime as OutTime
,t.TotalTime
from Timelog t
inner join Employee e
on e.id = t.EmployeeId
inner join Project p
on p.Id = t.EmployeeProjectId
inner join Client c
on c.Id = p.ClientId
where t.EmployeeId = #empid
print #inTime
print #outTime
print #fromDate
print #toDate
end
I am attaching the output files what i am getting , please help me with this
Messeges getting printed:
No values returned or Selected:
Your initial declaration settings only select data from your TimeLog table, which clearly contains data. Because you are then inner joining from here to other tables, if those other tables have no data, nothing will be returned.
Either make sure that your Employee, Project and Client tables have data in them or change your joins to left instead of inner:
select #fromDate as FromDate
,#toDate as ToDate
,c.Name as Client
,p.Name as Project
,#inTime as InTime
,#outTime as OutTime
,t.TotalTime
from Timelog t
left join Employee e
on e.id = t.EmployeeId
left join Project p
on p.Id = t.EmployeeProjectId
left join Client c
on c.Id = p.ClientId

Finetuning the SQL Query Performance

The following query takes about 3000ms to execute.
I could not fine-tune this query to give a considerable performance edge.
declare #EndDate datetime;
declare #FromDate datetime;
set #EndDate = getdate();
set #FromDate = DATEADD(year,-1, #EndDate);
SELECT [twc].ColumnId, [twc].ColumnName
FROM [table1] twcs with(nolock)
INNER JOIN [table2] twc with(nolock) ON [twc].ColumnId = [twcs].ColumnId
WHERE [twcs].[ColumnName] = 1 AND [twc].[CreateDate] between #fromdate and #enddate;
This is your query:
SELECT twc.ColumnId, twc.ColumnName
FROM table1 twcs with (nolock) INNER JOIN
table2 twc with (nolock)
ON twc.ColumnId = twcs.ColumnId
WHERE twcs.ColumnName = 1 AND twc.CreateDate between #fromdate and #enddate;
I would suggest the following indexes such as the following:
table2(CreateDate, ColumnId)
table2(ColumnId, CreateDate)
table1(ColumnName, ColumnId)
table1(ColumnId, ColumnName)
Let the optimizer choose which to use.
With a query as simple and straight forward as that, you're likely looking at an issue where the tables need a new index to speed the return rather than script optimization.

SQL Server : multiple join query optimization

I've just started with Microsoft SQL Server and I'm facing a problem, which I believe it is an sql optimization issue. Could you please take a look (see below) and give me your feedback.
I have two tables defined as follows:
floatTable (DateAndTime datetime2(7),
TagIndex smallint,
Val float)
stringTable (DateAndTime datetime2(7),
TagIndex smallint,
Val float)
The SQL query which I have used to get the RESULT is (don't laugh):
DECLARE #startDate DATETIME, #endDate DATETIME
SET #startDate = '20130312 9:00:00'
SET #endDate = '20130313 9:00:00'
USE TensionDB
SELECT t1.DateAndTime, t1.Val AS Winch_1,t2.Val AS Winch_2, t3.Val AS Winch_3, t4.Val AS Winch_4, t5.Val AS Winch_5,
t6.Val AS Winch_6, t7.Val AS Winch_7, t8.Val AS Winch_8, t9.Val AS Latitude, t10.Val AS Longitude
FROM
((SELECT DISTINCT DateAndTime ,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 0)t1
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE ( DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 1)t2
ON t2.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 2)t3
ON t3.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 3)t4
ON t4.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 4)t5
ON t5.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 5)t6
ON t6.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 6)t7
ON t7.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime,Val FROM dbo.FloatTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 7)t8
ON t8.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime, Val FROM dbo.StringTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 8)t9
ON t9.DateAndTime = t1.DateAndTime
INNER JOIN (SELECT DISTINCT DateAndTime, Val FROM dbo.StringTable
WHERE (DateAndTime BETWEEN #startDate AND #endDate) AND TagIndex = 9)t10
ON t10.DateAndTime = t1.DateAndTime)
PROBLEM: The big problem is that even if I get the correct result, the query gets very slow for large amount of data. I'm pretty sure that there is another way to write the query, but for the moment I don't have any other idea.
Could you give me a hint please? Appreciate any help from your side.
Thank you in advance
#Kiril and #Patrick,
Using your hints and ideas I have re-wrote my original query using pivot table.
Unfortunately, I still have to use INNER JOIN, as the values(Val) in the stringTable are strings and values(Val) in floatTable are floats. To be honest, I have to perform more tests with both queries, as I can't see a real improvement (time wise), using pivot table; apart from the length of the query. One last thing, I have embedded the query in a stored procedure. Please find below the "final" code:
-- ================================================
-- Template generated from Template Explorer using:
-- Create Procedure (New Menu).SQL
--
-- Use the Specify Values for Template Parameters
-- command (Ctrl-Shift-M) to fill in the parameter
-- values below.
--
-- This block of comments will not be included in
-- the definition of the procedure.
-- ================================================
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: xxxx
-- Create date: xxxx
-- Description: retrieves tension data
-- =============================================
CREATE PROCEDURE getTension
-- Add the parameters for the stored procedure here
#startDate datetime = NULL,
#endDate datetime = NULL
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
-- Insert statements for procedure here
SELECT distinct pvt.DateAndTime
, pvt.[0] AS Winch_1
, pvt.[1] AS Winch_2
, pvt.[2] AS Winch_3
, pvt.[3] AS Winch_4
, pvt.[4] AS Winch_5
, pvt.[5] AS Winch_6
, pvt.[6] AS Winch_7
, pvt.[7] AS Winch_8
, st.Val AS Longitude
, st1.Val AS Latitude
FROM FloatTable
PIVOT
(MAX(Val)
FOR TagIndex in ([0],[1],[2],[3],[4],[5],[6],[7])
) as pvt
inner join StringTable st on st.DateAndTime = pvt.DateAndTime and st.TagIndex = 8
inner join StringTable st1 on st1.DateAndTime = pvt.DateAndTime and st1.TagIndex = 9
Where (pvt.DateAndTime between #startDate and #endDate)
ORDER BY DateAndTime
END
GO
Thanks again for your guidance
I suppose that using pivot table is going to be more efficient:
DECLARE #startDate DATETIME, #endDate DATETIME
SET #startDate = '20130312 9:00:00'
SET #endDate = '20130313 9:00:00'
SELECT st.DateAndTime
, pvt.0 AS Winch_1
, pvt.1 AS Winch_2
, pvt.2 AS Winch_3
, pvt.3 AS Winch_4
, pvt.4 AS Winch_5
, pvt.5 AS Winch_6
, pvt.6 AS Winch_7
, pvt.7 AS Winch_8
, pvt.8 AS Latitude
, pvt.9 AS Longitude
FROM StringTable st
INNER JOIN FloatTable ft on st.DateandTime=ft.DateandTime
pivot
( MAX(Val) for [TagIndex] in ([0], [1], [2], [3], [4], [5], [6], [7], [8], [9])
order by [st.DateAndTime]
)pvt
I haven't tested it so you may have to tweak it a bit, in order to make it work.
Start putting DateAndTime and TagIndex in an index. That would make a difference.
On the other hand, your query could get a lot faster if you wouldn't need the repeating inner joins.
Can you replace the distinct by a single group by and use min or max?
Another option is using pivot tables.
Like this:
select *
from FloatTable
pivot (min(DateAndTime) x, min(Val) y in ([1],[2],[3], ...))

Resources