Optimizing query with huge amount of data - sql-server

How can I optimize the query. I looked at the execution plan and created all the index. Every table has huge data. And this query execution time is very large. By looking at the query could you please suggest where can I optimize more.
If I give little background of the query the structure like:
There are many companies
Each company can have multiple managers
Data is in pagination format
Filter on #parent_manager so another temp table created parent_manager_filter just to use for the filtering purpose as #parent_manager has name in "," separated format
CREATE TABLE #parent_manager
(
cid NUMERIC(18) PRIMARY KEY,
name NVARCHAR(MAX),
code NVARCHAR(MAX)
);
CREATE INDEX cte_parent_manager ON #parent_manager(cid);
CREATE TABLE #parent_manager_filter
(
cid NUMERIC(18),
name NVARCHAR(1000),
code NVARCHAR(1000)
);
CREATE INDEX cte_parent_manager_filter_idx ON #parent_manager_filter(cid);
INSERT INTO #parent_manager
SELECT DISTINCT
mgrc.cid,
name = CAST (STUFF ((SELECT ', ' + CAST(c.company_name AS varchar(2000))
FROM manager_company mc
INNER JOIN company c ON (mc.mgr_cid = c.cid )
WHERE mc.cid = mgrc.cid
AND c.company_name IS NOT NULL
FOR XML PATH ('')), 1, 1, '') AS VARCHAR(2000)),
code = CAST (STUFF ((SELECT ', ' + CAST(c.code AS varchar(2000))
FROM manager_company mc
INNER JOIN company c ON (mc.mgr_cid = c.cid )
WHERE mc.cid = mgrc.cid
AND c.company_name IS NOT NULL
FOR XML PATH ('')), 1, 1, '') AS VARCHAR(2000))
FROM
manager_company mgrc
INNER JOIN
company c ON (mgrc.mgr_cid = c.cid )
JOIN
handler h ON (c.handlerId = h.handlerid )
WHERE
h.handlerid = 5800657002370
INSERT INTO #parent_manager_filter
SELECT DISTINCT
mc.cid,
c.company_name as name,
c.code as code
FROM
manager_company mc
INNER JOIN
company c ON (mc.mgr_cid = c.cid )
JOIN
handler h ON (h.handlerid = c.handlerid)
WHERE
h.handlerid = 5800657002370 ;
WITH company AS
(
SELECT DISTINCT
c.cid AS cid,
parentManager.name AS MANAGER_NAME,
parentManager.code AS code
FROM
company c
LEFT JOIN
#parent_manager parentManager ON (parentManager.cid = c.cid)
LEFT JOIN
# parent_manager_filter parentManagerFilter ON (parentManagerFilter.cid = c.cid)
WHERE
parentManagerFilter.name IN (:managerList)
),
total_rows AS
(
SELECT
COUNT(*) OVER () AS TOTALCOUNT,
ROW_NUMBER() OVER (ORDER BY company_name ASC) AS rnum,
grid.*
FROM
company grid
)
SELECT *
FROM total_rows rnum
WHERE rnum >= 1
AND rnum <= 10
DROP TABLE #parent_manager;
DROP TABLE #parent_manager_filter;

If you are building up temp tables then I would make sure you don't miss a clustered index, else your temp table is simply a heap. You don't have one covering the filter table.
INSERT INTO #parent_manager_filter ...
CREATE CLUSTERED INDEX cte_parent_manager_filter On #parent_manager_filter(cid);

Related

How can I refer to a LAG() function column in SQL Server?

I have a query in which I use LAG function :
WITH Tr AS
(
SELECT
DocDtls.Warehouse, Transactions.Code, DocDtls.zDate,
Transactions.ID, Transactions.QtyIn, Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN
Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
)
SELECT
ID, Code, QtyIn, QtyOut, BalanceAfter,
LAG(BalanceAfter, 1, 0) OVER (PARTITION BY Warehouse, Code
ORDER BY Code, ID) Prev_BlncAfter
FROM
Tr;
It's working fine but when I try to add this column before FROM:
SUM(Prev_BlncAfter + QtyIn) - QtyOut AS NewBlncAfter
I get this error :
Msg 207, Level 16, State 1, Line 3
Invalid column name 'Prev_BlncAfter'
How can I fix this ? Thanks
You can create the LAG column inside the CTE instead of in the outer query. E.g.
declare #DocDtls table (Warehouse int, zDate date, [PrimDocNum] int);
declare #Transactions table (code int, id int, QtyIn int, QtyOut int, balanceafter int, [DocNum] int)
;with Tr As
(
SELECT
d.Warehouse
, t.Code
, d.zDate
, t.ID
, t.QtyIn
, t.QtyOut
, t.BalanceAfter
,LAG(BalanceAfter,1,0) Over (partition by Warehouse,Code order by Code,ID) Prev_BlncAfter
FROM #DocDtls d
INNER JOIN #Transactions t ON d.[PrimDocNum] = t.[DocNum]
)
select ID,Code,QtyIn,QtyOut,BalanceAfter
,SUM(Prev_BlncAfter + QtyIn)-QtyOut As NewBlncAfter
from Tr
group by ID,Code,QtyIn,QtyOut,BalanceAfter;
You can nest this query to refer the newly added column from the outer scope, or create another with like you've done before for referencing it afterwards:
with Tr As (
SELECT
DocDtls.Warehouse,
Transactions.Code,
DocDtls.zDate,
Transactions.ID,
Transactions.QtyIn,
Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
),
formatted_tr as (
select
ID,
Code,
QtyIn,
QtyOut,
BalanceAfter,
LAG(BalanceAfter, 1, 0) Over (
partition by Warehouse,
Code
order by
Code,
ID
) Prev_BlncAfter
from
Tr
)
select
SUM(Prev_BlncAfter + QtyIn) - QtyOut As NewBlncAfter
from
formatted_tr
group by
ID, QtyOut
;
Based on comments , I combined the two answers to get what I need :
with Tr As (
SELECT
DocDtls.Warehouse,
Transactions.Code,
DocDtls.zDate,
Transactions.ID,
Transactions.QtyIn,
Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
),
formatted_tr as (
select
ID,
Code,
QtyIn,
QtyOut,
BalanceAfter,
LAG(BalanceAfter, 1, 0) Over (
partition by Warehouse,
Code
order by
Code,zDate,ID
) Prev_BlncAfter
from
Tr
)
select ID,Code,QtyIn,QtyOut,BalanceAfter
,SUM(Prev_BlncAfter + QtyIn)-QtyOut As NewBlncAfter
from formatted_tr
group by ID,Code,QtyIn,QtyOut,BalanceAfter;
;

Convert PostgreSQL to MS SQL

I am needing help converting a PostgreSQL query to MSSQ.
Below is what i have done so far but i am issuing with the function and array areas which i do not think are allowed in MS SQL.
Is there something that that i need to do change the function and looks the WHERE statement has an array in it too.
I have added the select statement for the #temp table but when i create the #temp table i am getting errors saying incorrect syntax
CREATE FUNCTION pm_aggregate_report
(
_facility_ids uuid[]
, _risk_ids uuid[] DEFAULT NULL::uuid[]
, _assignee_ids uuid[] DEFAULT NULL::uuid[]
, _start_date date DEFAULT NULL::date
, _end_date date DEFAULT NULL::date
)
RETURNS TABLE
(
facility character varying
, pm_id uuid, grouped_pm boolean
, risk_id uuid
, risk character varying
, pm_status_id uuid
, user_id uuid
, assignee text
, completed_by uuid
, total_labor bigint
)
CREATE TABLE #tmp_pm_aggregate
(
facility_id VARCHAR(126),
pm_id VARCHAR(126),
grouped_pm VARCHAR(126),
risk_id VARCHAR(126),
pm_status_id VARCHAR(126),
user_id VARCHAR(126),
completed_by VARCHAR(126)
)
SELECT DISTINCT
COALESCE(gp.facility_id, a.facility_id) as facility_id,
COALESCE(p.grouped_pm_id, p.id) as pm_id,
CASE WHEN p.grouped_pm_id IS NULL THEN false ELSE true END as grouped_pm,
COALESCE(gp.risk_id, a.risk_id) as risk_id,
COALESCE(gp.pm_status_id, p.pm_status_id) as pm_status_id,
COALESCE(gass.user_id, sass.user_id) as user_id,
COALESCE(gp.completed_by, p.completed_by) as completed_by
FROM pms p
JOIN assets a
ON p.asset_id = a.id
LEFT JOIN grouped_pms gp
ON p.grouped_pm_id = gp.id
LEFT JOIN assignees sass
ON p.id = sass.record_id
AND sass.type = 'single_pm'
LEFT JOIN assignees gass
ON p.grouped_pm_id = gass.record_id
AND gass.type = 'grouped_pm'
LEFT JOIN users u
ON (sass.user_id = u.id OR gass.user_id = u.id)
WHERE a.facility_id = ANY(_facility_ids)
AND NOT a.is_component
AND COALESCE(gp.pm_status_id, p.pm_status_id) in ('f9bdfc17-3bb5-4ec0-8477-24ef05ea3b9b', '06fc910c-3d07-4284-8f6e-8fb3873f5333')
AND COALESCE(gp.completion_date, p.completion_date) BETWEEN COALESCE(_start_date, '1/1/2000') AND COALESCE(_end_date, '1/1/3000')
AND COALESCE(gp.show_date, p.show_date) <= CURRENT_TIMESTAMP
AND COALESCE(gass.user_id, sass.user_id) IS NOT NULL
AND u.user_type_id != 'ec823d98-7023-4908-8006-2e33ddf2c11b'
AND (_risk_ids IS NULL OR COALESCE(gp.risk_id, a.risk_id) = ANY(_risk_ids)
AND (_assignee_ids IS NULL OR COALESCE(gass.user_id, sass.user_id) = ANY(_assignee_ids);
SELECT
f.name as facility,
t.pm_id,
t.grouped_pm,
t.risk_id,
r.name as risk,
t.pm_status_id,
t.user_id,
u.name_last + ', ' + u.name_first as assignee,
t.completed_by,
ISNULL(gwl.total_labor, swl.total_labor) as total_labor
FROM #tmp_pm_aggregate t
JOIN facilities f
ON t.facility_id = f.id
JOIN risks r
ON t.risk_id = r.id
JOIN users u
ON t.user_id = u.id
LEFT JOIN (SELECT wl.record_id, wl.user_id, SUM(wl.labor_time) as total_labor
FROM work_logs wl
WHERE wl.type = 'single_pm'
GROUP BY wl.record_id, wl.user_id) as swl
ON t.pm_id = swl.record_id
AND t.user_id = swl.user_id
AND t.grouped_pm = false
LEFT JOIN (SELECT wl.record_id, wl.user_id, SUM(wl.labor_time) as total_labor
FROM work_logs wl
WHERE wl.type = 'grouped_pm'
GROUP BY wl.record_id, wl.user_id) as gwl
ON t.pm_id = gwl.record_id
AND t.user_id = gwl.user_id
AND t.grouped_pm = true
ORDER BY facility,
assignee,
risk;
DROP TABLE #tmp_pm_aggregate;
You can create an inline Table Valued Function, and simply return a resultset from it. You do not need (and cannot use) a temp table, you do not declare the returned "rowset" shape.
For the array parameters, you can use a Table Type:
CREATE TYPE dbo.GuidList (value uniqueidentifier NOT NULL PRIMARY KEY);
Because the table parameters are actual tables, you must query them like this (NOT EXISTS (SELECT 1 FROM #risk_ids) OR ISNULL(gp.risk_id, a.risk_id) IN (SELECT r.value FROM #risk_ids))
The parameters must start with #
There is no boolean type, you must use bit
Always use deterministic date formats for literals. yyyymmdd works for dates. Do you need to take into account hours and minutes, because you haven't?
ISNULL generally performs better than COALESCE in SQL Server, as the compiler understands it better
You may want to pass a separate parameter showing whether you passed in anything for the optional table parameters
I suggest you look carefully at the actual query: why does it need DISTINCT? It performs poorly, and is usually a code-smell indicating poorly thought-out joins. Perhaps you need to combine the two joins on assignees, or perhaps you should use a row-numbering strategy somewhere.
CREATE FUNCTION dbo.pm_aggregate_report
(
#facility_ids dbo.GuidList
, #risk_ids dbo.GuidList
, #assignee_ids dbo.GuidList
, #start_date date
, #end_date date
)
RETURNS TABLE AS RETURN
SELECT DISTINCT -- why DISTINCT, perhaps rethink your joins
ISNULL(gp.facility_id, a.facility_id) as facility_id,
ISNULL(p.grouped_pm_id, p.id) as pm_id,
CASE WHEN p.grouped_pm_id IS NULL THEN CAST(0 AS bit) ELSE CAST(1 AS bit) END as grouped_pm,
ISNULL(gp.risk_id, a.risk_id) as risk_id,
ISNULL(gp.pm_status_id, p.pm_status_id) as pm_status_id,
ISNULL(gass.user_id, sass.user_id) as user_id,
ISNULL(gp.completed_by, p.completed_by) as completed_by
FROM pms p
JOIN assets a
ON p.asset_id = a.id
LEFT JOIN grouped_pms gp
ON p.grouped_pm_id = gp.id
LEFT JOIN assignees sass
ON p.id = sass.record_id
AND sass.type = 'single_pm'
LEFT JOIN assignees gass
ON p.grouped_pm_id = gass.record_id
AND gass.type = 'grouped_pm'
LEFT JOIN users u
ON (sass.user_id = u.id OR gass.user_id = u.id) -- is this doubling up your rows?
WHERE a.facility_id IN (SELECT f.value FROM #facility_ids f)
AND a.is_component = 0
AND ISNULL(gp.pm_status_id, p.pm_status_id) in ('f9bdfc17-3bb5-4ec0-8477-24ef05ea3b9b', '06fc910c-3d07-4284-8f6e-8fb3873f5333')
AND ISNULL(gp.completion_date, p.completion_date) BETWEEN ISNULL(#start_date, '20000101') AND ISNULL(#end_date, '30000101') -- perhaps use >= AND <
AND ISNULL(gp.show_date, p.show_date) <= CURRENT_TIMESTAMP
AND ISNULL(gass.user_id, sass.user_id) IS NOT NULL
AND u.user_type_id != 'ec823d98-7023-4908-8006-2e33ddf2c11b'
AND (NOT EXISTS (SELECT 1 FROM #risk_ids) OR ISNULL(gp.risk_id, a.risk_id) IN (SELECT r.value FROM #risk_ids))
AND (NOT EXISTS (SELECT 1 FROM #assignee_ids) OR ISNULL(gass.user_id, sass.user_id) IN (SELECT aid.value FROM #assignee_ids aid));

Aggregate without duplicates

What I have going on here is:
Table 1: AOC_Model
AOC_ID int (Primary Key)
Model varchar(50)
Table 2: AOC_Chipset
AOC_CHIPSET_ID int (Primary Key)
CONTROLLER_ID int
CHIPSET_ID int
AOC_ID int
Table 3: Controller
CONTROLLER_ID int (Primary Key)
CONTROLLER varchar(10)
Table 4: Chipset
CONTROLLER_ID int (Primary Key)
CHIPSET_ID int (Primary Key)
CHIPSET varchar(50)
Table 5: Notes_Chipset
NOTES_CHIPSET_ID int (Primary Key)
CONTROLLER_ID int
CHIPSET_ID int
DATE date
NOTES varchar(800)
First I have a Chipset table that is joined to Controller via Controller_ID
Then I have AOC_Chipset which actually acts as Joining table between Controller and Chipset. AOC_Chipset is joined to Chipset via Controller_ID and Chipset_id
Then I have Notes_Chipset which is also joined to Chipset via Controller_ID and Chipset_id
And Finally, I have AOC_Model which is joined to AOC_Chipset via AOC_ID
I have many to many relationships going on here.
I could have one or two controllers assigned to the same AOC_ID.
I could have one or two chipsets assigned to the same controller.
I could have multiple Notes assigned to the same Chipset.
I created this query in SQL Server 2019:
SELECT
dbo.AOC_CHIPSET.AOC_ID,
string_agg(dbo.CONTROLLER.CONTROLLER, ', ') AS vControllers,
string_agg(dbo.CHIPSET.CHIPSET, ', ') AS vChipsets,
string_agg(dbo.NOTES_CHIPSET.DATE, ', ') AS vDate,
string_agg(dbo.NOTES_CHIPSET.NOTES, ', ') AS vNotes
FROM
dbo.AOC_CHIPSET
INNER JOIN
dbo.CHIPSET ON dbo.AOC_CHIPSET.CONTROLLER_ID = dbo.CHIPSET.CONTROLLER_ID
AND dbo.AOC_CHIPSET.CHIPSET_ID = dbo.CHIPSET.CHIPSET_ID
INNER JOIN
dbo.CONTROLLER ON dbo.CHIPSET.CONTROLLER_ID = dbo.CONTROLLER.CONTROLLER_ID
INNER JOIN
dbo.NOTES_CHIPSET ON dbo.CHIPSET.CONTROLLER_ID = dbo.NOTES_CHIPSET.CONTROLLER_ID
AND dbo.CHIPSET.CHIPSET_ID = dbo.NOTES_CHIPSET.CHIPSET_ID
AND dbo.CONTROLLER.CONTROLLER_ID = dbo.NOTES_CHIPSET.CONTROLLER_ID
GROUP BY
dbo.AOC_CHIPSET.AOC_ID
The problem is the result contains duplicates
I know I can use the DISTINCT, but I can't figure out where / how to place it in conjunction with string_agg?
I replaced the joins by sub-selects placed directly in the select list. This allows me to select distinct values per retrieved property. In fact, there are always 2 sub-selects per string to create. The inner one has a SELECT DISTINCT and the outer one makes the string_agg. The inner sub-select filters its rows with a WHERE-clause with AOC_ID matching the main SELECT
SELECT
ac.AOC_ID,
(SELECT string_agg(CONTROLLER, ', ') FROM
(SELECT DISTINCT CONTROLLER
FROM dbo.CONTROLLER co INNER JOIN dbo.AOC_CHIPSET ac1
ON ac1.CONTROLLER_ID = co.CONTROLLER_ID
WHERE ac1.AOC_ID = ac.AOC_ID) x) AS vControllers,
(SELECT string_agg(CHIPSET, ', ') FROM
(SELECT DISTINCT CHIPSET
FROM dbo.CHIPSET cs INNER JOIN dbo.AOC_CHIPSET ac2
ON ac2.CONTROLLER_ID = cs.CONTROLLER_ID AND ac2.CHIPSET_ID = cs.CHIPSET_ID
WHERE ac2.AOC_ID = ac.AOC_ID) y) AS vChipsets,
(SELECT string_agg([DATE], ', ') FROM
(SELECT DISTINCT [DATE]
FROM dbo.NOTES_CHIPSET nd INNER JOIN dbo.AOC_CHIPSET ac3
ON ac3.CONTROLLER_ID = nd.CONTROLLER_ID AND ac3.CHIPSET_ID = nd.CHIPSET_ID
WHERE ac3.AOC_ID = ac.AOC_ID) z) AS vDate,
(SELECT string_agg(NOTES, ', ') FROM
(SELECT DISTINCT NOTES
FROM dbo.NOTES_CHIPSET nd INNER JOIN dbo.AOC_CHIPSET ac4
ON ac4.CONTROLLER_ID = nd.CONTROLLER_ID AND ac4.CHIPSET_ID = nd.CHIPSET_ID
WHERE ac4.AOC_ID = ac.AOC_ID) z) AS vNotes
FROM
dbo.AOC_CHIPSET ac
GROUP BY
ac.AOC_ID
The SELECT DISTINCT in the sub-queries work, because the select-list does not contain any controller or chipset id. This would not be possible with joins on the outer level, since those require these ids.
I don't fully understand what the purpose/use of this query is, so forgive me if this doesn't help. But I don't think DISTINCT will be of much use to you with your AOC_ID being a unique value in each row of the duplicate data. If you got rid of the AOC_ID, then you could use DISTINCT and not have to worry about that data appearing twice. But again, if getting rid of the AOC_ID in this query is not an option, I'm as stumped as you are.
SELECT MAX(AOC_ID) as AOC_ID, vControllers, vChipsets, vDate, vNotes FROM
(
SELECT
dbo.AOC_CHIPSET.AOC_ID,
string_agg(dbo.CONTROLLER.CONTROLLER, ', ') AS vControllers,
string_agg(dbo.CHIPSET.CHIPSET, ', ') AS vChipsets,
string_agg(dbo.NOTES_CHIPSET.DATE, ', ') AS vDate,
string_agg(dbo.NOTES_CHIPSET.NOTES, ', ') AS vNotes
FROM
dbo.AOC_CHIPSET
INNER JOIN
dbo.CHIPSET ON dbo.AOC_CHIPSET.CONTROLLER_ID = dbo.CHIPSET.CONTROLLER_ID
AND dbo.AOC_CHIPSET.CHIPSET_ID = dbo.CHIPSET.CHIPSET_ID
INNER JOIN
dbo.CONTROLLER ON dbo.CHIPSET.CONTROLLER_ID = dbo.CONTROLLER.CONTROLLER_ID
INNER JOIN
dbo.NOTES_CHIPSET ON dbo.CHIPSET.CONTROLLER_ID = dbo.NOTES_CHIPSET.CONTROLLER_ID
AND dbo.CHIPSET.CHIPSET_ID = dbo.NOTES_CHIPSET.CHIPSET_ID
AND dbo.CONTROLLER.CONTROLLER_ID = dbo.NOTES_CHIPSET.CONTROLLER_ID
GROUP BY
dbo.AOC_CHIPSET.AOC_ID
) R
GROUP BY vControllers, vChipsets, vDate, vNotes

Unable concate NULL value in SQL using CONCAT, COALESCE and ISNULL

I have a query with multiple joins where I want to combine records from two columns into one. If one column is empty then I want to show one column value as result. I tried with CONCAT, COALEASE and ISNULL but no luck. What am I missing here?
My objective is, create one column which has combination of s.Script AS Original and FromAnotherTable from query. Below query runs but throws Invalid column name 'Original' and Invalid column name 'FromAnotherTable'. when I try to use CONCAT, COALEASE or ISNULL .
SQL Query:
SELECT DISTINCT
c.Name AS CallCenter,
LTRIM(RTRIM(s.Name)) Name,
d.DNIS,
s.ScriptId,
s.Script AS Original,
(
SELECT TOP 5 CCSL.Line+'; '
FROM CallCenterScriptLine CCSL
WHERE CCSL.ScriptId = s.ScriptId
ORDER BY ScriptLineId FOR XML PATH('')
) AS FromAnotherTable,
--CONCAT(s.Script, SELECT TOP 5 CCSL.Line+'; ' FROM dbo.CallCenterScriptLine ccsl WHERE ccsl.ScriptId = s.ScriptId ORDER BY ccsl.ScriptLineId xml path(''))
--CONCAT(Original, FromAnotherTable) AS Option1,
--COALESCE(Original, '') + FromAnotherTable AS Option2,
--ISNULL(Original, '') + FromAnotherTable AS Option3,,
r.UnitName AS Store,
r.UnitNumber
FROM CallCenterScript s WITH (NOLOCK)
INNER JOIN CallCenterDNIS d WITH (NOLOCK) ON d.ScriptId = s.ScriptId
INNER JOIN CallCenter c WITH (NOLOCK) ON c.Id = s.CallCenterId
INNER JOIN CallCenterDNISRestaurant ccd WITH (NOLOCK) ON ccd.CallCenterDNISId = d.CallCenterDNISId
INNER JOIN dbo.Restaurant r WITH (NOLOCK) ON r.RestaurantID = ccd.CallCenterRestaurantId
WHERE c.Id = 5
AND (1 = 1)
AND (s.IsDeleted = 0 OR s.IsDeleted IS NULL)
ORDER BY DNIS ASC;
Output:
This works:
DECLARE #Column1 VARCHAR(50) = 'Foo',
#Column2 VARCHAR(50) = NULL;
SELECT CONCAT(#Column1,#Column2);
SELECT COALESCE(#Column2, '') + #Column1
SELECT ISNULL(#Column2, '') + #Column1
So I am not sure what I am missing in my original query.
Look at row 3 in the results you are getting. In your concatenated columns (Option1, 2, 3) you are getting the first script column twice. Not the first one + the second one like you expect.
The reason is because you've aliased your subquery "script" which is the same name as another column in your query, which makes it ambiguous.
Change the alias of the subquery and the problem should go away. I'm frankly surprised your query didn't raise an error.
EDIT: You can't use a column alias in another column's definition in the same level of the query. In other words, you can't do this:
SELECT
SomeColumn AS A
, (Subquery that returns a column) AS B
, A + B --this is not allowed
FROM ...
You can either create a CTE that returns the aliased columns and then concatenate them in the main query that selects from the CTE, or you have to use the original sources of the aliases, like so:
SELECT
SomeColumn AS A
, (Subquery that returns a column) AS B
, SomeColumn + (Subquery that returns a column) --this is fine
FROM ...
I took another approach where instead on creating separate column, I used ISNULL in my subQuery which returns my desired result.
Query:
SELECT DISTINCT
c.Name AS CallCenter,
LTRIM(RTRIM(s.Name)) Name,
d.DNIS,
s.ScriptId,
s.Script AS Original,
(
SELECT TOP 5 ISNULL(CCSL.Line, '')+'; ' + ISNULL(s.Script, '')
FROM CallCenterScriptLine CCSL
WHERE CCSL.ScriptId = s.ScriptId
ORDER BY ScriptLineId FOR XML PATH('')
) AS FromAnotherTable,
r.UnitName AS Store,
r.UnitNumber
FROM CallCenterScript s WITH (NOLOCK)
INNER JOIN CallCenterDNIS d WITH (NOLOCK) ON d.ScriptId = s.ScriptId
INNER JOIN CallCenter c WITH (NOLOCK) ON c.Id = s.CallCenterId
INNER JOIN CallCenterDNISRestaurant ccd WITH (NOLOCK) ON ccd.CallCenterDNISId = d.CallCenterDNISId
INNER JOIN dbo.Restaurant r WITH (NOLOCK) ON r.RestaurantID = ccd.CallCenterRestaurantId
WHERE c.Id = 5
AND (1 = 1)
AND (s.IsDeleted = 0 OR s.IsDeleted IS NULL)
ORDER BY DNIS ASC;
Here's a simplified example using table variables.
Instead of using a subquery for a field, it uses a CROSS APPLY.
And CONCAT in combination with STUFF is used to glue the strings together.
declare #Foo table (fooID int identity(1,1) primary key, Script varchar(30));
declare #Bar table (barID int identity(1,1) primary key, fooID int, Line varchar(30));
insert into #Foo (Script) values
('Test1'),('Test2'),(NULL);
insert into #Bar (fooID, Line) values
(1,'X'),(1,'Y'),(2,NULL),(3,'X'),(3,'Y');
select
f.fooID,
f.Script,
x.Lines,
CONCAT(Script+'; ', STUFF(x.Lines,1,2,'')) as NewScript
from #Foo f
cross apply (
select '; '+b.Line
from #Bar b
where b.fooID = f.fooID
FOR XML PATH('')
) x(Lines)
Result:
fooID Script Lines NewScript
----- ------- ------- -----------
1 Test1 ; X; Y Test1; X; Y
2 Test2 NULL Test2;
3 NULL ; X; Y X; Y

SQL Server CTE hierarchy keyword search

I've run into a tricky issue with recursive searching in an eCommerce shop stored procedure. Basically this single procedure will return all products factoring in basic filters and paging, and using a parent/child category table to perform recursive checks down the hierarchy. This is works beautifully and the CTE's run extremely fast, however the recent addition of a keyword search which needs to search across the Category Name, Product Name, and Style Number has caused dramas.
This seemed quite trivial at first as the 1st CTE already generates a table of all relevant categories in the hierarchy based on the supplied #categoryid and then joins onto the rest of the Product specific tables for all filtering. The Product Name and Style Number search works fine, but I cannot for the life of me get a Category Name search to work because it needs to search the category tree for any matches down the hierarchy tree starting from the top.
EDIT: I'm now thinking it may just be a lot easier to add a "tag" table against Products which stores all keyword related tags such as category name, product name and style etc and search directly against the tags.
For example a subset of the Category hierarchy looks like this:
Mens
- Polos
- Jerseys
- Pants
Womens
- Pants
- Shirts
- Polos
Supporters
- State Of Origin
- Mens
- Womens
- Kids
- Bulldogs
- Jerserys
- Pants
- Shirts
- Caps
- Warratahs
In my sample code below i am passing a search term of "origin mens" which should return all products within the "State of Origin" category that are also within the "Mens" category. The only thing it matches on is Product Names that start with "Origin" and nothing else because the category at the product level is not "State of Origin" as this is the parent. Any help here would be fantastic!
-- Variable Declarations
DECLARE #categoryid int
DECLARE #minprice int
DECLARE #maxprice int
DECLARE #sizefilter int
DECLARE #colourfilter int
DECLARE #searchstring varchar(255)
DECLARE #totalrows int
-- Variables values for testing
SET #categoryid = 0
SET #minprice = 0
SET #maxprice = 0
SET #sizefilter = 0
SET #colourfilter = 0
SET #searchstring = 'origin mens'
-- Setup paging table
DECLARE #indextable table (rownum int identity(1,1), recordid int);
BEGIN
-- First run CTE recursively over all categories in hierarchy
;WITH categoryCTE AS (
SELECT cat.id as CategoryId, cat.name as CategoryName
FROM dbo.shopcategory AS cat
WHERE (#categoryid = 0 OR cat.id = #categoryid)
AND cat.isenabled = 1
UNION ALL
SELECT child.id as CategoryId, child.name as CategoryName
FROM dbo.ShopCategory AS child
INNER JOIN categoryCTE AS parent
ON child.parentid = parent.CategoryId
WHERE child.isenabled = 1
),
-- Now join CTE onto products tables via linker product_shopcategory
productsCTE AS (
SELECT p.id, ppc.shopcategoryid, ppc.listorder as catlistorder
FROM categoryCTE as cat
INNER JOIN product_shopcategory ppc ON ppc.shopcategoryid = cat.CategoryId
INNER JOIN product p ON ppc.productid = p.id
INNER JOIN productlocality pl ON pl.productid = p.id
-- ** SEARCH - Join List to Table function of keywords
INNER JOIN dbo.udf_parseList(#searchString, ' ') s
ON (cat.CategoryName + p.Name + p.stylenumber LIKE '%' + s.array_Value + '%')
LEFT JOIN product_quantity pq ON pq.productid = p.id AND pq.localityid = #localityid
LEFT JOIN productcolour pc ON pc.productid = p.id
LEFT JOIN productcolourswatch pcs ON pc.productcolourswatchid = pcs.id
LEFT JOIN product_productsize pps ON pps.productid = p.id
LEFT JOIN productsize ps ON pps.productsizeid = ps.id
WHERE p.isenabled = 1
AND pq.quantity > 1
AND (pc.isenabled IS NULL OR pc.isenabled = 1)
AND (#minprice = 0 OR pl.price >= #minprice)
AND (#maxprice = 0 OR pl.price <= #maxprice)
-- Colour Group Filters
AND (#colourfilter = 0
OR
(pcs.swatchgroupid = #colourfilter AND (pq.productcolourid = pc.id AND pq.quantity > 0))
)
-- Size Group Filters
AND (#sizefilter = 0
OR
(ps.sizegroupid = #sizefilter AND (pq.productsizeid = pps.productsizeid AND pq.quantity > 0))
)
)
-- Create Paging table of results and strip out duplicates with group by
INSERT INTO #indextable (recordid)
SELECT DISTINCT id
FROM productsCTE
GROUP BY id
ORDER BY id;
Finally solved it! I almost went down the path of creating a full tag table structure so that i could search directly against keyword tags rather than the direct data, however in trying to script a product tags table containing a nesting of the category hierarchy I found the solution which was quite simple.
In the solution procedure below i've created a new column in the CategoryCTE to hold a comma delimited list of category names that is built recursively and this then tracks the full tree for the supplied CategoryId. Now that i have a comma delimited list of Category names, I can then factor this into my 2nd CTE and perform a standard LIKE clause factoring in Product Name, Style Number, and Category Names. Finally in order to make this search a little smarter i made the keyword search inclusive of all keywords so that "mens origin" will only return products matching both of these keywords as oppose to any matches, and this was done using the NOT EXISTS clause.
Hope this helps someone else it performs very fast as well!
-- Variable Declarations
DECLARE #categoryid int
DECLARE #minprice int
DECLARE #maxprice int
DECLARE #sizefilter int
DECLARE #colourfilter int
DECLARE #searchstring varchar(255)
DECLARE #totalrows int
-- Variables values for testing
SET #categoryid = 0
SET #minprice = 0
SET #maxprice = 0
SET #sizefilter = 0
SET #colourfilter = 0
SET #searchstring = 'origin mens'
-- Setup paging table
DECLARE #indextable table (rownum int identity(1,1), recordid int);
BEGIN
-- First run CTE recursively over all categories in hierarchy inclusive of supplied categoryId
;WITH categoryCTE AS (
SELECT cat.id as CategoryId, cat.name as CategoryName,
CONVERT(varchar(255),cat.name) AS Tags
FROM dbo.shopcategory AS cat
WHERE (#categoryid = 0 OR cat.id = #categoryid)
AND cat.isenabled = 1
UNION ALL
SELECT child.id as CategoryId, child.name as CategoryName, CONVERT(varchar(255),
parent.Tags + CONVERT(varchar(32),',' + child.name)) AS Tags
FROM dbo.ShopCategory AS child
INNER JOIN categoryCTE AS parent
ON child.parentid = parent.CategoryId
WHERE child.isenabled = 1
),
-- Now join CTE onto products tables via linker product_shopcategory
productsCTE AS (
SELECT p.id, ppc.shopcategoryid, ppc.listorder as catlistorder
FROM categoryCTE as cat
INNER JOIN product_shopcategory ppc ON ppc.shopcategoryid = cat.CategoryId
INNER JOIN product p ON ppc.productid = p.id
INNER JOIN productlocality pl ON pl.productid = p.id
LEFT JOIN product_quantity pq ON pq.productid = p.id AND pq.localityid = #localityid
LEFT JOIN productcolour pc ON pc.productid = p.id
LEFT JOIN productcolourswatch pcs ON pc.productcolourswatchid = pcs.id
LEFT JOIN product_productsize pps ON pps.productid = p.id
LEFT JOIN productsize ps ON pps.productsizeid = ps.id
WHERE p.isenabled = 1
AND pq.quantity > 1
AND (pc.isenabled IS NULL OR pc.isenabled = 1)
AND pl.localityid = #localityid
AND (#minprice = 0 OR pl.price >= #minprice)
AND (#maxprice = 0 OR pl.price <= #maxprice)
-- Keyword Search filter
AND (#searchstring = '' OR NOT EXISTS
(
SELECT NULL
FROM dbo.udf_parseList(#searchString, ' ')
WHERE cat.Tags + p.Name + p.stylenumber + pc.stylenumber NOT LIKE '%' + array_Value + '%'
)
)
-- Colour Group Filters
AND (#colourfilter = 0
OR
(pcs.swatchgroupid = #colourfilter AND (pq.productcolourid = pc.id AND pq.quantity > 0))
)
-- Size Group Filters
AND (#sizefilter = 0
OR
(ps.sizegroupid = #sizefilter AND (pq.productsizeid = pps.productsizeid AND pq.quantity > 0))
)
)
-- Create Paging table of results and strip out duplicates with group by
INSERT INTO #indextable (recordid)
SELECT DISTINCT id
FROM productsCTE
GROUP BY id
ORDER BY id;

Resources