Convert PostgreSQL to MS SQL - sql-server

I am needing help converting a PostgreSQL query to MSSQ.
Below is what i have done so far but i am issuing with the function and array areas which i do not think are allowed in MS SQL.
Is there something that that i need to do change the function and looks the WHERE statement has an array in it too.
I have added the select statement for the #temp table but when i create the #temp table i am getting errors saying incorrect syntax
CREATE FUNCTION pm_aggregate_report
(
_facility_ids uuid[]
, _risk_ids uuid[] DEFAULT NULL::uuid[]
, _assignee_ids uuid[] DEFAULT NULL::uuid[]
, _start_date date DEFAULT NULL::date
, _end_date date DEFAULT NULL::date
)
RETURNS TABLE
(
facility character varying
, pm_id uuid, grouped_pm boolean
, risk_id uuid
, risk character varying
, pm_status_id uuid
, user_id uuid
, assignee text
, completed_by uuid
, total_labor bigint
)
CREATE TABLE #tmp_pm_aggregate
(
facility_id VARCHAR(126),
pm_id VARCHAR(126),
grouped_pm VARCHAR(126),
risk_id VARCHAR(126),
pm_status_id VARCHAR(126),
user_id VARCHAR(126),
completed_by VARCHAR(126)
)
SELECT DISTINCT
COALESCE(gp.facility_id, a.facility_id) as facility_id,
COALESCE(p.grouped_pm_id, p.id) as pm_id,
CASE WHEN p.grouped_pm_id IS NULL THEN false ELSE true END as grouped_pm,
COALESCE(gp.risk_id, a.risk_id) as risk_id,
COALESCE(gp.pm_status_id, p.pm_status_id) as pm_status_id,
COALESCE(gass.user_id, sass.user_id) as user_id,
COALESCE(gp.completed_by, p.completed_by) as completed_by
FROM pms p
JOIN assets a
ON p.asset_id = a.id
LEFT JOIN grouped_pms gp
ON p.grouped_pm_id = gp.id
LEFT JOIN assignees sass
ON p.id = sass.record_id
AND sass.type = 'single_pm'
LEFT JOIN assignees gass
ON p.grouped_pm_id = gass.record_id
AND gass.type = 'grouped_pm'
LEFT JOIN users u
ON (sass.user_id = u.id OR gass.user_id = u.id)
WHERE a.facility_id = ANY(_facility_ids)
AND NOT a.is_component
AND COALESCE(gp.pm_status_id, p.pm_status_id) in ('f9bdfc17-3bb5-4ec0-8477-24ef05ea3b9b', '06fc910c-3d07-4284-8f6e-8fb3873f5333')
AND COALESCE(gp.completion_date, p.completion_date) BETWEEN COALESCE(_start_date, '1/1/2000') AND COALESCE(_end_date, '1/1/3000')
AND COALESCE(gp.show_date, p.show_date) <= CURRENT_TIMESTAMP
AND COALESCE(gass.user_id, sass.user_id) IS NOT NULL
AND u.user_type_id != 'ec823d98-7023-4908-8006-2e33ddf2c11b'
AND (_risk_ids IS NULL OR COALESCE(gp.risk_id, a.risk_id) = ANY(_risk_ids)
AND (_assignee_ids IS NULL OR COALESCE(gass.user_id, sass.user_id) = ANY(_assignee_ids);
SELECT
f.name as facility,
t.pm_id,
t.grouped_pm,
t.risk_id,
r.name as risk,
t.pm_status_id,
t.user_id,
u.name_last + ', ' + u.name_first as assignee,
t.completed_by,
ISNULL(gwl.total_labor, swl.total_labor) as total_labor
FROM #tmp_pm_aggregate t
JOIN facilities f
ON t.facility_id = f.id
JOIN risks r
ON t.risk_id = r.id
JOIN users u
ON t.user_id = u.id
LEFT JOIN (SELECT wl.record_id, wl.user_id, SUM(wl.labor_time) as total_labor
FROM work_logs wl
WHERE wl.type = 'single_pm'
GROUP BY wl.record_id, wl.user_id) as swl
ON t.pm_id = swl.record_id
AND t.user_id = swl.user_id
AND t.grouped_pm = false
LEFT JOIN (SELECT wl.record_id, wl.user_id, SUM(wl.labor_time) as total_labor
FROM work_logs wl
WHERE wl.type = 'grouped_pm'
GROUP BY wl.record_id, wl.user_id) as gwl
ON t.pm_id = gwl.record_id
AND t.user_id = gwl.user_id
AND t.grouped_pm = true
ORDER BY facility,
assignee,
risk;
DROP TABLE #tmp_pm_aggregate;

You can create an inline Table Valued Function, and simply return a resultset from it. You do not need (and cannot use) a temp table, you do not declare the returned "rowset" shape.
For the array parameters, you can use a Table Type:
CREATE TYPE dbo.GuidList (value uniqueidentifier NOT NULL PRIMARY KEY);
Because the table parameters are actual tables, you must query them like this (NOT EXISTS (SELECT 1 FROM #risk_ids) OR ISNULL(gp.risk_id, a.risk_id) IN (SELECT r.value FROM #risk_ids))
The parameters must start with #
There is no boolean type, you must use bit
Always use deterministic date formats for literals. yyyymmdd works for dates. Do you need to take into account hours and minutes, because you haven't?
ISNULL generally performs better than COALESCE in SQL Server, as the compiler understands it better
You may want to pass a separate parameter showing whether you passed in anything for the optional table parameters
I suggest you look carefully at the actual query: why does it need DISTINCT? It performs poorly, and is usually a code-smell indicating poorly thought-out joins. Perhaps you need to combine the two joins on assignees, or perhaps you should use a row-numbering strategy somewhere.
CREATE FUNCTION dbo.pm_aggregate_report
(
#facility_ids dbo.GuidList
, #risk_ids dbo.GuidList
, #assignee_ids dbo.GuidList
, #start_date date
, #end_date date
)
RETURNS TABLE AS RETURN
SELECT DISTINCT -- why DISTINCT, perhaps rethink your joins
ISNULL(gp.facility_id, a.facility_id) as facility_id,
ISNULL(p.grouped_pm_id, p.id) as pm_id,
CASE WHEN p.grouped_pm_id IS NULL THEN CAST(0 AS bit) ELSE CAST(1 AS bit) END as grouped_pm,
ISNULL(gp.risk_id, a.risk_id) as risk_id,
ISNULL(gp.pm_status_id, p.pm_status_id) as pm_status_id,
ISNULL(gass.user_id, sass.user_id) as user_id,
ISNULL(gp.completed_by, p.completed_by) as completed_by
FROM pms p
JOIN assets a
ON p.asset_id = a.id
LEFT JOIN grouped_pms gp
ON p.grouped_pm_id = gp.id
LEFT JOIN assignees sass
ON p.id = sass.record_id
AND sass.type = 'single_pm'
LEFT JOIN assignees gass
ON p.grouped_pm_id = gass.record_id
AND gass.type = 'grouped_pm'
LEFT JOIN users u
ON (sass.user_id = u.id OR gass.user_id = u.id) -- is this doubling up your rows?
WHERE a.facility_id IN (SELECT f.value FROM #facility_ids f)
AND a.is_component = 0
AND ISNULL(gp.pm_status_id, p.pm_status_id) in ('f9bdfc17-3bb5-4ec0-8477-24ef05ea3b9b', '06fc910c-3d07-4284-8f6e-8fb3873f5333')
AND ISNULL(gp.completion_date, p.completion_date) BETWEEN ISNULL(#start_date, '20000101') AND ISNULL(#end_date, '30000101') -- perhaps use >= AND <
AND ISNULL(gp.show_date, p.show_date) <= CURRENT_TIMESTAMP
AND ISNULL(gass.user_id, sass.user_id) IS NOT NULL
AND u.user_type_id != 'ec823d98-7023-4908-8006-2e33ddf2c11b'
AND (NOT EXISTS (SELECT 1 FROM #risk_ids) OR ISNULL(gp.risk_id, a.risk_id) IN (SELECT r.value FROM #risk_ids))
AND (NOT EXISTS (SELECT 1 FROM #assignee_ids) OR ISNULL(gass.user_id, sass.user_id) IN (SELECT aid.value FROM #assignee_ids aid));

Related

Query works as expected, SSRS finds error?

This question was closed because someone thought it was the same issue as SSRS multi-value parameter using a stored procedure
But it is not. My report is not a stored procedure and thus, behaves differently. Also, this issue describes a result of getting no results if multi-valued params are used and that too is inaccurate for this scenario. So I'll try posting this again.
My report for the most part works. It is when I select more than one value from either of 2 specific params (#global, #manual) that I get this error:
Here is the SQL:
DECLARE #STATE VARCHAR(2) = 'mn'
,#START DATE = '6/1/2020'
,#END DATE = '7/1/2020'
,#GLOBAL VARCHAR(50) = 'indigent fee'
,#MANUAL VARCHAR(100) = '''misc charges'',''discount'''
DROP TABLE
IF EXISTS #customers
,#test
SELECT DISTINCT ch.amount
,ch.vehicle_program_id
,c.customer_id
,ch.customer_charge_id
,ch.charge_type
INTO #customers
FROM customer c
JOIN customer_charge ch(NOLOCK) ON c.customer_id = ch.customer_id
JOIN service_history sh(NOLOCK) ON sh.customer_id = c.customer_id
JOIN header h(NOLOCK) ON h.service_history_id = sh.service_history_id
WHERE ch.entry_date BETWEEN #START
AND #END
AND ch.price_trigger_id IN (
16
,15
)
AND ch.source_type = 1
AND sh.service_type = 5
AND h.is_duplicate = 0;
WITH CTE_global
AS (
SELECT DISTINCT ch.charge_type
,'global' AS type
FROM customer_charge ch
JOIN store s ON ch.store_id = s.store_id
JOIN address a ON a.id = s.address_id
JOIN locality l ON a.locality_id = l.id
WHERE l.region = #state
AND ch.price_trigger_id = 16
UNION ALL
SELECT 'None'
,'global'
)
,CTE_manual
AS (
SELECT DISTINCT ch.charge_type
,'manual' AS type
FROM customer_charge ch
JOIN store s ON ch.store_id = s.store_id
JOIN address a ON a.id = s.address_id
JOIN locality l ON a.locality_id = l.id
WHERE l.region = #state
AND ch.price_trigger_id = 15
UNION ALL
SELECT 'None'
,'manual'
)
SELECT DISTINCT c.last_name
,c.first_name
,vp.account_no
,cust.charge_type
,cust.amount
,sh.service_date
,s.store_name_short
,GLOBAL = g.charge_type
,manual = m.charge_type
INTO #test
FROM vehicle_program vp(NOLOCK)
JOIN vehicle v(NOLOCK) ON v.vehicle_id = vp.vehicle_id
JOIN service_history sh(NOLOCK) ON sh.vehicle_program_id = vp.program_id
AND service_type = 5
JOIN customer c(NOLOCK) ON v.customer_id = c.customer_id
AND c.customer_id = sh.customer_id
JOIN store s(NOLOCK) ON vp.current_store_id = s.store_id
JOIN #customers cust ON cust.customer_id = c.customer_id
AND cust.vehicle_program_id = sh.vehicle_program_id
JOIN customer_condition cc(NOLOCK) ON c.customer_id = cc.customer_id
JOIN customer_charge ch(NOLOCK) ON ch.customer_id = c.customer_id
JOIN service_charge sc ON sc.service_history_id = sh.service_history_id
AND sc.customer_charge_id = cust.customer_charge_id
JOIN header h(NOLOCK) ON h.service_history_id = sh.service_history_id
JOIN CTE_global g ON g.charge_type = ch.charge_type
JOIN CTE_manual m ON m.charge_type = ch.charge_type
WHERE cc.state_of_conviction = #state
AND sh.service_date BETWEEN #START
AND #END
AND h.is_duplicate = 0
SELECT *
FROM #test
WHERE GLOBAL IN (
CASE
WHEN #global IN ('None')
THEN charge_type
WHEN #global NOT IN ('None')
THEN #global
END
)
OR manual IN (
CASE
WHEN #manual IN ('None')
THEN charge_type
WHEN #manual NOT IN ('None')
THEN #manual
END
)
For clarity, the last bit in the query there is some logic to allow for these two params to be optional: so by selecting 'None' that param is rendered useless basically. It seems clear that the issue is with this last bit, specifically my WHERE clause using the CASE expression. When I remove that, I don't get the error, but I of course lose my logic. What's most confusing is that the error indicates an issue with a comma, but there's no comma in that part of the SQL?? Any help is going to be greatly appreciated.
Assuming users will only select 'None' from the list on it's own and never with another value then the following should work.
WHERE (GLOBAL IN (#Global) OR #Global = 'None')
AND
(manual IN (#manual) OR #manual = 'None')
this question was closed because someone thought it was the same issue
It is a dupe, but you kind of have to read between the lines in the other answers to apply it to this scenario. The point is that SSRS replaces multi-select parameters with delimited strings in the query body itself, and this transformation can lead either to unexpectedly getting no results, or in an illegal SQL query, depending on where the parameter marker appears in the original query.
I'll make it a bit clearer exactly what's going on. You can repro this behavior with this as your Data Set query:
drop table if exists #foo
create table #foo(charge_type varchar(200) , global varchar(200))
select *
from #foo
WHERE GLOBAL IN (
CASE
WHEN #global IN ('None')
THEN charge_type
WHEN #global NOT IN ('None')
THEN #global
END
)
And configure #global as a parameter that allows multi-select. When the user selects multiple values SSRS transforms the query into:
drop table if exists #foo
create table #foo(charge_type varchar(200) , global varchar(200))
select *
from #foo
WHERE GLOBAL IN (
CASE
WHEN N'a',N'b' IN ('None')
THEN charge_type
WHEN N'a',N'b' NOT IN ('None')
THEN N'a',N'b'
END
)
Which fails with An expression of non-boolean type specified in a context where a condition is expected, near ','.

SQL Server : Cross Join in GreenPlum (Pre-LATERAL version of Postgres)

I'm attempting to convert the following SQL Server query into a GreenPlum version of the query:
INSERT INTO #TMP1 (part_id, file_id, location, measure_date)
SELECT DISTINCT
pt.part_id, qf.file_id, qf.edl_desc, pt.measure_date
FROM
part pt WITH (NOLOCK)
INNER JOIN
file_model qm with (nolock) on qm.file_model_id = pt.file_model_id
INNER JOIN
file qf with (nolock) on qf.file_id = qm.file_id;
INSERT INTO #part_list (file_id, part_id, measure_date)
SELECT DISTINCT
t1.file_id, k.part_id, k.measure_date
FROM
#TMP1 t1 WITH (NOLOCK)
CROSS APPLY
(SELECT DISTINCT TOP (300)
t2.part_id, t2.measure_date
FROM
#TMP1 t2 WITH (NOLOCK)
WHERE
t1.file_id = t2.file_id and t1.location = t2.location
ORDER BY
t2.measure_date DESC) k
WHERE
t1.measure_date >= dateadd(day, 30, getdate());
The idea here being that the final table contains the most recent up to 300 parts for all parts programs that are active (ie manufactured something) in the last 30 days.
Per the answers to this question, I am aware that LATERAL JOIN would do it, except my organization is using an older version of Postgres that does not have LATERAL, so I was left with implementing the following function instead:
CREATE FUNCTION BuildActiveParts(p_day INT, p_n INT)
RETURNS SETOF RECORD --TABLE (part_id bigint,file_id int, measure_date timestamp, location varchar(255))
AS $$
DECLARE
part_active RECORD;
part_list RECORD;
BEGIN
FOR part_active IN
SELECT DISTINCT qf.file_id, qf.location
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id WHERE pt.measure_date >= current_date - p_day LOOP
FOR part_list IN
SELECT DISTINCT pt.part_id, qf.file_id, pt.measure_date, qf.location
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id WHERE qf.file_id = part_active.file_id
AND qf.location = part_active.location
ORDER BY pt.measure_date DESC LIMIT p_n LOOP
RETURN NEXT part_list;
END LOOP;
END LOOP;
END
$$ LANGUAGE plpgsql;
-- Later used in:
--Build list of all active programs in last p_day days. This temporary table is a component of a larger function that produces a table based on this and other other calculations, called daily.
-- Note: this insert yields 'function cannot execute because it accesses relation'
INSERT INTO TMP_part_list ( part_id, file_id, measure_date, location)
SELECT DISTINCT * FROM BuildActiveParts(p_day, p_n) AS active_parts (part_id int, file_id text, measure_date timestamp, location text )
;
Unfortunately, this function is used in inserts to another table (an unavoidable reality of my business requirements), so while the function returns nice happy results when run in isolation, I get a big angry function cannot execute on segment because it accesses relation when I try to use it for its intended purpose. While I've seen suggestions to the effect of "make a VIEW instead", that's not really an option because a view resulting from the script this functionality is a part of would take too long to query.
What can I do, beyond embarking on a months-long excursion through a jungle of red tape to convince my organization to update their stuff, to resolve this?
Edit: Here are some attempts based on comments:
Attempt with function, did not work because of function cannot execute on segment because it accesses relation:
DROP FUNCTION IF EXISTS BuildRecentParts(TEXT, TEXT, INT);
CREATE FUNCTION BuildRecentParts(file_id TEXT, location_in TEXT, p_n INT)
RETURNS SETOF RECORD --TABLE (measure_date timestamp, part_id bigint)
AS $$
DECLARE
part_list RECORD;
BEGIN
FOR part_list IN
SELECT DISTINCT pt.measure_date, pt.part_id
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE qf.file_id = file_id
AND qf.edl_desc = location_in
ORDER BY pt.measure_date DESC LIMIT p_n LOOP
RETURN NEXT part_list;
END LOOP;
END
$$ LANGUAGE plpgsql;
SELECT DISTINCT qf.file_id, qf.edl_desc, (SELECT pti.measure_date, pti.part_id FROM part pti
INNER JOIN file_model qmi on qmi.file_model_id = pti.file_model_id
INNER JOIN file qfi on qfi.file_id = qmi.file_id
WHERE qfi.file_id = qf.file_id
AND qfi.edl_desc = qf.edl_desc
ORDER BY pti.measure_date DESC LIMIT 300)
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE pt.measure_date >= current_date - 30 ;
Attempt without function, will not work because subquery has multiple columns:
CREATE TEMPORARY TABLE TMP_TMP1 (part_id bigint, file_id varchar(255), location varchar(255), measure_date timestamp) DISTRIBUTED BY (part_id);
INSERT INTO TMP_TMP1 (part_id, file_id, location, measure_date)
SELECT DISTINCT pt.part_id, qf.file_id, qf.edl_desc, pt.measure_date
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id;
ANALYZE TMP_TMP1;
SELECT DISTINCT t1.file_id, t1.location, (SELECT t2.measure_date, t2.part_id FROM TMP_TMP1 t2
WHERE t2.file_id = t1.file_id
AND t2.location = t1.location
ORDER BY t2.measure_date DESC LIMIT 300)
FROM TMP_TMP1 t1
WHERE t1.measure_date >= current_date - 30;
I also attempted a recursive CTE, but found that that was unsupported.
Between answers here and from architects at my organization, we decided that we have struck a GreenPlum limitation that would be too costly to overcome, the logic that performs the Cross Join will be shifted to the R script that calls the stored procedure that this functionality would have been a part of.
Well, Greenplum doesn't have dirty reads so you can't implement the nolock hint you have. That is probably a good thing too. I would recommend removing that from SQL Server too.
I think the best solution is to use an Analytical function here instead of that function or even a correlated subquery which Greenplum supports. It is also more efficient in SQL Server to use this approach.
SELECT sub2.part_id, sub2.location, sub2.measure_date
FROM (
SELECT sub1.part_id, sub1.location, sub1.measure_date, row_number() over(partition by sub1.part_id order by sub1.measure_date desc) as rownum
FROM (
SELECT pt.part_id, qf.edl_desc as location, pt.measure_date
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE pt.measure_date >= (now() - interval '30 days')
GROUP BY pt.part_id, qf.edl_desc, pt.measure_date
) AS sub1
) as sub2
WHERE sub2.rownum <= 300;
Now, I had to guess at your data because it looks like you could get into trouble with your original query if you have multiple qf.qcc_file_desc values because your original group by includes this. If you had multiple values, then things would get ugly.
I'm also not 100% sure on the row_number function without knowing your data. It might be this instead:
row_number() over(partition by sub1.part_id, sub1.location order by sub1.measure_date desc)

Update records SQL?

First when I started this project seemed very simple. Two tables, field tbl1_USERMASTERID in Table 1 should be update from field tbl2_USERMASTERID Table 2. After I looked deeply in Table 2, there is no unique ID that I can use as a key to join these two tables. Only way to match the records from Table 1 and Table 2 is based on FIRST_NAME, LAST_NAME AND DOB. So I have to find records in Table 1 where:
tbl1_FIRST_NAME equals tbl2_FIRST_NAME
AND
tbl1_LAST_NAME equals tbl2_LAST_NAME
AND
tbl1_DOB equals tbl2_DOB
and then update USERMASTERID field. I was afraid that this can cause some duplicates and some users will end up with USERMASTERID that does not belong to them. So if I find more than one record based on first,last name and dob those records would not be updated. I would like just to skip and leave them blank. That way I wouldn't populate invalid USERMASTERID. I'm not sure what is the best way to approach this problem, should I use SQL or ColdFusion (my server side language)? Also how to detect more than one matching record?
Here is what I have so far:
UPDATE Table1 AS tbl1
LEFT OUTER JOIN Table2 AS tbl2
ON tbl1.dob = tbl2.dob
AND tbl1.fname = tbl2.fname
AND tbl1.lname = tbl2.lname
SET tbl1.usermasterid = tbl2.usermasterid
WHERE LTRIM(RTRIM(tbl1.usermasterid)) = ''
Here is query where I tried to detect duplicates:
SELECT DISTINCT
tbl1.FName,
tbl1.LName,
tbl1.dob,
COUNT(*) AS count
FROM Table1 AS tbl1
LEFT OUTER JOIN Table2 AS tbl2
ON tbl1.dob = tbl2.dob
AND tbl1.FName = tbl2.first
AND tbl1.LName = tbl2.last
WHERE LTRIM(RTRIM(tbl1.usermasterid)) = ''
AND LTRIM(RTRIM(tbl1.first)) <> ''
AND LTRIM(RTRIM(tbl1.last)) <> ''
AND LTRIM(RTRIM(tbl1.dob)) <> ''
GROUP BY tbl1.FName,tbl1.LName,tbl1.dob
Some data after I tested query above:
First Last DOB Count
John Cook 2008-07-11 2
Kate Witt 2013-06-05 1
Deb Ruis 2016-01-22 1
Mike Bennet 2007-01-15 1
Kristy Cruz 1997-10-20 1
Colin Jones 2011-10-13 1
Kevin Smith 2010-02-24 1
Corey Bruce 2008-04-11 1
Shawn Maiers 2016-08-28 1
Alenn Fitchner 1998-05-17 1
If anyone have idea how I can prevent/skip updating duplicate records or how to improve this query please let me know. Thank you.
You could check for and avoid duplicate matches using with common_table_expression (Transact-SQL)
along with row_number()., like so:
with cte as (
select
t.fname
, t.lname
, t.dob
, t.usermasterid
, NewUserMasterId = t2.usermasterid
, rn = row_number() over (partition by t.fname, t.lname, t.dob order by t2.usermasterid)
from table1 as t
inner join table2 as t2 on t.dob = t2.dob
and t.fname = t2.fname
and t.lname = t2.lname
and ltrim(rtrim(t.usermasterid)) = ''
)
--/* confirm these are the rows you want updated
select *
from cte as t
where t.NewUserMasterId != ''
and not exists (
select 1
from cte as i
where t.dob = i.dob
and t.fname = i.fname
and t.lname = i.lname
and i.rn>1
);
--*/
/* update those where only 1 usermasterid matches this record
update t
set t.usermasterid = t.NewUserMasterId
from cte as t
where t.NewUserMasterId != ''
and not exists (
select 1
from cte as i
where t.dob = i.dob
and t.fname = i.fname
and t.lname = i.lname
and i.rn>1
);
--*/
I use the cte to extract out the sub query for readability. Per the documentation, a common table expression (cte):
Specifies a temporary named result set, known as a common table expression (CTE). This is derived from a simple query and defined within the execution scope of a single SELECT, INSERT, UPDATE, or DELETE statement.
Using row_number() to assign a number for each row, starting at 1 for each partition of t.fname, t.lname, t.dob. Having those numbered allows us to check for the existence of duplicates with the not exists() clause with ... and i.rn>1
You could use a CTE to filter out the duplicates from Table1 before joining:
; with CTE as (select *
, count(ID) over (partition by LastName, FirstName, DoB) as IDs
from Table1)
update a
set a.ID = b.ID
from Table2 a
left join CTE b
on a.FirstName = b.FirstName
and a.LastName = b.LastName
and a.Dob = b.Dob
and b.IDs = 1
This will work provided there are no exact duplicates (same demographics and same ID) in table 1. If there are exact duplicates, they will also be excluded from the join, but you can filter them out before the CTE to avoid this.
Please try below SQL:
UPDATE Table1 AS tbl1
INNER JOIN Table2 AS tbl2
ON tbl1.dob = tbl2.dob
AND tbl1.fname = tbl2.fname
AND tbl1.lname = tbl2.lname
LEFT JOIN Table2 AS tbl3
ON tbl3.dob = tbl2.dob
AND tbl3.fname = tbl2.fname
AND tbl3.lname = tbl2.lname
AND tbl3.usermasterid <> tbl2.usermasterid
SET tbl1.usermasterid = tbl2.usermasterid
WHERE LTRIM(RTRIM(tbl1.usermasterid)) = ''
AND tbl3.usermasterid is null

TSQL Group By & Count not aggregating as expected

I have a query that returns 6 rows and I want to aggregate the information to provide a single row with a count of instances. The without aggregate query returns the correct data but when I add a GroupBy and Count to the query it returns 2 rows.
The underlying ID (SR01.ReportKey) shown in the first result has two records so I think the Group By is somehow using this field in the grouping.
NOTE: The ReportKey is not actually used in the query I just had it in the first result for information purposes.
Question :
Any idea why the Group By is not grouping all the rows into a single result with a count of 6?
Without aggregate
Query :
SELECT
'Open' AS RecStatus,
ISNULL(UWZone.UWZoneID,'') AS ZoneID,
ISNULL(UWZone.UWZoneName,'') AS ZoneName,
Branch.BranchID,
ISNULL(Branch.BranchName,'') AS BranchName,
UW.UWID AS ServicingRep,
ISNULL(UW.UWName,'') + '/' + ISNULL(UA.UWName, '') AS RepName
FROM ProductivityRecommendations
INNER JOIN SR01 ON SR01.ReportKey = ProductivityRecommendations.ReportKey
LEFT JOIN UW ON SR01.Underwriter = UW.UWID
LEFT JOIN UW AS UA ON SR01.UA = UA.UWID
LEFT JOIN Branch ON SR01.ProdBranch = Branch.BranchID
LEFT JOIN UWZone ON UWZone.UWZoneAbbrev = Branch.UWZone
WHERE ISNULL(SR01.ServicingBranch,'-') <> '-'
AND ProductivityRecommendations.DateComplete BETWEEN #DateFrom AND #DateTo
AND ProductivityRecommendations.RecCriticality IN ('CRI', 'CCM')
AND ProductivityRecommendations.RecStatus IN ('N','O','U','A','R')
AND DateRecIssued IS NOT NULL
AND (#Zone IS NULL OR (UWZone.UWZoneID IN (SELECT val FROM ufn_SplitMax(#Zone ,','))))
AND (#Branch IS NULL OR (Branch.BranchID IN (SELECT val FROM ufn_SplitMax(#Branch ,','))))
AND (#RepID IS NULL OR (SR01.Underwriter IN(SELECT val FROM ufn_SplitMax(#RepID ,','))) OR #RepID IS NULL OR (SR01.UA IN(SELECT val FROM ufn_SplitMax(#RepID ,','))))
AND (#InsuredNumber IS NULL OR (ProductivityRecommendations.CustNum IN (SELECT val FROM ufn_SplitMax(#InsuredNumber ,','))))
Results :
Adding aggregates
Query :
SELECT
'Open' AS RecStatus,
ISNULL(UWZone.UWZoneID,'') AS ZoneID,
ISNULL(UWZone.UWZoneName,'') AS ZoneName,
Branch.BranchID,
ISNULL(Branch.BranchName,'') AS BranchName,
UW.UWID AS ServicingRep,
ISNULL(UW.UWName,'') + '/' + ISNULL(UA.UWName, '') AS RepName,
COUNT(ProductivityRecommendations.RecStatus) AS Requests
FROM ProductivityRecommendations
INNER JOIN SR01 ON SR01.ReportKey = ProductivityRecommendations.ReportKey
LEFT JOIN UW ON SR01.Underwriter = UW.UWID
LEFT JOIN UW AS UA ON SR01.UA = UA.UWID
LEFT JOIN Branch ON SR01.ProdBranch = Branch.BranchID
LEFT JOIN UWZone ON UWZone.UWZoneAbbrev = Branch.UWZone
WHERE ISNULL(SR01.ServicingBranch,'-') <> '-'
AND ProductivityRecommendations.DateComplete BETWEEN #DateFrom AND #DateTo
AND ProductivityRecommendations.RecCriticality IN ('CRI', 'CCM')
AND ProductivityRecommendations.RecStatus IN ('N','O','U','A','R')
AND DateRecIssued IS NOT NULL
AND (#Zone IS NULL OR (UWZone.UWZoneID IN (SELECT val FROM ufn_SplitMax(#Zone ,','))))
AND (#Branch IS NULL OR (Branch.BranchID IN (SELECT val FROM ufn_SplitMax(#Branch ,','))))
AND (#RepID IS NULL OR (SR01.Underwriter IN(SELECT val FROM ufn_SplitMax(#RepID ,','))) OR #RepID IS NULL OR (SR01.UA IN(SELECT val FROM ufn_SplitMax(#RepID ,','))))
AND (#InsuredNumber IS NULL OR (ProductivityRecommendations.CustNum IN (SELECT val FROM ufn_SplitMax(#InsuredNumber ,','))))
GROUP BY UWZone.UWZoneID, UWZone.UWZoneName, Branch.BranchID, Branch.BranchName, SR01.ServicingRep, UW.UWID, ISNULL(UW.UWName,'') + '/' + ISNULL(UA.UWName, '')
Results :
as #Johan said in a comment:
This will should give you 1 row only in your described senario:
GROUP BY
ISNULL(UWZone.UWZoneID,''),
ISNULL(UWZone.UWZoneName,''),
Branch.BranchID,
ISNULL(Branch.BranchName,'') ,
UW.UWID,
ISNULL(UW.UWName,''),
ISNULL(UA.UWName, '')
Try a "select distinct" of the columns that you are grouping by, and also a LEN to see if they have spaces or other char that are not seen, and also test for NULLs. Then, you decide how to handel these columns, using ISNULL, COALESCE, CASE, and/or WHERE statements, depending on what you need.

SQL Server: breaking a multiple join query into smaller ones and creating a function

I have a complex query with multiple joins in it, which runs multiple times in my application. I want to write this query as a function by breaking this query into smaller pieces inside the function. As a newbie, I have limited knowledge on SQL Server.
The following is the query:
SELECT
ts.lable as label,
ts.percentage as rate
FROM
TaxSet ts
JOIN
UserInfo u ON u.userID = ?
AND u.countryID = ts.countryId
AND (ts.stateId IS NULL OR ts.stateId = 0 OR LEN(ts.stateId) < 1)
JOIN
Users us ON u.userID = us.id
JOIN
Users p ON us.parentID = p.id
AND ts.ispID = p.id
JOIN
ProductType pt ON ts.productTypeID = pt.id
WHERE
startDate <= getutcdate()
AND getutcdate() <= endDate
AND pt.identifier = ?
AND ts.id NOT IN (SELECT eu.ispTaxSettingId
FROM ExemptUsers eu
WHERE eu.ExemptUserId = ?)
Now, how can I write a function by breaking this query into smaller ones.
Thanks in advance.
May I ask why you want to split it into functions? I reformatted your code and have put it into a stored procedure for now. My thinking is that you want to pass through a Identifier and UserID which are the parameters of your query.
I have modified the query and removed the Not In statement. This has been replaced by a LEFT JOIN to ExemptUsers on u.id = eu.ExemptUserID and then an addition to the WHERE clause to ensure eu.ExemptUserID is NULL. This is basically a clearer way of saying "If the userID exists in table ExemptUsers do not bring back results for that user".
In addition I have removed the join to Users p as I can't see that this was being used in any way, unless you want to ensure that the user has a parent?
CREATE PROCEDURE wsp_StoredProcName
(#UserID int,
#Identifier int)
AS
BEGIN
SELECT
ts.lable as label,
ts.percentage as rate
FROM
TaxSet ts
INNER JOIN UserInfo u ON u.userID = ts.UserID
AND u.countryID = ts.countryId
INNER JOIN Users us on u.userID = us.id
INNER JOIN ProductType pt on ts.productTypeID = pt.id
LEFT JOIN ExemptUsers eu on u.id = eu.ExemptUserID
WHERE
(
ts.UserID = #UserID
and pt.identifier = #Identifier
and startDate <= getutcdate()
and getutcdate() <= endDate
AND eu.ExemptUserID IS NULL
and
(
ts.stateId is null or ts.stateId = 0 or len(ts.stateId) < 1
)
)
END
After all you select from TaxSet where certain conditions must be met: The date range, the state, a relation to a particular user and its parent, a relation to a particular product type, and the non-existence for a particular exempt user. So use EXISTS and NOT EXISTS throughout your query in order to make it plain to the reader and dbms what you want to achieve. The more straight-forward a query, the easier it often is for the optimizer to deal with it.
select
ts.lable as label,
ts.percentage as rate
from taxset ts
where getutcdate() between ts.startdate and ts.enddate
and (stateid is null or stateid between 0 and 9)
and exists
(
select *
from users u
join userinfo ui on ui.userid = u.id
where u.id = ?
and ui.countryid = ts.countryid
and u.parentid = ts.ispid
)
and exists
(
select *
from producttype pt
where pt.identifier = ?
and pt.id = ts.producttypeid
)
and not exists
(
select *
from exemptusers eu
where eu.exemptuserid = ?
and eu.isptaxsettingid = ts.id
);
As others have mentioned: When splitting a query into smaller parts and execute these separately, it usually becomes slower not faster. This is due to the fact that a dbms is made to do exactly this in the most efficient way internally. Of course sometimes a dbms' optimizer fails to find a good execution plan, though. You may want to look at the execution plan and check whether you find that plan appropriate.

Resources