joining on count and rank the result t sql - sql-server

Here's my Count_query:
Declare #yes_count decimal;
Declare #no_count decimal;
set #yes_count=(Select count(*) from Master_Data where Received_Data='Yes');
set #no_count=(Select count(*) from Master_Data where Received_Data='No');
select #yes_count As Yes_Count,#no_count as No_Count,(#yes_count/(#yes_count+#no_count)) As Submission_Count
I am having trouble making joins on these two queries
This is the rest of the query:
Select Distinct D.Member_Id,d.Name,d.Region_Name, D.Domain,e.Goal_Abbreviation,
e.Received_Data, case when Received_Data = 'Service Not Provided' then null
when Received_Data = 'No' then null else e.Improvement end as
Percent_Improvement , case when Received_Data = 'Service Not Provided' then null
when Received_Data = 'No' then null else e.Met_40_20 end as Met_40_20
FROM (
select distinct member_Domains.*,
(case when NoData.Member_Id is null then 'Participating' else ' ' end) as Participating
from
(
select distinct members.Member_Id, members.Name, Members.Region_Name,
case when Domains.Goal_Abbreviation = 'EED Reduction' then 'EED'
When Domains.Goal_Abbreviation = 'Pressure Ulcers' then 'PRU'
when Domains.Goal_Abbreviation = 'Readmissions' then 'READ' else Domains.Goal_Abbreviation end as Domain from
(select g.* from Program_Structure as ps inner join Goal as g on ps.Goal_Id = g.Goal_Id
and ps.Parent_Goal_ID = 0) as Domains
cross join
(select distinct hc.Member_ID, hc.Name,hc.Region_Name from zsheet as z
inner join Hospital_Customers$ as hc on z.CCN = hc.Mcare_Id) as Members
) as member_Domains
left outer join Z_Values_Hospitals as NoData on member_Domains.member_ID = NoData.Member_Id
and Member_Domains.Domain = noData.ReportName) D
Left Outer JOIN
(SELECT B.Member_ID, B.Goal_Abbreviation, B.minRate, C.maxRate, B.BLine, C.Curr_Quarter, B.S_Domain,
(CASE WHEN B.Member_ID IN
(SELECT member_id
FROM Null_Report
WHERE ReportName = B.S_Domain) THEN 'Service Not Provided' WHEN Curr_Quarter = 240 THEN 'Yes' ELSE 'No' END) AS Received_Data,
ROUND((CASE WHEN minRate = 0 AND maxRate = 0 THEN 0 WHEN minRate = 0 AND maxRate > 0 THEN 1 ELSE (((maxRate - minRate) / minRate) * 100) END), .2) AS Improvement,
(CASE WHEN ((CASE WHEN minRate = 0 AND maxRate = 0 THEN 0 WHEN minRate = 0 AND maxRate > 0 THEN 1 ELSE (maxRate - minRate) / minRate END)) <= - 0.4 OR
maxRate = 0 THEN 'Yes' WHEN ((CASE WHEN minRate = 0 AND maxRate = 0 THEN 0 WHEN minRate = 0 AND maxRate > 0 THEN 1 ELSE (maxRate - minRate) / minRate END))
<= - 0.2 OR maxRate = 0 THEN 'Yes' ELSE 'No' END) AS Met_40_20
FROM (SELECT tab.Member_ID, tab.Measure_Value AS minRate, tab.Goal_Abbreviation, A.BLine, tab.S_Domain
FROM Measure_Table_Description AS tab INNER JOIN
(SELECT DISTINCT
Member_ID AS new_memid, Goal_Abbreviation AS new_measure, MIN(Reporting_Period_ID) AS BLine, MAX(Reporting_Period_ID)
AS Curr_Quarter
FROM Measure_Table_Description
WHERE (Member_ID > 1) AND (Measure_Value IS NOT NULL) AND (Measure_ID LIKE '%O%')
GROUP BY Goal_Abbreviation, Member_ID) AS A ON tab.Member_ID = A.new_memid AND tab.Reporting_Period_ID = A.BLine AND
tab.Goal_Abbreviation = A.new_measure) AS B FULL OUTER JOIN
(SELECT tab.Member_ID, tab.Measure_Value AS maxRate, tab.Goal_Abbreviation, A_1.Curr_Quarter
FROM Measure_Table_Description AS tab INNER JOIN
(SELECT DISTINCT
Member_ID AS new_memid, Goal_Abbreviation AS new_measure,
MIN(Reporting_Period_ID) AS BLine, MAX(Reporting_Period_ID)
AS Curr_Quarter
FROM Measure_Table_Description AS Measure_Table_Description_1
WHERE (Member_ID >1) AND (Measure_Value IS NOT NULL) AND (Measure_ID LIKE '%O%')
GROUP BY Goal_Abbreviation, Member_ID) AS A_1 ON tab.Member_ID = A_1.new_memid
AND tab.Reporting_Period_ID = A_1.Curr_Quarter AND
tab.Goal_Abbreviation = A_1.new_measure) AS C ON B.Member_ID = C.Member_ID
WHERE (B.Goal_Abbreviation = C.Goal_Abbreviation) ) E ON D.Member_Id = E.Member_ID AND d.Domain = E.S_Domain
ORDER BY D.Domain,D.Member_ID
How do I get a count of the 'yes'/ (count(yes)+count(no)) for each member_ID as column1 and also display the rank of each member_ID against all the member_IDs in the result as column2. I have come up with a query that generates the count for the entire table, but how do I restrict it each Member_ID.
Thanks for your help.

I haven't taken the time to digest your provided query, but if abstracted to the concept of having an aggregate over a range of data repeated on each row, you should look at using windowing functions. There are other methods, such as using a CTE to do your aggregation and then JOINing back to your detailed data. That might work better for more complex calculations, but the window functions are arguably the more elegant option.
DECLARE #MasterData AS TABLE
(
MemberID varchar(50),
MemberAnswer int
);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jim', 1);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jim', 0);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jim', 1);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jim', 1);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jane', 1);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jane', 0);
INSERT INTO #MasterData (MemberID, MemberAnswer) VALUES ('Jane', 1);
-- Method 1, using windowing functions (preferred for performance and syntactical compactness)
SELECT
MemberID,
MemberAnswer,
CONVERT(numeric(19,4),SUM(MemberAnswer) OVER (PARTITION BY MemberID)) / CONVERT(numeric(19,4),COUNT(MemberAnswer) OVER (PARTITION BY MemberID)) AS PercentYes
FROM #MasterData;
-- Method 2, using a CTE
WITH MemberSummary AS
(
SELECT
MemberID,
SUM(MemberAnswer) AS MemberYes,
COUNT(MemberAnswer) AS MemberTotal
FROM #MasterData
GROUP BY MemberID
)
SELECT
md.MemberID,
md.MemberAnswer,
CONVERT(numeric(19,4),MemberYes) / CONVERT(numeric(19,4),MemberTotal) AS PercentYes
FROM #MasterData md
JOIN MemberSummary ms
ON md.MemberID = ms.MemberID;

First thought is: your query is much, much too complicated. I have spent about 10 minutes now trying to make sense of it and haven't gotten anywhere, so it's obviously going to pose a long-term maintenance challenge to those within your organization going forward as well. I would really recommend you try to find some way of simplifying it.
That said, here is a simplified, general example of how to query on a calculated value and rank the results:
CREATE TABLE member (member_id INT PRIMARY KEY);
CREATE TABLE master_data (
transaction_id INT PRIMARY KEY,
member_id INT FOREIGN KEY REFERENCES member(member_id),
received_data BIT
);
-- INSERT data here
; WITH member_data_counts AS (
SELECT
m.member_id,
(SELECT COUNT(*) FROM master_data d WHERE d.member_id = m.member_id AND d.received_data = 1) num_yes,
(SELECT COUNT(*) FROM master_data d WHERE d.member_id = m.member_id AND d.received_data = 0) num_no
FROM member m
), member_data_calc AS (
SELECT
*,
CASE
WHEN (num_yes + num_no) = 0 THEN NULL -- avoid division-by-zero error
ELSE num_yes / (num_yes + num_no)
END pct_yes
FROM member_data_counts
), member_data_rank AS (
SELECT *, RANK() OVER (ORDER BY pct_yes DESC) AS RankValue
FROM member_data_calc
)
SELECT *
FROM member_data_rank
ORDER BY RankValue ASC;

Related

Undefined error in SQL Server Code before "IF"

I'm obviously new to this and I need some help. I have this code in SQL Server Management Studio.
The error that I get is
Msg 156, Level 15, State 1, Line 38
Incorrect syntax near the keyword 'IF'
If I execute direct select statement instead of using "If" the query is successfully executed.
Please point out what I am missing and where is my mistake.
Thanks a lot.
declare #dateFrom [date] = cast(dateadd(day, -7, getdate()) as [date]);
declare #dateTo [date] = cast(dateadd(day, 28, getdate()) as [date]);
declare #prTypeCode [varchar](10) = null;
declare #ExcludeA [bit] = 0 ;
declare #ExcludeB [bit] = 0;
;with distribution as
(
select *
from [schema1].[Table1]
),
product as
(
select *
from [schema1].[Product]
),
fint as
(
select
*,
(case when CHARINDEX('0%', FIN_ CODE) > 0 then 1 else 0 end) as ZFlag
from [schema1].[FinTab]
),
deal as
(
select
*,
case when (AG_ID = 5 or DE_ID = 6) then 1
else 0 end as Ex_flag
from
[P_ BASE1].[ schema2].[table3]
),
dealersn as
(
select distinct *
from [P_ BASE1].[ schema2].[table4]
),
regionN as
(
select distinct *
from [P_ BASE1].[ schema2].[table5]
),
final as
(
select
d.PRODUCT_CODE, p.PR_NAME, p.PR_TYPE_CODE, p.PR_ACTIVE,
d.FIN_TABLE_CODE, ft.FT_NAME, ft.FT_ACTIVE,
cast(d.SND_TO_DATE as [date]) as SND_TO_DATE,
count(d.SN_ID) as SHOP_COUNT,
min(ft.ZFlag) as ZFlag,
min(ds.Ex_flag) as ExFlag, dn.SND_FULLNAME,rn.REG_NAME
from
distribution d
left outer join
product p on p.PRODUCT_CODE = d.PRODUCT_CODE
left outer join
fint ft on ft. FIN_ CODE = FIN_ CODE
left outer join
deal as ds On d.SN_ID = ds.SHOP_ID
left outer join
dealersn as dn on d.SN_ID = dn.SN_DATA_ID
left outer join
regionN as rn ON ds.REGION_ID = rn.REGION_ID
where
d.SND_TO_DATE between #dateFrom and #dateTo
and (#prTypeCode is null or p.PR_TYPE_CODE = #prTypeCode)
group by
rn.REG_NAME, ds.REGION_ID, ds.SHOP_ID, d.PRODUCT_CODE, p.PR_NAME,
p.PR_TYPE_CODE, p.PR_ACTIVE, d.FIN_TABLE_CODE, ft.FT_NAME,
ft.FT_ACTIVE, d.SND_TO_DATE,dn.SND_FULLNAME
)
IF (#ZeroPercent = 0 AND #LargeChainsOrTushev = 0)
BEGIN
SELECT DISTINCT *
FROM final
WHERE ZFlag = 0 AND ExFlag = 0
END
ELSE
IF #ZeroPercent = 1 AND #LargeChainsOrTushev = 0
BEGIN
SELECT DISTINCT *
FROM final
WHERE ZFlag = 1 AND ExFlag = 0
END
ELSE IF #ZeroPercent = 0 AND #LargeChainsOrTushev = 1
BEGIN
SELECT DISTINCT *
FROM final
WHERE ZFlag = 0 AND ExFlag = 1
END
ELSE
BEGIN
SELECT DISTINCT *
FROM final
END
You have a with statement that "defines" a bunch of tables, the last of which is called final. This is called a CTE (Common Table Expression). You can use CTEs for many reasons, but the important thing is, you have to use the CTE in a query. Instead you define the CTE and then...do nothing.
If you put a line like this:
select * from final
just after the CTE (before the IF), it will run.

SQL - Finding Gaps in Coverage

I am running this problem on SQL server
Here is my problem.
have something like this
Dataset A
FK_ID StartDate EndDate Type
1 10/1/2018 11/30/2018 M
1 12/1/2018 2/28/2019 N
1 3/1/2019 10/31/2019 M
I have a second data source I have no control over with data something like this:
Dataset B
FK_ID SpanStart SpanEnd Type
1 10/1/2018 10/15/2018 M
1 10/1/2018 10/25/2018 M
1 2/15/2019 4/30/2019 M
1 5/1/2019 10/31/2019 M
What I am trying to accomplish is to check to make sure every date within each TYPE M record in Dataset A has at least 1 record in Dataset B.
For example record 1 in Dataset A does NOT have coverage from 10/26/2018 through 11/30/2018. I really only care about when the coverage ends, in this case I want to return 10/26/2018 because it is the first date where the span has no coverage from Dataset B.
I've written a function that does this but it is pretty slow because it is cycling through each date within each M record and counting the number of records in Dataset B. It exits the loop when it finds the first one but I would really like to make this more efficient. I am sure I am not thinking about this properly so any suggestions anyone can offer would be helpful.
This is the section of code I'm currently running
else if #SpanType = 'M'
begin
set #CurrDate = #SpanStart
set #UncovDays = 0
while #CurrDate <= #SpanEnd
Begin
if (SELECT count(*)
FROM eligiblecoverage ec join eligibilityplan ep on ec.plandescription = ep.planname
WHERE ec.masterindividualid = #IndID
and ec.planbegindate <= #CurrDate and ec.planenddate >= #CurrDate
and ec.sourcecreateddate = #MaxDate
and ep.medicaidcoverage = 1) = 0
begin
SET #Result = concat('NON Starting ',format(#currdate, 'M/d/yyyy'))
BREAK
end
set #CurrDate = #CurrDate + 1
end
end
I am not married to having a function it just could not find a way to do this in queries that wasn't very very slow.
EDIT: Dataset B will never have any TYPEs except M so that is not a consideration
EDIT 2: The code offered by DonPablo does de-overlap the data but only in cases where there is an overlap at all. It reduces dataset B to:
FK_ID SpanStart SpanEnd Type
1 10/1/2018 10/25/2018 M
instead of
FK_ID SpanStart SpanEnd Type
1 10/1/2018 10/25/2018 M
1 2/15/2019 4/30/2019 M
1 5/1/2019 10/31/2019 M
I am still futzing around with it but it's a start.
I would approach this by focusing on B. My assumption is that any absent record would follow span_end in the table. So here is the idea:
Unpivot the dates in B (adding "1" to the end dates)
Add a flag if they are present with type "M".
Check to see if any not-present records are in the span for A.
Check the first and last dates as well.
So, this looks like:
with bdates as (
select v.dte,
(case when exists (select 1
from b b2
where v.dte between b2.spanstart and b2.spanend and
b2.type = 'M'
)
then 1 else 0
end) as in_b
from b cross apply
(values (spanstart), (dateadd(day, 1, spanend)
) v(dte)
where b.type = 'M' -- all we care about
group by v.dte -- no need for duplicates
)
select a.*,
(case when not exists (select 1
from b b2
where a.startdate between b2.spanstart and b2.spanend and
b2.type = 'M'
)
then 0
when not exists (select 1
from b b2
where a.enddate between b2.spanstart and b2.spanend and
b2.type = 'M'
)
when exists (select 1
from bdates bd
where bd.dte between a.startdate and a.enddate and
bd.in_b = 0
)
then 0
when exists (select 1
from b b2
where a.startdate between b2.spanstart and b2.spanend and
b2.type = 'M'
)
then 1
else 0
end)
from a;
What is this doing? Four validity checks:
Is the starttime valid?
Is the endtime valid?
Are any intermediate dates invalid?
Is there at least one valid record?
Start by framing the problem in smaller pieces, in a sequence of actions like I did in the comment.
See George Polya "How To Solve It" 1945
Then Google is your friend -- look at==> sql de-overlap date ranges into one record (over a million results)
UPDATED--I picked Merge overlapping dates in SQL Server
and updated it for our table and column names.
Also look at theory from 1983 Allen's Interval Algebra https://www.ics.uci.edu/~alspaugh/cls/shr/allen.html
Or from 2014 https://stewashton.wordpress.com/2014/03/11/sql-for-date-ranges-gaps-and-overlaps/
This is a primer on how to setup test data for this problem.
Finally determine what counts via Ranking the various pairs of A vs B --
bypass those totally Within, then work with earliest PartialOverlaps, lastly do the Precede/Follow items.
--from Merge overlapping dates in SQL Server
with SpanStarts as
(
select distinct FK_ID, SpanStart
from Coverage_B as t1
where not exists
(select * from Coverage_B as t2
where t2.FK_ID = t1.FK_ID
and t2.SpanStart < t1.SpanStart
and t2.SpanEnd >= t1.SpanStart)
),
SpanEnds as
(
select distinct FK_ID, SpanEnd
from Coverage_B as t1
where not exists
(select * from Coverage_B as t2
where t2.FK_ID = t1.FK_ID
and t2.SpanEnd > t1.SpanEnd
and t2.SpanStart <= t1.SpanEnd)
),
DeOverlapped_B as
(
Select FK_ID, SpanStart,
(select min(SpanEnd) from SpanEnds as e
where e.FK_ID = s.FK_ID
and SpanEnd >= SpanStart) as SpanEnd
from SpanStarts as s
)
Select * from DeOverlapped_B
Now we have something to feed into the next steps, and we can use the above as a CTE
======================================
with SpanStarts as
(
select distinct FK_ID, SpanStart
from Coverage_B as t1
where not exists
(select * from Coverage_B as t2
where t2.FK_ID = t1.FK_ID
and t2.SpanStart < t1.SpanStart
and t2.SpanEnd >= t1.SpanStart)
),
SpanEnds as
(
select distinct FK_ID, SpanEnd
from Coverage_B as t1
where not exists
(select * from Coverage_B as t2
where t2.FK_ID = t1.FK_ID
and t2.SpanEnd > t1.SpanEnd
and t2.SpanStart <= t1.SpanEnd)
),
DeOverlapped_B as
(
Select FK_ID, SpanStart,
(select min(SpanEnd) from SpanEnds as e
where e.FK_ID = s.FK_ID
and SpanEnd >= SpanStart) as SpanEnd
from SpanStarts as s
),
-- find A row's coverage
ACoverage as (
Select
a.*, b.SpanEnd, b.SpanStart,
Case
When SpanStart <= StartDate And StartDate <= SpanEnd
And SpanStart <= EndDate And EndDate <= SpanEnd
Then '1within' -- starts, equals, during, finishes
When EndDate < SpanStart
Or SpanEnd < StartDate
Then '3beforeAfter' -- preceeds, meets, preceeded, met
Else '2overlap' -- one or two ends hang over spanStart/End
End as relation
From Coverage_A a
Left Join DeOverlapped_B b
On a.FK_ID = b.FK_ID
Where a.Type = 'M'
)
Select
*
,Case
When relation1 = '2' And StartDate < SpanStart Then StartDate
When relation1 = '2' Then DateAdd(d, 1, SpanEnd)
When relation1 = '3' Then StartDate
End as UnCoveredBeginning
From (
Select
*
,SUBSTRING(relation,1,1) as relation1
,ROW_NUMBER() Over (Partition by A_ID Order by relation, SpanStart) as Rownum
from ACoverage
) aRNO
Where Rownum = 1
And relation1 <> '1'

How to get a windowed function working in WHERE clause

I know that there are quite many threads on "Windowed functions can only appear in the select or ORDER BY clases" topic, but I have read them through and just don't seem to get any of those trick to work for me. So here I go.
My Query you can see below the line. I have highlighted the part of the Query which is "causing" me issues or problems. The thing is that I would like to get
"LocalCurrentAmount not between -1.000000 and 1.000000"
in somehow. I have tried the CTE versioon, but somehow didn't work and my declare tables and then the details view started causing problems.
Would really appreciate your help!
Jaanis
declare #exceptions1 table (CustomerCode varchar(7), Exception_comment varchar(15));
insert into #exceptions1 (CustomerCode, Exception_comment)
VALUES
(3514437,'Exception'),(3500977,'Exception'),(3295142,'Exception'), ...
declare #exceptions2 table (CustomerCode2 varchar(7), Exception_comment2 varchar(15));
insert into #exceptions2 (CustomerCode2, Exception_comment2)
VALUES
(3390437,'VIP SE') ,(3390438,'VIP SE') ,(3390481,'VIP SE'), ...
declare #exceptions3 table (CustomerCode3 varchar(7), Exception_comment3 varchar(15));
insert into #exceptions1 (CustomerCode, Exception_comment)
VALUES
(1530350, 'DK Exception'), (1533834, 'DK Exception'), (1530002, 'DK Exception'), ...
with Details as
(
select ard.AccountsReceivableHeaderID, sum(ard.TransactionAmountOC) as DetailAmountOC , Max(DetailSequenceCode) as DetailSequenceCode, Max( ard.BatchDate) as BatchDate
from dmbase.fAccountsReceivableDetail ard
join dmbase.fAccountsReceivable arh on ard.AccountsReceivableHeaderID = arh.AccountsReceivableID
where ard.BatchDate <= getdate()
group by AccountsReceivableHeaderID
)
SELECT
comp.CompanyCode
,convert(varchar(10),ar.InvoiceDate, 103) as Invoice_date -- dd/MM/yyyy format
,case
when ar.IsCreditMemo = 'Y' then 'Memo (Credit/Debit)'
when ar.InvoiceCode = '0' then 'Payment'
else 'Invoice'
end as Description
,isnull(cm.SummaryInvoiceCode, ar.InvoiceSummaryCode) InvoiceSummaryCode
,case
when len(ar.InvoiceSequenceCode) = '1' then CONCAT(ar.InvoiceCode,'-000',ar.InvoiceSequenceCode)
when len(ar.InvoiceSequenceCode) = '2' then CONCAT(ar.InvoiceCode,'-00',ar.InvoiceSequenceCode)
when len(ar.InvoiceSequenceCode) = '3' then CONCAT(ar.InvoiceCode,'-0',ar.InvoiceSequenceCode)
else CONCAT(ar.InvoiceCode,'-',ar.InvoiceSequenceCode)
end as Invoice#
,**(ar.OriginalInvoiceAmountOC
+
case
when row_number() over (partition by AccountsReceivableID order by ar.InvoiceCode) = 1 then isnull(vat.vatAdjustment, 0)
else 0
end
+
coalesce(det.DetailAmountOC, 0)) * coalesce(cer.CurrencyExchangeRate, ar.CurrencyExchangeRate) AS LocalCurrentAmount**
,(ar.OriginalInvoiceAmountOC
+
case
when row_number() over (partition by AccountsReceivableID order by ar.InvoiceCode) = 1 then isnull(vat.vatAdjustment, 0)
else 0
end
+ coalesce(det.DetailAmountOC, 0)) AS CurrentAmount
,ar.OriginalInvoiceAmountOC
+
case
when row_number() over (partition by AccountsReceivableID order by ar.InvoiceCode) = 1 then isnull(vat.vatAdjustment, 0)
else 0
end as OriginalInvoiceAmountOC
,ar.InvoiceCurrencyCode
,cust.CustomerCode
,upper(cust.CustomerName) as CustomerName
from
dmbase.fAccountsReceivable ar
INNER JOIN dmbase.dlocation loc
ON loc.LocationID = ar.LocationID
INNER JOIN dmbase.dCustomer cust
ON cust.CustomerID = ar.CustomerID
LEFT JOIN dmbase.VatAdjustment vat
on ar.InvoiceCode = vat.contractNumber
and ar.InvoiceSequenceCode = vat.invoiceSequence
and cust.CustomerCode = vat.CustomerNumber
and loc.CompanyCode = vat.companyCode
inner join dmbase.dCompany comp
on (ar.CompanyID = comp.CompanyID)
left join dmbase.dAccountsReceivableInvoiceStatus aris
on (aris.ARInvoiceStatusAMID=ar.ARInvoiceStatusAMID)
left hash join Details det
on (ar.AccountsReceivableID = det.AccountsReceivableHeaderID)
left join dmbase.dCurrencyExchangeRate cer
on (comp.CompanyCode = cer.CompanyCode
and ar.InvoiceCurrencyCode = cer.CurrencyCode
and case ar.InvoiceDate when '1900-01-01' then getdate() else ar.InvoiceDate end between cer.ValidFrom and cer.ValidTo)
left join dmbase.fContractClosedHeader ccd
on ccd.ContractNumber = ar.InvoiceCode
and ccd.ContractSeqNumber = ar.InvoiceSequenceCode
and ccd.CompanyID = ar.CompanyID
and ccd.ContractNumber!='0'
and ccd.CreditMemoContractNumber != '0'
left join dmbase.fAccountsReceivableHeader cm
on ccd.CreditMemoContractNumber = cm.ContractNumber
and ccd.CreditMemoSequenceCode = cm.ContractSeqNumber
and cm.CompanyID = ccd.CompanyID
where
(aris.ARInvoiceStatusCode = 'OP' or (ar.LastPaymentDate >= getdate())
or (ar.TotalAdjustmentsAmountOC <> 0 and ar.CurrentAmountLC = 0
and (ar.OriginalInvoiceAmountOC + isnull(vat.vatAdjustment, 0) ) + coalesce(det.DetailAmountOC, 0) <> 0)
)
and ar.OriginalInvoiceAmountOC <= 0
and ar.IsCreditMemo = 'Y' -- ainult Memo (Credit/Debit)
and cust.InternalCustomerType = 'External'
and cust.CustomerName not in ('RR AJM', 'RAMIRENT', 'Ramirent')
and cust.CustomerName not like '%[7][0-9][0-9][0-9]%'
and ar.InvoiceDate <= EOMONTH(getdate(),-3
and cust.CustomerCode NOT IN
(select CustomerCode from #exceptions1
union
select CustomerCode2 from #exceptions2
union
select CustomerCode3 from #exceptions3)
order by Invoice_date
When using a Window function, like you said in your post, you can't use it in the WHERE clause. The common solution, therefore, is to use a CTE and reference it in the WHERE outside of it. I've used your CTE, however, without any kind of sample data this is a total guess. I've also left a lot of comments for you, and changed some other parts of the SQL, as some of the clauses you have will effect your query's performance.
WITH
Details AS
(SELECT ard.AccountsReceivableHeaderID,
SUM(ard.TransactionAmountOC) AS DetailAmountOC,
MAX(DetailSequenceCode) AS DetailSequenceCode,
MAX(ard.BatchDate) AS BatchDate
FROM dmbase.fAccountsReceivableDetail ard
JOIN dmbase.fAccountsReceivable arh ON ard.AccountsReceivableHeaderID = arh.AccountsReceivableID
WHERE ard.BatchDate <= GETDATE()
GROUP BY AccountsReceivableHeaderID),
Summary AS(
SELECT comp.CompanyCode,
CONVERT(varchar(10), ar.InvoiceDate, 103) AS Invoice_date, -- dd/MM/yyyy format
CASE
WHEN ar.IsCreditMemo = 'Y' THEN 'Memo (Credit/Debit)'
WHEN ar.InvoiceCode = '0' THEN 'Payment'
ELSE 'Invoice'
END AS Description,
ISNULL(cm.SummaryInvoiceCode, ar.InvoiceSummaryCode) AS InvoiceSummaryCode,
CASE
WHEN LEN(ar.InvoiceSequenceCode) = '1' THEN CONCAT(ar.InvoiceCode, '-000', ar.InvoiceSequenceCode)
WHEN LEN(ar.InvoiceSequenceCode) = '2' THEN CONCAT(ar.InvoiceCode, '-00', ar.InvoiceSequenceCode)
WHEN LEN(ar.InvoiceSequenceCode) = '3' THEN CONCAT(ar.InvoiceCode, '-0', ar.InvoiceSequenceCode)
ELSE CONCAT(ar.InvoiceCode, '-', ar.InvoiceSequenceCode)
END AS Invoice#,
(ar.OriginalInvoiceAmountOC + CASE
WHEN ROW_NUMBER() OVER (PARTITION BY AccountsReceivableID ORDER BY ar.InvoiceCode) = 1 THEN ISNULL(vat.vatAdjustment, 0)
ELSE 0
END + COALESCE(det.DetailAmountOC, 0)) * COALESCE(cer.CurrencyExchangeRate, ar.CurrencyExchangeRate) AS LocalCurrentAmount,
(ar.OriginalInvoiceAmountOC + CASE
WHEN ROW_NUMBER() OVER (PARTITION BY AccountsReceivableID ORDER BY ar.InvoiceCode) = 1 THEN ISNULL(vat.vatAdjustment, 0)
ELSE 0
END + COALESCE(det.DetailAmountOC, 0)) AS CurrentAmount,
ar.OriginalInvoiceAmountOC + CASE
WHEN ROW_NUMBER() OVER (PARTITION BY AccountsReceivableID ORDER BY ar.InvoiceCode) = 1 THEN ISNULL(vat.vatAdjustment, 0)
ELSE 0
END AS OriginalInvoiceAmountOC,
ar.InvoiceCurrencyCode,
cust.CustomerCode,
UPPER(cust.CustomerName) AS CustomerName
FROM dmbase.fAccountsReceivable ar
INNER JOIN dmbase.dlocation loc ON loc.LocationID = ar.LocationID
INNER JOIN dmbase.dCustomer cust ON cust.CustomerID = ar.CustomerID
LEFT JOIN dmbase.VatAdjustment vat ON ar.InvoiceCode = vat.contractNumber
AND ar.InvoiceSequenceCode = vat.invoiceSequence
AND cust.CustomerCode = vat.CustomerNumber
AND loc.CompanyCode = vat.companyCode
INNER JOIN dmbase.dCompany comp ON (ar.CompanyID = comp.CompanyID)
LEFT JOIN dmbase.dAccountsReceivableInvoiceStatus aris ON (aris.ARInvoiceStatusAMID = ar.ARInvoiceStatusAMID)
LEFT HASH JOIN Details det ON (ar.AccountsReceivableID = det.AccountsReceivableHeaderID)
LEFT JOIN dmbase.dCurrencyExchangeRate cer ON (comp.CompanyCode = cer.CompanyCode
AND ar.InvoiceCurrencyCode = cer.CurrencyCode
AND CASE ar.InvoiceDate WHEN '1900-01-01' THEN GETDATE()ELSE ar.InvoiceDate END BETWEEN cer.ValidFrom AND cer.ValidTo)
LEFT JOIN dmbase.fContractClosedHeader ccd ON ccd.ContractNumber = ar.InvoiceCode
AND ccd.ContractSeqNumber = ar.InvoiceSequenceCode
AND ccd.CompanyID = ar.CompanyID
AND ccd.ContractNumber != '0'
AND ccd.CreditMemoContractNumber != '0'
LEFT JOIN dmbase.fAccountsReceivableHeader cm ON ccd.CreditMemoContractNumber = cm.ContractNumber
AND ccd.CreditMemoSequenceCode = cm.ContractSeqNumber
AND cm.CompanyID = ccd.CompanyID
WHERE (aris.ARInvoiceStatusCode = 'OP'
OR (ar.LastPaymentDate >= GETDATE())
OR (ar.TotalAdjustmentsAmountOC <> 0
AND ar.CurrentAmountLC = 0
AND (ar.OriginalInvoiceAmountOC + ISNULL(vat.vatAdjustment, 0)) + COALESCE(det.DetailAmountOC, 0) <> 0)) --Why ISNULL for one, COALESCE for the other? Both will make the query non-SARGable
AND ar.OriginalInvoiceAmountOC <= 0
AND ar.IsCreditMemo = 'Y' -- ainult Memo (Credit/Debit)
AND cust.InternalCustomerType = 'External'
AND cust.CustomerName NOT IN ('RR AJM', 'RAMIRENT', 'Ramirent')
AND cust.CustomerName NOT LIKE '%[7][0-9][0-9][0-9]%' --A leading wildcard is going to perform slow
AND ar.InvoiceDate <= EOMONTH(DATEADD(DAY, -3, GETDATE())) --I have changed this from EOMONTH(GETDATE(), -3 (which made no sense)
AND NOT EXISTS (SELECT 1
FROM #exceptions1 E
WHERE E.CustomerCode = cust.CustomerCode) --Changed to EXISTS binned UNION you should use UNION ALL if you're doing something like this, it'll be quicker)
AND NOT EXISTS (SELECT 1
FROM #exceptions2 E
WHERE E.CustomerCode = cust.CustomerCode) --Changed to EXISTS binned UNION (you should use UNION ALL if you're doing something like this, it'll be quicker)
AND NOT EXISTS (SELECT 1
FROM #exceptions3 E
WHERE E.CustomerCode = cust.CustomerCode)) --Changed to EXISTS binned UNION you should use UNION ALL if you're doing something like this, it'll be quicker)
SELECT *
FROM Summary
WHERE LocalCurrentAmount NOT BETWEEN -1 AND 1
ORDER BY Invoice_date;
It's worth noting that the above sql is completely untested. I do not have access to your server, or data, so I'm purely relying on "my eye" to note any errors.

Amazon Redshift MAX_STEPS limitation

When I try to run a huge query over Amazon Redshift I get the following error:
"m_num_steps < MAX_STEPS - exceeded maximum plan steps of 100"
There are 4 tables at Redshift. The fact table has about 400 million rows. The query is a "WITH" clause with multiple joins, agregations, analytic and window functions.
The question is: there is a way to avoid this "MAX_STEPS = 100" limitation?
The query (simplified):
WITH
-- Regr
a27 as (
select id_uc, case when count(*) = 0 or VAR_POP(numero_de_meses) = 0 then null else trunc(((SUM(consumo_normalizado * numero_de_meses) - SUM(numero_de_meses) * SUM(consumo_normalizado) / count(*)) / count(*)) / VAR_POP(numero_de_meses), 2) end N2542, case when count(*) = 0 or STDDEV_POP(consumo_normalizado) = 0 or STDDEV_POP(numero_de_meses) = 0 then null else trunc(POWER(((SUM(consumo_normalizado * numero_de_meses) - SUM(numero_de_meses) * SUM(consumo_normalizado) / count(*)) / count(*)) / (STDDEV_POP(consumo_normalizado) * STDDEV_POP(numero_de_meses)), 2), 2) end N2554 from (
select id_uc,
rank() over (partition by canal.id_uc order by canal.data_referencia) numero_de_meses,
case
when stddev(consumo_faturado) over (partition by canal.id_uc) <> 0
then (consumo_faturado - avg(consumo_faturado) over (partition by canal.id_uc)) / stddev(consumo_faturado) over (partition by canal.id_uc)
else null end as consumo_normalizado
from fato_uc canal
where exists (select 1 from eventos_resultado_atual evt where canal.id_uc = evt.id_uc and canal.data_referencia between E_Fat_12_meses_atras and ultimo_faturamento)
) group by id_uc)
-- Regr
,a28 as (
select id_uc, case when count(*) = 0 or VAR_POP(numero_de_meses) = 0 then null else case when count(*) = 0 or VAR_POP(numero_de_meses) = 0 then null else trunc(((SUM(consumo_normalizado * numero_de_meses) - SUM(numero_de_meses) * SUM(consumo_normalizado) / count(*)) / count(*)) / VAR_POP(numero_de_meses), 2) end end N2545, case when count(*) = 0 or STDDEV_POP(consumo_normalizado) = 0 or STDDEV_POP(numero_de_meses) = 0 then null else trunc(POWER(((SUM(consumo_normalizado * numero_de_meses) - SUM(numero_de_meses) * SUM(consumo_normalizado) / count(*)) / count(*)) / (STDDEV_POP(consumo_normalizado) * STDDEV_POP(numero_de_meses)), 2), 2) end N2557 from (
select id_uc,
rank() over (partition by canal.id_uc order by canal.data_referencia) numero_de_meses,
case
when stddev(consumo_faturado) over (partition by canal.id_uc) <> 0
then (consumo_faturado - avg(consumo_faturado) over (partition by canal.id_uc)) / stddev(consumo_faturado) over (partition by canal.id_uc)
else null end as consumo_normalizado
from fato_uc canal
where exists (select 1 from eventos_resultado_atual evt where canal.id_uc = evt.id_uc and canal.data_referencia between E_Fat_12_meses_atras and E_2457)
) group by id_uc)
-- Corr
,a65 as (
select evt.id_uc, case when count(*) = 0 or STDDEV_POP(c1.avg_cons) = 0 or STDDEV_POP(c2.consumo_faturado) = 0 then null else trunc(((SUM(c1.avg_cons * c2.consumo_faturado) - SUM(c2.consumo_faturado) * SUM(c1.avg_cons) / count(*)) / count(*)) / (STDDEV_POP(c1.avg_cons) * STDDEV_POP(c2.consumo_faturado)), 2) end N2720
from eventos_resultado_atual evt inner join VIZ_FAT c1 on evt.id_uc = c1.id_uc
inner join fato_uc c2 on evt.id_uc = c2.id_uc and date_trunc('month', c1.mes_ref) = date_trunc('month', c2.data_referencia)
where c1.mes_ref between E_Fat_12_meses_atras and ultimo_faturamento
and c2.data_referencia between E_Fat_12_meses_atras and ultimo_faturamento
group by evt.id_uc)
-- Corr
,a66 as (
select evt.id_uc, case when count(*) = 0 or STDDEV_POP(c1.avg_cons) = 0 or STDDEV_POP(c2.consumo_faturado) = 0 then null else trunc(((SUM(c1.avg_cons * c2.consumo_faturado) - SUM(c2.consumo_faturado) * SUM(c1.avg_cons) / count(*)) / count(*)) / (STDDEV_POP(c1.avg_cons) * STDDEV_POP(c2.consumo_faturado)), 2) end N2723
from eventos_resultado_atual evt inner join VIZ_FAT c1 on evt.id_uc = c1.id_uc
inner join fato_uc c2 on evt.id_uc = c2.id_uc and date_trunc('month', c1.mes_ref) = date_trunc('month', c2.data_referencia)
where c1.mes_ref between E_Fat_12_meses_atras and E_2457
and c2.data_referencia between E_Fat_12_meses_atras and E_2457
group by evt.id_uc)
SELECT *
FROM eventos_resultado_atual
left join a27 on eventos_resultado_atual.id_uc = a27.id_uc
left join a28 on eventos_resultado_atual.id_uc = a28.id_uc
left join a65 on eventos_resultado_atual.id_uc = a65.id_uc
left join a66 on eventos_resultado_atual.id_uc = a66.id_uc
Your problem is with redshift and as always amazon have no solution,try breaking the query in little tasks, be careful with working with lot of data in redshift have serious issues with big data
My recommendation is to break your query into a few sub queries and insert the results into tables along the process. Please share query and I will help you break it up into a few queries that will run more efficient. If you can also share the redshift provided explain plain I I can go over how to use that to do what I would do...

paging over SELECT UNION super slow and killing my server

I have an SP that returns paged data from a query that contains a UNION. This is killing my DB and taking 30 seconds to run sometimes, am I missing something obvious here? What can I do to improve it's performance?
Tables Involved: Products, Categories, CategoryProducts
Goal:
Any Products that are not in a Category or have been deleted from a category UNION all Products currently in a category and page over them for a web service.
I have Indexes on all columns that I am joining on and there are 427,996 Products, 6148 Categories and 409,691 CategoryProducts in the database.
Here is my query that is taking between 6, and 30 seconds to run:
SELECT * FROM (
SELECT ROW_NUMBER() OVER(ORDER BY Products.ItemID, Products.ManufacturerID) AS RowNum, *
FROM
(
SELECT Products.*,
CategoryID = NULL, CategoryName = NULL,
CategoryProductID = NULL,
ContainerMinimumQuantity =
CASE COALESCE(Products.ContainerMinQty, 0)
WHEN 0 THEN Products.OrderMinimumQuantity
ELSE Products.ContainerMinQty
END
Products.IsDeleted,
SortOrder = NULL
FROM CategoryProducts RIGHT OUTER JOIN Products
ON CategoryProducts.ManufacturerID = Products.ManufacturerID
AND CategoryProducts.ItemID = Products.ItemID
WHERE (Products.ManufacturerID = #ManufacturerID)
AND (Products.ModifiedOn > #tStamp )
AND ((CategoryProducts.IsDeleted = 1) OR (CategoryProducts.IsDeleted IS NULL))
UNION
SELECT Products.*,
CategoryProducts.CategoryID , CategoryProducts.CategoryName,
CategoryProducts.CategoryProductID ,
ContainerMinimumQuantity =
CASE COALESCE(Products.ContainerMinQty, 0)
WHEN 0 THEN Products.OrderMinimumQuantity
ELSE Products.ContainerMinQty
END
CategoryProducts.IsDeleted,
CategoryProducts.SortOrder
FROM Categories INNER JOIN
CategoryProducts ON Categories.CategoryID = CategoryProducts.CategoryID INNER JOIN
Products ON CategoryProducts.ManufacturerID = Products.ManufacturerID
AND CategoryProducts.ItemID = Products.ItemID
WHERE (Products.ManufacturerID = #ManufacturerID)
AND (Products.ModifiedOn > #tStamp OR CategoryProducts.ModifiedOn > #tStamp))
AS Products) AS C
WHERE RowNum >= #StartRow AND RowNum <= #EndRow
Any insight would be greatly appreciated.
If I read your situation correctly, the only reason for having two distinct queries is treatment of missing/deleted CategoryProducts. I tried to address this issue by left join with IsDeleted = 0 to bring all deleted CategoryProducts to nulls, so I don't have to test them again. ModifiedOn part got another test for null for missing/deleted Categoryproducts you wish to retrieve.
select *
from (
SELECT
Products.*,
-- Following three columns will be null for deleted/missing categories
CategoryProducts.CategoryID,
CategoryProducts.CategoryName,
CategoryProducts.CategoryProductID ,
ContainerMinimumQuantity = COALESCE(nullif(Products.ContainerMinQty, 0),
Products.OrderMinimumQuantity),
CategoryProducts.IsDeleted,
CategoryProducts.SortOrder,
ROW_NUMBER() OVER(ORDER BY Products.ItemID,
Products.ManufacturerID) AS RowNum
FROM Products
LEFT JOIN CategoryProducts
ON CategoryProducts.ManufacturerID = Products.ManufacturerID
AND CategoryProducts.ItemID = Products.ItemID
-- Filter IsDeleted in join so we get nulls for deleted categories
-- And treat them the same as missing ones
AND CategoryProducts.IsDeleted = 0
LEFT JOIN Categories
ON Categories.CategoryID = CategoryProducts.CategoryID
WHERE Products.ManufacturerID = #ManufacturerID
AND (Products.ModifiedOn > #tStamp
-- Deleted/missing categories
OR CategoryProducts.ModifiedOn is null
OR CategoryProducts.ModifiedOn > #tStamp)
) C
WHERE RowNum >= #StartRow AND RowNum <= #EndRow
On a third look I don't see that Category is used at all except as a filter to CategoryProducts. If this is the case second LEFT JOIN should be changed to INNER JOIN and this section should be enclosed in parenthessis.

Resources