I'm trying to do a n inner join by creating a new column using substring to match it with another column in a database so to display the name in the other table
The code is
SELECT
REVERSE(SUBSTRING(REVERSE(flh[FIleName]), 0, CHARINDEX('_', REVERSE(flh[FileName])))) AS FILENAMES,
SUBSTRING(flh.[Filename], 0, LEN(flh.[FileName]) - CHARINDEX('_', REVERSE(flh.[FileName])) + 1) AS DATE,
erc.DisplayName
FROM
[Recon_E].[ETL].[tblFileLoadHistory] flh
INNER JOIN
Feed.dbo.tblEnfusionReRunConfig erc on flh.filenames = erc.FileName
WHERE
flh.RecRunId = (SELECT MAX(RecRunId)
FROM Recon_E.etl.tblFileLoadHistory)
I'm separating the date and the string in the SubString Select.
But I'm not able to give the alias filename to the new column and perform the join
One solution is to use Common Table expressions or CTEs like below if you are on SQL 2005 or later.
;with FileLoadHist as
-- define cte query
(
SELECT REVERSE(SUBSTRING(REVERSE(flh[FIleName]),0,CHARINDEX('_',REVERSE(flh[FileName])) ))AS FILENAMES,
SUBSTRING(flh.[Filename],0, len(flh.[FileName]) - charindex('_',reverse(flh.[FileName])) +1) AS DATE
FROM [Recon_E].[ETL].[tblFileLoadHistory] flh
where flh.RecRunId=( SELECT max(RecRunId) from Recon_E.etl.tblFileLoadHistory )
)
--define outer query
select FILENAMES, DATE, erc.DisplayName
from FileLoadHist
INNER JOIN Feed.dbo.tblEnfusionReRunConfig erc on FileLoadHist.FILENAMES = erc.FileName
Related
I am trying to extract strings from a column to show which are the FROM Tables and JOIN Tables. the complete string consists of one FROM Table and multiple JOIN Tables. Here is a sample of a string:
FROM [TABLEOWNER] .[load_XT_Customer_test] load_XT_Customer_test
INNER JOIN [TABLEOWNER].[load_xt_orders_test] load_xt_orders_test ON load_xt_customer_test.customer_id=load_xt_orders_test.customer_id
INNER JOIN [TABLEOWNER].[load_XT_Orders] load_XT_Orders ON
load_xt_customer_test.customer_id=load_xt_orders.customer_id
INNER JOIN [TABLEOWNER].[load_xt_order_details_test] load_xt_order_details_test
ON load_xt_customer_test.customer_id=load_xt_order_details_test.customer_id
My problem here is that there is no unique character to separate the strings, how can I dynamically extract the single join tables?
I tested it with:
SELECT st_where
,SUBSTRING(st_where
,CHARINDEX('] ',st_where,1)+1
,ABS(CHARINDEX(' ',st_where,CHARINDEX('] ',st_where,1)+1)
-CHARINDEX(']',st_where,1)-1))
AS FromTable
,SUBSTRING(st_where
,CHARINDEX('JOIN [TABLEOWNER].[',st_where,1)+19
,ABS(CHARINDEX(' ',st_where,CHARINDEX('JOIN [TABLEOWNER].[',st_where,1)+19)
-CHARINDEX('JOIN [TABLEOWNER].[',st_where,1)-20))
AS JoinTable
,SUBSTRING(st_where
,CHARINDEX('=',st_where,1)+59
,ABS(CHARINDEX(' ',st_where,CHARINDEX('=',st_where,1)+59)
-CHARINDEX('=',st_where,1)-60))
AS JoinTable2
This should get you started:
DECLARE #st_where nvarchar(max) =
N'
FROM [TABLEOWNER] .[load_XT_Customer_test] load_XT_Customer_test
INNER JOIN [TABLEOWNER].[load_xt_orders_test] load_xt_orders_test ON load_xt_customer_test.customer_id=load_xt_orders_test.customer_id
INNER JOIN [TABLEOWNER].[load_XT_Orders] load_XT_Orders ON
load_xt_customer_test.customer_id=load_xt_orders.customer_id
INNER JOIN [TABLEOWNER].[load_xt_order_details_test] load_xt_order_details_test
ON load_xt_customer_test.customer_id=load_xt_order_details_test.customer_id
';
SELECT
TableName =
SUBSTRING
(
SS.[value],
CharStart.pos + 1,
CharEnd.pos - CharStart.pos + 1
)
FROM STRING_SPLIT
(
-- Replace delimiter phrase with a single character
-- so we can use STRING_SPLIT
-- or use a more general string splitter
REPLACE(#st_where, N'INNER JOIN', NCHAR(256)),
NCHAR(256)
) AS SS
CROSS APPLY
(
VALUES
(
-- Find the start of the table name
CHARINDEX(N'.[', SS.[value], 1)
)
) AS CharStart (pos)
CROSS APPLY
(
VALUES
(
-- Find the end of the table name
CHARINDEX(N']', SS.[value], CharStart.pos)
)
) AS CharEnd (pos);
db<>fiddle demo
The general idea is to split the input string on the phrase "INNER JOIN" then locate the table names using ".[" and "]" as delimiters.
I'm attempting to convert the following SQL Server query into a GreenPlum version of the query:
INSERT INTO #TMP1 (part_id, file_id, location, measure_date)
SELECT DISTINCT
pt.part_id, qf.file_id, qf.edl_desc, pt.measure_date
FROM
part pt WITH (NOLOCK)
INNER JOIN
file_model qm with (nolock) on qm.file_model_id = pt.file_model_id
INNER JOIN
file qf with (nolock) on qf.file_id = qm.file_id;
INSERT INTO #part_list (file_id, part_id, measure_date)
SELECT DISTINCT
t1.file_id, k.part_id, k.measure_date
FROM
#TMP1 t1 WITH (NOLOCK)
CROSS APPLY
(SELECT DISTINCT TOP (300)
t2.part_id, t2.measure_date
FROM
#TMP1 t2 WITH (NOLOCK)
WHERE
t1.file_id = t2.file_id and t1.location = t2.location
ORDER BY
t2.measure_date DESC) k
WHERE
t1.measure_date >= dateadd(day, 30, getdate());
The idea here being that the final table contains the most recent up to 300 parts for all parts programs that are active (ie manufactured something) in the last 30 days.
Per the answers to this question, I am aware that LATERAL JOIN would do it, except my organization is using an older version of Postgres that does not have LATERAL, so I was left with implementing the following function instead:
CREATE FUNCTION BuildActiveParts(p_day INT, p_n INT)
RETURNS SETOF RECORD --TABLE (part_id bigint,file_id int, measure_date timestamp, location varchar(255))
AS $$
DECLARE
part_active RECORD;
part_list RECORD;
BEGIN
FOR part_active IN
SELECT DISTINCT qf.file_id, qf.location
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id WHERE pt.measure_date >= current_date - p_day LOOP
FOR part_list IN
SELECT DISTINCT pt.part_id, qf.file_id, pt.measure_date, qf.location
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id WHERE qf.file_id = part_active.file_id
AND qf.location = part_active.location
ORDER BY pt.measure_date DESC LIMIT p_n LOOP
RETURN NEXT part_list;
END LOOP;
END LOOP;
END
$$ LANGUAGE plpgsql;
-- Later used in:
--Build list of all active programs in last p_day days. This temporary table is a component of a larger function that produces a table based on this and other other calculations, called daily.
-- Note: this insert yields 'function cannot execute because it accesses relation'
INSERT INTO TMP_part_list ( part_id, file_id, measure_date, location)
SELECT DISTINCT * FROM BuildActiveParts(p_day, p_n) AS active_parts (part_id int, file_id text, measure_date timestamp, location text )
;
Unfortunately, this function is used in inserts to another table (an unavoidable reality of my business requirements), so while the function returns nice happy results when run in isolation, I get a big angry function cannot execute on segment because it accesses relation when I try to use it for its intended purpose. While I've seen suggestions to the effect of "make a VIEW instead", that's not really an option because a view resulting from the script this functionality is a part of would take too long to query.
What can I do, beyond embarking on a months-long excursion through a jungle of red tape to convince my organization to update their stuff, to resolve this?
Edit: Here are some attempts based on comments:
Attempt with function, did not work because of function cannot execute on segment because it accesses relation:
DROP FUNCTION IF EXISTS BuildRecentParts(TEXT, TEXT, INT);
CREATE FUNCTION BuildRecentParts(file_id TEXT, location_in TEXT, p_n INT)
RETURNS SETOF RECORD --TABLE (measure_date timestamp, part_id bigint)
AS $$
DECLARE
part_list RECORD;
BEGIN
FOR part_list IN
SELECT DISTINCT pt.measure_date, pt.part_id
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE qf.file_id = file_id
AND qf.edl_desc = location_in
ORDER BY pt.measure_date DESC LIMIT p_n LOOP
RETURN NEXT part_list;
END LOOP;
END
$$ LANGUAGE plpgsql;
SELECT DISTINCT qf.file_id, qf.edl_desc, (SELECT pti.measure_date, pti.part_id FROM part pti
INNER JOIN file_model qmi on qmi.file_model_id = pti.file_model_id
INNER JOIN file qfi on qfi.file_id = qmi.file_id
WHERE qfi.file_id = qf.file_id
AND qfi.edl_desc = qf.edl_desc
ORDER BY pti.measure_date DESC LIMIT 300)
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE pt.measure_date >= current_date - 30 ;
Attempt without function, will not work because subquery has multiple columns:
CREATE TEMPORARY TABLE TMP_TMP1 (part_id bigint, file_id varchar(255), location varchar(255), measure_date timestamp) DISTRIBUTED BY (part_id);
INSERT INTO TMP_TMP1 (part_id, file_id, location, measure_date)
SELECT DISTINCT pt.part_id, qf.file_id, qf.edl_desc, pt.measure_date
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id;
ANALYZE TMP_TMP1;
SELECT DISTINCT t1.file_id, t1.location, (SELECT t2.measure_date, t2.part_id FROM TMP_TMP1 t2
WHERE t2.file_id = t1.file_id
AND t2.location = t1.location
ORDER BY t2.measure_date DESC LIMIT 300)
FROM TMP_TMP1 t1
WHERE t1.measure_date >= current_date - 30;
I also attempted a recursive CTE, but found that that was unsupported.
Between answers here and from architects at my organization, we decided that we have struck a GreenPlum limitation that would be too costly to overcome, the logic that performs the Cross Join will be shifted to the R script that calls the stored procedure that this functionality would have been a part of.
Well, Greenplum doesn't have dirty reads so you can't implement the nolock hint you have. That is probably a good thing too. I would recommend removing that from SQL Server too.
I think the best solution is to use an Analytical function here instead of that function or even a correlated subquery which Greenplum supports. It is also more efficient in SQL Server to use this approach.
SELECT sub2.part_id, sub2.location, sub2.measure_date
FROM (
SELECT sub1.part_id, sub1.location, sub1.measure_date, row_number() over(partition by sub1.part_id order by sub1.measure_date desc) as rownum
FROM (
SELECT pt.part_id, qf.edl_desc as location, pt.measure_date
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE pt.measure_date >= (now() - interval '30 days')
GROUP BY pt.part_id, qf.edl_desc, pt.measure_date
) AS sub1
) as sub2
WHERE sub2.rownum <= 300;
Now, I had to guess at your data because it looks like you could get into trouble with your original query if you have multiple qf.qcc_file_desc values because your original group by includes this. If you had multiple values, then things would get ugly.
I'm also not 100% sure on the row_number function without knowing your data. It might be this instead:
row_number() over(partition by sub1.part_id, sub1.location order by sub1.measure_date desc)
I’m using SQL Server 2008
I have joins written something like the following, where the first join is encapsulated in a ‘With as’ statement so that I can name the output table as ‘A’ and then reference the ‘A’ resulting table in the next select and Join seen beneath it.
This works perfectly fine. What I would like to do then is reference that second table for another select statement and join, but when I try to wrap it in a ‘With as’ statement as well, the editor does not accept it as legitimate syntax for the second instance of 'With as'.
How can I subset resulting tables to reference in further select and join statements? I do not have permission to write to the database, so I can not create permanent tables in the database.
Thank you.
With A as
(
SELECT POL.[COMPANY_CODE]
,POL.[POLICY_NUMBER]
,POL.[STATUS_CODE]
,POL.ORIG_CLIENT_NUM
,TA.LINE
FROM [SamsReporting].[dbo].[POLICY] POL
Left join [SamsReporting].[dbo].[Transact] TA
ON TA.POLICY_NUMBER = POL.POLICY_NUMBER and TA.BASE_Account = 'B'
)
Select PM.POLICY_NUMBER
,A.[COMPANY_CODE]
,A.[POLICY_NUMBER]
,A.[Policy Status]
,eApp.SourceCode
From A
Left Join Web.dbo.Pmetrics PM on A.POLICY_NUMBER=PM.POLICY_NUMBER
Left Outer Join DDP.pol.eAppStaging eApp
on A.POLICY_NUMBER=eApp.PolicyNumber
where eApp.SourceCode = 'HAQ' or eApp.SourceCode = 'PLS'
Common Table Expressions (CTEs) can build upon each other as you would like. For example, you can do this:
WITH CTE1 AS (SELECT * FROM Table 1)
, CTE2 AS (SELECT * FROM CTE1)
, CTE3 AS (SELECT * FROM CTE2)
You only need the WITH statement for the first CTE. After that just use the CTE name, as in my example.
Hope that helps,
Ash
Sounds like a syntax issue to me. Google CTE (Common Table Expression) and review some examples of how they are formed.
With A as
(SELECT POL.[COMPANY_CODE]
,POL.[POLICY_NUMBER]
,POL.[STATUS_CODE]
,POL.ORIG_CLIENT_NUM
,TA.LINE
FROM [SamsReporting].[dbo].[POLICY] POL
Left join [SamsReporting].[dbo].[Transact] TA
ON TA.POLICY_NUMBER = POL.POLICY_NUMBER and TA.BASE_Account = 'B'),
B as (
Select PM.POLICY_NUMBER
,A.[COMPANY_CODE]
,A.[POLICY_NUMBER]
,A.[Policy Status]
,eApp.SourceCode
From A
Left Join Web.dbo.Pmetrics PM on A.POLICY_NUMBER=PM.POLICY_NUMBER
Left Outer Join DDP.pol.eAppStaging eApp
on A.POLICY_NUMBER=eApp.PolicyNumber
where eApp.SourceCode = 'HAQ' or eApp.SourceCode = 'PLS')
Select *
From B -- inner join some table
where some condition = 1
Need help ensuring the below query doesn't return inaccurate results.
select #billed = count(a.[counter]) from [dbo].cxitems a with (nolock)
inner join [dbo].cxitemhist b with (nolock) on a.[counter] = b.cxlink
where b.[eventtype] in ('BILLED','REBILLED')
and b.[datetime] between #begdate and #enddate
The query is "mostly" accurate as is, however there is a slight possibility that cxitemhist table could contain more than 1 "billed" record for given date range. I only need to count item as "Billed" once during given date range.
You can join on a sub query the limits you to one row for each combination of fields used for the join:
select #billed = count(a.[counter])
from [dbo].cxitems a
inner join (
select distinct cxlink
from [dbo].cxitemhist
where [eventtype] in ('BILLED','REBILLED')
and [datetime] between #begdate and #enddate
) b on a.[counter] = b.cxlink
You can also use the APPLY operator instead of a join here, but you'll have to check against your data to see which gives better performance.
If you only need to count records from the cxitems table, that have any corresponding records from the cxitemhist table, you can use the exists clause with a subquery.
select #billed = count(a.[counter]) from [dbo].cxitems a
where exists(select * from [dbo].cxitemhist b
where a.[counter] = b.cxlink
and b.[eventtype] in ('BILLED','REBILLED')
and b.[datetime] between #begdate and #enddate)
Cannot really say how this will affect performance, without specific data, though, but it should be comparably fast with your code.
I'm pulling my hair out over a subquery that I'm using to avoid about 100 duplicates (out of about 40k records). The records that are duplicated are showing up because they have 2 dates in h2.datecreated for a valid reason, so I can't just scrub the data.
I'm trying to get only the earliest date to return. The first subquery (that starts with "select distinct address_id", with the MIN) works fine on it's own...no duplicates are returned. So it would seem that the left join (or just plain join...I've tried that too) couldn't possibly see the second h2.datecreated, since it doesn't even show up in the subquery. But when I run the whole query, it's returning 2 values for some ipc.mfgid's, one with the h2.datecreated that I want, and the other one that I don't want.
I know it's got to be something really simple, or something that just isn't possible. It really seems like it should work! This is MSSQL. Thanks!
select distinct ipc.mfgid as IPC, h2.datecreated,
case when ad.Address is null
then ad.buildingname end as Address, cast(trace.name as varchar)
+ '-' + cast(trace.Number as varchar) as ONT,
c.ACCOUNT_Id,
case when h.datecreated is not null then h.datecreated
else h2.datecreated end as Install
from equipmentjoin as ipc
left join historyjoin as h on ipc.id = h.EQUIPMENT_Id
and h.type like 'add'
left join circuitjoin as c on ipc.ADDRESS_Id = c.ADDRESS_Id
and c.GRADE_Code like '%hpna%'
join (select distinct address_id, equipment_id,
min(datecreated) as datecreated, comment
from history where comment like 'MAC: 5%' group by equipment_id, address_id, comment)
as h2 on c.address_id = h2.address_id
left join (select car.id, infport.name, carport.number, car.PCIRCUITGROUP_Id
from circuit as car (NOLOCK)
join port as carport (NOLOCK) on car.id = carport.CIRCUIT_Id
and carport.name like 'lead%'
and car.GRADE_Id = 29
join circuit as inf (NOLOCK) on car.CCIRCUITGROUP_Id = inf.PCIRCUITGROUP_Id
join port as infport (NOLOCK) on inf.id = infport.CIRCUIT_Id
and infport.name like '%olt%' )
as trace on c.ccircuitgroup_id = trace.pcircuitgroup_id
join addressjoin as ad (NOLOCK) on ipc.address_id = ad.id
The typical approach to only getting the lowest row is one of the following. You didn't bother to specify what version of SQL Server you're using, what you want to do with ties, and I have little interest to try to work this into your complex query, so I'll show you an abstract simplification for different versions.
SQL Server 2000
SELECT x.grouping_column, x.min_column, x.other_columns ...
FROM dbo.foo AS x
INNER JOIN
(
SELECT grouping_column, min_column = MIN(min_column)
FROM dbo.foo GROUP BY grouping_column
) AS y
ON x.grouping_column = y.grouping_column
AND x.min_column = y.min_column;
SQL Server 2005+
;WITH x AS
(
SELECT grouping_column, min_column, other_columns,
rn = ROW_NUMBER() OVER (ORDER BY min_column)
FROM dbo.foo
)
SELECT grouping_column, min_column, other_columns
FROM x
WHERE rn = 1;
This subqery:
select distinct address_id, equipment_id,
min(datecreated) as datecreated, comment
from history where comment like 'MAC: 5%' group by equipment_id, address_id, comment
Probably will return multiple rows because the comment is not guaranteed to be the same.
Try this instead:
CROSS APPLY (
SELECT TOP 1 H2.DateCreated, H2.Comment -- H2.Equipment_id wasn't used
FROM History H2
WHERE
H2.Comment LIKE 'MAC: 5%'
AND C.Address_ID = H2.Address_ID
ORDER BY DateCreated
) H2
Switch that to OUTER APPLY in case you want rows that don't have a matching desired history entry.