Using Lateral Flatten and Join results in a syntax error - snowflake-cloud-data-platform

I'm running into an issue where I get a syntax error if I try to join and lateral flatten in the same query. I've created the following example:
Setup
CREATE TABLE "DEMO_DB"."PUBLIC"."MAIN_TABLE" (id number, department_id number, stuff variant);
insert into "DEMO_DB"."PUBLIC"."MAIN_TABLE" (id, department_id, stuff) SELECT 1, 1, PARSE_JSON('[{ "json_id": 1, "name": "Thing 1-One" }, { "json_id": 2, "name": "Thing 1-Two" }]');
insert into "DEMO_DB"."PUBLIC"."MAIN_TABLE" (id, department_id, stuff) SELECT 2, 1, PARSE_JSON('[{ "json_id": 1, "name": "Thing 2-One" }, { "json_id": 2, "name": "Thing 2-Two" }]');
insert into "DEMO_DB"."PUBLIC"."MAIN_TABLE" (id, department_id, stuff) SELECT 3, 2, PARSE_JSON('[{ "json_id": 1, "name": "Thing 3-One" }, { "json_id": 3, "name": "Thing 3-Three" }]');
And setup a table to join to:
CREATE TABLE "DEMO_DB"."PUBLIC"."DEPARTMENTS" (id number, name string);
insert into "DEMO_DB"."PUBLIC"."DEPARTMENTS" (id, name) values (1, 'First Department');
insert into "DEMO_DB"."PUBLIC"."DEPARTMENTS" (id, name) values (2, 'Second Department');
Examples
Basic lateral flatten works:
SELECT
MAIN_TABLE.id,
MAIN_TABLE.department_id,
my_stuff.value:json_id::number stuff_id,
my_stuff.value:name::string stuff_name
FROM
"DEMO_DB"."PUBLIC"."MAIN_TABLE",
lateral flatten ( input => MAIN_TABLE.stuff ) my_stuff
;
The Problem
"SQL compilation error: error line 9 at position 59 invalid identifier 'MAIN_TABLE.DEPARTMENT_ID'"
SELECT
MAIN_TABLE.id,
MAIN_TABLE.department_id,
DEPARTMENTS.name department_name,
my_stuff.value:json_id::number stuff_id,
my_stuff.value:name::string stuff_name
FROM
"DEMO_DB"."PUBLIC"."MAIN_TABLE",
lateral flatten ( input => MAIN_TABLE.stuff ) my_stuff
JOIN "DEMO_DB"."PUBLIC"."DEPARTMENTS" on (DEPARTMENTS.id = MAIN_TABLE.department_id);
More that works
-- basic join without lateral flatten
SELECT
MAIN_TABLE.id,
MAIN_TABLE.department_id,
DEPARTMENTS.name department_name -- ,
-- my_stuff.value:json_id::number stuff_id,
-- my_stuff.value:name::string stuff_name
FROM
"DEMO_DB"."PUBLIC"."MAIN_TABLE" -- ,
-- lateral flatten ( input => MAIN_TABLE.stuff ) my_stuff
JOIN "DEMO_DB"."PUBLIC"."DEPARTMENTS" on (DEPARTMENTS.id = MAIN_TABLE.department_id);
-- Using a table in the from clause with a where
SELECT
MAIN_TABLE.id,
MAIN_TABLE.department_id,
DEPARTMENTS.name department_name,
my_stuff.value:json_id::number stuff_id,
my_stuff.value:name::string stuff_name
FROM
"DEMO_DB"."PUBLIC"."MAIN_TABLE",
lateral flatten ( input => MAIN_TABLE.stuff ) my_stuff,
"DEMO_DB"."PUBLIC"."DEPARTMENTS"
WHERE (DEPARTMENTS.id = MAIN_TABLE.department_id);
-- Using an inline view to pull in the join data
SELECT
MAIN_TABLE.id,
MAIN_TABLE.department_id,
MAIN_TABLE.department_name,
my_stuff.value:json_id::number stuff_id,
my_stuff.value:name::string stuff_name
FROM
(
SELECT
MAIN_TABLE.*,
DEPARTMENTS.name department_name
FROM
"DEMO_DB"."PUBLIC"."MAIN_TABLE"
JOIN "DEMO_DB"."PUBLIC"."DEPARTMENTS" on (DEPARTMENTS.id = MAIN_TABLE.department_id)
) MAIN_TABLE,
lateral flatten ( input => MAIN_TABLE.stuff ) my_stuff;

This works:
SELECT
MAIN_TABLE.id,
MAIN_TABLE.department_id,
DEPARTMENTS.name department_name,
my_stuff.value:json_id::number stuff_id,
my_stuff.value:name::string stuff_name
FROM
"DEMO_DB"."PUBLIC"."MAIN_TABLE"
JOIN "DEMO_DB"."PUBLIC"."DEPARTMENTS" on (DEPARTMENTS.id = MAIN_TABLE.department_id),
lateral flatten ( input => MAIN_TABLE.stuff ) my_stuff ;
The order matters for lateral joins. Here is a usage note that discusses it:
https://docs.snowflake.com/en/sql-reference/constructs/join-lateral.html#usage-notes

Related

CROSS APPLY in Oracle/SQL Server substitute in Snowflake

I am looking for the best alternatives on mapping Cross APPLY to SF.
Something like:
select department_name, employee_id, employee_name
from departments d
cross apply (select employee_id, employee_name
from employees e
where salary >= 2000
and e.department_id = d.department_id)
order by 1, 2, 3;
The ANSI SQL equivalent of CROSS APPLY is JOIN LATERAL:
select department_name, employee_id, employee_name
from departments d
join lateral (select employee_id, employee_name
from employees e
where salary >= 2000
and e.department_id = d.department_id)
order by 1, 2, 3;
Output:
and for OUTER APPLY is LEFT JOIN LATERAL () ON TRUE:
select department_name, employee_id, employee_name
from departments d
left join lateral (select employee_id, employee_name
from employees e
where salary >= 2000
and e.department_id = d.department_id) ON TRUE
order by 1, 2, 3;
Output:
For source data:
CREATE OR REPLACE TABLE departments(department_id INT, department_name TEXT,
deparment_location TEXT)
AS
SELECT 1, 'HR', 'London' UNION
SELECT 2, 'SALES', 'Berlin' UNION
SELECT 3, 'RESEARCH', 'Paris';
CREATE OR REPLACE TABLE employees(employee_id INT, employee_name TEXT,
salary INT, department_id INT)
AS
SELECT 100, 'John', 2000, 1 UNION
SELECT 101, 'Anna', 4000, 2;
Related: CROSS/OUTER APPLY in MySQL
What I have seen is that the same results can be achieved with a CROSS JOIN LATERAL
using this testdata from here:
create table departments (
department_id number(2) ,
department_name varchar2(14),
location varchar2(13)
);
insert into departments values (10,'ACCOUNTING','NEW YORK');
insert into departments values (20,'RESEARCH','DALLAS');
insert into departments values (30,'SALES','CHICAGO');
insert into departments values (40,'OPERATIONS','BOSTON');
create table employees (
employee_id number(4) ,
employee_name varchar2(10),
job varchar2(9),
manager_id number(4),
hiredate date,
salary number(7,2),
commission number(7,2),
department_id number(2)
);
insert into employees values (7369,'SMITH','CLERK',7902,to_date('17-12-1980','dd-mm-yyyy'),800,NULL,20);
insert into employees values (7499,'ALLEN','SALESMAN',7698,to_date('20-2-1981','dd-mm-yyyy'),1600,300,30);
insert into employees values (7521,'WARD','SALESMAN',7698,to_date('22-2-1981','dd-mm-yyyy'),1250,500,30);
insert into employees values (7566,'JONES','MANAGER',7839,to_date('2-4-1981','dd-mm-yyyy'),2975,NULL,20);
insert into employees values (7654,'MARTIN','SALESMAN',7698,to_date('28-9-1981','dd-mm-yyyy'),1250,1400,30);
insert into employees values (7698,'BLAKE','MANAGER',7839,to_date('1-5-1981','dd-mm-yyyy'),2850,NULL,30);
insert into employees values (7782,'CLARK','MANAGER',7839,to_date('9-6-1981','dd-mm-yyyy'),2450,NULL,10);
insert into employees values (7788,'SCOTT','ANALYST',7566,to_date('13-JUL-87','dd-mm-rr')-85,3000,NULL,20);
insert into employees values (7839,'KING','PRESIDENT',NULL,to_date('17-11-1981','dd-mm-yyyy'),5000,NULL,10);
insert into employees values (7844,'TURNER','SALESMAN',7698,to_date('8-9-1981','dd-mm-yyyy'),1500,0,30);
insert into employees values (7876,'ADAMS','CLERK',7788,to_date('13-6-87', 'dd-mm-yyyy')-51,1100,NULL,20);
insert into employees values (7900,'JAMES','CLERK',7698,to_date('3-12-1981','dd-mm-yyyy'),950,NULL,30);
insert into employees values (7902,'FORD','ANALYST',7566,to_date('3-12-1981','dd-mm-yyyy'),3000,NULL,20);
insert into employees values (7934,'MILLER','CLERK',7782,to_date('23-1-1982','dd-mm-yyyy'),1300,NULL,10);
As mentioned in the question, the following:
select department_name, employee_id, employee_name
from departments d
cross join lateral (select employee_id, employee_name
from employees e
where salary >= 2000
and e.department_id = d.department_id)
order by 1, 2, 3;
is equivalent, but is it the best option?

Self join and get unique records between a date

I have my table and data as follows where I am trying to filter based on period and get results
CREATE TABLE testData
(
Id int,
period date,
value decimal(18,2)
)
INSERT INTO testData
VALUES (1, '2001-08-01', 400), (2, '2001-09-01', 400), (2, '2001-09-01', 300)
I have a fiddle which is giving results but not as expected you can check fiddle here http://sqlfiddle.com/#!6/beb4c/5
This is my SQL query
SELECT
a.id,
[value] - (SELECT TOP 1 b.[value]
FROM testData b
WHERE b.period = a.period
ORDER BY b.id DESC) x
FROM
testData a
Output I am expecting is
1 2001-08-01 400
2 2001-09-01 100
try This
WITH CTE
AS
(
SELECT
SeqNo = ROW_NUMBER() OVER(PARTITION BY Id ORDER BY Period),
*
FROM TestData
)
SELECT
A.Id,
A.Period,
Value = ISNULL(A.Value,0) - ISNULL(Q.Value,0)
FROM CTE A
LEFT JOIN(
SELECT
B.Id,
B.period,
Value = SUM(B.Value)
FROM CTE B
WHERE B.SeqNo <> 1
GROUP BY B.Period,B.Id
)Q
ON A.Id = Q.Id
WHERE A.SeqNo = 1
Fiddle Link Here

Select row with max value with having clause

create table Users
(
ID int primary key,
Username char(13) not null,
Salary int,
DepartmentID int,
PCID int
);
insert into Users values (1, 'Jenson', 180000, 4,12);
insert into Users values (2, 'John', 161000, 2,11);
insert into Users values (3, 'Jack', 150000, 1,10);
insert into Users values (4, 'James', 150000, 3,9);
insert into Users values (5, 'Jeremy', 151000, 3,7);
create table Departments
(
ID int primary key,
Name char(13) not null,
);
insert into Departments values (1, 'Programming');
insert into Departments values (2, 'Supply');
insert into Departments values (3, 'Medicine');
insert into Departments values (4, 'Economic');
insert into Departments values (5, 'Communication');
SELECT
s.dep_id as dep_id, s.Sum_Salary
FROM
(SELECT
d.ID AS dep_id, SUM(u.Salary) AS Sum_Salary
FROM
dbo.users u
INNER JOIN
Departments d ON u.DepartmentID = d.id
GROUP BY
d.ID) s
I can select from Department_id and sum_salary
How can I select row select row with max value of sum_salary? Not using CTE or same ways.
You can use TOP and ORDER BY for this:
SELECT TOP 1
d.ID AS dep_id,
sum(u.Salary) AS Sum_Salary
from dbo.users u
INNER JOIN Departments d ON u.DepartmentID=d.id
GROUP BY d.ID
order by Sum_Salary desc;
It'll return the top 1 row with maximum Sum_salary.
If you just want to find maximum sum_salary, use MAX:
SELECT
MAX(s.Sum_Salary)
FROM
(SELECT
SUM(u.Salary) AS Sum_Salary
FROM
dbo.users u
INNER JOIN
Departments d ON u.DepartmentID = d.id
GROUP BY
d.ID) s
WITH CTE AS
(
SELECT *,ROW_NUMBER() OVER( ORDER BY SUM_SALARY DESC) AS RN FROM (SELECT D.ID AS DEP_ID ,SUM(U.SALARY) AS SUM_SALARY FROM DBO.USERS U
INNER JOIN DEPARTMENTS D ON U.DEPARTMENTID=D.ID
GROUP BY D.ID )A
)
SELECT SUM_SALARY, RN
FROM CTE WHERE RN=1
OR
SELECT D.ID AS DEP_ID ,SUM(U.SALARY) AS SUM_SALARY FROM DBO.USERS U
INNER JOIN DEPARTMENTS D ON U.DEPARTMENTID=D.ID
GROUP BY D.ID
HAVING SUM(U.SALARY) = (SELECT TOP 1 SUM(U.SALARY) AS SUM_SALARY FROM DBO.USERS U
INNER JOIN DEPARTMENTS D ON U.DEPARTMENTID=D.ID
GROUP BY D.ID
ORDER BY SUM_SALARY DESC)

How to set an order when using CTE?

I have a table with 2 columns (id, childId). The data is as follows:
1, 2
3, 4
2, null
4, null
I'm using a CTE so that I get the child records:
DECLARE #id TABLE (id int);
INSERT INTO #id SELECT 1;
INSERT INTO #id SELECT 3;
WITH cte AS
(
SELECT id, childId
FROM mytable
WHERE
id IN (SELECT id FROM #id)
UNION ALL
SELECT b.id, b.childId
FROM mytable b
INNER JOIN cte
ON b.id = cte.childId
)
SELECT * FROM cte
The result always come back as:
1, 2
3, 4
4, null
2, null
But I need the result to look like:
1, 2
2, null
3, 4,
4, null
That is, first the anchor records then the records for the recursive sql for each anchor record.
Is this possible?
Add a static value to in anchor query. Then in recursive part add a static value greater than the static value of anchor query. Now the use static value in Order by
Try this
WITH cte AS
(
SELECT 0 as rn, id, childId
FROM mytable
WHERE
id IN (SELECT id FROM #id)
UNION ALL
SELECT 1 as rn,b.id, b.childId
FROM mytable b
INNER JOIN cte
ON b.id = cte.childId
)
SELECT * FROM cte
Order by rn,id
Also consider adding option(Maxrecursion N). By default it just makes only 100 recursions
By Adding a Seq, the results will be displayed in the proper order/nesting
DECLARE #id TABLE (id int);
INSERT INTO #id SELECT 1;
INSERT INTO #id SELECT 3;
WITH cte AS
(
SELECT id, childId
,Seq = cast(100000+Row_Number() over (Order by id) as varchar(500))
FROM mytable
WHERE
id IN (SELECT id FROM #id)
UNION ALL
SELECT b.id, b.childId
,Seq = cast(concat(cte.Seq,'.',100000+Row_Number() over (Order by b.id)) as varchar(500))
FROM mytable b
INNER JOIN cte
ON b.id = cte.childId
)
SELECT * FROM cte
Order By Seq

How to implement a ZIP JOIN in T-SQL?

Let say I have table #Foo:
Id Color
-- ----
1 Red
2 Green
3 Blue
4 NULL
And table #Bar:
Value
-----
1
2.5
I would like to create table Result using simple statement to get:
Id Color Value
-- ---- -----
1 Red 1
2 Green 2.5
3 Blue NULL
4 NULL NULL
What I have invented so far is:
WITH cte1
AS
(
SELECT [Id], [Color], ROW_NUMBER() OVER (ORDER BY [Id]) AS 'No'
FROM #Foo
),
cte2
AS
(
SELECT [Value], ROW_NUMBER() OVER (ORDER BY [Value]) AS 'No'
FROM #Bar
)
SELECT [Id], [Color], [Value]
FROM cte1 c1
FULL OUTER JOIN cte2 c2 ON c1.[No] = c2.[No]
Do you know faster or more standard way to do ZIP JOIN in T-SQL?
You can simply try this.
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS Id, Value FROM #Bar
)
SELECT F.Id, F.Color, CTE.Value
FROM #Foo F
LEFT JOIN CTE ON CTE.Id = F.Id
You can get rid of the CTE or make your query shorter
with subquery's like this
select Id,Color,Value from
(
SELECT [Id], [Color], ROW_NUMBER() OVER (ORDER BY [Id]) AS 'No'
FROM #Foo
)x full outer join
(
SELECT [Value], ROW_NUMBER() OVER (ORDER BY [Value]) AS 'No'
FROM #Bar
)y
on x.No=y.No
Will this suffice? (Admittedly, I may be miss-interpreting the question)
SELECT
F.ID AS ID,
F.Color AS Color,
B.Value AS Value
FROM #Foo F
LEFT OUTER JOIN #Bar B ON F.ID = FLOOR(B.Value)
--this DOES seem to return the correct output, but I'm not sure that my logic
--is what you are after
SELECT
F.ID AS ID,
F.Color AS Color,
B.Value AS Value
FROM
(
VALUES
(1,'Red'),(2,'Green'),(3,'Blue'),(4, NULL)
) AS F(ID, Color)
LEFT OUTER JOIN
(
VALUES
(1), (2.5)
) AS B(Value)
ON F.ID = FLOOR(B.Value)
Or are you wanting to essentially:
Sort #Foo by ID
Sort #Boo by Value
Match:
"First" row from #Foo with "First" row from #Bar
"Second" row from #Foo with "Second" row from #Bar
etc...
(Sorry, but I am not familiar with what a "ZIP JOIN" is.
I will look at the link provided by #RszardDzegan, though.)
you could try something like this:
DECLARE #Foo TABLE (Id INT, Color VARCHAR(10));
DECLARE #Bar TABLE (Value DECIMAL(2, 1))
INSERT INTO #Foo (Id, Color)
VALUES (1, 'Red'), (2, 'Green'), (3, 'Blue'), (4, NULL)
INSERT INTO #Bar (Value)
VALUES (1), (2.5);
WITH ECROSS
AS (
SELECT F.Id, F.Color, B.Value, DENSE_RANK() OVER (
ORDER BY F.Id
) AS No1, DENSE_RANK() OVER (
ORDER BY B.Value
) AS No2
FROM #Foo F, #Bar B
)
SELECT A.id, A.Color, B.Value
FROM ECROSS A
LEFT JOIN ECROSS B ON A.No1 = B.No2
AND A.No1 = B.No1
GROUP BY A.id, A.Color, B.Value
DECLARE #Foo TABLE (pk_id int identity(1,1), Id INT, Color VARCHAR(10));
DECLARE #Bar TABLE (pk_id int identity(1,1), Value DECIMAL(2, 1))
INSERT INTO #Foo (Id, Color)
VALUES (1, 'Red'), (2, 'Green'), (3, 'Blue'), (4, NULL)
INSERT INTO #Bar (Value)
VALUES (1), (2.5);
SELECT F.id, F.Color, B.Value
FROM #Foo F
LEFT JOIN #Bar B ON F.pk_id = B.pk_id
Try the following code. You just need to provide both data types in the same structure with a row number per group. With that you can use the PIVOT operator to produce the expected result.
WITH
CTE_FOO AS
(
SELECT
[Group]
,[Spread]
,[Aggregate]
FROM
(VALUES
(1, 1, N'Red' )
,(2, 1, N'Green')
,(3, 1, N'Blue' )
,(4, 1, NULL )
) AS FOO([Group], [Spread], [Aggregate])
),
CTE_BAR AS
(
SELECT
[Group]
,[Spread]
,CAST([Aggregate] AS nvarchar(max)) AS [Aggregate]
FROM
(VALUES
(1, 2, 1 )
,(2, 2, 2.5 )
) AS BAR([Group], [Spread], [Aggregate])
),
CTE_FOOBAR AS
(
SELECT [Group], [Spread], [Aggregate] FROM CTE_FOO
UNION ALL
SELECT [Group], [Spread], [Aggregate] FROM CTE_BAR
)
SELECT
[Group] AS [ID]
,[1] AS [Color]
,[2] AS [Value]
FROM
CTE_FOOBAR
PIVOT
(
MAX([Aggregate]) FOR [Spread] IN ([1], [2])
) AS PivotTable
You can skip creating new row numbers for #Foo, since its row numbers in this case are given.
Then the solution will become
SELECT F.Id,F.Color,newBar.Value from #Foo as F
LEFT JOIN
(
SELECT [Value], ROW_NUMBER() OVER (ORDER BY [Value]) AS 'No'
FROM #Bar
) newBar
on F.Id=newBar.No
This solution has been tested and proven. It gives you all values of #Foo and for each a sorted value of #Bar if there is one.

Resources