Snowflake - produced extra row with null values after count operation - snowflake-cloud-data-platform

I have two tables like:
create or replace table t1 (C_ID varchar, I_A varchar ) as select * from values
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','j'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','l'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','m'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','d')
;
create or replace table t2 (C_ID varchar, I_ID varchar, I_N varchar, I_V varchar, UPDATED_DATE datetime, SOURCE_DELETED_DATE datetime, DELETED_DATE datetime, SYNC_DATE datetime ) as select * from values
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b4bd031f','d','E','2020-06-19 04:35:00.386000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b4bd031f','j','C','2020-06-19 04:35:00.386000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b7700321','d','M','2020-06-19 04:35:00.476000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b7700321','j','VM','2020-06-19 04:35:00.476000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b7700321','m','dd','2020-06-19 04:35:00.476000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b7dc0323','d','En','2020-06-19 04:35:00.501000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b7dc0323','j','VPE','2020-06-19 04:35:00.501000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b7dc0323','m','dd','2020-06-19 04:35:00.501000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b84c0325','d','En','2020-06-19 04:35:00.420000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b84c0325','j','EN','2020-06-19 04:35:00.420000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b84c0325','m','j','2020-06-19 04:35:00.420000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b8b40327','d','M','2020-06-19 04:35:00.548000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b8b40327','j','PM','2020-06-19 04:35:00.548000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00'),
('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','ac12000371951f7e81719520b8b40327','m','a','2020-06-19 04:35:00.548000000',NULL,NULL,'2022-07-06 08:10:33.239000000 -07:00');
SELECT
CONCAT(T2.C_ID, T2.I_ID, T2.I_V, T2.UPDATED_DATE) AS PK,
T2.I_N AS I_N,
T2.I_V AS I_V,
count(T2.I_ID) AS I_ID,
T2.UPDATED_DATE AS UPDATED_DATE,
T2.SYNC_DATE AS SYNC_DATE
FROM T1 AS T1
LEFT OUTER JOIN T2
ON T1.C_ID = T2.C_ID
AND T1.I_A = T2.I_N
GROUP BY T2.C_ID, T2.I_N, T2.I_ID, T2.I_V, T2.UPDATED_DATE, T2.SYNC_DATE;
Here I'm trying to get count of the different I_ID-s for the same pairs of C_ID + I_N.
Am I missing something? I expect no null values row in such join.
My results

The table t1 contains row ('4335b37a-242d-4b42-a5bf-92b0fbbb50b6','l') that does not have a corresponding row on t2.
The SELECT statement:
SELECT CONCAT(T2.C_ID, T2.I_ID, T2.I_V, T2.UPDATED_DATE) AS PK,
gets values from t2 which are all NULLs.
Removing the GROUP BY and providing output for T1 join columns:
SELECT T1.C_ID, T1.I_A,
CONCAT(T2.C_ID, T2.I_ID, T2.I_V, T2.UPDATED_DATE) AS PK,
T2.I_N AS I_N,
T2.I_V AS I_V,
T2.UPDATED_DATE AS UPDATED_DATE,
T2.SYNC_DATE AS SYNC_DATE
FROM T1 AS T1
LEFT OUTER JOIN T2
ON T1.C_ID = T2.C_ID
AND T1.I_A = T2.I_N;
Output:

Please check your data and query:
create or replace table t1 (id number, col1 number ) as select * from values
( 1 , 222 ),
( 2 , 333 ),
( 3 , 444 ),
( 4 , 555 );
create or replace table t2 (id number, id_t1 number, col1_ number, col2 number ) as select * from values
( 1 , 1 , 222 , 41 ),
( 2 , 1 , 222 , 42 ),
( 3 , 2 , 333 , 43 ),
( 4 , 2 , 333 , 44 ),
( 5 , 2 , 333 , 45 ),
( 6 , 3 , 444 , 46 ),
( 7 , 4 , 555 , 47 );
SELECT
CONCAT(T1.ID, T1.COL1) AS PK,
T1.COL1 AS COL1,
count(DISTINCT T2.COL2) AS COL2_COUNT
FROM T1
LEFT OUTER JOIN T2
ON T1.ID = T2.ID_T1
AND T1.COL1 = T2.COL1_
GROUP BY 1,2;
+------+------+------------+
| PK | COL1 | COL2_COUNT |
+------+------+------------+
| 1222 | 222 | 2 |
| 2333 | 333 | 3 |
| 4555 | 555 | 1 |
| 3444 | 444 | 1 |
+------+------+------------+
No NULLs in the result!

Related

Remove sql select duplicates

select T1.C1
,T1.C2
,T2.C2
from table1 T1
join table2 T2
on T1.C1 = T2.C1
and T1.C2 != T2.C2
and T2.C1 != ''
Output:
| T1.C1 | T1.C2 | T2.C2 |
--------------------------
| 1 | A1 | B14 |
| 1 | B14 | A1 |
| 2 | A3 | B14 |
| 2 | B14 | A3 |
Simple SQL query to return all C1 that are in two different items.
How can i remove all the duplicates from query to get this result:
| T1.C1 | T1.C2 | T2.C2 |
--------------------------
| 1 | A1 | B14 |
| 2 | A3 | B14 |
Instead of:
T1.C2 != T2.C2
use:
T1.C2 <= T2.C2
This works as long as for each (T1.C2 < T2.C2) pair an equivalent (T1.C2 > T2.C2) pair exists, like in your sample data, e.g. for (A1, B14) pair (B14, A1) also exists.
Otherwise, you can use:
SELECT T1.C1, T1.C2, T2.C2
FROM (
SELECT T1.C1, T1.C2, T2.C2,
ROW_NUMBER() OVER (PARTITION BY T1.C1,
IIF(T1.C2 <= T2.C2, T1.C2, T2.C2),
IIF(T1.C2 <= T2.C2, T2.C2, T1.C2)
ORDER BY T1.C2, T2.C2) AS rn
FROM table1 T1
JOIN table2 T2
ON T1.C1 = T2.C1
AND T1.C2 != T2.C2
AND T2.C1 != '') AS t
WHERE t.rn = 1
If I understand what you're after, here is the code to generate this output:
T1xC2 hits
-------------------- -------------------
A1 A3,B14
A3 A1,B14
B14 A1,A3
Is Produced by:
Declare #T1 Table (C1 int, C2 varchar(20))
Declare #T2 Table (C1 int, C2 varchar(20))
insert into #T1
Select 1, 'A1'
union select 1, 'B14'
union select 2, 'A3'
union select 2, 'B14'
Insert into #T2
Select 1, 'B14'
Union Select 1, 'A1'
union select 2, 'B14'
union select 2, 'A3'
;WITH mix
AS (
SELECT T1.C1 AS [T1xC1]
,T2.C1 AS [T2xC1]
,T1.C2 AS [T1xC2]
,T2.C2 AS [T2xC2]
,ROW_NUMBER() OVER (
ORDER BY (
SELECT NULL
)
) AS rnk
FROM #T1 T1
CROSS JOIN #t2 T2
)
,Groupwork
AS (
SELECT *
FROM mix m
WHERE EXISTS (
SELECT 1
FROM Mix m2
WHERE m.T1xC2 = m2.T1xC2
AND m.T2xC2 <> m2.T2xC2
AND m.T1xC2 <> m.t2xc2
)
)
,GroupRows
AS (
SELECT DISTINCT T1xc1, T1xC2
FROM Groupwork
)
SELECT distinct T1xC2, x.hits
FROM GroupRows g
CROSS APPLY (
SELECT STUFF((
SELECT distinct ',' + g2.T2xC2
FROM Groupwork g2
WHERE g2.T1xC2 = g.T1xC2
FOR XML PATH('')
), 1, 1, '') hits
) x

Referencing outer table in an aggregate function in a subquery

I'm looking for a solution to particular query problem. I have a table Departments and table Employees designed like that:
Departments Employees
===================== ============================
ID | Name ID | Name | Surname | DeptID
--------------------- ----------------------------
1 | ADMINISTRATION 1 | X | Y | 2
2 | IT 2 | Z | Z | 1
3 | ADVERTISEMENT 3 | O | O | 1
4 | A | B | 3
I'd like to get list of all departments whose number of employees is smaller than number of employees working in Administration.
That was one of my ideas, but it did not work:
select * from Departments as Depts where Depts.ID in
(select Employees.ID from Employees group by Employees.ID
having count(Employees.ID) < count(case when Depts.Name='ADMINISTRATION' then 1 end));
Using GROUP BY and HAVING:
SELECT
d.ID, d.Name
FROM Departments d
LEFT JOIN Employees e
ON e.DeptID = d.ID
GROUP BY d.ID, d.Name
HAVING
COUNT(e.ID) < (SELECT COUNT(*) FROM Employees WHERE DeptID = 1)
Try this,
declare #Departments table (ID int, Name varchar(50))
insert into #Departments
values
(1 ,'ADMINISTRATION')
,(2 ,'IT')
,(3 ,'ADVERTISEMENT')
declare #Employees table (ID int, Name varchar(50)
,Surname varchar(50),DeptID int)
insert into #Employees
values
(1 ,'X','Y',2)
,(2 ,'Z','Z',1)
,(3 ,'O','O',1)
,(4 ,'A','B',3)
;
WITH CTE
AS (
SELECT *
,row_number() OVER (
PARTITION BY deptid ORDER BY id
) rn
FROM #Employees
WHERE deptid <> 1
)
SELECT *
FROM cte
WHERE rn < (
SELECT count(id) admincount
FROM #Employees
WHERE DeptID = 1
)

How to make query to count values from two tables

I have three tables main 'maintable' table and two sub tables 'table1' and 'table2' the main table 'maintable' contains tow columns 'ID' and 'name' like that:
ID name
.... ......
1 Khalid
2 Jone
3 Steve
and the first sub table 'table1' contains 't1ID' and 'column' and 'ID' (foreign key) from 'maintable' like that:
t1ID column ID
...... ....... ....
1 Value 1
2 Value 1
3 Value 1
4 Value 2
and the second sub table 'table2' contains 't2ID' and 'column' and 'ID' (foreign key) from 'maintable' like that:
t2ID column ID
...... ....... ....
1 Value 2
2 Value 1
3 Value 1
4 Value 3
I want to make query to find count of (table1.ID) as A and count of (table2.ID) as B like that:
name A B
...... ... ...
khalid 3 2
Jone 1 1
Steve 0 1
Try this :
select name,
(select count(t1.ID) from table1 t1 where t1.ID = main.ID) as A,
(select count(t2.ID) from table2 t2 where t2.ID = main.ID) as B
from maintable main
Try this out:
;with cte1 as (
SELECT ID, COUNT(1) AS Cnt
FROM table1
GROUP BY ID
), cte2 as (
SELECT ID, COUNT(1) AS Cnt
FROM table2
GROUP BY ID
)
SELECT m.name, ISNULL(cte1.Cnt, 0) AS A, ISNULL(cte2.Cnt, 0) AS B
FROM maintable m
LEFT JOIN cte1 ON cte1.ID = m.ID
LEFT JOIN cte2 ON cte2.ID = m.ID
It can also be done with subqueries, but I like CTEs more (query is more readable).
Try this:
with t0_t1 as (
select
t.id,
t.nm,
count(t1.id) as A
from table0 t
left join table1 t1 on t.id = t1.id
group by t.id, t.nm
)
select t.nm, t.A, count(t2.id) as B
from t0_t1 t
left join table2 t2 on t.id = t2.id
group by t.nm, t.A
Example: http://sqlfiddle.com/#!6/341ff/10
create table table0 (id int, nm varchar(20));
insert into table0 values (1,'Khalid'),(2,'Jone'),(3,'Steve');
create table table1 (t1id int, col varchar(20), id int);
insert into table1 values
(1, 'v', 1), (2, 'v', 1), (3, 'v', 1), (4, 'v', 2);
create table table2 (t2id int, col varchar(20), id int);
insert into table2 values
(1, 'v', 2), (2, 'v', 1), (3, 'v', 1), (4, 'v', 3);
Result:
| nm | A | B |
|--------|---|---|
| Steve | 0 | 1 |
| Jone | 1 | 1 |
| Khalid | 3 | 2 |

join by column type

I have 2 tables like the following.
id1 | val | type
1 2 type1
1 4 type2
2 9 type2
2 7 type1
id2|type1|type2
11 2 4
33 7 9
I need result like this
id1|id2
1 11
2 33
I need to check both type1 and type2 to relate id1 and id2. I tried the following query but it does not work.
select id1,id2 from t1 inner join t2 on (type='type1' and
t1.val=t2.type1)and (type='type2' and t1.val=t2.type2)
I believe this should give you what you want:
SELECT
a.id1,
t2.id2
FROM
t2
INNER JOIN
t1 a ON a.val = t2.type1 AND a.type = 'type1'
INNER JOIN
t1 b ON b.val = t2.type2 AND b.type = 'type2'
WHERE
a.id1 = b.id1
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE t1(id1 INT, val INT, [type] VARCHAR(10))
INSERT INTO t1 VALUES
(1 , 2 ,'type1'),
(1 , 4 ,'type2'),
(2 , 9 ,'type2'),
(2 , 7 ,'type1')
CREATE TABLE t2(id2 INT, [type1] VARCHAR(10), [type2] VARCHAR(10))
INSERT INTO t2 VALUES
(11 , 2 , 4),
(33 , 7 , 9)
Query 1:
Select t1.id1 , t2.id2
from t1
inner join
(Select * from t2 t
UNPIVOT (Val for [type] IN ([type1],[type2]))up) t2
ON t1.val = t2.Val and t1.type = t2.type
GROUP BY t1.id1 , t2.id2
Results:
| id1 | id2 |
|-----|-----|
| 1 | 11 |
| 2 | 33 |

sql - how to query member of group with recursion (1 table with user and groups)

Following structure exists:
CREATE TABLE rel(
entry_id int,
parent_id int
)
CREATE TABLE entries(
entry_id int,
name varchar(44)
)
Following data exists:
INSERT INTO entries VALUES (1,'user 1');
INSERT INTO entries VALUES (2,'group 2');
INSERT INTO entries VALUES (3,'group 3');
INSERT INTO entries VALUES (4,'user 4');
INSERT INTO entries VALUES (5,'user 5');
INSERT INTO rel VALUES (3,2);
INSERT INTO rel VALUES (4,2);
INSERT INTO rel VALUES (1,3);
INSERT INTO rel VALUES (5,3);
INSERT INTO rel VALUES (2,NULL);
Result should look like:
group_id| group_name | member_id | member_name | level
2 | group 2 | 4 | user 4 | 0
2 | group 2 | 1 | user 1 | 1
2 | group 2 | 5 | user 5 | 1
3 | group 3 | 1 | user 1 | 0
3 | group 3 | 5 | user 5 | 0
I already tried stuff like the following but it's not returning the results I need:
SELECT
entries.entry_id,
entries.name,
rel.parent_id,
(SELECT name FROM entries WHERE entry_id=parent_id) AS parent_name
INTO
#tmpEntries
FROM
entries, rel
WHERE
rel.entry_id = entries.entry_id
;
SELECT * FROM #tmpEntries;
WITH MyCTE
AS (
SELECT
entry_id,
name,
parent_id,
--CAST('' AS VARCHAR(44)) AS
parent_name
FROM #tmpEntries
--WHERE parent_id IS NULL
UNION ALL
SELECT
#tmpEntries.entry_id,
#tmpEntries.name,
#tmpEntries.parent_id,
--MyCTE.name AS
#tmpEntries.parent_name
FROM #tmpEntries
INNER JOIN MyCTE ON #tmpEntries.parent_id = MyCTE.entry_id
--WHERE #tmpEntries.parent_id IS NOT NULL
-- WHERE NOT EXISTS (SELECT entry_id FROM rel WHERE parent_id=#tmpEntries.entry_id)
)
SELECT DISTINCT *
FROM MyCTE
ORDER BY parent_id
;
WITH MyCTE2
AS (
SELECT
entry_id,
name,
parent_id,
--CAST('' AS VARCHAR(44)) AS
parent_name
FROM #tmpEntries
--WHERE parent_id IS NULL
UNION ALL
SELECT
#tmpEntries.entry_id,
#tmpEntries.name,
#tmpEntries.parent_id,
--MyCTE.name AS
#tmpEntries.parent_name
FROM #tmpEntries
INNER JOIN MyCTE2 ON #tmpEntries.parent_id = MyCTE2.entry_id
--WHERE #tmpEntries.parent_id IS NOT NULL
WHERE NOT EXISTS (SELECT entry_id FROM rel WHERE parent_id=#tmpEntries.entry_id)
)
SELECT DISTINCT *
FROM MyCTE2
ORDER BY parent_id
This will work under assumption that group is anything that contains one or more members i.e. empty group will be considered a simple member.
with cte_hierarchy as
(
select entry_id, parent_id, 0 as level
from rel
where parent_id is not null
union all
select h.entry_id, rel.parent_id, h.level + 1 as level
from cte_hierarchy h
inner join rel on h.parent_id = rel.entry_id
)
select
g.entry_id as group_id,
g.name as group_name,
e.entry_id as member_id,
e.name as member_name,
h.level
from cte_hierarchy h
inner join entries e on e.entry_id = h.entry_id
inner join entries g on g.entry_id = h.parent_id
where not exists (select * from rel where parent_id = h.entry_id)
order by g.entry_id, h.level, e.entry_id
The where clause excludes nested groups that would otherwise appear as members.

Resources