majority vote TSQL - sql-server

I am trying to map sub categories to a 'majority' parent category. Looking at this example:
IF OBJECT_ID(N'tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
CREATE TABLE #Temp
(
ID INT,
SubCategory NVARCHAR(100),
Category NVARCHAR(100)
)
INSERT INTO #Temp
SELECT 1, N'SC1', N'C1'
UNION
SELECT 2, N'SC1', N'C1'
UNION
SELECT 3, N'SC1', N'C1'
UNION
SELECT 4, N'SC1', N'C2'
UNION
SELECT 5, N'SC1', N'C2'
sub category SC1 can belong to 2 'parent' categories: C1, C2 but C1 is assigned more to SC1 than C2 so it represents the majority. I am currently using a correlated sub query to map sun categories to their majority parent categroy like this:
IF OBJECT_ID(N'tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
CREATE TABLE #Temp
(
ID INT,
SubCategory NVARCHAR(100),
Category NVARCHAR(100)
)
INSERT INTO #Temp
SELECT 1, N'SC1', N'C1'
UNION
SELECT 2, N'SC1', N'C1'
UNION
SELECT 3, N'SC1', N'C1'
UNION
SELECT 4, N'SC1', N'C2'
UNION
SELECT 5, N'SC1', N'C2'
SELECT
Id,
OuterQuery.SubCategory,
Category = (SELECT TOP 1 Category FROM #Temp AS InnerQuery
WHERE InnerQuery.SubCategory = OuterQuery.SubCategory
GROUP BY InnerQuery.SubCategory, InnerQuery.Category
ORDER BY COUNT(DISTINCT Id) DESC)
FROM #Temp AS OuterQuery
Is this a correct way of achieving want I want to?

I would do it like this:
DECLARE #t TABLE (
ID INT,
SubCategory NVARCHAR(100),
Category NVARCHAR(100)
)
INSERT INTO #t
VALUES
( 1, N'SC1', N'C1'),
(2, N'SC1', N'C1'),
(3, N'SC1', N'C1'),
(4, N'SC1', N'C2'),
(5, N'SC1', N'C2'),
(6, N'SC2', N'C2'),
(7, N'SC2', N'C2'),
(8, N'SC2', N'C3');
WITH a AS (
SELECT
subcategory,
category,
COUNT(id) cnt
FROM #t
GROUP BY
SubCategory,
category
), b as (
SELECT
subcategory,
category,
ROW_NUMBER() OVER
(PARTITION BY subcategory ORDER BY cnt DESC) row
FROM a
)
SELECT
subcategory,
category AS Majority
FROM b
WHERE row = 1
This produces the result:
subcategory Majority
SC1 C1
SC2 C2

There's this way with just subqueries;
Test Data (I've increased the range for this)
IF OBJECT_ID(N'tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
CREATE TABLE #Temp
(ID INT, SubCategory NVARCHAR(100), Category NVARCHAR(100))
INSERT INTO #Temp (ID, SubCategory, Category)
VALUES
(1, N'SC1', N'C1')
,(2, N'SC1', N'C1')
,(3, N'SC1', N'C1')
,(4, N'SC1', N'C2')
,(5, N'SC1', N'C2')
,(6, N'SC2', N'C3')
,(7, N'SC2', N'C3')
,(8, N'SC2', N'C2')
,(9, N'SC2', N'C2')
,(10, N'SC2', N'C2')
,(11, N'SC2', N'C2')
Now the query;
SELECT DISTINCT
t.SubCategory
,t.Category
FROM #Temp t
JOIN
(
SELECT
SubCategory
,Category
,COUNT(SubCategory) Ct
FROM #Temp
GROUP BY SubCategory, Category
) Cont
ON t.SubCategory = cont.SubCategory
AND t.Category = cont.Category
JOIN
(
SELECT
a.SubCategory
,MAX(a.Ct) MaxCount
FROM
(
SELECT
SubCategory
,Category
,COUNT(SubCategory) Ct
FROM #Temp
GROUP BY SubCategory, Category
) a
GROUP BY a.SubCategory
) maxi ON t.SubCategory = maxi.SubCategory
AND cont.Ct = maxi.MaxCount

Related

Stored procedure in SnowSQL to combine 2 tables and avoid duplicates

DECLARE #tmp1 TABLE (ItemCode INT, Item VARCHAR(30), Qty INT)
INSERT INTO #tmp1 (ItemCode, Item, Qty)
VALUES(1, 'Item1', 300),
(2, 'Item2', 500)
DECLARE #tmp2 TABLE (JobNo INT, ItemCode INT, Item VARCHAR(30), Qty INT)
INSERT INTO #tmp2 (JobNo, ItemCode, Item, Qty)
VALUES(1, 1, 'Item1', 150),
(2, 1, 'Item1', 150),
(3, 2, 'Item1', 50),
(4, 2, 'Item1', 75),
(5, 2, 'Item1', 125),
(6, 2, 'Item1', 100)
;WITH MyCTE AS
(
SELECT t1.ItemCode, t1.Item, t1.Qty, t2.JobNo, t2.ItemCode AS ItemCode2, t2.Item AS Item2, t2.Qty AS Qty2, t2.MySeed
FROM #tmp1 AS t1 INNER JOIN (
SELECT *, ROW_NUMBER() OVER(PARTITION BY ItemCode ORDER BY JobNo) AS [MySeed]
FROM #tmp2
) AS t2 ON t1.ItemCode = t2.ItemCode
WHERE t2.MySeed = 1
UNION ALL
SELECT NULL AS ItemCode, NULL AS Item, NULL AS Qty, t2.JobNo, t2.ItemCode AS ItemCode2, t2.Item AS Item2, t2.Qty AS Qty2, t2.MySeed
FROM #tmp1 AS t1 INNER JOIN (
SELECT *, ROW_NUMBER() OVER(PARTITION BY ItemCode ORDER BY JobNo) AS [MySeed]
FROM #tmp2
) AS t2 ON t1.ItemCode = t2.ItemCode
WHERE t2.MySeed > 1
)
SELECT *
FROM MyCTE
ORDER BY ItemCode2, MySeed
I found a sample query online as above which is straight forward as it combines table 1 and table 2 and it assumes in table 2 if myseed count is 1 then input null values.
But, I need a query in snowflake for the scenario where table 1 can have any number of entries for each id and table 2 can also have multiple entries for each id. I need to join both the tables without duplicates and input null in the rows when the one table has multiple entries and other table don't have multiple entries.
How to write in snowflake(SNOWSQL)?
I need a result like below to combine two tables and avoid duplicates in either one of the table

How to convert two rows into one column using pivot in sql

I want to convert two columns into one row using PIVOT FUNCTION in SQL database.
But I'm facing problems. I can convert 1 column into 1 row but not 2 columns into 1 row.
You can try this
Create Table Article (ArticleId Int, [Description] Varchar(10))
Insert Into Article Values (1, 'Test')
Create Table OrderForecast(ArticleId Int, [Week] Int, [Order] Int, Amount Int)
Insert Into OrderForecast Values (1, 51, 1, 0),(1, 52, 2, 150), (1, 1, 3, 0),(1, 2, 4, 200), (1, 3, 5,0)
Select ArticleId, [Description], Week51, Week52, Week1, Week2, Week3
from
(
select ArticleId, [Description], Amount, [Week]
from
(
SELECT OrderForecast.ArticleId, 'Week' + Convert(Varchar(10), OrderForecast.[Week]) as [Week], [Order], Amount,
Article.[Description] as [Description] FROM OrderForecast
Inner Join Article On OrderForecast.ArticleId = Article.ArticleId
)a
) d
pivot
(
max(Amount)
for [Week] in (Week51, Week52, Week1, Week2, Week3)
) piv;
You can find the demo Here

How to get the Previous & next row based on condition

I am trying to get the statement on fetching the previous and next rows of a selected row.
Declare #OderDetail table
(
Id int primary key,
OrderId int,
ItemId int,
OrderDate DateTime2,
Lookup varchar(15)
)
INSERT INTO #OderDetail
VALUES
(1, 10, 1, '2018-06-11', 'A'),
(2, 10, 2, '2018-06-11', 'BE'), --this
(3, 2, 1, '2018-06-04', 'DR'),
(4, 2, 2, '2018-06-04', 'D'), --this
(5, 3, 2, '2018-06-14', 'DD'), --this
(6, 4, 2, '2018-06-14', 'R');
DECLARE
#ItemId int = 2,
#orderid int = 10
Required output:
Input for the procedure is order id =10 and item id =2 and i need to check item-2 is in the any other order i.e only previous and next item of matched record/order as per order date
Is this what your after? (Updated to reflect edit [OrderDate] to question)
Declare #OderDetail table
(
Id int primary key,
OrderId int,
ItemId int,
OrderDate DateTime2,
Lookup varchar(15)
)
INSERT INTO #OderDetail
VALUES
(1, 10, 1, '2018-06-11', 'A'),
(2, 10, 2, '2018-06-11', 'BE'), --this
(3, 2, 1, '2018-06-04', 'DR'),
(4, 2, 2, '2018-06-04', 'D'), --this
(5, 3, 2, '2018-06-14', 'DD'), --this
(6, 4, 2, '2018-06-14', 'R');
declare #ItemId int=2 , #orderid int = 10;
Query
With cte As
(
Select ROW_NUMBER() OVER(ORDER BY OrderDate) AS RecN,
*
From #OderDetail Where ItemId=#ItemId
)
Select Id, OrderId, ItemId, [Lookup] From cte Where
RecN Between ((Select Top 1 RecN From cte Where OrderId = #orderid) -1) And
((Select Top 1 RecN From cte Where OrderId = #orderid) +1)
Order by id
Result:
Id OrderId ItemId Lookup
2 10 2 BE
4 2 2 D
5 3 2 DD
Another possible approach is to use LAG() and LEAD() functions, that return data from a previous and subsequent row form the same resul tset.
-- Table
DECLARE #OrderDetail TABLE (
Id int primary key,
OrderId int,
ItemId int,
OrderDate DateTime2,
Lookup varchar(15)
)
INSERT INTO #OrderDetail
VALUES
(1, 10, 1, '2018-06-11', 'A'),
(2, 10, 2, '2018-06-11', 'BE'), --this
(3, 2, 1, '2018-06-04', 'DR'),
(4, 2, 2, '2018-06-04', 'D'), --this
(5, 3, 2, '2018-06-14', 'DD'), --this
(6, 4, 2, '2018-06-14', 'R');
-- Item and order
DECLARE
#ItemId int = 2,
#orderid int = 10
-- Statement
-- Get previois and next ID for every order, grouped by ItemId, ordered by OrderDate
;WITH cte AS (
SELECT
Id,
LAG(Id, 1) OVER (PARTITION BY ItemId ORDER BY OrderDate) previousId,
LEAD(Id, 1) OVER (PARTITION BY ItemId ORDER BY OrderDate) nextId,
ItemId,
OrderId,
Lookup
FROM #OrderDetail
)
-- Select current, previous and next order
SELECT od.*
FROM cte
CROSS APPLY (SELECT * FROM #OrderDetail WHERE Id = cte.Id) od
WHERE (cte.OrderId = #orderId) AND (cte.ItemId = #ItemId)
UNION ALL
SELECT od.*
FROM cte
CROSS APPLY (SELECT * FROM #OrderDetail WHERE Id = cte.previousId) od
WHERE (cte.OrderId = #orderId) AND (cte.ItemId = #ItemId)
UNION ALL
SELECT od.*
FROM cte
CROSS APPLY (SELECT * FROM #OrderDetail WHERE Id = cte.nextId) od
WHERE (cte.OrderId = #orderId) AND (cte.ItemId = #ItemId)
Output:
Id OrderId ItemId OrderDate Lookup
2 10 2 11/06/2018 00:00:00 BE
4 2 2 04/06/2018 00:00:00 D
5 3 2 14/06/2018 00:00:00 DD
Update to given this data set: I see where you are going with this. Note that in SOME cases there IS no row before the given one - so it only returns 2 not 3. Here I updated the CTE version. Un-comment the OTHER row to see 3 not 2 as there is then one before the selected row with that Itemid.
Added a variable to demonstrate how this is better allowing you to get 1 before and after or 2 before/after if you change that number (i.e. pass a parameter) - and if less rows, or none are before or after it gets as many as it can within that constraint.
Data setup for all versions:
Declare #OderDetail table
(
Id int primary key,
OrderId int,
ItemId int,
OrderDate DateTime2,
Lookup varchar(15)
)
INSERT INTO #OderDetail
VALUES
(1, 10, 1, '2018-06-11', 'A'),
(2, 10, 2, '2018-06-11', 'BE'), --this
(3, 2, 1, '2018-06-04', 'DR'),
(4, 2, 2, '2018-06-04', 'D'), --this
(5, 3, 2, '2018-06-14', 'DD'), --this
(9, 4, 2, '2018-06-14', 'DD'),
(6, 4, 2, '2018-06-14', 'R'),
--(10, 10, 2, '2018-06-02', 'BE'), -- un-comment to see one before
(23, 4, 2, '2018-06-14', 'R');
DECLARE
#ItemId int = 2,
#orderid int = 2;
CTE updated version:
DECLARE #rowsBeforeAndAfter INT = 1;
;WITH cte AS (
SELECT
Id,
OrderId,
ItemId,
OrderDate,
[Lookup],
ROW_NUMBER() OVER (ORDER BY OrderDate,Id) AS RowNumber
FROM #OderDetail
WHERE
ItemId = #itemId -- all matches of this
),
myrow AS (
SELECT TOP 1
Id,
OrderId,
ItemId,
OrderDate,
[Lookup],
RowNumber
FROM cte
WHERE
ItemId = #itemId
AND OrderId = #orderid
)
SELECT
cte.Id,
cte.OrderId,
cte.ItemId,
cte.OrderDate,
cte.[Lookup],
cte.RowNumber
FROM ctE
INNER JOIN myrow
ON ABS(cte.RowNumber - myrow.RowNumber) <= #rowsBeforeAndAfter
ORDER BY OrderDate, OrderId;
You probably want the CTE method (See an original at the end of this) however:
Just to point out, this gets the proper results but is probably not what you are striving for since it is dependent on the row order and the item id not the actual row with those two values:
SELECT TOP 3
a.Id,
a.OrderId,
a.ItemId,
a.Lookup
FROM #OderDetail AS a
WHERE
a.ItemId = #ItemId
To fix that, you can use an ORDER BY and TOP 1 with a UNION, kind of ugly. (UPDATED with date sort and != on the id.)
SELECT
u.Id,
u.OrderId,
u.OrderDate,
u.ItemId,
u.Lookup
FROM (
SELECT
a.Id,
a.OrderId,
a.OrderDate,
a.ItemId,
a.Lookup
FROM #OderDetail AS a
WHERE
a.ItemId = #ItemId
AND a.OrderId = #orderid
UNION
SELECT top 1
b.Id,
b.OrderId,
b.OrderDate,
b.ItemId,
b.Lookup
FROM #OderDetail AS b
WHERE
b.ItemId = #ItemId
AND b.OrderId != #orderid
ORDER BY b.OrderDate desc, b.OrderId
UNION
SELECT top 1
b.Id,
b.OrderId,
b.OrderDate,
b.ItemId,
b.Lookup
FROM #OderDetail AS b
WHERE
b.ItemId = #ItemId
AND b.OrderId != #orderid
ORDER BY b.OrderDate asc, b.OrderId
) AS u
ORDER BY u.OrderDate asc, u.OrderId
I think its simple, you can check with min(Id) and Max(id) with left outer join or outer apply
like
Declare #ItemID int = 2
Select * From #OderDetail A
Outer Apply (
Select MIN(A2.Id) minID, MAX(A2.Id) maxID From #OderDetail A2
Where A2.ItemId =#ItemID
) I05
Outer Apply(
Select * From #OderDetail Where Id=minID-1
Union All
Select * From #OderDetail Where Id=maxID+1
) I052
Where A.ItemId =#ItemID Order By A.Id
Let me know if this helps you or you face any problem with it...
Regards,

Row based Condition in Left Join

Is there a way to write a row based condition in Left Join.
If some row not exists based on column condition, then it should take the next first row.
I have the structure below,
create table Report
(
id int,
name varchar(10)
)
create table ReportData
(
report_id int references report(id),
flag bit,
path varchar(50)
)
insert into Report values (1, 'a');
insert into Report values (2, 'b');
insert into Report values (3, 'c');
insert into ReportData values (1, 0, 'xx');
insert into ReportData values (2, 0, 'yy');
insert into ReportData values (2, 1, 'yy');
insert into ReportData values (3, 1, 'zz');
insert into ReportData values (3, 1, 'mm');
I need some output like
1 a 0 xx
2 b 0 yy
3 c 1 zz
You can use ROW_NUMBER for this:
;WITH ReportDate_Rn AS (
SELECT report_id, flag, path,
ROW_NUMBER() OVER (PARTITION BY report_id ORDER BY path) AS rn
FROM ReportDate
)
SELECT t1.id, t1.name, t2.flag, t2.path
FROM Report AS t1
JOIN ReportDate_Rn AS t2 ON t1.id = t2.report_id AND t2.rn = 1
The above query regards as first record of each report_id slice, the one having the alphabetically smallest path. You may amend the ORDER BY clause of the ROW_NUMBER() window function as you wish.
SELECT id,name,flag,path
FROM
(
SELECT Report.id,Report.name,ReportData.flag,ReportData.path,
row_number() over(partition by ReportData.report_id order by flag) as rownum
FROM Report
JOIN ReportData on Report.id = ReportData.report_id
) tmp
WHERE tmp.rownum=1
A simpler alternative to the left join, using rowid and rownum
SELECT id, name, flag, path
FROM report, reportdata
WHERE reportdata.rowid = (SELECT rowid
FROM reportdata
WHERE id = report_id
AND rownum = 1);
Without using row_numner() you can achieve this.
Have a look at this SQL Fiddle
select r.id, r.name, d.flag, d.path from report r
inner join reportdata d
on r.id = d.report_id group by d.report_id
PS: I wasn't believing the result - I was just building the query - haven't used d.report_id in the select clause and it worked. Will be updating this answer once I get the reason why this query worked :)
Use Partition BY:
declare #Report AS table
(
id int,
name varchar(10)
)
declare #ReportData AS table
(
report_id int ,
flag bit,
path varchar(50)
)
insert into #Report values (1, 'a');
insert into #Report values (2, 'b');
insert into #Report values (3, 'c');
insert into #ReportData values (1, 0, 'xx');
insert into #ReportData values (2, 0, 'yy');
insert into #ReportData values (2, 1, 'yy');
insert into #ReportData values (3, 1, 'zz');
insert into #ReportData values (3, 1, 'mm');
;WITH T AS
(
Select
R.id,
r.name,
RD.flag,
RD.path,
ROW_NUMBER () OVER(PARTITION BY R.id ORDER BY R.id) AS PartNo
FROM #Report R
LEFT JOIN #ReportData RD ON R.id=RD.report_id
)
SELECT
T.id,
T.name,
T.flag,
T.path
FROM T WHERE T.PartNo=1

How to find max of one column of all child in SQL

I have a chart like picture , that store it in table with KID , ParentID .
how can i get max MR for all child under parent.
example : for Node C ----> max ( MR(D) , MR(E) , MR(F) )
How can find Max(MR) for all child of node?
DECLARE #a TABLE
(
KID INT PRIMARY KEY,
ParentID INT,
MR INT
)
INSERT INTO #a (KID, ParentID, MR)
VALUES
(1, 0, 3), (2, 1, 1), (3, 1, 3),
(4, 3, 3), (5, 3, 5), (6, 5, 3)
;WITH cte AS
(
SELECT *
FROM #a
WHERE ParentID = 3
UNION ALL
SELECT t2.*
FROM cte t1
JOIN #a t2 ON t1.ParentID = t2.KID
)
SELECT MAX(MR)
FROM cte
OPTION (MAXRECURSION 0)
result -
5
Maybe you can use over clause
SELECT
ParentID,
MAX(MR) OVER(PARTITION BY ParentID)
FROM
Table

Resources