SQL - Combine rows - sql-server

SQL - Combine rows - sql-server

I have rows in a table that looks like this:
[date],[name],[duty],[holiday],[hdaypart],[sick],[sdaypart]
2015-04-27, person1, 1,0,NULL,0,NULL
2015-04-27, person1, 0 1,'fd',0,NULL
I would like to combine these rows to:
[date],[name],[duty],[holiday],[hdaypart],[sick],[sdaypart]
2015-04-27, person1, 1,1,'fd',0,NULL
The duty, holiday and sick columns as BIT columns.
Is there way to do this?
The one solution I can come up with is using subqueries, but it consumes a lot of time. A faster solution would be nice.
This is what I have now:
SELECT DISTINCT [name],[date],[region],[cluster]
,CASE WHEN (SELECT SUM(CONVERT(INT,callduty)) FROM planning AS t2
WHERE t1.[Date] = #datum AND t2.[Name] = t1.[name] AND t2.[Date] = t1.[date] ) > 0
THEN 1 ELSE 0 END AS [CallDuty]
,CASE WHEN (SELECT SUM(CONVERT(INT,holiday)) FROM planning AS t2
WHERE t1.[Date] = #datum AND t2.[Name] = t1.[name] AND t2.[Date] = t1.[date] ) > 0
THEN 1 ELSE 0 END AS [Holiday]
FROM planning AS t1
where t1.[Date] = #datum AND t1.[Name] like #naam
group by t1.[date],t1.[name], t1.Region, t1.cluster
order by t1.[name]

You seem to want to group by date and name and select either the maximum or the not null values within each group. MAX aggregate function is suitable for both of these selections:
SELECT [date],[name], MAX([duty]), MAX([holiday]),
MAX([hdaypart]), MAX([sick]), MAX([sdaypart])
FROM mytable
GROUP BY [date],[name]

By looking at your example, I assume that you want to get the maximum values for a specific user.
You could do this using a group by and max
select max([date]),[name],max([duty]),max([holiday]),max([hdaypart]),max([sick]),max([sdaypart])
from yourtable
group by name
This is not really pretty but should perform better than using subqueries.
EDIT:
If you have columns with bit sql types, use
max(cast([bitColumn] as int))
Adding the date column in the group by, as suggested by Giorgos Betsos, the result is
select [date],
[name],
max([duty]),
max([holiday]),
max(cast([hdaypart] as int)),
max(cast([sick] as int)),
max(cast([sdaypart] as int))
from yourtable
group by [date],[name]

declare #t table ([date] date,[name] varchar(10),[duty] varchar(10),[holiday] int,[hdaypart] varchar(10),[sick] int,[sdaypart]
int
)
insert into #t([date],[name],[duty],[holiday],[hdaypart],[sick],[sdaypart])values ('2015-04-27','person1',1,0,NULL,0,NULL),
('2015-04-27','person1',1,0,'fd',0,NULL)
select MAX([date]),MAX([name]),MAX([duty]),MAX([holiday]),MAX([hdaypart]), [sick],[sdaypart] from #t
group by sick,[sdaypart]
OR
select [date],[name],[duty],[holiday],MAX([hdaypart])AS H,[sick],[sdaypart] from #t
group by [date],[name],[duty],[holiday],[sick],[sdaypart]
UNION
select [date],[name],[duty],[holiday],MAX([hdaypart])AS H,[sick],[sdaypart] from #t
group by [date],[name],[duty],[holiday],[sick],[sdaypart]

CREATE TABLE #Combine
(
[date] date,
[name] VARCHAR(10),
[duty] CHAR(1),
[holiday] CHAR(1),
[hdaypart] CHAR(5),
[sick] CHAR(1),
[sdaypart] VARCHAR(10)
)
INSERT INTO #Combine VALUES('2015-04-27', 'person1', '1','0',NULL,'0',NULL),
('2015-04-27', 'person1', '0','1','fd','0',NULL)
SELECT MAX(Date) [date],MAX(name) [name],MAX(Duty) [duty],MAX(holiday) holiday,
MAX(hdaypart) hdaypart,max(sick) sick,max(sdaypart)sdaypart FROM #Combine

Related

Loops on SQL Server

I have the following query where I input a date and it give me the result. However, I need to run this for 60 different dates. Instead of running this 1 by 1, is there anyway to automate this so it runs each time on a different date?
IF OBJECT_ID('tempdb..#1') IS NOT NULL DROP TABLE #1
declare #d1 datetime = '2020-02-06'
select distinct [User] into #1
from [X].[dbo].[Table1]
where [status] = 'Success'
and [Date] = #d1;
select count(distinct [User])
from #1
inner join [Y].[dbo].[Table2]
on #1.[User] = [Y].[dbo].[Table2].User
where [Date2] between #d1 and #d1+1
and [Checkname] in ('Check1','Check2')

Loops are slow and generally a bad practice in the context of T-SQL. You can use something like this to get the count of users for a batch of dates:
DROP TABLE IF EXISTS #DataSource;
CREATE TABLE #DataSource
(
[Date] DATETIME
,[UsersCount] INT
);
INSERT INTO #DataSource ([Date])
VALUES ('2020-02-06')
,('2020-02-07')
,('2020-02-08');
IF OBJECT_ID('tempdb..#1') IS NOT NULL DROP TABLE #1
select distinct DS1.[Date]
,DS1.[User]
into #1
from [X].[dbo].[Table1] DS1
INNER JOIN #DataSource DS2
ON DS1.[Date] = DS2.[Date]
where DS1.[status] = 'Success';
select #1.[date]
,count(distinct [User])
from #1
inner join [Y].[dbo].[Table2]
on #1.[User] = [Y].[dbo].[Table2].User
where [Date2] between #1.[date] and #1.[date] + 1
and [Checkname] in ('Check1','Check2')
GROUP BY #1.[date]

First, I want to say that gotqn's answer is a good answer - however, I think there are a few more things in the original code that can be improved - so here is how I would probably do it:
Assuming the dates are consecutive, use a common table expression to calculate the dates using dateadd and row_number.
Then, use another common table expression to get the list of dates and users from table1,
and then select the date and count of distinct users for each date from that common table expression joined to table2:
DECLARE #StartDate Date = '2020-02-06';
WITH Dates AS
(
SELECT TOP (60) DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY ##SPID) -1, #StartDate) As Date
FROM sys.objects
), CTE AS
(
SELECT t1.[User], t1.[Date]
FROM [X].[dbo].[Table1] AS t1
JOIN Dates
ON t1.[Date] = Dates.[Date]
WHERE [status] = 'Success'
)
SELECT cte.[Date], COUNT(DISTINCT [User])
FROM CTE
JOIN [Y].[dbo].[Table2] As t1
ON CTE.[User] = t1.[User]
AND t1.[Date2] >= CTE.[Date]
AND t1.[Date2] < DATEADD(Day, 1, CTE.[Date])
AND [Checkname] IN ('Check1','Check2')
GROUP BY cte.[Date]
If the dates are not consecutive, you can use a table variable to hold the dates instead of calculating them using a common table expression.

How to do SQL Range based calculations

I have two columns 'TotalAmount' and 'DefaultFees'
But there is a more granular table that says for the 'TotalAmount' ( which is 837.681561) between '0.010000' and '500.000000' fees should be '2.500000'
and for the rest of the total amount between '500.010000' and '800.000000' which is 300 fees should be '4.876000'
For the rest which is 37.681561, it should be default fees. There can be more rows to the Min/Max table
How can I achieve this in SQL
Expected Output

Although Isaac's answer is more clear, as an alternative solution, you can use a case-when statement with a subqueries as below.
Select TotalAmount,
case when TotalAmount >= (select max(MaximumAmount) from TableB) then DefaultFees
else (Select Fees
From TableB b
where a.TotalAmount<b.MaximumAmount and a.TotalAmount>=b.MinimumAmount
)
end as FinalFee
From TableA a
EDIT:
Based on your updated question, this is the query you are looking for:
CREATE TABLE #TableA (myVal INT, DefaultFee float)
INSERT INTO #TableA VALUES (837,3.66)
CREATE TABLE #TableB (minamount INT, maxamount INT, fees float)
INSERT INTO #TableB VALUES (0,500,2.5),(500,800,4.876)
;WITH cte AS (
SELECT ISNULL(b.minamount, (SELECT MAX(maxamount) FROM #tableB)) as [max]
from #TableA a
left join #TableB b on a.myVal between b.minamount and maxamount
)
SELECT maxamount, fees
FROM #TableB
WHERE maxamount <= (SELECT [max] FROM cte)
UNION
SELECT (a.myval - (SELECT [max] FROM cte)), a.DefaultFee
FROM #TableA a
ORDER BY 1 desc

How to select highest common value across groups

`Suppose I have a set of data with 2 fields - Type and Date. I am interested in finding (if exists) the the max common date across the various types. Is this easier to do in SQL or LINQ?
Given the data below the result should be 2018-02-01 as this is the max common date for all types. It there is no such date then no data is returned.
Type, Date
---------
1,2018-03-01
1,2018-02-01
1,2018-01-01
2,2018-02-01
2,2018-05-01
2,2018-01-01
3,2018-01-01
3,2018-03-01
3,2018-02-01

You could use:
SELECT TOP 1 [Date], COUNT(*) OVER(PARTITION BY Date) AS cnt
FROM tab
ORDER BY cnt DESC, [Date] DESC
DBFiddle Demo

This'll work if you have an unlimited or indeterminable number of Types:
CREATE TABLE #Sample ([Type] int, [DAte] date);
INSERT INTO #Sample
VALUES
(1,'20180301'),
(1,'20180201'),
(1,'20180101'),
(2,'20180201'),
(2,'20180501'),
(2,'20180101'),
(3,'20180101'),
(3,'20180301'),
(3,'20180201');
GO
WITH EntryCount AS(
SELECT [Type], [Date],
COUNT(*) OVER (PARTITION By [Date]) AS Entries
FROM #Sample)
SELECT MAX(Date)
FROM EntryCount EC
WHERE Ec.Entries = (SELECT COUNT(DISTINCT sq.[Type]) FROM #Sample sq);
GO
DROP TABLE #Sample;
Not sure how quick it'll be either though.

Example
Select Top 1 [Date]
from YourTable
Group By [Date]
Order By count([Type]) desc,[Date] desc
Returns
2018-02-01

This is not going to be very efficient not matter how you slice it because you have to compare across three groups. Assuming you have 3 types you could use a self join. Something like this.
select MAX(YourDate)
from YourTable yt
join YourTable yt2 on yt2.YourType = 2 and yt.YourDate = yt2.YourDate
join YourTable yt3 on yt3.YourType = 3 and yt.YourDate = yt3.YourDate
where yt.YourType = 1

Select all records for customers where mindate in 2015

I want to select all records for customers whose first order is from 2015. I want any orders they placed after 2015 too, but I DON'T want the records for customers whose first order was in 2016. I am ultimately trying to find the percentage of people who order more than twice, but I want to exclude the customers who were new in 2016.
This doesn't work because 'mindate' is an invalid column name but I'm not sure why or how else to try it.
Select
od.CustomerID, OrderID, OrderDSC, OrderDTS
From
OrderDetail OD
Join
(Select
OrderID, Min(orderdts) as mindate
From
OrderDetail
Where
mindate Between '2015-1-1' and '2015-12-31'
Group By Orderid) b on od.OrderID = b.OrderID

Because execution phases - it's seqency how is qry evaluated and by engine. In where clause your mindate not yet exists.
You can change mindate by orderdts:
select OrderID, min(orderdts) as mindate
from OrderDetail
where orderdts between '2015-1-1' and '2015-12-31'
group by Orderid
Second option is to use having statement - it's evaluated after group by.

What I di was select the distinct CustomerIDs that fall in between your daterange and did a left join with the table so it filters out anyone that doesn't fall in between your daterange.
SELECT * FROM
(Select DISTINCT(CustomerID) as CustomerID
FROM OrderDetail WHERE OrderDTS between '2015-1-1' AND '2015-12-31') oIDs
LEFT JOIN
OrderDetail OD
ON oIDs.CustomerID = OD.CustomerID

Try using the EXISTS clause. It is basically a sub-query. Below is an example you should be able to adapt.
create table Test (Id int, aDate datetime)
insert Test values (1,'04/04/2014')
insert Test values (1,'05/05/2015')
insert Test values (1,'06/06/2016')
insert Test values (2,'04/30/2016')
insert Test values (3,'02/27/2014')
select t.* from Test t
where
aDate>='01/01/2015'
and exists(select * from Test x where x.Id=t.Id and x.aDate >='01/01/2015' and x.aDate<'01/01/2016')

I don't know the orderdts data type but if it is datetime orders on 2015-12-31 will not be included (unless the order date is 2015-12-31 00:00:00.000. Note how this will skip the first record:
DECLARE #orders TABLE (CustomerID INT, orderDate DATETIME);
INSERT #orders VALUES (1, '2015-12-31 00:00:01.000'), (1, '2015-12-30'), (2, '2015-01-04');
SELECT * FROM #orders WHERE orderDate BETWEEN '2015-01-01' AND '2015-12-31';
In this case you would want the WHERE clause filter to look like:
WHERE orderDate BETWEEN '2015-01-01 00:00:00.000' AND '2015-12-31 23:59:59.999';
Or
WHERE CAST(orderDate AS date) BETWEEN '2015-01-01' AND '2015-12-31';
(the first example will almost certainly perform better).
Now, using this sample data:
-- Sample data
CREATE TABLE #LIST (LISTName varchar(10) NOT NULL);
INSERT #LIST
SELECT TOP (100) LEFT(newid(), 8)
FROM sys.all_columns a, sys.all_columns b;
-- You will probably want LISTName to be indexed
CREATE NONCLUSTERED INDEX nc_LISTName ON #LIST(LISTName);
You can implement Paul's solution like this:
DECLARE #LIST_Param varchar(8) = 'No List';
SELECT LISTName
FROM
(
SELECT distinct LISTName
FROM #LIST
UNION ALL
SELECT 'No List'
WHERE (SELECT COUNT(DISTINCT LISTName) FROM #LIST) < 1000000
) Distinct_LISTName
WHERE (#LIST_Param = 'No List' or #LIST_Param = LISTName);
Alternatively you can do this:
DECLARE #LIST_Param varchar(8) = 'No List';
WITH x AS
(
SELECT LISTName, c = COUNT(*)
FROM #LIST
WHERE (#LIST_Param = 'No List' or #LIST_Param = LISTName)
GROUP BY LISTName
),
c AS (SELECT s = SUM(c) FROM x)
SELECT LISTName
FROM x CROSS JOIN c
WHERE s < 1000000;

TSql equality on groups of rows

I have a table that contains information on groups. There can be any number of members in a group. There is a group identifier and then an element identifier. I want to be able to in a single statement determine whether or not a given set exists in the table
#groupTable is an example of the data that already exists in the database
#inputData is the data that I want to see if it already exists in #groupTable
declare #groupData table
(
groupIdentifier int,
elementIdentifier uniqueidentifier
)
insert into #groupData values
(1, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(1, '89e7e6be-cee8-40a7-8135-a54659e0d88c')
declare #inputData table
(
tempGroupIdentifier int,
elementIdentifier uniqueidentifier
)
insert into #inputData values
(42, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(42, '89e7e6be-cee8-40a7-8135-a54659e0d88c'),
(55, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(55, '2395a42c-94f4-4cda-a773-221b26ea5e44'),
(55, 'f22db9df-a1f4-4078-b74c-90e34376eff6')
Now I want to run a query that will show the relationship of the sets, showing which groupIdentifier is associated with which tempGroupIdentifier. If there is no matching set then I need to know that too.
desired output:
groupIdentifier, tempGroupIdentifier
1, 42
null, 55
Does anyone any suggestions on how to approach this problem?
I could probably pivot the rows and concat all elementIdentifiers into a giant string for each group that then do equality on, but that doesn't seem like a good solution.

SELECT DISTINCT
T1.tempgroupIdentifier, T2.GroupIdentifier
FROM
(
SELECT
COUNT(*) OVER (PARTITION BY tempgroupIdentifier) AS GroupCount,
ROW_NUMBER() OVER (PARTITION BY tempgroupIdentifier ORDER BY elementIdentifier) AS GroupRN,
tempgroupIdentifier, elementIdentifier
FROM
#inputData
) T1
LEFT JOIN
(
SELECT
COUNT(*) OVER (PARTITION BY GroupIdentifier) AS GroupCount,
ROW_NUMBER() OVER (PARTITION BY GroupIdentifier ORDER BY elementIdentifier) AS GroupRN,
GroupIdentifier, elementIdentifier
FROM
#groupData
) T2 ON T1.elementIdentifier = T2.elementIdentifier AND
T1.GroupCount = T2.GroupCount AND
T1.GroupRN = T2.GroupRN
Edit: this will also deal with the same value in a given set

SELECT
(
CASE WHEN matchCount = gdCount AND matchCount = idCount
THEN groupIdentifier
ELSE NULL
END) groupIdentifier,
cj.tempGroupIdentifier
FROM
(
SELECT gd.groupIdentifier, id.tempGroupIdentifier, COUNT(1) matchCount
FROM #groupData gd
CROSS JOIN #inputData id
WHERE id.elementIdentifier = gd.elementIdentifier
GROUP BY gd.groupIdentifier, id.tempGroupIdentifier) as cj
CROSS APPLY (SELECT COUNT(groupIdentifier) from #groupData gdca WHERE gdca.groupIdentifier = cj.groupIdentifier) as gdc(gdCount)
CROSS APPLY (SELECT COUNT(tempGroupIdentifier) from #inputData idca WHERE idca.tempGroupIdentifier = cj.tempGroupIdentifier) as idc(idCount)

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

SQL - Combine rows - sql-server

Related

Loops on SQL Server

How to do SQL Range based calculations

How to select highest common value across groups

Select all records for customers where mindate in 2015

TSql equality on groups of rows

Categories

Resources