How to separate one column to multiple using conditions in SQL Server - sql-server

I want separate one column into multiple columns based on condition.
Table : emp
CREATE TABLE [dbo].[emp]
(
[name] [varchar](200) NULL,
[id] [int] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[emp] ([name], [id])
VALUES (N'lux-pen-oxo-mobile', 1),
(N'pne-soap', 2),
(N'hop-pen-mobile-soap-jad', 3),
(N'pen-soap-box', 4)
Based on the above data I want output like below :
id |prod1 |prod2 |prod3 |prod4 | Prod5
1 |lux |pen |oxo |mobile |
2 |pne |soap | | |
3 |hop |pen |mobile |soap |jad
4 |pen |soap |box |
I tried like this:
select
id,
case
when charindex('-', name) > 0
then substring(name, 1, charindex('-', [name]) - 1)
end prod1,
substring(name, charindex('-', [name], 2) + 1, len(name)) prod2,
substring(name, charindex('-', [name], 3) + 1, len(name)) prod3,
substring(name, charindex('-', [name], 4) + 1, len(name)) prod4,
substring(name, charindex('-', [name], 5) + 1, len(name)) prod4
from
[emp]
This query not returning the expected result.
Please tell me how to write a query to achieve this task in SQL Server.

You can do it using CTE like following example.
;WITH Split_Names (ID,xmlname)
AS
(
SELECT ID,
CONVERT(XML,'<Names><name>'
+ REPLACE(Name,'-', '</name><name>') + '</name></Names>') AS xmlname
FROM [dbo].[emp]
)
SELECT ID,
xmlname.value('/Names[1]/name[1]','varchar(100)') AS prod1,
xmlname.value('/Names[1]/name[2]','varchar(100)') AS prod2,
xmlname.value('/Names[1]/name[3]','varchar(100)') AS prod3,
xmlname.value('/Names[1]/name[4]','varchar(100)') AS prod4,
xmlname.value('/Names[1]/name[5]','varchar(100)') AS prod5
FROM Split_Names
OUTPUT
+----+-------+-------+--------+--------+-------+
| ID | prod1 | prod2 | prod3 | prod4 | prod5 |
+----+-------+-------+--------+--------+-------+
| 1 | lux | pen | oxo | mobile | NULL |
+----+-------+-------+--------+--------+-------+
| 2 | pne | soap | NULL | NULL | NULL |
+----+-------+-------+--------+--------+-------+
| 3 | hop | pen | mobile | soap | jad |
+----+-------+-------+--------+--------+-------+
| 4 | pen | soap | box | NULL | NULL |
+----+-------+-------+--------+--------+-------+
If you want to replace NULL with '', in that case you can change the columns in select like following.
ISNULL(xmlname.value('/Names[1]/name[1]','varchar(100)'),'') AS prod1 ,
Live Demo

Related

SQL Server get missing records information

I have a question about SQL Server.
If any column does not have values, then need to provide which column does not have a value.
If data is not available in one column, then output column value not available.
If data not available more than one column, then output those columns value are not available.
Concatenate multiple columns when values not exists.
Sample data :
CREATE TABLE [dbo].[EmpDetails]
(
[Empid] [int] NULL,
[Empname] [varchar](50) NULL,
[Location] [varchar](50) NULL,
[Deptid] [int] NULL,
[Deptname] [varchar](50) NULL
)
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (1, NULL, N'che', 10, N'hr')
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (2, N'hari', N'pune', NULL, N'pm')
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (3, N'var', NULL, 30, NULL)
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (4, NULL, NULL, NULL, N'hr')
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (NULL, N'venu', N'pune', NULL, NULL)
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (NULL, N'kumar', N'pune', 20, NULL)
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (8, 'ravi', NULL, 10, N'hr')
INSERT INTO [dbo].[EmpDetails] ([Empid], [Empname], [Location], [Deptid], [Deptname])
VALUES (10, N'k', N'pune', 20, N'hr')
Based on above data I want output like below :
empid | Empname | Location | Deptid| Deptname | Validate
------+---------+----------+-------+------------+---------------------------------------
1 | NULL |Che |10 | hr | Empname value is not available
2 | hari |pune |NULL | pm | Deptid value is not available
3 | var |NULL |30 | NULL | location and deptname values are not available
4 | NULL |NULL |NULL | hr | empname and location and deptid values are not available
NULL | venu |pune |NULL | NULL | empid and deptid and deptname values are not available
NULL | kumar |pune |20 | NULL | empid and deptname values are not available
8 | ravi |NULL |10 | hr | location value is not available
10 | k |pune |20 | hr |
I tried like below :
SELECT
empid, empname, location, deptid, deptname,
CASE
WHEN COALESCE(empid, '') = '' THEN 'Empid'
ELSE ''
END + ' '+
CASE
WHEN COALESCE(empname, '') = ''
THEN 'Empname'
ELSE ''
END + ' '+
CASE
WHEN COALESCE(Location, '') = ''
THEN 'Location'
ELSE ''
END + ' '+
CASE
WHEN COALESCE(Deptid, '') = ''
THEN 'Deptid'
ELSE ''
END + ' '+
CASE
WHEN COALESCE(Deptname, '') = ''
THEN 'Deptname'
ELSE ''
END + ' ' +
+ 'value not available' AS Validate
FROM
[Test].[dbo].[EmpDetails]
But this query is not returning the expected output.
Please tell me how to write query to achieve this task in SQL Server
Please try the following solution.
It is based on XML and XQuery.
It will work starting from SQL Server 2012 onwards.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE
(
Empid int NULL,
Empname varchar(50) NULL,
Location varchar(50) NULL,
Deptid int NULL,
Deptname varchar(50) NULL
);
INSERT INTO #tbl (Empid, Empname, Location, Deptid, Deptname) VALUES
(1, NULL, N'che', 10, N'hr'),
(2, N'hari', N'pune', NULL, N'pm'),
(3, N'var', NULL, 30, NULL),
(4, NULL, NULL, NULL, N'hr'),
(NULL, N'venu', N'pune', NULL, NULL),
(NULL, N'kumar', N'pune', 20, NULL),
(8, 'ravi', NULL, 10, N'hr'),
(10, N'k', N'pune', 20, N'hr');
-- DDL and sample data population, end
SELECT t.*
, s
, Validate = REPLACE(c.query('
for $x in /root/source/*
let $name := local-name($x)
return if (/root/target/*[local-name(.)=$name]) then ()
else $name
').value('.','VARCHAR(MAX)'),SPACE(1), ' and ') +
CASE WHEN s=5 THEN ''
WHEN s=4 THEN ' value is not available'
WHEN s<4 THEN ' values are not available'
END
FROM #tbl AS t
CROSS APPLY (SELECT TRY_CAST('<root><source><Empid/><Empname/><Location/><Deptid/><Deptname/></source>' +
(SELECT Empid, Empname, Location, Deptid, Deptname
FOR XML PATH(''), ROOT('target')) + '</root>' AS XML)) AS t1(c)
CROSS APPLY (SELECT c.value('count(/root/target/*)', 'INT')) AS t2(s);
Output
+-------+---------+----------+--------+----------+---+----------------------------------------------------------+
| Empid | Empname | Location | Deptid | Deptname | s | Validate |
+-------+---------+----------+--------+----------+---+----------------------------------------------------------+
| 1 | NULL | che | 10 | hr | 4 | Empname value is not available |
| 2 | hari | pune | NULL | pm | 4 | Deptid value is not available |
| 3 | var | NULL | 30 | NULL | 3 | Location and Deptname values are not available |
| 4 | NULL | NULL | NULL | hr | 2 | Empname and Location and Deptid values are not available |
| NULL | venu | pune | NULL | NULL | 2 | Empid and Deptid and Deptname values are not available |
| NULL | kumar | pune | 20 | NULL | 3 | Empid and Deptname values are not available |
| 8 | ravi | NULL | 10 | hr | 4 | Location value is not available |
| 10 | k | pune | 20 | hr | 5 | |
+-------+---------+----------+--------+----------+---+----------------------------------------------------------+

Retrieve line from ValidationDate Column

I have difficulties to write a SQL script.
I have a table like this:
And I want to have a result like this:
I used the min and max functions but that doesn't work.
Do you have any idea?
Thank you for your help
MIN() and MAX() do appear to get you what you want. FYI, I have converted your dates to yyyy-MM-dd format.
IF OBJECT_ID('tempdb..#YourTable','U') IS NOT NULL DROP TABLE #YourTable; --SELECT * FROM #YourTable
CREATE TABLE #YourTable (
Business_Key int NOT NULL,
[Name] varchar(10) NOT NULL,
[Attribute] varchar(10) NOT NULL,
ValidFrom date NOT NULL,
ValidTo date NOT NULL,
Primary_Key int NOT NULL,
);
INSERT INTO #YourTable (Business_Key, [Name], Attribute, ValidFrom, ValidTo, Primary_Key)
VALUES (1, 'Toto', 'Child', '2020-01-01', '2020-01-03', 1)
, (1, 'Toto', 'Child', '2020-01-03', '2020-01-10', 2)
, (1, 'Toto', 'Man' , '2020-01-10', '2020-01-15', 3)
, (2, 'Tata', 'Woman', '2020-01-01', '2020-01-15', 4)
, (3, 'Titi', 'Man' , '2020-01-01', '2020-01-15', 5)
, (3, 'Titi', 'Man' , '2020-01-05', '2020-01-17', 6)
SELECT Business_Key
, [Name]
, [Attribute]
, ValidFrom = MIN(ValidFrom)
, ValidTo = MAX(ValidTo)
, Primary_Key = MAX(Primary_Key)
FROM #YourTable yt
GROUP BY Business_Key, [Name], [Attribute]
Returns:
| Business_Key | Name | Attribute | ValidFrom | ValidTo | Primary_Key |
|--------------|------|-----------|------------|------------|-------------|
| 1 | Toto | Child | 2020-01-01 | 2020-01-10 | 2 |
| 1 | Toto | Man | 2020-01-10 | 2020-01-15 | 3 |
| 2 | Tata | Woman | 2020-01-01 | 2020-01-15 | 4 |
| 3 | Titi | Man | 2020-01-01 | 2020-01-17 | 6 |

Convert Rows to Columns - For every FieldName that exists I need a column for it

My question is very similar to Efficiently convert rows to columns in sql server. For every FieldName that exists, I need a column for it. The issue I am having is
I am creating many rows for each ID
I have an uncertain amount of columns. There are at least 2000 different FieldNames so I need something that is efficient
I need to have conditions based on if it's a string, numeric, or date field.
Original table:
CREATE TABLE [UWFieldTable]
(
[FieldName] nvarchar(25),
[StringValue] nvarchar(25),
[DateValue] date,
[NumericValue] nvarchar(25),
[Id] nvarchar(5)
)
INSERT INTO [UWFieldTable] VALUES ('UWName', 'Kim', NULL, NULL, 'A1')
INSERT INTO [UWFieldTable] VALUES ('UWDate', NULL, '1/9/2020', NULL, 'A1')
INSERT INTO [UWFieldTable] VALUES ('UWNumber', '3.3', NULL, '3.3', 'A2')
INSERT INTO [UWFieldTable] VALUES ('CloseName', 'Billy', NULL, NULL, 'A2')
INSERT INTO [UWFieldTable] VALUES ('CloseDate', NULL, '1/6/2020', NULL, 'A3')
INSERT INTO [UWFieldTable] VALUES ('CloseNumber', '30.6', NULL, '30.6', 'A3')
INSERT INTO [UWFieldTable] VALUES ('UWDate', NULL, '1/10/2020', NULL, 'A3')
FieldName | StringValue | DateValue | NumericValue | Id |
-------------------------------------------------------------
UWName | Kim | NULL | NULL | A1 |
UWDate | NULL | 2020-01-09 | NULL | A1 |
UWNumber | 3.3 | NULL | 3.3 | A2 |
CloseName | Billy | NULL | NULL | A2 |
CloseDate | NULL | 2020-01-06 | NULL | A3 |
CloseNumber | 30.6 | NULL | 30.6 | A3 |
UWDate | NULL | 2020-01-10 | NULL | A3 |
...
Desired output:
Id | UWName | UWDate | UWNumber | CloseName | CloseDate | CloseNumber |
--------------------------------------------------------------------------------
A1 | Kim | 2020-01-09 | NULL | NULL | NULL | NULL |
A2 | NULL | NULL | 3.3 | Billy | NULL | NULL |
A3 | NULL | 2020-01-01 | NULL | NULL | 2020-01-10 | 30.6 |
Attempted code:
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
SELECT #cols = STUFF((SELECT ',' + QUOTENAME([FieldName])
FROM [UWFieldTable]
GROUP BY [FieldName]
ORDER BY [FieldName]
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
SET #query = 'SELECT [Id], ' + #cols + ' from
(
select *
from [UWFieldTable]
) x
PIVOT
(
MAX([StringValue])
FOR [FieldName] in (' + #cols + ')
) p order by [Id]'
EXECUTE(#query);
Try this:
SELECT [Id], [UWName],[UWDate],[UWNumber],[CloseName],[CloseDate],[CloseNumber] from
(
SELECT [Id],[FieldName], Val
FROM (SELECT [FieldName], [StringValue], CAST([DateValue] AS nvarchar(25)) AS DateValue, [NumericValue], [Id]
FROM [UWFieldTable]) AS srcUnpivot
UNPIVOT ( Val FOR ColType IN ([StringValue], DateValue, [NumericValue])) AS unpvt
) x
PIVOT
(
MAX([Val])
FOR [FieldName] in ([UWName],[UWDate],[UWNumber],[CloseName],[CloseDate],[CloseNumber])
) AS pv1
ref: SQL Server Pivot on multiple fields
for such case, it is easier to use conditional case statement with GROUP BY
SELECT Id,
UWName = MAX(CASE WHEN [FieldName] = 'UWName' THEN [StringValue] END),
UWDate = MAX(CASE WHEN [FieldName] = 'UWDate' THEN [DateValue] END),
UWNumber = MAX(CASE WHEN [FieldName] = 'UWNumber' THEN [NumericValue] END),
CloseName = MAX(CASE WHEN [FieldName] = 'CloseName' THEN [StringValue] END),
CloseDate = MAX(CASE WHEN [FieldName] = 'CloseDate' THEN [DateValue] END),
CloseNumber = MAX(CASE WHEN [FieldName] = 'CloseNumber' THEN [NumericValue] END)
FROM [UWFieldTable]
GROUP BY Id

Compare historical rows (LAG rows) and combine changed values to single column

Compare historical rows (LAG rows based on ResultChngDt) and combine changed column values to single column. Looking for help in writing elegant/efficient SQL Server 2016 TSQL Code(without cursors).
I have a table with the structure and data like this:
+----+-------+--------------+---------------+--------+--------+--------------+
| ID | RepID | CollctedDate | CompletedDate | Result | Tcode | ResultChngDt |
+----+-------+--------------+---------------+--------+--------+--------------+
| 1 | 101 | 11/20/2017 | 12/13/2017 | | L-2190 | 12/13/2017 |
| 1 | 101 | 11/22/2017 | 12/15/2017 | POS | L-Afb | 1/5/2018 |
| 1 | 102 | 11/22/2017 | 12/15/2017 | | L-2191 | 12/15/2017 |
| 1 | 102 | 11/22/2017 | 12/15/2017 | POS | L-2192 | 12/31/2017 |
+----+-------+--------------+---------------+--------+--------+--------------+
I need to generate a report/result as follows:
+----+-------+---------------------------+--------------------------+--+
| ID | RepID | Previous | Current | |
+----+-------+---------------------------+--------------------------+--+
| 1 | 101 | CollctedDate:11/20/2017 | CollctedDate:11/22/2017 | |
| | | CompletedDate:12/13/2017 | CompletedDate:12/15/2017 | |
| | | Result: | Result:POS | |
| | | Tcode:L-2190 | Tcode:L-Afb | |
| 1 | 102 | CollctedDate:11/22/2017 | CollctedDate:11/22/2017 | |
| | | CompletedDate:12/15/2017 | CompletedDate:12/15/2017 | |
| | | Result: | Result:POS | |
| | | Tcode:L-2191 | Tcode:L-2192 | |
+----+-------+---------------------------+--------------------------+--+
CREATE TABLE [dbo].[Table1]
(
[ID] INT NULL,
[RepID] INT NULL,
[CollctedDate] DATETIME NULL,
[CompletedDate] DATETIME NULL,
[Result] VARCHAR(3) NULL,
[Tcode] VARCHAR(10) NULL,
[ResultChngDt] DATETIME NULL
) ON [PRIMARY];
GO
INSERT INTO [dbo].[Table1] ([ID], [RepID], [CollctedDate], [CompletedDate], [Result], [Tcode], [ResultChngDt])
VALUES (1, 101, N'11/20/2017', N'12/13/2017', N'', N'L-2190', N'12/13/2017')
, (1, 101, N'11/22/2017', N'12/15/2017', N'POS', N'L-Afb', N'1/5/2018')
, (1, 102, N'11/22/2017', N'12/15/2017', N'', N'L-2191', N'12/15/2017')
, (1, 102, N'11/22/2017', N'12/15/2017', N'POS', N'L-2192', N'12/31/2017')
Here's my query for your question:
WITH cte_LEADLAG AS(
SELECT ID,
RepID,
CollctedDate,
CompletedDate,
Result,
Tcode,
ResultChngDt,
CONCAT('CollectedDate:',CAST(CollctedDate AS DATETIME2), ' CompletedDate:', CAST(CompletedDate AS DATETIME2), ' Result:', Result, ' Tcode', Tcode) AS dates,
LAG(CollctedDate) OVER(PARTITION BY RepID ORDER BY CollctedDate) AS 'LAGCollectedDate' ,
lead(CollctedDate) OVER(PARTITION BY RepID ORDER BY CollctedDate) AS 'LEADCollectedDate',
LAG(CompletedDate) OVER(PARTITION BY RepID ORDER BY CompletedDate) AS 'LAGCompDate' ,
lead(CompletedDate) OVER(PARTITION BY RepID ORDER BY CompletedDate) AS 'LEADcompDate' ,
LEAD(Result) OVER(PARTITION BY RepID ORDER BY CompletedDate) AS 'LEADResult' ,
LEAD(Tcode) OVER(PARTITION BY RepID ORDER BY CompletedDate) AS 'LEADTcode'
FROM #temp
),
cte_FINAL AS(
SELECT distinct ID,
RepID,
CASE WHEN cte.LAGCollectedDate IS NULL THEN CONCAT('CollectedDate:',CAST(CollctedDate AS DATETIME2), ' CompletedDate:', CAST(CompletedDate AS DATETIME2), ' Result:', Result, ' Tcode', Tcode) end AS 'Previous',
CASE WHEN cte.LEADCollectedDate IS not NULL THEN CONCAT('CollectedDate:',CAST(cte.LEADCollectedDate AS DATETIME2), ' CompletedDate:', CAST(LEADcompDate AS DATETIME2), ' Result:', cte.LEADResult, ' Tcode', cte.LEADTcode) end AS 'Current'
FROM cte_LEADLAG AS cte
WHERE cte.LEADCollectedDate IN (SELECT MAX(LEADCollectedDate) FROM cte_LEADLAG WHERE cte_LEADLAG.RepID = cte.RepID))
)
SELECT *
FROM cte_FINAL;
Result:
with data as (
select *, row_number() over (partition by RepID order by ResultChgDt desc) as rn
from dbo.Table1
)
select
from data as d1 left outer join data as d2 on d2.rn = d1.rn + 1
where d1.rn = 1 -- I suppose you only want the two most recent??
This gives you all the data you need in a single row. You can handle report formatting to suit whatever requirements you have in whatever tool you're using for that.

SQL DATEDIFF in an sql query

I have two tables Customers and Purchases:
Customers table:
+------------+-----------+----------+
| CustomerID | FirstName | Surname |
+------------+-----------+----------+
| 101 | Jeff | Smith |
| 102 | Alex | Jones |
| 103 | Pam | Clark |
| 104 | Zola | Lona |
| 105 | Simphele | Ndima |
| 106 | Andre | Williams |
| 107 | Wayne | Shelton |
| 108 | Bob | Banard |
| 109 | Ken | Davidson |
| 110 | Sally | Ivan |
+------------+-----------+----------+
Purchases table:
+------------+--------------+------------+-----------+
| PurchaseId | PurchaseDate | CustomerID | ProductID |
+------------+--------------+------------+-----------+
| 1 | 2012-08-15 | 105 | a510 |
| 2 | 2012-08-15 | 102 | a510 |
| 3 | 2012-08-15 | 103 | a506 |
| 4 | 2012-08-16 | 105 | a510 |
| 5 | 2012-08-17 | 106 | a507 |
| 6 | 2012-08-17 | 107 | a509 |
| 7 | 2012-08-18 | 108 | a502 |
| 8 | 2012-08-19 | 108 | a510 |
| 9 | 2012-08-19 | 109 | a502 |
| 10 | 2012-08-20 | 110 | a503 |
| 11 | 2012-08-21 | 101 | a510 |
| 12 | 2012-08-22 | 102 | a507 |
+------------+--------------+------------+-----------+
My question (which I have been struggling with for the last 2 days): create a query that will display all the customers who purchased products after five days or more, since their last purchase.
Desired outputs:
+-----------+------------------+
| Firstname | Daysdifference |
+-----------+------------------+
| Alex | 7 |
+-----------+------------------+
select c.FirstName, t.dif as Daysdifference from customer c
inner join
(
select p1.CustomerID,
datediff(day,p1.PurchaseDate,p2.PurchaseDate) as dif
from purchases p1
inner join purchases p2
on p1.CustomerID=p2.CustomerID
where datediff(day,p1.PurchaseDate,p2.PurchaseDate)>=5
) t
on t.CustomerID= c.CustomerID
Here you go:
DECLARE #Customers TABLE (CustomerID INT, FirstName VARCHAR(30), Surname VARCHAR(30));
DECLARE #Purchases TABLE (PurchaseId INT, PurchaseDate DATE, CustomerID INT, ProductID VARCHAR(10) );
/**/
INSERT INTO #Customers VALUES
(101,'Jeff ' , 'Smith '),
(102,'Alex ' , 'Jones '),
(103,'Pam ' , 'Clark '),
(104,'Zola ' , 'Lona '),
(105,'Simphele' , 'Ndima '),
(106,'Andre ' , 'Williams'),
(107,'Wayne ' , 'Shelton '),
(108,'Bob ' , 'Banard '),
(109,'Ken ' , 'Davidson'),
(110,'Sally ' , 'Ivan ');
INSERT INTO #Purchases VALUES
(1, '2012-08-15' ,105, 'a510'),
(2, '2012-08-15' ,102, 'a510'),
(3, '2012-08-15' ,103, 'a506'),
(4, '2012-08-16' ,105, 'a510'),
(5, '2012-08-17' ,106, 'a507'),
(6, '2012-08-17' ,107, 'a509'),
(7, '2012-08-18' ,108, 'a502'),
(8, '2012-08-19' ,108, 'a510'),
(9, '2012-08-19' ,109, 'a502'),
(10,'2012-08-20' ,110, 'a503'),
(11,'2012-08-21' ,101, 'a510'),
(12,'2012-08-22' ,102, 'a507');
--
WITH CTE AS (
SELECT Pur1.CustomerID, DATEDIFF(DAY, Pur1.PurchaseDate, Pur2.PurchaseDate) Daysdifference
FROM #Purchases Pur1 INNER JOIN #Purchases Pur2 ON Pur1.CustomerID = Pur2.CustomerID
)
SELECT Cus.FirstName, CTE.Daysdifference
FROM #Customers Cus INNER JOIN CTE ON Cus.CustomerID = CTE.CustomerID
WHERE CTE.Daysdifference >= 5;
Result:
+-----------+------------------+
| Firstname | Daysdifference |
+-----------+------------------+
| Alex | 7 |
+-----------+------------------+
Demo
You can solve it like this:
Create a ranking based on date desc and partitioned by customer id
Next check date diff between consecutive ranks to find those customers
Query below
; with cte as
(
select
*,
row_number() over(partition by CustomerID order by PurchaseDate desc) r
from
Purchases
)
select
Name= c.FirstName,
Daysdifference =datediff(d,c1.PurchaseDate, c2.PurchaseDate)
from
Customers c join
cte c1
on c.customerid=c1.customerid
join cte c2
on c1.CustomerID=c2.CustomerId
and c1.r-1=c2.r
and datediff(d,c1.PurchaseDate, c2.PurchaseDate) >=5
See working demo
Since SQL Server 2012 and the addition of the LAG & LEAD functions, there is no reason at all to do a self join for something like this...
Note... Ranking function can be extremely efficient compared to other methods BUT they do need the help of a proper index to perform their best (note the additional POC index in the test script).
CREATE TABLE #Customers (
CustomerID INT PRIMARY KEY,
FirstName VARCHAR(30),
Surname VARCHAR(30)
);
CREATE TABLE #Purchases (
PurchaseId INT PRIMARY KEY,
PurchaseDate DATE,
CustomerID INT,
ProductID VARCHAR(10)
);
INSERT INTO #Customers VALUES
(101,'Jeff ' , 'Smith '),
(102,'Alex ' , 'Jones '),
(103,'Pam ' , 'Clark '),
(104,'Zola ' , 'Lona '),
(105,'Simphele' , 'Ndima '),
(106,'Andre ' , 'Williams'),
(107,'Wayne ' , 'Shelton '),
(108,'Bob ' , 'Banard '),
(109,'Ken ' , 'Davidson'),
(110,'Sally ' , 'Ivan ');
INSERT INTO #Purchases VALUES
(1, '2012-08-15' ,105, 'a510'),
(2, '2012-08-15' ,102, 'a510'),
(3, '2012-08-15' ,103, 'a506'),
(4, '2012-08-16' ,105, 'a510'),
(5, '2012-08-17' ,106, 'a507'),
(6, '2012-08-17' ,107, 'a509'),
(7, '2012-08-18' ,108, 'a502'),
(8, '2012-08-19' ,108, 'a510'),
(9, '2012-08-19' ,109, 'a502'),
(10,'2012-08-20' ,110, 'a503'),
(11,'2012-08-21' ,101, 'a510'),
(12,'2012-08-22' ,102, 'a507');
-- add POC index...
CREATE NONCLUSTERED INDEX ix_POC ON #Purchases (CustomerID, PurchaseDate);
--===========================================================
SELECT
c.FirstName,
p2.Daysdifference
FROM
#Customers c
JOIN (
SELECT
p.CustomerID,
Daysdifference = DATEDIFF(DAY, p.PurchaseDate, LEAD(p.PurchaseDate, 1) OVER (PARTITION BY p.CustomerID ORDER BY p.PurchaseDate))
FROM
#Purchases p
) p2
ON c.CustomerID = p2.CustomerID
WHERE
p2.Daysdifference >= 5;
Results...
FirstName Daysdifference
------------------------------ --------------
Alex 7

Resources