SQL - Most recent by date - database

I have a consumer table which has the columns - Email, AccountState and DateCreated
AccountState can have the values 1 (active), 2 (inactive) and 3 (archived)
A specific consumer can have multiple rows consisting of the account state's above.
What I am trying to do is construct a query which returns the following
A. A list of consumer records for each consumer (using email address)
B. Only the records which aren't the most recent (so if a specific email address has 3 records, 1 for each state, it would return the 2 which aren't the most recent)
Then once I have this list I want to set all these states to 3 as they need to be archived.
So for the example data shown here
Only rows 13 - 16 would be returned.
I have tried to do this using the query below but it isn't working.
SELECT con.Email,
con.Id,
con.DateCreated AS DateRegistered,
con.DateLastActivity,
con.hasiPhone,
con.hasAndroid,
con.hasSMS,
con.CurrencyCode AS Currency,
con.AccountState
FROM Consumer con
WHERE con.AccountState <> 1
AND DateCreated =( SELECT MAX(DateCreated)
FROM Consumer con_most_recent
WHERE con_most_recent.AccountState <> 1
AND con_most_recent.Id = con.Id)
order by Email asc

;WITH x AS
(
SELECT rn = ROW_NUMBER() OVER (PARTITION BY EMail ORDER BY DateCreated DESC),
Email, Id, DateCreated AS DateRegistered --, ... other columns
FROM dbo.Consumer
WHERE AccountState <> 1
)
SELECT Email, Id, DateRegistered --, ... other columns
FROM x
WHERE rn > 1
ORDER BY Email;
EDIT changing state for these rows
;WITH x AS
(
SELECT rn = ROW_NUMBER() OVER (PARTITION BY EMail ORDER BY DateCreated DESC),
Email, Id, DateCreated AS DateRegistered --, ... other columns
FROM dbo.Consumer
WHERE AccountState <> 1
)
UPDATE x
SET AccountState = 3
WHERE rn > 1;

Related

SQL Server : how to divide database records into even, random groups

tblNames
OrganizationID (int)
LastName (varchar)
...
GroupNumber (int)
GroupNumber is currently NULL for all records, I need an UPDATE statement to update this column.
I need to split up records on an OrganizationID level into even, random groups.
If there are < 20,000 records for an OrganizationID, I need 2 even, random groups. So records for that OrganizationID will have a GroupNumber of 1 or 2. There will be the same (or if odd number of records difference of only 1) number of records for GroupNumber = 1 and for GroupNumber = 2, and there will be no recognizable way to tell how a person got into a GroupNumber - i.e. LastNames that start with A-L are group 1, M-Z are group 2 would not be OK.
If there are > 20,000 records for an OrganizationID, I need 4 even, random groups. So records for that OrganizationID will have a GroupNumber values of 1, 2, 3, or 4. There will be the same (or if odd number of records difference of only 1) number of records for each GroupNumber, and there will be no recognizable way to tell how a person got into a GroupNumber - i.e. LastNames that start with A-F are group 1, G-L are group 2, etc. would not be OK.
There are only about 20 organizations, so I can run an update statement 20 times, once per organizationID if needed.
I have full control of the table so I can add keys or columns, but for now this is what it is.
Would appreciate any help.
Create row numbers randomly (with ROW_NUMBER and GETID). Then get their modulo 2 or 4 depending on the record count to get buckets 0 to 1 or 0 to 3.
select
organizationid, lastname, ...,
case when cnt <= 20000 then rn % 2 else rn % 4 end as bucket
from
(
select
organizationid, lastname, ...,
row_number() over(order by newid()) as rn,
count(*) over () as cnt
from mytable
) randomized;
UPDATE: I suppose the update statement would have to look something like this:
with randomized as
(
select
groupnumber,
row_number() over(order by newid()) as rn,
count(*) over () as cnt
from mytable
)
update randomzized
set groupnumber = case when cnt <= 20000 then rn % 2 else rn % 4 end + 1;
Another slightly different approach;
Setting up some fake data:
if object_id('tempdb.dbo.#Orgs') is not null drop table #Orgs
create table #Orgs
(
RID int identity(1,1) primary key clustered,
OrganizationId int,
LastName varchar(36),
GroupId int
)
insert into #Orgs (OrganizationId, LastName)
select top 40000 row_number() over (order by (select null)) % 20000, newid()
from sys.all_objects a, sys.all_objects b
then using the rarely useful ntile() function to get as close to identically sized groups as possible. Sorting by newid() essentially sorts the data randomly (or as random as generating one guid to the next is).
declare #NumRandomGroups int = 4
update o
set GroupId = x.GroupId
from #orgs o
inner join (select RID, GroupId = ntile(#NumRandomGroups) over (order by newid())
from #orgs) x
on o.RID = x.RID
select GroupId, count(1)
from #Orgs
group by GroupId
select *
from #Orgs
order by RID
You can then set #NumRandomGroups to whatever you want it to be based on the count of Organizations

TSQL : Find PAIR Sequence in a table

I have following table in T-SQL(there are other columns too but no identity column or primary key column):
Oid Cid
1 a
1 b
2 f
3 c
4 f
5 a
5 b
6 f
6 g
7 f
So in above example I would like to highlight that following Oid are duplicate when looking at Cid column values as "PAIRS":
Oid:
1 (1 matches Oid: 5)
2 (2 matches Oid: 4 and 7)
Please NOTE that Oid 2 match did not include Oid 6, since the pair of 6 has letter 'G' as well.
Is it possible to create a query without using While loop to highlight the "Oid" like above? along with how many other matches count exist in database?
I am trying to find the patterns within the dataset relating to these two columns. Thank you in Advance.
Here is a worked example - see comments for explanation:
--First set up your data in a temp table
declare #oidcid table (Oid int, Cid char(1));
insert into #oidcid values
(1,'a'),
(1,'b'),
(2,'f'),
(3,'c'),
(4,'f'),
(5,'a'),
(5,'b'),
(6,'f'),
(6,'g'),
(7,'f');
--This cte gets a table with all of the cids in order, for each oid
with cte as (
select distinct Oid, (select Cid + ',' from #oidcid i2
where i2.Oid = i.Oid order by Cid
for xml path('')) Cids
from #oidcid i
)
select Oid, cte.Cids
from cte
inner join (
-- Here we get just the lists of cids that appear more than once
select Cids, Count(Oid) as OidCount
from cte group by Cids
having Count(Oid) > 1 ) as gcte on cte.Cids = gcte.Cids
-- And when we list them, we are showing the oids with duplicate cids next to each other
Order by cte.Cids
select o1.Cid, o1.Oid, o2.Oid
, count(*) + 1 over (partition by o1.Cid) as [cnt]
from table o1
join table o2
on o1.Cid = o2.Cid
and o1.Oid < o2.Oid
order by o1.Cid, o1.Oid, o2.Oid
Maybe Like this then:
WITH CTE AS
(
SELECT Cid, oid
,ROW_NUMBER() OVER (PARTITION BY cid ORDER BY cid) AS RN
,SUM(1) OVER (PARTITION BY oid) AS maxRow2
,SUM(1) OVER (PARTITION BY cid) AS maxRow
FROM oid
)
SELECT * FROM CTE WHERE maxRow != 1 AND maxRow2 = 1
ORDER BY oid

unique chat records sql

I have DB which having 5 column as follows:
message_id
user_id_send
user_id_rec
message_date
message_details
Looking for a SQL Serve Query, I want to Filter Results from two columns (user_id_send,user_id_rec)for Given User ID based on following constrains:
Get the Latest Record (filtered on date or message_id)
Only Unique Records (1 - 2 , 2 - 1 are same so only one record will be returned which ever is the latest one)
Ordered by Descending based on message_id
SQL Query
The main purpose of this query is to get records of user_id to find out to whom he has sent messages and from whom he had received messages.
I have also attached the sheet for your reference.
Here is my try
WITH t
AS (SELECT *
FROM messages
WHERE user_id_sender = 1)
SELECT DISTINCT user_id_reciever,
*
FROM t;
WITH h
AS (SELECT *
FROM messages
WHERE user_id_reciever = 1)
SELECT DISTINCT user_id_sender,
*
FROM h;
;WITH tmpMsg AS (
SELECT M2.message_id
,M2.user_id_receiver
,M2.user_id_sender
,M2.message_date
,M2.message_details
,ROW_NUMBER() OVER (PARTITION BY user_id_receiver+user_id_sender ORDER BY message_date DESC) AS 'RowNum'
FROM messages M2
WHERE M2.user_id_receiver = 1
OR M2.user_id_sender = 1
)
SELECT T.message_id
,T.user_id_receiver
,T.user_id_sender
,T.message_date
,T.message_details
FROM tmpMsg T
WHERE RowNum <= 1
The above should fetch you the results you are looking for when you query for a particular user_id (replace the 1 with parameter e.g. #p_user_id). The user_id_receiver+user_id_sender in the PARTITION clause ensure that records with user id combinations such as 1 - 2, 2 - 1 are not selected twice.
Hope this helps.
select * from
(
select ROW_NUMBER() over (order by message_date DESC) as rowno,
* from messages
where user_id_receiver = 1
--order by message_date DESC
) T where T.rowno = 1
UNION ALL
select * from
(
select ROW_NUMBER() over (order by message_date DESC) as rowno,
* from messages
where user_id_sender = 1
-- order by message_date DESC
) T where T.rowno = 1
Explanation: For each group of user_id_sender, it orders internally by message_date desc, and then adds row numbers, and we only want the first one (chronologically last). Then do the same for user_id_receiver, and union the results together to get 1 result set with all the desired rows. You can then add your own order by clause and additional where conditions at the end as required.
Of course, this only works for any 1 user_id at a time (replace =1 with #user_id).
To get a result from all user_id's at once, is a totally different query, so I hope this helps?

Select Records based on Count of Child Records Matching Specified Criteria

I have two Tables in an SQL 08 database:
Clients
Balances
ClientID (foreign key to clients)
isStartingBalance (bit)
Using SQL, I need to locate and update all Balance Records where the Client the Balance Record belongs to has only one balance record, and that balance record has isStartingBalance=0. I need to update those records so that isStartingBalance=1.
How might I go about doing this?
Thanks in advance.
You can use a subquery to find clients with only one balance row:
update Balances
set isStartingBalance = 1
where isStartingBalance = 0
and ClientID in
(
select ClientID
from Balances
group by
ClientID
having count(*) = 1
)
Or more sensibly, you can update the oldest balance row for each customer:
update bal
set isStartingBalance = 1
from (
select row_number() over (
partition by ClientID
order by dt) as rn
, *
from Balance
) bal
where rn = 1
and isStartingBalance = 0
Example at SQL Fiddle.
An even more sensible solution would be to remove the isStartingBalance column entirely, and query for it when you need it:
select ClientID
, dt
, case when rn = 1 then 1 else 0 end as isStartingBalance
from (
select ClientID
, dt
, row_number() over (
partition by ClientID
order by dt) as rn
from Balance
) SubQueryAlias

Check for applicable Group query for shopping cart

I have problem for one of discount check condition. I have tables structure as below:
Cart table (id, customerid, productid)
Group table (groupid, groupname, discountamount)
Group Products table (groupproductid, groupid, productid)
While placing an order, there will be multiple items in cart, I want to check those items with top most group if that group consists of all product shopping cart have?
Example:
If group 1 consists 2 products and those two products exists in cart table then group 1 discount should be returned.
please help
It's tricky, without having real table definitions nor sample data. So I've made some up:
create table Carts(
id int,
customerid int,
productid int
)
create table Groups(
groupid int,
groupname int,
discountamount int
)
create table GroupProducts(
groupproductid int,
groupid int,
productid int
)
insert into Carts (id,customerid,productid) values
(1,1,1),
(2,1,2),
(3,1,4),
(4,2,2),
(5,2,3)
insert into Groups (groupid,groupname,discountamount) values
(1,1,10),
(2,2,15),
(3,3,20)
insert into GroupProducts (groupproductid,groupid,productid) values
(1,1,1),
(2,1,5),
(3,2,2),
(4,2,4),
(5,3,2),
(6,3,3)
;With MatchedProducts as (
select
c.customerid,gp.groupid,COUNT(*) as Cnt
from
Carts c
inner join
GroupProducts gp
on
c.productid = gp.productid
group by
c.customerid,gp.groupid
), GroupSizes as (
select groupid,COUNT(*) as Cnt from GroupProducts group by groupid
), MatchingGroups as (
select
mp.*
from
MatchedProducts mp
inner join
GroupSizes gs
on
mp.groupid = gs.groupid and
mp.Cnt = gs.Cnt
)
select * from MatchingGroups
Which produces this result:
customerid groupid Cnt
----------- ----------- -----------
1 2 2
2 3 2
What we're doing here is called "relational division" - if you want to search elsewhere for that term. In my current results, each customer only matches one group - if there are multiple matches, we need some tie-breaking conditions to determine which group to report. I prompted with two suggestions in comments (lowest groupid or highest discountamount). Your response of "added earlier" doesn't help - we don't have a column which contains the addition dates of groups. Rows have no inherent ordering in SQL.
We would do the tie-breaking in the definition of MatchingGroups and the final select:
MatchingGroups as (
select
mp.*,
ROW_NUMBER() OVER (PARTITION BY mp.customerid ORDER BY /*Tie break criteria here */) as rn
from
MatchedProducts mp
inner join
GroupSizes gs
on
mp.groupid = gs.groupid and
mp.Cnt = gs.Cnt
)
select * from MatchingGroups where rn = 1

Resources