SQL Server or functionality in where condition - sql-server

I have an SQL query where I want to get the rows with values "all" or "female" in [gender] column and value "A" in [group] column. If there are 2 rows with [group] = A and [gender] = all and the other [group] = A and [gender] = female I want to get only the row with [gender] = all. Now I use:
where group=A and (gender=all or gender=female)
But I get both rows
In the example table below I want to get only the row: A all
But if I use the where group=A and (gender=all or gender=female) query I will get both rows for group A
group gender
A all
A female
B all
C female
C all

You can use something like row_number() to prioritize the various subsets of records you're looking at and then select only one record from each. From the wording of your question I assume there is some other field in the table on which you're "grouping" records together—in other words, a field whose every distinct value should produce at most one record in the result set whose group and gender values match your criteria. In the following example I've assumed that this field is called Category; if you share the actual schema of your table then I can improve the example, but this should suffice to illustrate the idea.
declare #SampleData table
(
Category bigint,
[Group] char(1),
Gender varchar(16)
);
insert #SampleData values
(1, 'A', 'Female'), -- include
(2, 'B', 'Female'), -- exclude; wrong group
(3, 'A', 'Female'), -- exclude; right group and gender but superseded by (3, 'A', 'All')
(3, 'A', 'All'), -- include
(4, 'A', 'All'), -- include
(5, 'A', 'Male'); -- exclude; wrong gender
with PrioritizedData as
(
select
D.*,
[Priority] = row_number() over (partition by D.Category order by case D.Gender when 'All' then 0 else 1 end)
from
#SampleData D
where
D.[Group] = 'A' and
D.Gender in ('Female', 'All')
)
select * from PrioritizedData P where P.[Priority] = 1;

You can use the RANK() window function with results grouped by group and ordered by gender (this works because all is alphabetically before female or male. If your ordering gets more complex than that, you'll have to look at another way to order them.
/* TEST DATA */
; WITH a AS (
SELECT 'A' AS thegroup, 'all' AS gender UNION ALL
SELECT 'A' AS thegroup, 'all' AS gender UNION ALL
SELECT 'A' AS thegroup, 'female' AS gender UNION ALL
SELECT 'B' AS thegroup, 'all' AS gender UNION ALL
SELECT 'C' AS thegroup, 'female' AS gender UNION ALL
SELECT 'C' AS thegroup, 'all' AS gender UNION ALL
SELECT 'D' AS thegroup, 'female' AS gender
)
/* THE QUERY */
SELECT b.*
FROM (
SELECT thegroup, gender, RANK() OVER (PARTITION BY thegroup ORDER BY gender) AS rn /* Sets the ranked groups of 'thegroup' */
FROM a
) b
WHERE b.rn = 1 /* Gets first group. */
AND thegroup = 'A'

data script
declare #data table ([group] char(1), [gender] varchar(16));
insert into #data values ('A', 'all'), ('A', 'female') ,('B', 'all') ,('C', 'female') ,('C', 'all');
query
select
[group] = [d].[group]
,[gender] = [x].[gender]
from
#data as [d]
cross apply
(
select top 1 [gender] from #data where [group] = [d].[group] order by iif([gender] = 'all', 0, 1) asc
) as [x]
group by
[d].[group]
,[x].[gender];

Related

Snowflake : IN operator

so I want something as below in my query
select * from table a
where a.id in(select id, max(date) from table a group by id)
I am getting error here , as IN is equivalent to = .
how to do it?
example :
id
date
1
2022-31-01
1
2022-21-03
2
2022-01-01
2
2022-02-01
I need to get only one record based on date(max). The table has more columns than just id and date
so I need to something like this in snowflake
select * from table a
where id in(select id,max(date) from table a group by id)
```-----------------------
All solutions are working , if i select from table .
but i have case statement in view where duplicate records are coming
example :
create or replace view v_test
as
select * from
(
select id,lastdatetime,*,
case when start_date < timestamp and timestamp < end
and move_date = '9999-12-31' then 'Y'
else 'N' end as IND
from table a
) a
so if any one select view where IND= 'Y', more than 1 records are coming
what i want is to select latest records for ID where IND='Y' and max(lastdatetime)
how to incorporate this logic in view?
I think you are trying to get the latest record for each id?
select *
from table a
qualify row_number() over (partition by id order by date desc) = 1
So if we look at your sub-select:
using this "data" for the examples:
with data (id, _date) as (
select column1, to_date(column2, 'yyyy-dd-mm') from values
(1, '2022-31-01'),
(1, '2022-21-03'),
(2, '2022-01-01'),
(2, '2022-02-01')
)
select id, max(_date)
from data
group by 1;
it gives:
ID
MAX(_DATE)
1
2022-03-21
2
2022-01-02
which makes it seem you want the "the last date, per id"
which can classically (ansi sql) be written:
select d.*
from data as d
join (
select
id,
max(_date) as max_date
from data
group by 1
) as c
on d.id = c.id and d._date = c.max_date
;
ID
_DATE
1
2022-03-21
2
2022-01-02
which gives you "all the rows values". BUT if you have many rows with the same last date, you will get those, in the output.
Another methods is to use a ROW_NUMBER to pick one and only one row, which is the style of answer Mike has given:
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify row_number() over (partition by id order by _date desc) =1 ;
gives:
ID
_DATE
EXTRA
1
2022-03-21
extra_b_double_a
2
2022-01-02
extra_d
now if you want the "all rows of the last day" you method works, albeit the QUALIFY/ROW_NUMBER is faster. You can use RANK
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify dense_rank() over (partition by id order by _date desc) =1 ;
ID
_DATE
EXTRA
1
2022-03-21
extra_b_double_a
1
2022-03-21
extra_b_double_b
2
2022-01-02
extra_d
Now the last thing that it almost seems you are asking for, is "how do find the ID with the most recent data (here 1) and get all rows for that"
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify id = last_value(id) over (order by _date);
Here is an example of how to use the in operator with a subquery:
select * from table1 t1 where t1.id in (select t2.id from table2 t2);
Usage of IN is possible to match on both columns:
select *
from tab AS a
where (a.id, a.date) in (select id, max(date) from tab group by id);
For sample data:
CREATE TABLE tab (id, date)
AS
SELECT column1, to_date(column2, 'yyyy-dd-mm')
FROM VALUES
(1, '2022-31-01'),
(1, '2022-21-03'),
(2, '2022-01-01'),
(2, '2022-02-01');
Output:

How to get those hotelid which are not having any entry after the date of particular product name?

For an example:
I have a table t1 like:
ID Name Date Product
1 A 20170101 AT
2 B 20170101 BT
1 A 20160102 A
2 B 20170106 D
3 C 20190101 F
3 C 20190101 GT
3 C 20190102 D
Now I want to get those hotel list which is not having any entry after the date of (AT,BT,GT) product.
From Above table:
I need hotelname A result because if we see 3rd entry on table for name A it is on the 20160102 which is less than the date of (A-T) product, it means there is no entry after that.
I would suggest to first retrieve all records with - in the product since this seems to be your criteria here, right? Then you could left join the result to the original table and by this evaluate the mentioned rule concerning the date. Following an example:
CREATE TABLE t (
[ID] INT NULL,
[Name] VARCHAR (80) NULL,
[Date] INT NULL,
[Product] VARCHAR (80) NULL
);
INSERT INTO t VALUES
(1, 'A', 20170101, 'A-T')
,(2, 'B', 20170101, 'B-T')
,(1, 'A', 20160102, 'A')
,(2, 'B', 20170106, 'D')
,(3, 'C', 20190101, 'F')
,(3, 'C', 20190101, 'G-T')
,(3, 'C', 20190102, 'D');
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY [Name] ORDER BY [Date]) rn
,COUNT(*) OVER (PARTITION BY [Name]) cn
FROM t
)
SELECT [Name]
FROM cte
WHERE Product IN (N'AT', N'BT', N'GT')
AND rn = cn
see SQL fiddle for details: http://sqlfiddle.com/#!18/dde24/12/1

Query to Find Max row based on N fields

Given the following data:
ID Name, Value, TimeStamp
1, 'A', 7.00, 21/12/2017
2, 'A', 5.00, 21/12/2017
3, 'A', 6.00, 20/12/2017
4, 'B', 1.00, 21/12/2017
Result I want is:
Name, Value, TimeStamp
'A', 5.00, 21/12/2017
'B', 1.00, 21/12/2017
I.e. group by Name and take value with latest TimeStamp, if 2 or more have the same TimeStamp take the one with latest ID
I did seem to find an answer that was similar to another post:
SELECT ID, Name, Value, TimeStamp
FROM MyTable
JOIN ( SELECT Name, MAX(TimeStamp) As TimeStamp
FROM MyTable
GROUP BY Name ) m
ON MyTable.Name = m.Name and MyTable.TimeStamp = a.TimeStamp
This gives me the max timestamp so to get the id, I can repeat the process, i.e. I can use:
WITH CTE AS (
...
)
SELECT Name, Value, TimeStamp
FROM CTE
JOIN ( SELECT Name, MAX(ID)
FROM CTE
GROUP BY Name ) a
ON CTE.Name = a.Name AND CTE.ID = a.ID
However, what happens if I now want to scale it up to 3 fields. Is there an easier way to do this, without experimenting I was thinking recursive CTE. Trying to avoid dynamic sql.
I think you may want to use the ROW_NUMBER function for this. Below is an example.
SQL Example
;WITH
test_data
AS
(
SELECT tbl.* FROM (VALUES
( 1, 'A', 7.00, '21-Dec-2017')
, ( 2, 'A', 5.00, '21-Dec-2017')
, ( 3, 'A', 6.00, '20-Dec-2017')
, ( 4, 'B', 1.00, '21-Dec-2017')
) tbl ([ID], [Name], [Value], [TimeStamp])
)
,
test_data_order
AS
(
SELECT
[ID]
, [Name]
, [Value]
, [TimeStamp]
, EX_ROW_NUMBER = ROW_NUMBER() OVER
(
PARTITION BY
[Name]
ORDER BY
[TimeStamp] DESC
, [ID] DESC
)
FROM
test_data
)
SELECT
*
FROM
test_data_order
WHERE
EX_ROW_NUMBER = 1
db<>fiddle
Results
Try this
SELECT Name, Value, TimeStamp
FROM (
SELECT ID, Name, Value, TimeStamp
, ROW_NUMBER() OVER (PARTITION BY Name ORDER BY TimeStamp DESC, ID DESC) AS RowNum
) b
WHERE RowNum = 1

Delete or Select Only row with same id but have values for certain fields

I would like to select/delete data with different rows but with same id.
For Example.
ID ColumnA
A Honda
A NULL
B Yamaha
B NULL
C NULL
C Merc
D NULL
E NULL
Output:
ID ColumnA
A Honda
B Yamaha
C Merc
D NULL
E NULL
First thing, I already google for the solutions, but no answers. Any help is greatly appreciated
You could use Row_number and TOP 1 WITH TIES
DECLARE #SampleData AS TABLE
(
ID varchar(10),
ColumnA varchar(20)
)
INSERT INTO #SampleData
VALUES
('A', 'Honda'),
('A', NULL),
('B', 'Yamaha'),
('B', NULL),
('C', NULL),
('C', 'Merc'),
('D', NULL),
('E', NULL)
SELECT TOP (1) WITH TIES
sd.ID,
sd.ColumnA
FROM #SampleData sd
ORDER BY Row_number() OVER(PARTITION BY sd.ID ORDER BY sd.ColumnA DESC)
Return
ID ColumnA
------------
A Honda
B Yamaha
C Merc
D NULL
E NULL
;With cte(ID, ColumnA)
AS
(
SELECT 'A','Honda' Union all
SELECT 'A',NULL Union all
SELECT 'B','Yamaha' Union all
SELECT 'B',NULL Union all
SELECT 'C',NULL Union all
SELECT 'C','Merc' Union all
SELECT 'D' , NULL Union all
SELECT 'E', NULL
)
SELECT ID, ColumnA From
(
SELECT *,ROW_NUMBER()Over(Partition by ID order by ColumnA DESc)AS Seq from cte
)Dt
WHERE dt.Seq =1
Output:
ID ColumnA
A Honda
B Yamaha
C Merc
D NULL
E NULL
try this:
declare #tb table(ID varchar(50), ColumnA varchar(50))
insert into #tb
select 'A', 'Honda' union all
select 'A' , null union all
select 'B', 'Yamaha' union all
select 'B' , null union all
select 'C' , null union all
select 'C', 'Merc' union all
select 'D', NULL union all
select 'E', NULL
select a.id,b.ColumnA from
(select count(1) cnt,ID from #tb group by ID having count(1)>1 or count(1)=1) as a
left join
(select * from #tb) as b on a.ID = b.ID
where b.columnA is not null and cnt>1 or cnt =1
result:
A Honda
B Yamaha
C Merc
D NULL
E NULL
drop table if exists dbo.Motorcycle;
create table dbo.Motorcycle (
ID char(1)
, ColumnA varchar(100)
);
insert into dbo.Motorcycle (ID, ColumnA)
values ('A', 'Honda'), ('A', null)
, ('B', 'Yamaha'), ('B', null)
, ('C', null), ('C', 'Merc')
, ('D', null)
, ('E', null);
select
t.ID, t.ColumnA
from (
select
*
, ROW_NUMBER() over (partition by m.ID order by m.ColumnA desc) as RBr
from dbo.Motorcycle m
) t
where t.RBr = 1

How can I select first inserted row in SQL Server?

may be this question be unrelated to stackoverflow . but this is my problem and i do not know the syntax .
with this query i select the persons who had transactions by their date time .
this is my query
i want to write query that select the their first TransactionsTimeStamp?
I assume that you are looking for ranking functions like ROW_NUMBER, you could use them for example with a Common Table Expression (CTE):
WITH CTE AS
(
SELECT ..., RN = ROW_NUMBER() OVER (PARTITION BY FirstName, LastName
ORDER BY TransactionsTimeStamp ASC)
FROM dbo.TableName ... (join tables here)
)
SELECT ....
FROM CTE
WHERE RN = 1
... are the columns that you want to select, you can select all, as opposed to a GROUP BY.
But if you just want to select the TransactionsTimeStamp-column for every user:
SELECT MIN(TransactionsTimeStamp) AS TransactionsTimeStamp, FirstName, LastName
FROM dbo.tableName ... (join tables here)
GROUP BY FirstName, LastName
The problem in your query is that you are grouping by Date column. So you are getting all different Date values as a result. You should group only by FirstName and LastName and apply some aggregation functions to Date column.
If just Min date is needed then you can get that date using aggregate function like:
DECLARE #test TABLE
(
first_name NVARCHAR(MAX) ,
last_name NVARCHAR(MAX) ,
transaction_date DATETIME
)
INSERT INTO #test
VALUES ( 'A', 'B', '20150101' )
INSERT INTO #test
VALUES ( 'A', 'B', '20150120' )
INSERT INTO #test
VALUES ( 'C', 'D', '20150103' )
INSERT INTO #test
VALUES ( 'C', 'D', '20150119' )
SELECT first_name ,
last_name ,
MIN(transaction_date) AS min_transaction_date
FROM #test
GROUP BY first_name ,
last_name
Output:
first_name last_name min_transaction_date
A B 2015-01-01 00:00:00.000
C D 2015-01-03 00:00:00.000
Select firstname, lastname, min(date) as minimum_date from clubprofile_cp
group by firstname, lastname

Resources