Select distinct by unique column - sql-server

For example now I have a table like this:
Col1 Col2 Col3 Col4
1 1 1 1
11 2 3 44
111 2 3 444
1111 3 3 3
I have another table with the same structure, except that it has an unique index include Col2 and Col3. So, I want to select from the first table and insert into the second table, but skip record that have the same unique index. So I can have a new table with data like:
Col1 Col2 Col3 Col4
1 1 1 1
11 2 3 44
1111 3 3 3
How can I do that ?
Currently I'm using Merge Into, but in the situation that my table have millions records it is very slow

Try this query, without unique index/constraint -
Query:
DECLARE #temp TABLE
(
Col1 VARCHAR(10)
, Col2 VARCHAR(10)
, Col3 VARCHAR(10)
, Col4 VARCHAR(10)
)
INSERT INTO #temp (Col1, Col2, Col3, Col4)
VALUES
('1', '1', '1', '1'),
('11', '2', '3', '44'),
('111', '2', '3', '444'),
('1111', '3', '3', '3')
SELECT
Col1 = MIN(Col1)
, Col2
, Col3
, Col4 = MIN(Col4)
FROM #temp
GROUP BY
Col2
, Col3
Output:
Col1 Col2 Col3 Col4
---------- ---------- ---------- ----------
1 1 1 1
11 2 3 44
1111 3 3 3

I doubt you'll be able to do much better than a merge, but you could try optimizing your merge query. Maybe post the query you are currently using? In any case, analytics tend to be fast in these case. Something along the following lines for example:
SELECT Col1, Col2, Col3, Col4
FROM (
SELECT Col1,
Col2,
Col3,
Col4,
MIN(Col1) OVER (PARTITION BY COL2, COL3) AS MinCol1
FROM someTable
) Temp
WHERE Col1 = MinCol1

Related

TSQL -- find records in table with multiples in one column, and at least one specific occurrence of a value in another column

TSQL -- find records in table with multiples in one column, and at least one specific occurrence of a value in another column
If I have:
ourDB.dbo.ourTable with col1 and col2 and col 3
I want to find occurrences such that
* A value of col1 occurs multiple times
* at least one instance of col2 = 'Val1' at least once.
TSQL -- find specific occurrence in table
So one would start with:
Select col1, col2, col3
FROM ourDB.dbo.ourTable
having count(col1) > 1
WHERE
(col2 = 'Val1')
Group by col1, col2, col3
Order by col1, col2, col3
This would find where col2 always occurs with 'Val1', but how is this generalized to Col2 having 'Val1' at least once ?
You must GROUP BY col1 only and with conditional aggregation you get all the col1 values you need:
SELECT * FROM ourDB.dbo.ourTable
WHERE col1 IN (
SELECT col1
FROM ourDB.dbo.ourTable
GROUP BY col1
HAVING COUNT(*) > 1 AND SUM(CASE WHEN col2 = 'Val1' THEN 1 END) > 0
)
ORDER BY col1, col2, col3
If you want only the rows with col2 = 'Val1':
SELECT * FROM ourDB.dbo.ourTable
WHERE
col2 = 'Val1'
AND
col1 IN (
SELECT col1
FROM ourDB.dbo.ourTable
GROUP BY col1
HAVING COUNT(*) > 1 AND SUM(CASE WHEN col2 = 'Val1' THEN 1 END) > 0
)
ORDER BY col1, col3
Alternate method below, originally from a colleague, I modified it some.
NOTE: Not necessarily better than other (accepted) answer, just different approach.
-- GENERIC_JOIN_WITH_SPECIFIC_COUNTS_Q_v_0.sql
USE [ourDB]
SELECT COUNT( distinct titleid) -- also could use COUNT(*)
from ourTable
WHERE
(
(1 <
(
select count( col1)
from ourTable
GROUP BY col1
HAVING (count(col1 > 1)
-- more than one col1 occurence
)
)
AND
(0 <
(
select count(*) from ourTable
WHERE( col2 = 'Val1' )
-- at least one occurence of col2 having 'Val1'
)
)
)

Count of col1 over order by col2

create table t (col1 int,col2 varchar(10))
insert into t values(10,'val')
insert into t values(20,null)
insert into t values(10,'val3')
insert into t values(40,'val1')
insert into t values(50,null)
insert into t values(60,'val')
--1.
SELECT *,COUNT(COL2) OVER (ORDER BY COL1) FROM T
--2.
SELECT *,COUNT(COL1) OVER (ORDER BY COL2) FROM T
I am not able to understand the output of two select query mentioned above.. can anybody elaborate.
The count column will contain the accumilative number of values in col2 that are not null, sorted by the value of col 1
SELECT *,COUNT(COL2) OVER (ORDER BY COL1) AS [count]
FROM T
ORDER BY COL1
Results:
col1 col2 count --explanation
10 val3 2 --The first two values of col1 are the same, and the values of col2 are not null.
10 val 2 --The first two values of col1 are the same, and the values of col2 are not null.
20 NULL 2 --A new value of col1, but col2 is null so it's not counted.
40 val1 3 --A new value of col1, and col2 is not null.
50 NULL 3 --A new value of col1, but col2 is null so it's not counted.
60 val 4 --A new value of col1, and col2 is not null.
Basically the same, just reverse the columns:
SELECT *,COUNT(COL1) OVER (ORDER BY COL2) As col1Count
FROM T
ORDER BY COL2
Results:
col1 col2 col1Count --explanation
20 NULL 2 --The first two values of col2 are the same (both null), and the values of col1 are not null.
50 NULL 2 --The first two values of col2 are the same (both null), and the values of col1 are not null.
60 val 4 --The next two values of col2 are the same (both 'val'), and the values of col1 are not null.
10 val 4 --The next two values of col2 are the same (both 'val'), and the values of col1 are not null.
40 val1 5 --A new value of col2, and col1 is not null.
10 val3 6 --A new value of col2, and col1 is not null.

Need the latest date from 2 columns in Oracle

As a part of my project, I need to display the latest date of the the two columns in the third column in oracle table. If dates are same, it should display latest based on the time. Please help me. Thanks in advance.
E.g.1:
col1 col2 col3
12-FEB-17 03:24:23 15-FEB-17 08:12:52 15-FEB-17 08:12:52
It should display the col2 data in col3 as it is 3 days latest than col1.
E.g.2:
col1 col2 col3
02-FEB-17 03:01:57 02-FEB-17 03:01:59 02-FEB-17 03:01:59
It should display the col2 data in col3 as it is 2 secs latest than col1.
You can simply use the GREATEST() function. For example:
with test as
( select to_date( '12-FEB-2017 03:24:23','DD-MON-YYYY HH24:MI:SS' ) col1,
to_date( '15-FEB-2017 08:12:52','DD-MON-YYYY HH24:MI:SS' ) col2 from dual union all
select to_date( '02-FEB-2017 03:01:57','DD-MON-YYYY HH24:MI:SS' ) ,
to_date( '02-FEB-2017 03:01:59','DD-MON-YYYY HH24:MI:SS' ) from dual
)
select col1, col2, greatest( col1, col2 ) col3
from test
COL1 COL2 COL3
-------------------- -------------------- --------------------
12-FEB-2017 03:24:23 15-FEB-2017 08:12:52 15-FEB-2017 08:12:52
02-FEB-2017 03:01:57 02-FEB-2017 03:01:59 02-FEB-2017 03:01:59
Use greatest but with NVL. Without NVL, if you have nulls then null will be the output of greatest
select col1,col2,
greatest(
nvl(col1,to_date('1901-01-01','YYYY-MM-DD')),
nvl(col2,to_date('1901-01-01','YYYY-MM-DD'))
) as col3
from your_table;
I really dislike using made up dates, because what if both are null? You would get 1901-01-01. Better yet, use COALESCE to return the non-null value:
WITH your_table
AS (SELECT 'COL1 Greater' test, SYSDATE col1, SYSDATE - 10 col2
FROM DUAL
UNION ALL
SELECT 'COL2 Greater', SYSDATE col1, SYSDATE + 10 col2
FROM DUAL
UNION ALL
SELECT 'COL1 null', NULL, SYSDATE col2
FROM DUAL
UNION ALL
SELECT 'Both null', NULL, NULL
FROM DUAL)
SELECT test
, TO_CHAR (col1, 'YYYY.MM.DD') col1
, TO_CHAR (col2, 'YYYY.MM.DD') col2
, TO_CHAR (GREATEST (COALESCE (col1, col2), COALESCE (col2, col1)), 'YYYY.MM.DD') AS col3
FROM your_table;
TEST COL1 COL2 COL3
------------ ---------- ---------- ----------
COL1 Greater 2017.03.01 2017.02.19 2017.03.01
COL2 Greater 2017.03.01 2017.03.11 2017.03.11
COL1 null 2017.03.01 2017.03.01
Both null

Dynamic columns col1 Col2 Col3 Col4

I have a table in that there are 4 columns , columns might get added or deleted on regular interval. I need a dynamic query which handle the columns on run time basis and perform a calculation.
Below are my structure of my table along with sample of records.
ID, col1, col2, col3, col4
1 '2016-08-09' '2016-08-09' '2016-08-09' '2016-08-09'
2 '2016-08-10' '2016-08-10' '2016-08-13' '2016-08-04'
I want result in which my date difference should come in each columns and there should one total columns should append which give total count of those columns for particular ID. Result set should be like below.
ID col1 Col2 Col3 Col4 Total
1 6 6 6 6 24
2. 6 7 8 8 29
Tested, works well. :)
--create table structure
create table #test1 (ID int, col1 date, col2 date, col3 date, col4 date)
go
--insert sample data
insert #test1
select 1, '2016-08-09', '2016-08-09', '2016-08-09', '2016-08-09'
union all
select 2, '2016-08-10', '2016-08-10', '2016-08-13', '2016-08-04'
union all
select 3, '2016-08-10', '2016-08-10', NULL, '2016-08-04'
--below is solution
with cte_test1 (id, dif1, dif2, dif3, dif4) as
(
select id,
datediff(day, col1, getdate()) as dif1,
datediff(day, col2, getdate()) as dif2,
datediff(day, col3, getdate()) as dif3,
datediff(day, col4, getdate()) as dif4
from #test1
)
select id,dif1,dif2,dif3,dif4,ISNULL(dif1,0)+ISNULL(dif2,0)+ISNULL(dif3,0)+ISNULL(dif4,0) as difTotal
from cte_test1
RESULT:

Update to get check_order

I have a table with values,
col1 col2 col3
1 0 ABA
1 0 ABB
1 0 ABC
2 0 BBA
2 0 BBB
2 0 BBC
I am trying to update the table to see the number of repetition of col1, in this case col1 has repeated 3 times so each update to col2 incremented by 1.
Required output after the update table
col1 col2 col3
1 1 ABA
1 2 ABB
1 3 ABC
2 1 BBA
2 2 BBB
2 3 BBC
A simple row_number() -ing should work
;with TMP as (
select *, row_number() over (partition by col1 order by col3) as RowNum
from tbl
)
update TMP set col2=RowNum
Where
tbl: is your table name
partition by col1: resets the row numbering for each col1 group
order by col3: is the basis for numbering within a col1 group
Assuming you are intending col3 to be in non-descending order, this should do it:
UPDATE MyTable
SET col2=(SELECT COUNT(*)
FROM MyTable AS T2
WHERE T2.col1=T1.col1 AND T2.col3<=T1.col3)
FROM MyTable AS T1
You will get duplicates in col2, if there are duplicates in col3 for a particular col1 value.
In case you are interested, here is a pretty verbose (and more expensive execution wise) solution using a ranking function. It has the same issue (i.e., the count gets repeated) for duplicates in col1/col3, as the previous solution:
UPDATE MyTable
SET col2=(
-- In the following query, DISTINCT merges rank dups caused by col3 dups
-- SELECT TOP(1) MyRank would also work.
SELECT DISTINCT MyRank
FROM (
SELECT col3,
DENSE_RANK() OVER (PARTITION BY col1 ORDER BY col3) AS MyRank
FROM MyTable
WHERE col1=UpdatedTable.col1
) As RankTable
WHERE RankTable.col3=UpdatedTable.col3)
FROM MyTable AS UpdatedTable

Resources