I am trying to clean up a data set similar in structure to the following table:
dataSource
| ID_dec | ID_base | name | field1 | field2 | field3 |
| 1.01 | 1 | AAA | Cat | Brown | Domesticated |
| 1.02 | 1 | AAA | Cat | Brown | Domesticated |
| 1.03 | 1 | AAA | Feline | NULL | Dom. |
| 1.04 | 1 | AAA | Beautiful cat | NULL | NULL |
| 1.05 | 1 | AAA | NULL | Light Brown | NULL |
| 2.01 | 2 | BBB | Dog | Black | Wild |
| 2.02 | 2 | BBB | Barker | NULL | NULL |
| 3.01 | 3 | CCC | Bird | Yellow | Domesticated |
| 4.01 | 4 | DDD | Snake | NULL | NULL |
| 4.02 | 4 | DDD | NULL | Green | NULL |
| 4.03 | 4 | DDD | NULL | Forest Green | NULL |
| 4.04 | 4 | DDD | NULL | Green | Wild |
| 4.05 | 4 | DDD | NULL | NULL | Wild |
I want to pull the longest string of each combination of field[N] and ID_base, like so:
result
| ID_base | name | field1 | field2 | field3 |
| 1 | AAA | Beautiful cat | Light Brown | Domesticated |
| 2 | BBB | Barker | Black | Wild |
| 3 | CCC | Bird | Yellow | Domesticated |
| 4 | DDD | Snake | Forest Green | Wild |
This has been asked before, but only while examining to a single field. The following SQL gets me the desired result, but feels inefficient when scaled up to the real data set of 37 fields and 5665 rows (4029 ID_bases and the most ID_decs to a single ID_base is 10):
SELECT DISTINCT a.id_base, a.name, b.result, c.result, d.result
FROM
dataSource a
LEFT JOIN
(
SELECT y.id_base, max(y.field1) result
FROM dataSource y
LEFT JOIN
(
SELECT id_base, max(len(field1)) leng
FROM dataSource
GROUP BY id_base
) z
ON y.id_base = z.id_base
WHERE len(y.field1) = z.leng
GROUP BY y.id_base
) b
ON a.id_base = b.id_base
LEFT JOIN
(
SELECT y.id_base, max(y.field2) result
FROM dataSource y
LEFT JOIN
(
SELECT id_base, max(len(field2)) leng
FROM dataSource
GROUP BY id_base
) z
ON y.id_base = z.id_base
WHERE len(y.field1) = z.leng
GROUP BY y.id_base
) c
ON a.id_base = c.id_base
LEFT JOIN
(
SELECT y.id_base, max(y.field3) result
FROM dataSource y
LEFT JOIN
(
SELECT id_base, max(len(field3)) leng
FROM dataSource
GROUP BY id_base
) z
ON y.id_base = z.id_base
WHERE len(y.field1) = z.leng
GROUP BY y.id_base
) d
ON a.id_base = d.id_base
What is the best way to go about this query?
WITH a AS (
SELECT id_base, name, max(len(field1)) l1, max(len(field2)) l2, max(len(field3)) l3
FROM datasource
GROUP BY id_base, name
)
SELECT a.*,
(SELECT TOP 1 field1 FROM datasource WHERE id_base = a.id_base AND len(field1) = a.l1),
(SELECT TOP 1 field2 FROM datasource WHERE id_base = a.id_base AND len(field2) = a.l2),
(SELECT TOP 1 field3 FROM datasource WHERE id_base = a.id_base AND len(field3) = a.l3)
from a
Another simpler variation:
SELECT
t.id_base,
t.name
(SELECT TOP 1 field1 FROM table WHERE id_base = t.id_base ORDER BY LEN(field1) DESC),
(SELECT TOP 1 field2 FROM table WHERE id_base = t.id_base ORDER BY LEN(field2) DESC),
(SELECT TOP 1 field3 FROM table WHERE id_base = t.id_base ORDER BY LEN(field3) DESC)
FROM (SELECT DISTINCT id_base, name FROM table) t
Select coalesce(t1.ID_base, t2.ID_base, t3.ID_base) base,
coalesce(t1.Name, t2.Name, t3.Name) Name,
coalesce(t1.field1, t2.field1, t3.field1) field1,
coalesce(t1.field2, t2.field2, t3.field2) field2,
coalesce(t1.field3, t2.field3, t3.field3) field3
from dataSource t1
full join dataSource t2 on t2.ID_base = t1.ID_base
and len(t1.field1) = (Select Max(len(field1)) from dataSource
where ID_base = t1.ID_base)
and len(t2.field2) = (Select Max(len(field2)) from dataSource
where ID_base = t2.ID_base)
full join dataSource t3 on t3.ID_base = t1.ID_base
and len(t3.field3) = (Select Max(len(field3)) from dataSource
where ID_base = t3.ID_base)
Related
I have a table with below structure:
+-------+-----------+--------+----------+--------+
| RefNo | TranType | Code | Remarks | Amount |
+-------+-----------+--------+----------+--------+
| 1 | BD | 400201 | abcc dfr | 200 |
| 1 | BD | 400202 | abcc dfr | 200 |
| 2 | BD | 400204 | defrt | 300 |
| 2 | BD | 400205 | defrt | 300 |
+-------+-----------+--------+----------+--------+
I need to transpose these values to the below format:
+-------+--------+--------+----------+----------+--------+
| RefNo | Code1 | Code2 | TranType | Remarks | Amount |
+-------+--------+--------+----------+----------+--------+
| 1 | 400201 | 400202 | BD | abcc dfr | 200 |
| 2 | 400204 | 400205 | BD | defrt | 300 |
+-------+--------+--------+----------+----------+--------+
You don't need to use PIVOT, you can do it using a simple query.
SELECT t1.refno,
t1.code AS Code1,
t2.code AS Code2,
t1.trantype,
t1.amount
FROM #table t1
INNER JOIN #table t2
ON t1.refno = t2.refno
AND T1.code < T2.code
Online Demo
You can try the following query.
;WITH Tab (RefNo,Code,TranType,Remarks,Amount,rowno)
AS
(SELECT T.RefNo
, T.Code
,TranType
,Remarks
,Amount
, RN = ROW_NUMBER() OVER (PARTITION BY T.RefNo ORDER BY T.Code )
FROM Table1 T)
SELECT RefNo,Code1 = MAX( CASE WHEN N.rowno=1 THEN N.Code ELSE 0 END ),
Code2 = MAX( CASE WHEN N.rowno=2 THEN N.Code ELSE 0 END ) ,
TranType,Remarks,Amount FROM Tab n
GROUP BY N.RefNo,TranType,Remarks,Amount
I'm having a serious problem with one of my import tables. I've imported an Excel file to a SQL Server table. The table ImportExcelFile now looks like this (simplified):
+----------+-------------------+-----------+------------+--------+--------+-----+---------+
| ImportId | Excelfile | SheetName | Field1 | Field2 | Field3 | ... | Field10 |
+----------+-------------------+-----------+------------+--------+--------+-----+---------+
| 1 | C:\Temp\Test.xlsx | Sheet1 | Age / Year | 2010 | 2011 | | 2018 |
| 2 | C:\Temp\Test.xlsx | Sheet1 | 0 | Value1 | Value2 | | Value9 |
| 3 | C:\Temp\Test.xlsx | Sheet1 | 1 | Value1 | Value2 | | Value9 |
| 4 | C:\Temp\Test.xlsx | Sheet1 | 2 | Value1 | Value2 | | Value9 |
| 5 | C:\Temp\Test.xlsx | Sheet1 | 3 | Value1 | Value2 | | Value9 |
| 6 | C:\Temp\Test.xlsx | Sheet1 | 4 | Value1 | Value2 | | Value9 |
| 7 | C:\Temp\Test.xlsx | Sheet1 | 5 | NULL | NULL | | NULL |
+----------+-------------------+-----------+------------+--------+--------+-----+---------+
I now want to insert those values from Field1 to Field10 to the table AgeYear(in my original table there are about 70 columns and 120 rows). The first row (Age / Year, 2010, 2011, ...) is the header row. The column Field1 is the leading column. I want to save the values in the following format:
+-----------+-----+------+--------+
| SheetName | Age | Year | Value |
+-----------+-----+------+--------+
| Sheet1 | 0 | 2010 | Value1 |
| Sheet1 | 0 | 2011 | Value2 |
| ... | ... | ... | ... |
| Sheet1 | 0 | 2018 | Value9 |
| Sheet1 | 1 | 2010 | Value1 |
| Sheet1 | 1 | 2011 | Value2 |
| ... | ... | ... | ... |
| Sheet1 | 1 | 2018 | Value9 |
| ... | ... | ... | ... |
+-----------+-----+------+--------+
I've tried the following query:
DECLARE #sql NVARCHAR(MAX) =
';WITH cte AS
(
SELECT i.SheetName,
ROW_NUMBER() OVER(PARTITION BY i.SheetName ORDER BY i.SheetName) AS rn,
' + #columns + ' -- #columns = 'Field1, Field2, Field3, Field4, ...'
FROM dbo.ImportExcelFile i
WHERE i.Sheetname LIKE ''Sheet1''
)
SELECT SheetName,
age Age,
y.[Year]
FROM cte
CROSS APPLY
(
SELECT Field1 age
FROM dbo.ImportExcelFile
WHERE SheetName LIKE ''Sheet1''
AND ISNUMERIC(Field1) = 1
) a (age)
UNPIVOT
(
[Year] FOR [Years] IN (' + #columns + ')
) y
WHERE rn = 1'
EXEC (#sql)
So far I'm getting the desired ages and years. My problem is that I don't know how I could get the values. With UNPIVOT I don't get the NULL values. Instead it fills the whole table with the same values even if they are NULL in the source table.
Could you please help me?
Perhaps an alternative approach. This is not dynamic, but with the help of a CROSS APPLY and a JOIN...
The drawback is that you'll have to define the 70 fields.
Example
;with cte0 as (
Select A.ImportId
,A.SheetName
,Age = A.Field1
,B.*
From ImportExcelFile A
Cross Apply ( values ('Field2',Field2)
,('Field3',Field3)
,('Field10',Field10)
) B (Item,Value)
)
,cte1 as ( Select * from cte0 where ImportId=1 )
Select A.SheetName
,[Age] = try_convert(int,A.Age)
,[Year] = try_convert(int,B.Value)
,[Value] = A.Value
From cte0 A
Join cte1 B on A.Item=B.Item
Where A.ImportId>1
Returns
1) Suppose i have a table like this:-
| id | color_code | fruit |
|:------|--------------|----------------:|
| 1 | 000001 | apple |
| 2 | 000001 | apple |
| 3 | 000001 | apple |
| 4 | 000002 | lemon |
| 5 | 000002 | lemon |
| 6 | 000003 | grapes |
| 7 | 000003 | grapes |
How can i group by the fruit column according to the color_code column in sql server?
like this i suppose:-
| id | color_code | fruit | group_concat(id) |
|:------|--------------|-----------------|---------------------|
| 1 | 000001 | apple | 1,2,3 |
| 4 | 000002 | lemon | 2,5 |
| 6 | 000003 | grapes | 6,7 |
2) What if i have 3 tables (like shown below) which require join, how can i achieve this?
series_no table:
| id | desc_seriesno |
|:------|----------------:|
| 7040 | AU1011 |
| 7041 | AU1022 |
| 7042 | AU1033 |
| 7043 | AU1044 |
| 7044 | AU1055 |
| 7045 | AU1066 |
brand table:
| id | desc_brand |
|:------|----------------:|
| 1020 | Audi |
| 1021 | Bentley |
| 1022 | Ford |
| 1023 | BMW |
| 1024 | Mazda |
| 1025 | Toyota |
car_info table:
| seriesno_id | brand_id | color |
|:---------------|------------|--------:|
| 7040 | 1020 | white |
| 7040 | 1020 | black |
| 7040 | 1020 | pink |
| 7041 | 1021 | yellow |
| 7041 | 1021 | brown |
| 7042 | 1022 | purple |
| 7042 | 1022 | black |
| 7042 | 1022 | green |
| 7043 | 1023 | blue |
| 7044 | 1024 | red |
| 7045 | 1025 | maroon |
| 7045 | 1025 | white |
this is my current query with sql server 2014:-
SELECT SN.id AS seriesid, B.id AS brandid, B.desc_brand
FROM [db1].[dbo].[series_no] SN
LEFT JOIN [db1].[dbo].[car_info] CI
ON CI.seriesno_id = SN.id
RIGHT JOIN [db1].[dbo].[brand] B
ON B.id = CI.brand_id
GROUP BY SN.id, B.id
ORDER BY SN.id ASC
but unfortunately it gave me an error since i cannot group by similar string this way.
i want it to be like this:-
| seriesid | brandid | desc_brand | count |
|:-----------|------------|---------------|-------|
| 7040 | 1020 | Audi | 3 |
| 7041 | 1021 | Bentley | 2 |
| 7042 | 1022 | Ford | 3 |
| 7043 | 1023 | BMW | 1 |
| 7044 | 1024 | Mazda | 1 |
| 7045 | 1025 | Toyota | 2 |
1 Fruit Color
Assuming the table name is FruitColor, you can get the desired output by the following query -
SELECT MIN(id) AS id
, color_code
, fruit
, group_concat_id = STUFF((SELECT ',' + CAST(id AS VARCHAR)
FROM FruitColor AS fci
WHERE fci.fruit = fc.fruit AND fci.color_code = fc.color_code
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM FruitColor AS fc
GROUP BY color_code, fruit
ORDER BY id;
The MIN() selects the first id of the group.
Since there is no default GROUP_CONCAT function like in MySql in SQL Server, you have to use the STUFF function and FOR XML PATH. To learn more about group concat you can visit this link https://sqlperformance.com/2014/08/t-sql-queries/sql-server-grouped-concatenation
You can customize the WHERE clause to match only by color_code.
2. You can have several options for this -
Option (a): Show counts for series with brands
SELECT seriesno_id AS seriesid, ci.brand_id AS bandid, desc_brand, COUNT(*) AS [count]
FROM db1.dbo.car_info AS ci
LEFT JOIN db1.dbo.brand AS b ON (b.id = ci.brand_id)
GROUP BY seriesno_id, ci.brand_id, desc_brand;
Here you don't need to use the series table if you want to show counts for cars having brand(s).
You may not need to use the RIGHT JOIN on the brand table because if brand table contains a record which
is not in car_info table, then seriesno_id would be null.
Option (b): Show counts for all the series with or without a brand
SELECT sn.id AS seriesid, ci.brand_id AS bandid, desc_brand, COUNT(*) AS [count]
FROM db1.dbo.series_no AS sn
LEFT JOIN db1.dbo.car_info AS ci ON (ci.seriesno_id = sn.id)
LEFT JOIN db1.dbo.brand AS b ON (b.id = ci.brand_id)
GROUP BY sn.id, ci.brand_id, desc_brand;
Option (c): The work around for selecting a column which is not in a GROUP BY
SELECT seriesno_id AS seriesid, ci.brand_id AS bandid, MAX(desc_brand) AS desc_brand, COUNT(*) AS [count]
FROM db1.dbo.car_info AS ci
LEFT JOIN db1.dbo.brand AS b ON (b.id = ci.brand_id)
GROUP BY seriesno_id, ci.brand_id;
Here, if we are certain that each brand contains only one desc_brand, we can use an aggregate on it.
This is bcause applying aggregate only one value returns that value. I used MAX here.
Personally I would go with option (a) as it makes more sense.
Update on GROUP BY exception for desc_brand being NTEXT...
Cast desc_brand to NVARCHAR to avoid the exception.
CAST(desc_brand AS NVARCHAR(200))
Also I highly recommend using VARCHAR / NVARCHAR instead of any TEXT, CHAR etc. because they usually occupy more memory.
SELECT
id = SUBSTRING(group_concat,1,1),
color_code,
fruit,
group_concat
FROM(
SELECT distinct
m.color_code,
m.fruit,
group_concat = STUFF((SELECT ',' + CONVERT(varchar(10),md.id)
FROM [Test_1].[dbo].[Stuff] md
WHERE m.fruit = md.fruit
AND m.color_code = md.color_code
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM [Test_1].[dbo].[Stuff] m)x
use below code ..
SELECT distinct
m.color_code
, m.fruit
, group_concat = STUFF((
SELECT ',' + CONVERT(varchar(10),md.id)
FROM dbo.tablename md
WHERE m.fruit = md.fruit and m.color_code = md.color_code
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM dbo.tablename m
for second :
SELECT SN.id AS seriesid, B.id AS brandid, B.desc_brand ,count(*)
FROM [db1].[dbo].[series_no] SN
LEFT JOIN [db1].[dbo].[car_info] CI
ON CI.seriesno_id = SN.id
RIGHT JOIN [db1].[dbo].[brand] B
ON B.id = CI.brand_id
GROUP BY SN.id, B.id ,B.desc_brand
ORDER BY 4 ASC
I have four tables - tblBase, tblLookup, tblData and tblData2
tblBase
+---------+----------+-----------+------------+
| Base_ID | Base_Num | Base_Type | Base_Date |
+---------+----------+-----------+------------+
| 1 | 1234 | ABC | 01/05/2016 |
| 2 | 3456 | DEF | 02/05/2016 |
| 3 | 7890 | GHI | 03/05/2016 |
+---------+----------+-----------+------------+
tblLookup
+-----------+-------------+
| Lookup_ID | Lookup_Name |
+-----------+-------------+
| 1 | Apple |
| 2 | Orange |
| 3 | Banana |
+-----------+-------------+
tblData
+-----------+----------+------------+
| Data_Name | Data_Num | Data_Date |
+-----------+----------+------------+
| Apple | 1234 | 02/05/2016 |
| Orange | 3456 | 03/05/2016 |
| Guava | 5937 | 04/05/2016 |
+-----------+----------+------------+
tblData2
+------------+-----------+------------+
| Data2_Name | Data2_Num | Data2_Date |
+------------+-----------+------------+
| Grapes | 3953 | 02/05/2016 |
| Orange | 3456 | 03/05/2016 |
| Banana | 7890 | 04/05/2016 |
| Banana | 1473 | 07/05/2016 |
+------------+-----------+------------+
I am trying to get the Data_Date from tblData or tblData2 (where ever the data exists) join with tblBase where Base_Num matches . As the common columns exists in tblLookup, I need to join all the four tables.
For example, Base_ID = 3, Base_Num = 7890, should pick up Data_Date from tblData2, as both Base_ID (Banana) and Base_Num (7890) matches.
I tried doing INNER JOIN however it did not give the desired result.
I'm looking for a resulting table like this:
+---------+----------+-----------+------------+-------------------+
| Base_ID | Base_Num | Base_Type | Base_Date | Desired_Data_Date |
+---------+----------+-----------+------------+-------------------+
| 1 | 1234 | ABC | 01/05/2016 | 02/05/2016 |
| 2 | 3456 | DEF | 02/05/2016 | 03/05/2016 |
| 3 | 7890 | GHI | 03/05/2016 | 04/05/2016 |
+---------+----------+-----------+------------+-------------------+
You may try Left Join
SELECT B.Base_ID, B.Base_Num , B.Base_Type, B.Base_Date,
D1.Data_Name AS Data1, D1.Data_Date AS DESIRED_DATE1
D2.Data2_Name AS Data2, D2.Data2_Date AS DESIRED_
FROM tblBase B
JOIN tblLookup L ON L.Lookup_ID=B.Base_ID
LEFT JOIN tblData D1 ON D1.Data_Num = B.Base_Num
LEFT JOIN tblData2 D2 ON D2.Data2_Num = B.Base_Num
WHERE <Condition>
declare #tblbase table (Base_ID int, Base_Num int, Base_Type varchar(3), Base_Date varchar(10))
Insert into #tblbase
values
( 1 , 1234 , 'ABC', '01/05/2016'),
( 2 , 3456 , 'DEF', '02/05/2016'),
( 3 , 7890 , 'GHI', '03/05/2016')
declare #tblLookup table (Lookup_ID int, Lookup_Name varchar(10))
insert into #tblLookup
values
( 1 , 'Apple' ),
( 2 , 'Orange'),
( 3 , 'Banana')
declare #tbldata table (Data_Name varchar(10), Data_Num int, Data_Date varchar(10))
Insert into #tbldata
values
( 'Apple' , 1234 , '02/05/2016'),
( 'Orange' , 3456 , '03/05/2016'),
( 'Guava' , 5937 , '04/05/2016')
declare #tbldata2 table (Data_Name varchar(10), Data_Num int, Data_Date varchar(10))
Insert into #tbldata2
values
( 'Grapes', 3953 , '02/05/2016'),
( 'Orange' , 3456 , '03/05/2016'),
( 'Banana' , 7890 , '04/05/2016'),
( 'Banana' , 1473 , '07/05/2016')
/*
Expected result
+---------+----------+-----------+------------+-------------------+
| Base_ID | Base_Num | Base_Type | Base_Date | Desired_Data_Date |
+---------+----------+-----------+------------+-------------------+
| 1 | 1234 | ABC | 01/05/2016 | 02/05/2016 |
| 2 | 3456 | DEF | 02/05/2016 | 03/05/2016 |
| 3 | 7890 | GHI | 03/05/2016 | 04/05/2016 |
+---------+----------+-----------+------------+-------------------+
*/
select bt.*,u.data_date as Desired_data_date
from #tblbase bt
join #tblLookup lu on lu.lookup_id = bt.base_id
join
(select t1.* from #tbldata t1
union
select t2.* from #tbldata2 t2
) u
on u.data_name = lu.Lookup_Name
where u.Data_Num = bt.Base_Num
order by bt.Base_Date
Hello I have a temp table (#tempResult) that contains results like the following...
-----------------------------------------
| DrugAliasID | Dosage1 | Unit1 | rowID |
-----------------------------------------
| 322 | 10 | MG | 1 |
| 322 | 50 | ML | 2 |
| 441 | 20 | ML | 3 |
| 443 | 15 | ML | 4 |
-----------------------------------------
I'm looking to get the results to be like the following, pivoting the rows that have the same DrugAliasID.
--------------------------------------------------
| DrugAliasID | Dosage1 | Unit1 | Dosage2 | Unit2 |
--------------------------------------------------
| 322 | 10 | MG | 50 | ML |
| 441 | 20 | ML | NULL | NULL |
| 443 | 15 | ML | NULL | NULL |
--------------------------------------------------
So far I have a solution that isn't using pivot. I'm not too good with pivot and was wondering if anyone knew how to use it in this scenario. Or solve it some other way. Thanks
SELECT
tr.drugAliasID,
MIN(trmin.dosage1) AS dosage1,
MIN(trmin.unit1) AS unit1,
MIN(trmax.dosage1) AS dosage2,
MIN(trmax.unit1) AS unit2
FROM
#tempResult tr
JOIN
#tempResult trmin ON trmin.RowID = tr.rowid AND trmin.drugAliasID = tr.drugAliasID
JOIN
#tempResult trmax ON trmax.RowID = tr.rowid AND trmax.drugAliasID = tr.drugAliasID
JOIN
(SELECT
MIN(RowID) AS rowid,
drugAliasID
FROM
#tempResult
GROUP BY
drugAliasID) tr1 ON tr1.rowid = trmin.RowID
JOIN
(SELECT
MAX(RowID) AS rowid,
drugAliasID
FROM
#tempResult
GROUP BY
drugAliasID) tr2 ON tr2.rowid = tr.RowID
GROUP BY
tr.drugAliasID
HAVING
count(tr.drugAliasID) > 1
Assuming your version of SQL Server supports the use of CTEs, you can simplify your query thus:
;with cte as
(select *, row_number() over (partition by drugaliasid order by rowid) rn
from #tempResult
)
select c.drugaliasid, c.dosage1, c.unit1, c2.dosage1 as dosage2, c2.unit1 as unit2
from cte c
left join cte c2 on c.drugaliasid = c2.drugaliasid and c.rn = 1 and c2.rn = 2
where c.rn = 1
Demo
This will give you the desired result, without having to use the pivot keyword.