Bigquery - Transpose Array into columns - arrays

The following query returns a list of key/value pairs:
SELECT ["name:apple", "color:red"] as fruit;
result:
Is there a way to transpose the data so the results would be:
Update
I'm looking for a generalized solution, where the key and value and the array length of the results are unknown i.e: SELECT ["key0:val0", "key1:val1"...] as data;

This should be a quick way to achieve your results:
#standardSQL
with items as (
select ["name:apple", "color:red"] p union all
select ["name:orange", "color:orange"] UNION ALL
select ["name:grapes", "color:green"]
),
arrayed as (
select
array_agg(
struct(
if(split(p, ":")[offset(0)] = 'name', split(p, ":")[offset(1)], '') as name,
if(split(p, ":")[offset(0)] = 'color', split(p, ":")[offset(1)], '') as color
)
) item from items, unnest(p) p
)
select
array((select i.name from unnest(item) i where i.name != '')) as name,
array((select i.color from unnest(item) i where i.color != '')) as color
from arrayed

I guess the BigQuery way would be using sub-selects on the array:
WITH t AS (SELECT * FROM UNNEST([
struct(['name:apple','color:red'] AS fruit),
struct(['name:pear','color:purple'] AS fruit)
]) )
SELECT
(SELECT SPLIT(f, ':')[SAFE_OFFSET(1)] FROM t.fruit f WHERE SPLIT(f, ':')[SAFE_OFFSET(0)]='name') AS name,
(SELECT SPLIT(f, ':')[SAFE_OFFSET(1)] FROM t.fruit f WHERE SPLIT(f, ':')[SAFE_OFFSET(0)]='color') AS color
FROM t

Not sure if there's a more succinct way to do this but this works
WITH CTE AS (
SELECT ["name:apple", "color:red"] as fruit
UNION ALL
SELECT ["name:pear", "color:green"]
),
CTE2 AS (
SELECT row_number() over () as rowNumber, fruit
FROM CTE
)
SELECT max(if(REGEXP_CONTAINS(fruit,'name:'),replace(fruit,'name:',''),null)) name,
max(if(REGEXP_CONTAINS(fruit,'color:'),replace(fruit,'color:',''),null)) color
FROM CTE2,
UNNEST(fruit) as fruit
GROUP BY rowNumber

Related

How can I refer to a LAG() function column in SQL Server?

I have a query in which I use LAG function :
WITH Tr AS
(
SELECT
DocDtls.Warehouse, Transactions.Code, DocDtls.zDate,
Transactions.ID, Transactions.QtyIn, Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN
Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
)
SELECT
ID, Code, QtyIn, QtyOut, BalanceAfter,
LAG(BalanceAfter, 1, 0) OVER (PARTITION BY Warehouse, Code
ORDER BY Code, ID) Prev_BlncAfter
FROM
Tr;
It's working fine but when I try to add this column before FROM:
SUM(Prev_BlncAfter + QtyIn) - QtyOut AS NewBlncAfter
I get this error :
Msg 207, Level 16, State 1, Line 3
Invalid column name 'Prev_BlncAfter'
How can I fix this ? Thanks
You can create the LAG column inside the CTE instead of in the outer query. E.g.
declare #DocDtls table (Warehouse int, zDate date, [PrimDocNum] int);
declare #Transactions table (code int, id int, QtyIn int, QtyOut int, balanceafter int, [DocNum] int)
;with Tr As
(
SELECT
d.Warehouse
, t.Code
, d.zDate
, t.ID
, t.QtyIn
, t.QtyOut
, t.BalanceAfter
,LAG(BalanceAfter,1,0) Over (partition by Warehouse,Code order by Code,ID) Prev_BlncAfter
FROM #DocDtls d
INNER JOIN #Transactions t ON d.[PrimDocNum] = t.[DocNum]
)
select ID,Code,QtyIn,QtyOut,BalanceAfter
,SUM(Prev_BlncAfter + QtyIn)-QtyOut As NewBlncAfter
from Tr
group by ID,Code,QtyIn,QtyOut,BalanceAfter;
You can nest this query to refer the newly added column from the outer scope, or create another with like you've done before for referencing it afterwards:
with Tr As (
SELECT
DocDtls.Warehouse,
Transactions.Code,
DocDtls.zDate,
Transactions.ID,
Transactions.QtyIn,
Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
),
formatted_tr as (
select
ID,
Code,
QtyIn,
QtyOut,
BalanceAfter,
LAG(BalanceAfter, 1, 0) Over (
partition by Warehouse,
Code
order by
Code,
ID
) Prev_BlncAfter
from
Tr
)
select
SUM(Prev_BlncAfter + QtyIn) - QtyOut As NewBlncAfter
from
formatted_tr
group by
ID, QtyOut
;
Based on comments , I combined the two answers to get what I need :
with Tr As (
SELECT
DocDtls.Warehouse,
Transactions.Code,
DocDtls.zDate,
Transactions.ID,
Transactions.QtyIn,
Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
),
formatted_tr as (
select
ID,
Code,
QtyIn,
QtyOut,
BalanceAfter,
LAG(BalanceAfter, 1, 0) Over (
partition by Warehouse,
Code
order by
Code,zDate,ID
) Prev_BlncAfter
from
Tr
)
select ID,Code,QtyIn,QtyOut,BalanceAfter
,SUM(Prev_BlncAfter + QtyIn)-QtyOut As NewBlncAfter
from formatted_tr
group by ID,Code,QtyIn,QtyOut,BalanceAfter;
;

SQL Server Query for required result

I am using SQL Server with my application.
The Table data is as following :
And I want result in following format:
I have tried with split function but its not working properly.
Is it possible to get such a result.
Please suggest.
Thank you.
Try this. I did not manage to get a single Not Req, it is like this ("Not Req/Not Req").
drop table if exists dbo.TableB;
create table dbo.TableB (
OldSPC varchar(100)
, old_freq varchar(100)
, NewSPC varchar(100)
, new_freq varchar(100)
);
insert into dbo.TableB(OldSPC, old_freq, NewSPC, new_freq)
values ('ADH,BAP', '7,7', 'ADH,BAP', '7,7')
, ('Not Req', 'Not Req', 'ADH,BAP', '7,7')
, ('BAP,EXT,ADL', '35,7,42', 'BAP,EXT,BAP,ADL', '21,7,35,42');
select
tt1.OldSPCOldFreq
, tt2.NewSPCNewFreq
from (
select
t.OldSPC, t.old_freq, t.NewSPC, t.new_freq
, STRING_AGG(t1.value + '/' + t2.value, ',') OldSPCOldFreq
from dbo.TableB t
cross apply (
select
ROW_NUMBER () over (order by t.OldSPC) as Rbr
, ss.value
from string_split (t.OldSPC, ',') ss
) t1
cross apply (
select
ROW_NUMBER () over (order by t.old_freq) as Rbr
, ss.value
from string_split (t.old_freq, ',') ss
) t2
where t1.Rbr = t2.Rbr
group by t.OldSPC, t.old_freq, t.NewSPC, t.new_freq
) tt1
inner join (
select
t.OldSPC, t.old_freq, t.NewSPC, t.new_freq
, STRING_AGG(t3.value + '/' + t4.value, ',') NewSPCNewFreq
from dbo.TableB t
cross apply (
select
ROW_NUMBER () over (order by t.NewSPC) as Rbr
, ss.value
from string_split (t.NewSPC, ',') ss
) t3
cross apply (
select
ROW_NUMBER () over (order by t.new_freq) as Rbr
, ss.value
from string_split (t.new_freq, ',') ss
) t4
where t3.Rbr = t4.Rbr
group by t.OldSPC, t.old_freq, t.NewSPC, t.new_freq
) tt2 on tt1.OldSPC = tt2.OldSPC
and tt1.old_freq = tt2.old_freq
and tt1.NewSPC = tt2.NewSPC
and tt1.new_freq = tt2.new_freq
As mentioned in comments, it might be easier for you to do it on front end, but it could be done in SQL Server as well.
Partial Rextester Demo
I didn't replicate your whole scenario but got it for 2 columns. To do it first of all, you need a unique identifier for each row. I am using a sequence number (1,2,3...).
Now refer to this answer, which uses recursive subquery to split csv to rows. Then I used XML PATH to change columns back to csv.
This is the query which is doing it for OLD SPC and OLD FREQ.
;with tmp(SEQ,OldSPCItem,OldSPC,OLD_FREQ_item,OLD_FREQ) as (
select SEQ, LEFT(OldSPC, CHARINDEX(',',OldSPC+',')-1),
STUFF(OldSPC, 1, CHARINDEX(',',OldSPC+','), ''),
LEFT(OLD_FREQ, CHARINDEX(',',OLD_FREQ+',')-1),
STUFF(OLD_FREQ, 1, CHARINDEX(',',OLD_FREQ+','), '')
from table1
union all
select SEQ, LEFT(OldSPC, CHARINDEX(',',OldSPC+',')-1),
STUFF(OldSPC, 1, CHARINDEX(',',OldSPC+','), ''),
LEFT(OLD_FREQ, CHARINDEX(',',OLD_FREQ+',')-1),
STUFF(OLD_FREQ, 1, CHARINDEX(',',OLD_FREQ+','), '')
from tmp
where OldSPC > ''
)
select seq,STUFF( (SELECT ',' + CONCAT(OldSPCItem,'/',OLD_FREQ_item) FROM TMP I
WHERE I.seq = O.seq FOR XML PATH('')),1,1,'') OLD_SPC_OLD_FREQ
from tmp O
GROUP BY seq
;
It will give you this output
+-----+------------------+
| seq | OLD_SPC_OLD_FREQ |
+-----+------------------+
| 1 | ADH/7,BAP/9 |
| 2 | NOT REQ/NOT REQ |
+-----+------------------+
What do you have to do now
- Find a way to generate a sequence number to uniquely identify each row. If you can use any column, use that instead of SEQ.
Similarly add logic for NEW_SPC and NEW_FREQ. (just copy paste LEFT and STUFF like in OLD_FREQ and change it for NEW_SPC and NEW_FREQ.
Replace multiple NOT REQ/ with '', so you will get only one NOT REQ. You can do it with replace function.
If you face any issue/error while doing so, add it to the Rexterster Demo and share the URL, we will check that.

Adding results from two queries

I'm using MS-SQL 2008 R2.
I have 2 Queries which are returning the required results.
But I need to add the two results from each queries to provide a final value [Enterprise Value]. I'm sure this is very straight forward but I'm going round in circles on this, have tried incorporating SUM which I think is the right approach?
Here is the full query as it currently stands:
declare #d1 datetime='2015-12-22'
(select
c.fs_perm_sec_id,
((c.p_price * s.p_com_shs_out)/1000) as [Enterprise Value]
from fp_v1.fp_basic_bd c
left join edm_v1.edm_security_entity_map e
on e.fs_perm_sec_id= c.fs_perm_sec_id
left join fp_v1.fp_basic_sho s
on s.fs_perm_sec_id = c.fs_perm_sec_id
and c.date=#d1
where s."date" =
(
select MAX(s2."date")
from fp_v1.fp_basic_sho s2
where s2.fs_perm_sec_id=c.fs_perm_sec_id
and s2."date" <= c."date"
)
and c."date"=#d1
and e.termination_date is null
and c.fs_perm_sec_id = 'GPHC8W-S-GB')
UNION ALL
select
ff.fs_perm_sec_id,
((FF_debt + ff_pfd_stk + ff_min_int_accum) - FF.ff_cash_st) as [Enterprise Value]
from ff_v2.ff_basic_af_v2 FF
where FF."date" =
( select MAX(FF2."date")
from ff_v2.ff_basic_af_v2 FF2
where FF2.fs_perm_sec_id=FF.fs_perm_sec_id
and FF.date <= FF2.date
)
and FF.fs_perm_sec_id =('GPHC8W-S-GB')
When inserting a "UNION ALL" between the two queries I get the following results:
fs_perm_sec_id Enterprise Value
GPHC8W-S-GB 9270.5204655
GPHC8W-S-GB 835
What I would like to achieve is a sum of the two values brought onto one row, i.e.:
fs_perm_sec_id Enterprise Value
GPHC8W-S-GB 10105.52
Thanks for your help.
Final SQL:
declare #d1 datetime='2015-12-23'
Select fs_perm_sec_id, SUM([Enterprise Value]) AS 'Enterprise Value'
from
(
(select
c.fs_perm_sec_id,
((c.p_price * s.p_com_shs_out)/1000) as [Enterprise Value]
from fp_v1.fp_basic_bd c
left join edm_v1.edm_security_entity_map e
on e.fs_perm_sec_id= c.fs_perm_sec_id
left join fp_v1.fp_basic_sho s
on s.fs_perm_sec_id = c.fs_perm_sec_id
and c.date=#d1
where s."date" =
(
select MAX(s2."date")
from fp_v1.fp_basic_sho s2
where s2.fs_perm_sec_id=c.fs_perm_sec_id
and s2."date" <= c."date"
)
and c."date"=#d1
and e.termination_date is null
and c.fs_perm_sec_id in ('FT9TC5-S-GB','GPHC8W-S-GB','R85KLC-S-US'))
UNION ALL
select
ff.fs_perm_sec_id,
((FF_debt + ff_pfd_stk + ff_min_int_accum) - FF.ff_cash_st) as [Enterprise Value]
from ff_v2.ff_basic_af_v2 FF
where FF."date" =
( select MAX(FF2."date")
from ff_v2.ff_basic_af_v2 FF2
where FF2.fs_perm_sec_id=FF.fs_perm_sec_id
and FF.date <= FF2.date
)
and FF.fs_perm_sec_id in ('FT9TC5-S-GB','GPHC8W-S-GB','R85KLC-S-US')) t
group by t.fs_perm_sec_id
just use the Derived Table and Group by
Select fs_perm_sec_id,
SUM(Enterprise Value) EnterpriseValue
from (**your whole code**)
GROUP BY fs_perm_sec_id
use group by
How to use group by with union in t-sql
SELECT id,sum(*)
FROM ( SELECT id,
time
FROM dbo.a
UNION
SELECT id,
time
FROM dbo.b
)
GROUP BY id
DECLARE
#d1 DATE = '20151222'
, #fs_perm_sec_id VARCHAR(100) = 'GPHC8W-S-GB'
SELECT #fs_perm_sec_id, SUM([Enterprise Value])
FROM (
SELECT [Enterprise Value]
FROM (
SELECT
c.fs_perm_sec_id
, (c.p_price * s.p_com_shs_out) / 1000 AS [Enterprise Value]
, RowNum = ROW_NUMBER() OVER (ORDER BY s.[date] DESC)
from fp_v1.fp_basic_bd c
join fp_v1.fp_basic_sho s on s.fs_perm_sec_id = c.fs_perm_sec_id
left join edm_v1.edm_security_entity_map e on e.fs_perm_sec_id= c.fs_perm_sec_id
where c.[date] = #d1
and e.termination_date is null
and c.fs_perm_sec_id = #fs_perm_sec_id
) t
WHERE t.RowNum = 1
UNION ALL
SELECT FF_debt + ff_pfd_stk + ff_min_int_accum - ff_cash_st
FROM (
SELECT
ff.fs_perm_sec_id
, FF_debt
, ff_pfd_stk
, ff_min_int_accum
, FF.ff_cash_st
, RowNum = ROW_NUMBER() OVER (ORDER BY FF.[date] DESC)
FROM ff_v2.ff_basic_af_v2 FF
WHERE FF.[date] =
AND FF.fs_perm_sec_id = #fs_perm_sec_id
) t
WHERE t.RowNum = 2
) t

SQL server: WHERE in xml field

Example: I have table TableA with 3 records:
record 1:
id = 1, value = '<Employee id='1' name='Employee1'></Employee><Employee id='2' name='Employee2'></Employee>'
record 2:
id = 2, value = '<Employee id='1' name='Employee1'></Employee><Employee id='2' name='Employee2'></Employee><Employee id='3' name='Employee3'></Employee>'
record 3:
id = 3, value = '<Employee id='1' name='Employee1'></Employee><Employee id='2' name='Employee2'></Employee><Employee id='3' name='Employee3'></Employee><Employee id='4' name='Employee4'></Employee>'
the query:
SELECT * FROM TableA
WHERE...
How can I put the where clause to get only record 1?
Many thanks,
The problem with the data is that it doesn't contain well formed xml - you will need to wrap it before you can use the xml tools in Sql like xquery.
SELECT *
FROM
(
SELECT
Nodes.node.value('(./#id)[1]', 'int') AS EmployeeId,
Nodes.node.value('(./#name)[1]', 'varchar(50)') AS EmployeeName
FROM
(
SELECT CAST('<xml>' + value + '</xml>' AS Xml) As WrappedXml
FROM TableA
) AS x
cross apply x.WrappedXml.nodes('//Employee') as Nodes(node)
) as y
WHERE
y.EmployeeId = 1;
Inner select -wraps the xml
Middle select - standard xquery
Outer select - where filter
You haven't clarified what you mean w.r.t. get only record 1, but if you mean just the first element of each row (which coincidentally also has id = 1), you can use ROW_NUMBER() to assign a sequence:
SELECT *
FROM
(
SELECT
Nodes.node.value('(./#id)[1]', 'int') AS EmployeeId,
Nodes.node.value('(./#name)[1]', 'varchar(50)') AS EmployeeName,
ROW_NUMBER() OVER (PARTITION BY x.Id ORDER BY ( SELECT 1 )) SequenceId
FROM
(
SELECT Id, CAST('<xml>' + value + '</xml>' AS Xml) As WrappedXml
FROM TableA
) AS x
cross apply x.WrappedXml.nodes('//Employee') as Nodes(node)
) as y
WHERE SequenceId = 1;
Both Fiddles here
I tried with this query and It returns record 1 as I expect:
SELECT * FROM TableA
WHERE value.exist('(Employee[#id = 1])') = 1 and value.exist('(Employee[#id = 2])') = 1 AND value.value('count(Employee[#id])', 'int') = 2
Do you have any comments for this query? Should I use it? :)

LINQ (to Oracle) - Row_Number() Over Partition By

This is a possible duplicate of other Partition By + Rank questions but I found most of those questions/answers to be too specific to their particular business logic. What I'm looking for is a more general LINQ version of the following type of query:
SELECT id,
field1,
field2,
ROW_NUMBER() OVER (PARTITION BY id
ORDER BY field1 desc) ROWNUM
FROM someTable;
A very common thing we do with this is to wrap it like in something like this:
SELECT id,
field1,
field2
FROM (SELECT id,
field1,
field2,
ROW_NUMBER() OVER (PARTITION BY id
ORDER BY field1 desc) ROWNUM
FROM someTable)
WHERE ROWNUM = 1;
Which returns the row containing the highest value in field1 for each id. Changing the order by to asc of course would return the lowest value or changing the rank to 2 will get the second highest/lowest value etc, etc. Is there a way to write a LINQ query that can be executed server side that gives us the same sort of functionality? Ideally, one that as performant as the above.
Edit:
I've tried numerous different solutions after scouring the web and they all end up giving me the same problem that Reed's answer below does because the SQL generated includes an APPLY.
A couple examples I tried:
from p in db.someTable
group p by p.id into g
let mostRecent = g.OrderByDescending(o => o.field1).FirstOrDefault()
select new {
g.Key,
mostRecent
};
db.someTable
.GroupBy(g => g.id, (a, b) => b.OrderByDescending(o => o.field1).Take(1))
.SelectMany(m => m);
Both of these result in very similar, if not identical, SQL code which uses an OUTER APPLY that Oracle does not support.
You should be able to do something like:
var results = someTable
.GroupBy(row => row.id)
.Select(group => group.OrderByDescending(r => r.id).First());
If you wanted the third highest value, you could do something like:
var results = someTable
.GroupBy(row => row.id)
.Select(group => group.OrderByDescending(r => r.id).Skip(2).FirstOrDefault())
.Where(r => r != null); // Remove the groups that don't have 3 items
an alternative way, by using a subquery which separately gets the maximum field1 for each ID.
SELECT a.*
FROM someTable a
INNER JOIN
(
SELECT id, max(field1) max_field
FROM sometable
GROUP BY id
) b ON a.id = b.ID AND
a.field1 = b.max_field
when converted to LINQ:
from a in someTable
join b in
(
from o in someTable
group o by new {o.ID} into g
select new
{
g.Key.ID,
max_field = g.Max(p => p.field1)
}
) on new {a.ID, a.field1} equals new {b.ID, field1 = b.max_field}
select a

Resources