Accessing JSON Array of strings - arrays

My problem is that I have a JSON column with a key which contains an array of string values. If I access it with array notation:
-- JSON Key = "infoProvided":["FullName","Contact Information"]
select top 1 json_value(textarea,'$.infoProvided[0]') from checklist
-- result
(No column name)
FullName
-- expected
(No column name)
FullName,Contact Information
I need to get all possible values in the array. Thanks in advance.

Note: I had to edit your JSON as it was missing the open and closing curly braces.
DECLARE #json VARCHAR(1000) = '{"infoProvided":["FullName","Contact Information"]}';
SELECT * FROM OPENJSON ( #json, '$.infoProvided' );
Returns
+-----+---------------------+------+
| key | value | type |
+-----+---------------------+------+
| 0 | FullName | 1 |
| 1 | Contact Information | 1 |
+-----+---------------------+------+
SELECT STRING_AGG ( [value], ',' ) AS InfoList FROM OPENJSON ( #json, '$.infoProvided' );
Returns
+------------------------------+
| InfoList |
+------------------------------+
| FullName,Contact Information |
+------------------------------+
SELECT * FROM OPENJSON ( #json, '$' );
Returns
+--------------+------------------------------------+------+
| key | value | type |
+--------------+------------------------------------+------+
| infoProvided | ["FullName","Contact Information"] | 4 |
+--------------+------------------------------------+------+
SELECT JSON_QUERY ( #json, '$.infoProvided' );
Returns
+------------------------------------+
| (No column name) |
+------------------------------------+
| ["FullName","Contact Information"] |
+------------------------------------+

Related

How to identify valid records based on column values in snowflake

I have a table as below
I want output like below
This means I have few predefined pairs, example
if one employee is coming from both HR_INTERNAL and HR_EXTERNAL, take only that record which is from HR_INTERNAL
if one employee is coming from both SALES_INTERNAL and SALES_EXTERNAL, take only that record which is from SALES_INTERNAL
etc.
Is there a way to achieve this?
I used ROW_NUMBER to rank
ROW_NUMBER() OVER(PARTITION BY "EMPID" ORDER BY SOURCESYSTEM ASC) AS RANK_GID
I just put them on a table like this:
create or replace table predefined_pairs ( pairs ARRAY );
insert into predefined_pairs select [ 'HR_INTERNAL', 'HR_EXTERNAL' ] ;
insert into predefined_pairs select [ 'SALES_INTERNAL', 'SALES_EXTERNAL' ] ;
Then I use the following query to produce the output you wanted:
select s.sourcesystem, s.empid,
CASE WHEN COUNT(1) OVER(PARTITION BY EMPID) = 1 THEN 'ValidRecord'
WHEN p.pairs[0] IS NULL THEN 'ValidRecord'
WHEN p.pairs[0] = s.sourcesystem THEN 'ValidRecord'
ELSE 'InvalidRecord'
END RecordValidity
from source s
left join predefined_pairs p on array_contains( s.sourcesystem::VARIANT, p.pairs ) ;
+-------------------+--------+----------------+
| SOURCESYSTEM | EMPID | RECORDVALIDITY |
+-------------------+--------+----------------+
| HR_INTERNAL | EMP001 | ValidRecord |
| HR_EXTERNAL | EMP001 | InvalidRecord |
| SALES_INTERNAL | EMP002 | ValidRecord |
| SALES_EXTERNAL | EMP002 | InvalidRecord |
| HR_EXTERNAL | EMP004 | ValidRecord |
| SALES_INTERNAL | EMP005 | ValidRecord |
| PURCHASE_INTERNAL | EMP003 | ValidRecord |
+-------------------+--------+----------------+

Logic in SQL Server to create a derived column based on comparing two comma separated columns

Want to create logic in SQL Server to create a derived column based on comparing two comma-separated columns.
Sample table data -
Create table ##table1 (ID INT Identity Primary Key, FulfillmentChannelStatus varchar(255),RoleAlternateSourcingChannel varchar (255))
insert into ##table1 values ('Filled,Open,In-process','Internal,Recruiter,Contractor')
,('Open,In-process,New','Contractor,Internal,Recruiter')
,('New,Filled','Contractor,Recruiter ')
,('Filled','Recruiter')
,('Open,New,Filled','Internal,Recruiter,Contractor')
,('Filled,Filled,Filled','Internal,Contractor,Recruiter')
,('Open ,Filled, In-proces','Contractor,Internal,Recruiter')
,('Filled','Others')
,('Cancelled,Filled','Contractor,Recruiter')
,('Cancelled, Filled, Cancel - In Process','Contractor,Recruiter,Internal')
Logic for new column--
--select * from ##tble
DECLARE #separator CHAR(1) = ','
SELECT
[Role Id],[RoleAlternateSourcingChannel],[FulfillmentChannelStatus] , [Filled fulfil] = x.value('(/root/r[sql:column("t.pos")]/text())[1]', 'VARCHAR(10)')
into ##temp FROM ##tble
CROSS APPLY (SELECT x = TRY_CAST('<root><r><![CDATA[' +
REPLACE([FulfillmentChannelStatus], #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)
.query('
for $x in /root/r[text()="Filled"][1]
return count(root/r[. << $x]) + 1
').value('text()[1]','INT')) AS t(pos)
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE([RoleAlternateSourcingChannel], #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)) AS t2(x)
Scenario: I have two comma-separated columns
1 . I need to calculate values for only "Filled" values (in column [Role Alternate Sourcing Channel])
2. In 1st first row- I have a Filled value for Internal Scheduling in column ([RoleAlternateSourcingChannel]) so in the output column – it will be Internal.
3. In 2nd row – I don’t have any Filled so the output will be Null.
4. in 3rd row - I have a Filled value for Recruiter so the output will be Recruiter.
And so on…
5.In Row 6 for all value is filled so the output will be a recruiter. because preference of Recruiter>Internal>Contractor
Other than Recruiter/Internal/ Contractor all filled values will be Null.
The position of Filled value is not fixed. It can be anywhere such as - either at 1st place/position or 2nd place or in 3rd place.
Expected Output -
|+----+------------------------+----------------------------+---------------+
| ID |FulfillmentChannelStatus|RoleAlternateSourcingChannel| Filled fulfil |
+----+------------------------+----------------------------+---------------+
| 1 | Filled,Open,In-process | Internal,Recruiter,Contractor | Internal |
| 2 | Open,In-process,New | Contractor,Internal,Recruiter | NULL |
| 3 | New,Filled | Contractor,Recruiter | Recruiter |
| 4 | Filled | Recruiter | Recruiter |
| 5 | Open,New,Filled | Internal,Recruiter,Contractor | Contractor |
| 6 | Filled,Filled,Filled | Internal,Contractor,Recruiter | Recruiter |
| 7 | Open ,Filled, In-process| Contractor,Internal,Recruiter | Internal |
| 8 | Filled | Others | Null
| 9 | Cancelled, Filled, Cancel - In Procecess|Contractor,Internal,Recruiter | Internal
| 10| Cancelled, Filled| Internal,Recruiter| Recruiter
+----+------------------------+-------+--------+----------------------------+
**Question:** I tried Query2, For all other cases it is working fine now but for Row 9 and 10 O/P is Null but it should be Internal and Recruiter respectively.
A minimal reproducible example ##1-4 is not provided.
Shooting from the hip.
Please try the following solution based on XQuery.
XML and XQuery data model is based on ordered sequences, exactly what we need.
You moved the goalposts in the middle of the game.
I made just the "Recruiter","Internal","Contractor" as a legitimate
values for the RoleAlternateSourcingChannel column. Everything
else is filtered out.
I don't see any easy way to handle the preference of
Recruiter>Internal>Contractor for the row #6.
SQL #1
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, FulfillmentChannelStatus VARCHAR(255), RoleAlternateSourcingChannel VARCHAR(255));
INSERT INTO #tbl (FulfillmentChannelStatus, RoleAlternateSourcingChannel) VALUES
('Filled,Open,In-process', 'Internal,Recruiter,Contractor'),
('Open,In-process,New', 'Contractor,Internal,Recruiter'),
('New,Filled', 'Contractor,Recruiter'),
('Filled', 'Recruiter'),
('Open,New,Filled', 'Internal,Recruiter,Contractor'),
('Filled,Filled,Filled', 'Internal,Contractor,Recruiter'),
('Open,Filled,In-process', 'Contractor,Internal,Recruiter'),
('Filled', 'Others');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = ',';
SELECT tbl.*
, Result = x.value('(/root/r[sql:column("t.pos")]/text())[1]', 'VARCHAR(10)')
FROM #tbl AS tbl
CROSS APPLY (SELECT x = TRY_CAST('<root><r><![CDATA[' +
REPLACE(FulfillmentChannelStatus, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)
.query('
if (count(/root/r[text()="Filled"]) eq 1) then
for $x in /root/r[text()="Filled"]
return count(root/r[. << $x]) + 1
else ()
').value('text()[1]','INT')) AS t(pos)
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE(RoleAlternateSourcingChannel, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML).query('<root>
{
for $x in /root/r[text()=("Recruiter","Internal","Contractor")]
return $x
}
</root>
')) AS t2(x);
Output
+----+--------------------------+-------------------------------+------------+
| ID | FulfillmentChannelStatus | RoleAlternateSourcingChannel | Result |
+----+--------------------------+-------------------------------+------------+
| 1 | Filled,Open,In-process | Internal,Recruiter,Contractor | Internal |
| 2 | Open,In-process,New | Contractor,Internal,Recruiter | NULL |
| 3 | New,Filled | Contractor,Recruiter | Recruiter |
| 4 | Filled | Recruiter | Recruiter |
| 5 | Open,New,Filled | Internal,Recruiter,Contractor | Contractor |
| 6 | Filled,Filled,Filled | Internal,Contractor,Recruiter | NULL |
| 7 | Open,Filled,In-process | Contractor,Internal,Recruiter | Internal |
| 8 | Filled | Others | NULL |
+----+--------------------------+-------------------------------+------------+
SQL #2
DB fiddle
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, FulfillmentChannelStatus VARCHAR(255), RoleAlternateSourcingChannel VARCHAR(255));
INSERT INTO #tbl (FulfillmentChannelStatus, RoleAlternateSourcingChannel) VALUES
('Filled,Open,In-process', 'Internal,Recruiter,Contractor'),
('Open,In-process,New', 'Contractor,Internal,Recruiter'),
('New,Filled', 'Contractor,Recruiter'),
('Filled', 'Recruiter'),
('Open,New,Filled', 'Internal,Recruiter,Contractor'),
('Filled,Filled,Filled', 'Internal,Contractor,Recruiter'),
('Open,Filled,In-process', 'Contractor,Internal,Recruiter'),
('Filled', 'Others'),
('Cancelled,Filled','Contractor,Recruiter'),
('Cancelled, Filled, Cancel - In Process','Contractor,Recruiter,Internal');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = ',';
;WITH rs AS
(
SELECT ID, x
FROM #tbl
CROSS APPLY (SELECT TRY_CAST('<root>' +
'<source><r><![CDATA[' + REPLACE(REPLACE(FulfillmentChannelStatus,SPACE(1),''), #separator, ']]></r><r><![CDATA[') +
']]></r></source>' +
'<target><r><![CDATA[' + REPLACE(REPLACE(RoleAlternateSourcingChannel,SPACE(1),''), #separator, ']]></r><r><![CDATA[') +
']]></r></target>' +
'</root>' AS XML).query('<root>
{
for $x in /root/source/r
let $pos := count(root/source/r[. << $x]) + 1
return <r>
<s>{data($x)}</s><t>{data(/root/target/r[$pos])}</t>
</r>
}
</root>')) AS t(x)
), cte AS
(
SELECT ID
, c.value('(s/text())[1]', 'VARCHAR(30)') AS source
, c.value('(t/text())[1]', 'VARCHAR(30)') AS [target]
FROM rs
CROSS APPLY x.nodes('/root/r') AS t(c)
), cte2 AS
(
SELECT *
, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY
CASE [target]
WHEN 'Recruiter' THEN 1
WHEN 'Internal' THEN 2
WHEN 'Contractor' THEN 3
END) AS seq
FROM cte
WHERE source = 'Filled'
AND [target] IN ('Recruiter','Internal','Contractor')
)
SELECT t.*
, c.[target] --, c.seq
FROM #tbl AS t
LEFT OUTER JOIN cte2 AS c ON c.ID = t.ID
WHERE c.seq = 1 OR c.seq is NULL
ORDER BY t.ID;
Output
+----+----------------------------------------+-------------------------------+------------+
| ID | FulfillmentChannelStatus | RoleAlternateSourcingChannel | target |
+----+----------------------------------------+-------------------------------+------------+
| 1 | Filled,Open,In-process | Internal,Recruiter,Contractor | Internal |
| 2 | Open,In-process,New | Contractor,Internal,Recruiter | NULL |
| 3 | New,Filled | Contractor,Recruiter | Recruiter |
| 4 | Filled | Recruiter | Recruiter |
| 5 | Open,New,Filled | Internal,Recruiter,Contractor | Contractor |
| 6 | Filled,Filled,Filled | Internal,Contractor,Recruiter | Recruiter |
| 7 | Open,Filled,In-process | Contractor,Internal,Recruiter | Internal |
| 8 | Filled | Others | NULL |
| 9 | Cancelled,Filled | Contractor,Recruiter | Recruiter |
| 10 | Cancelled, Filled, Cancel - In Process | Contractor,Recruiter,Internal | Recruiter |
+----+----------------------------------------+-------------------------------+------------+

Split string and output into rows

I want to split a string based on delimiter ',' and put the results into rows. Hence, I'm trying to use SPLIT_TO_TABLE function in Snowflake, but not working successfully.
I used the regexp_replace to clean the string. How can I output this into rows for each id?
SELECT value,
TRIM(regexp_replace(value, '[{}_]', ' ')) AS extracted
Here is the sample data:
+--------+------------------------------------+
| id | value |
+--------+------------------------------------+
| fsaf12 | {Other Questions,Missing Document} |
| sfas11 | {Other} |
+--------+------------------------------------+
Expected result:
+--------+------------------+
| id | extracted |
+--------+------------------+
| fsaf12 | Other Questions |
| fsaf12 | Missing Document |
| sfas11 | Others |
+--------+------------------+
Adding another way to split the data and output it as rows :
SELECT b,TRIM(regexp_replace(splitvalue, '[{}_]', '')) AS extracted from
(SELECT b, C.value::string AS splitvalue
FROM split,
LATERAL FLATTEN(input=>split(a, ',')) C);
where a and b are the columns in table "split" and data is as follows :
A
B
{First,Second}
row1
{Third,Fourth}
row2
HEre is answer , used replace function instead of regexp_replace
WITH DATATABLE(ID ,VALUEA ) AS
(
SELECT * FROM VALUES ('fsaf12','{Other Questions,Missing Document}'),('sfas11',' {Other} ')
)
SELECT ID, REPLACE(REPLACE(VALUE,'{',''),'}','') SPLITTED_VALUE FROM DATATABLE , LATERAL SPLIT_TO_TABLE (VALUEA,',') ;

Extract into multiple columns from JSON with PostgreSQL

I have a column item_id that contains data in JSON (like?) structure.
+----------+---------------------------------------------------------------------------------------------------------------------------------------+
| id | item_id |
+----------+---------------------------------------------------------------------------------------------------------------------------------------+
| 56711 | {itemID":["0530#2#1974","0538\/2#2#1974","0538\/3#2#1974","0538\/18#2#1974","0539#2#1974"]}" |
| 56712 | {itemID":["0138528#2#4221","0138529#2#4221","0138530#2#4221","0138539#2#4221","0118623\/2#2#4220"]}" |
| 56721 | {itemID":["2704\/1#1#1356"]}" |
| 56722 | {itemID":["0825\/2#2#3349","0840#2#3349","0844\/10#2#3349","0844\/11#2#3349","0844\/13#2#3349","0844\/14#2#3349","0844\/15#2#3349"]}" |
| 57638 | {itemID":["0161\/1#2#3364","0162\/1#2#3364","0163\/2#2#3364"]}" |
| 57638 | {itemID":["109#1#3364","110\/1#1#3364"]}" |
+----------+---------------------------------------------------------------------------------------------------------------------------------------+
I need the last four digits before every comma (if there is) and the last 4 digits distincted and separated into individual colums.
The distinct should happen across id as well, so only one result row with id: 57638 is permitted.
Here is a fiddle with a code draft that is not giving the right answer.
The desired result should look like this:
+----------+-----------+-----------+
| id | item_id_1 | item_id_2 |
+----------+-----------+-----------+
| 56711 | 1974 | |
| 56712 | 4220 | 4221 |
| 56721 | 1356 | |
| 56722 | 3349 | |
| 57638 | 3364 | 3365 |
+----------+-----------+-----------+
There can be quite a lot 'item_id_%' column in the results.
with the_table (id, item_id) as (
values
(56711, '{"itemID":["0530#2#1974","0538\/2#2#1974","0538\/3#2#1974","0538\/18#2#1974","0539#2#1974"]}'),
(56712, '{"itemID":["0138528#2#4221","0138529#2#4221","0138530#2#4221","0138539#2#4221","0118623\/2#2#4220"]}'),
(56721, '{"itemID":["2704\/1#1#1356"]}'),
(56722, '{"itemID":["0825\/2#2#3349","0840#2#3349","0844\/10#2#3349","0844\/11#2#3349","0844\/13#2#3349","0844\/14#2#3349","0844\/15#2#3349"]}'),
(57638, '{"itemID":["0161\/1#2#3364","0162\/1#2#3364","0163\/2#2#3364"]}'),
(57638, '{"itemID":["109#1#3365","110\/1#1#3365"]}')
)
select id
,(array_agg(itemid)) [1] itemid_1
,(array_agg(itemid)) [2] itemid_2
from (
select distinct id
,split_part(replace(json_array_elements(item_id::json -> 'itemID')::text, '"', ''), '#', 3)::int itemid
from the_table
order by 1
,2
) t
group by id
DEMO
You can unnest the json array, get the last 4 characters of each element as a number, then do conditional aggregation:
select
id,
max(val) filter(where rn = 1) item_id_1,
max(val) filter(where rn = 2) item_id_2
from (
select
id,
right(val, 4)::int val,
dense_rank() over(partition by id order by right(val, 4)::int) rn
from mytable t
cross join lateral jsonb_array_elements_text(t.item_id -> 'itemID') as x(val)
) t
group by id
You can add more conditional max()s to the outer query to handle more possible values.
Demo on DB Fiddle:
id | item_id_1 | item_id_1
----: | --------: | --------:
56711 | 1974 | null
56712 | 4220 | 4221
56721 | 1356 | null
56722 | 3349 | null
57638 | 3364 | 3365

Select unique rows with where in clause (SQL Server)

I am trying to return one json per unique value in a column.
However, when I try to select from a sub-query the entire result set is returned.
Example Table
| json | column |
|--------|--------|
| {obj1} | 1 |
| {obj2} | 1 |
| {obj3} | 2 |
Example Query
select distinct
[json]
from someTable
where [column] in (
select distinct
[column]
from someTable
)
Actual Output
| json |
|--------|
| {obj1} |
| {obj2} |
| {obj3} |
Expected Output
| json |
|--------|
| {obj1} |
| {obj3} |
How can I select just 1 json per unique column value?
If doesn't Matter output Related To Column is obj1 Or obj2 you must Use Group By Like this :
SELECT
Min(JSon) AS Json ,Column
FROM
someTable
Group By column
If you want the 1st [json] of each column then use row_number():
select t.[json] from (
select
[json],
row_number() over (partition by column order by id) rn
from someTable
) t
where t.rn = 1

Resources