Transform table with custom UDF and save the result in Snowflake - snowflake-cloud-data-platform

I'm looking for a way how to transform a table and save the result to another table in Snowflake. While transforming the table I want to modify some columns using a UDF and then save the result into interim table.
Steps:
1) read tableA
2) transform tableA (change some columns using custom UDF)
3) save result to tableB
Is it possible to do that on Snowflake side?
UPD
TableA
+----------+---------+--------------------------+---------------+--------+--------+-----+
|first_name|last_name|email |address |address2|address3|dob |
+----------+---------+--------------------------+---------------+--------+--------+-----+
|Mike |Johnson |mike#gmail.com |1650 Jackson St|Apt 704 |null |09/10|
|John |Dunn |john101#gmail.com |320 Day St |null |null |01/20|
|Bill |Rinitin |rinitin#gmail.com |108 Marietta Dr|null |null |04/11|
|Chris |Talbot |chris2#gmail.com |401 Harrison St|null |null |07/17|
|Teresa |Middleton|teresa.middleton#yahoo.com|181 Fremont St |Unit 62C|null |11/21|
+----------+---------+--------------------------+---------------+--------+--------+-----+
TableB
+----------+---------+--------------------------+---------------+--------+--------+-----+
|first_name|last_name|email |address |address2|address3|dob |
+----------+---------+--------------------------+---------------+--------+--------+-----+
|M*** |J****** |m***#gmail.com |108 Marietta Dr|null |null |09/**|
|J*** |D*** |j******#gmail.com |181 Fremont St |Unit 62C|null |01/**|
|B*** |R****** |r******#gmail.com |1650 Jackson St|Apt 704 |null |04/**|
|C**** |T***** |c*****#gmail.com |320 Day St |null |null |07/**|
|T***** |M********|te****.*********#yahoo.com|401 Harrison St|null |null |11/**|
+----------+---------+--------------------------+---------------+--------+--------+-----+

You just need to use INSERT INTO… SELECT…
This is standard SQL and nothing specific to Snowflake

If the transformation is permanent (i.e. the original data is fine to be lost), CREATE TABLE AS SELECT (CTAS) or INSERT SELECT would be a good option.
If you want to keep the original data but want to mask sensitive data only for specific people, you can leverage the Dynamic Data Masking feature:
Understanding Dynamic Data Masking — Snowflake Documentation:
https://docs.snowflake.com/en/user-guide/security-column-ddm-intro.html
Dynamic Data Masking is a feature allowing you to apply dynamic transformations with a query or a UDF to a table based on the current context (account, user, role, tag, etc.) by attaching a "Masking Policy" that you can arbitrarily define.
Below are examples of how you can mask the data with CTAS and Dynamic Data Masking.
Sample data/masking UDF preparation
create or replace table data (
first_name varchar,
last_name varchar,
email varchar,
address varchar,
address2 varchar,
address3 varchar,
dob varchar
);
insert into data values
('Mike', 'Johnson', 'mike#gmail.com', '1650 Jackson St', 'Apt 704', null, '09/10'),
('John', 'Dunn', 'john101#gmail.com', '320 Day St', null, null, '01/20'),
('Bill', 'Rinitin', 'rinitin#gmail.com', '108 Marietta Dr', null, null, '04/11'),
('Chris', 'Talbot', 'chris2#gmail.com', '401 Harrison St', null, null, '07/17'),
('Teresa', 'Middleton', 'teresa.middleton#yahoo.com', '181 Fremont St', 'Unit 62C', null, '11/21'),
;
create or replace function udf_mask_name (name varchar)
returns varchar
language sql
as $$
select left(name, 1) || repeat('*', len(name)-1)
$$;
create or replace function udf_mask_email (email varchar)
returns varchar
language sql
as $$
select left(split_part(email, '#', 1), 1) ||
repeat('*', len(split_part(email, '#', 1))-1) ||
'#' ||
split_part(email, '#', 2)
$$;
create or replace function udf_mask_dob (dob varchar)
returns varchar
language sql
as $$
select split_part(dob, '/', 1) ||
'/' ||
repeat('*', len(split_part(dob, '/', 2)))
$$;
Example 1: CTAS
create or replace table data_masked (
first_name varchar,
last_name varchar,
email varchar,
address varchar,
address2 varchar,
address3 varchar,
dob varchar
) as
select
udf_mask_name(first_name),
udf_mask_name(last_name),
udf_mask_email(email),
address, address2, address3,
udf_mask_dob(dob)
from data
;
select * from data_masked;
/*
FIRST_NAME LAST_NAME EMAIL ADDRESS ADDRESS2 ADDRESS3 DOB
M*** J****** m***#gmail.com 1650 Jackson St Apt 704 09/**
J*** D*** j******#gmail.com 320 Day St 01/**
B*** R****** r******#gmail.com 108 Marietta Dr 04/**
C**** T***** c*****#gmail.com 401 Harrison St 07/**
T***** M******** t***************#yahoo.com 181 Fremont St Unit 62C 11/**
*/
Example 2: Dynamic Data Masking
create or replace masking policy mp_mask_name as
(val varchar) returns varchar ->
case
when current_role() = 'ACCOUNTADMIN' then val
else udf_mask_name(val)
end
;
create or replace masking policy mp_mask_email as
(val varchar) returns varchar ->
case
when current_role() = 'ACCOUNTADMIN' then val
else udf_mask_email(val)
end
;
create or replace masking policy mp_mask_dob as
(val varchar) returns varchar ->
case
when current_role() = 'ACCOUNTADMIN' then val
else udf_mask_dob(val)
end
;
alter table data modify column first_name set masking policy mp_mask_name;
alter table data modify column last_name set masking policy mp_mask_name;
alter table data modify column email set masking policy mp_mask_email;
alter table data modify column dob set masking policy mp_mask_dob;
use role accountadmin;
select * from data;
/*
FIRST_NAME LAST_NAME EMAIL ADDRESS ADDRESS2 ADDRESS3 DOB
Mike Johnson mike#gmail.com 1650 Jackson St Apt 704 09/10
John Dunn john101#gmail.com 320 Day St 01/20
Bill Rinitin rinitin#gmail.com 108 Marietta Dr 04/11
Chris Talbot chris2#gmail.com 401 Harrison St 07/17
Teresa Middleton teresa.middleton#yahoo.com 181 Fremont St Unit 62C 11/21
*/
use role nonadminrole;
select * from data;
/*
FIRST_NAME LAST_NAME EMAIL ADDRESS ADDRESS2 ADDRESS3 DOB
M*** J****** m***#gmail.com 1650 Jackson St Apt 704 09/**
J*** D*** j******#gmail.com 320 Day St 01/**
B*** R****** r******#gmail.com 108 Marietta Dr 04/**
C**** T***** c*****#gmail.com 401 Harrison St 07/**
T***** M******** t***************#yahoo.com 181 Fremont St Unit 62C 11/**
*/

Related

How to create database within a database(postgres)?

Actually I'm noob and stuck on this problem for a week. I will try explaining it.
I have table for USER,
and a table for product
I want to store data of every user for every product. Like if_product_bought, num_of_items, and all.
So only solution I can think of database within database , that is create a copy of products inside user named database and start storing.
If this is possible how or is there any other better solution
Thanks in advance
You actually don't create a database within a database (or a table within a table) when you use PostgreSQL or any other SQL RDBMS.
You use tables, and JOIN them. You normally would have an orders table, together with an items_x_orders table, on top of your users and items.
This is a very simplified scenario:
CREATE TABLE users
(
user_id INTEGER /* SERIAL */ NOT NULL PRIMARY KEY,
user_name text
) ;
CREATE TABLE items
(
item_id INTEGER /* SERIAL */ NOT NULL PRIMARY KEY,
item_description text NOT NULL,
item_unit text NOT NULL,
item_standard_price decimal(10,2) NOT NULL
) ;
CREATE TABLE orders
(
order_id INTEGER /* SERIAL */ NOT NULL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(user_id),
order_date DATE NOT NULL DEFAULT now(),
other_data TEXT
) ;
CREATE TABLE items_x_orders
(
order_id INTEGER NOT NULL REFERENCES orders(order_id),
item_id INTEGER NOT NULL REFERENCES items(item_id),
-- You're supposed not to have the item more than once in an order
-- This makes the following the "natural key" for this table
PRIMARY KEY (order_id, item_id),
item_quantity DECIMAL(10,2) NOT NULL CHECK(item_quantity <> /* > */ 0),
item_percent_discount DECIMAL(5,2) NOT NULL DEFAULT 0.0,
other_data TEXT
) ;
This is all based in the so-called Relational Model. What you were thinking about is something else called a Hierarchical model, or a document model used in some NoSQL databases (where you store your data as a JSON or XML hierarchical structure).
You would fill those tables with data like:
INSERT INTO users
(user_id, user_name)
VALUES
(1, 'Alice Cooper') ;
INSERT INTO items
(item_id, item_description, item_unit, item_standard_price)
VALUES
(1, 'Oranges', 'kg', 0.75),
(2, 'Cookies', 'box', 1.25),
(3, 'Milk', '1l carton', 0.90) ;
INSERT INTO orders
(order_id, user_id)
VALUES
(100, 1) ;
INSERT INTO items_x_orders
(order_id, item_id, item_quantity, item_percent_discount, other_data)
VALUES
(100, 1, 2.5, 0.00, NULL),
(100, 2, 3.0, 0.00, 'I don''t want Oreo'),
(100, 3, 1.0, 0.05, 'Make it promo milk') ;
And then you would produce queries like the following one, where you JOIN all relevant tables:
SELECT
user_name, item_description, item_quantity, item_unit,
item_standard_price, item_percent_discount,
CAST(item_quantity * (item_standard_price * (1-item_percent_discount/100.0)) AS DECIMAL(10,2)) AS items_price
FROM
items_x_orders
JOIN orders USING (order_id)
JOIN items USING (item_id)
JOIN users USING (user_id) ;
...and get these results:
user_name | item_description | item_quantity | item_unit | item_standard_price | item_percent_discount | items_price
:----------- | :--------------- | ------------: | :-------- | ------------------: | --------------------: | ----------:
Alice Cooper | Oranges | 2.50 | kg | 0.75 | 0.00 | 1.88
Alice Cooper | Cookies | 3.00 | box | 1.25 | 0.00 | 3.75
Alice Cooper | Milk | 1.00 | 1l carton | 0.90 | 5.00 | 0.86
You can get all the code and test at dbfiddle here

SQL Server 2005 : ALTER COLUMN AUTO_INCREMENT and set started ID of AUTO_INCREMENT

I just search and tried for 2 hours, but I still can't solve this
I want to migrate data from another database to my database. I created a table tbl_animal and inserted some values:
CREATE TABLE tbl_animal
(
id INT NOT NULL,
name VARCHAR (150) NOT NULL
) ;
INSERT INTO tbl_animal (id, name) VALUES (111, 'dog');
INSERT INTO tbl_animal (id, name) VALUES (222, 'bird');
and data will be like this
______________
id | name|
--------------
111 | dog |
222 | bird |
--------------
Then I want to set ID to AUTO_INCREMENT PRIMARY KEY and SET AUTO_INCREMENT to start from 223
So if I run
INSERT INTO tbl_animal (name) VALUES ('fish')
data will be like this
______________
id | name|
--------------
111 | dog |
222 | bird |
223 | fish |
--------------
I try so many solutions, but still can't. The last query that I have tried is
ALTER TABLE tbl_animal
ALTER COLUMN id int NOT NULL IDENTITY(1,1) PRIMARY KEY;
above query throw error code (156):
Incorrect syntax near the keyword 'IDENTITY'
Thanks for help me
Adding identity to an existing column is not possible. All is not lost however. If you can create the table again you can do this kind of thing pretty easily. Something like this.
CREATE TABLE tbl_animal
(
id INT identity(1, 1) NOT NULL,
name VARCHAR (150) NOT NULL
) ;
set identity_insert tbl_animal on
INSERT INTO tbl_animal (id, name) VALUES (111, 'dog');
INSERT INTO tbl_animal (id, name) VALUES (222, 'bird');
set identity_insert tbl_animal off
dbcc CHECKIDENT('tbl_animal', RESEED, 222) --you really do want 222 here so the next inserted identity will be 223
INSERT INTO tbl_animal (name) VALUES ('fish')
select * from tbl_animal
If however you cannot recreate the table this is quite a bit more complicated. If that is the case let me know and I can help you.

How to render & insert alphabets row within the MS sql server records?

Database Records:
I want to represent as record set using stored procedure.
I have many records likewise in MS SQL DataBase.
It will be Listing record groupby A, B, C .. Z wize..
Automatically insert the Alphabets while got output from SQL Table.
I want below output from procedure..
How it will possible using Stored Procedure..?
You can use LEFT and UNION for this, though you will still get a 3 columns row for the rows that contains only the first letter:
Create and populate sample table (Please save us this step in your future questions)
DECLARE #T as TABLE
(
Name varchar(20),
Location varchar(20),
CreatedOn date
)
INSERT INTO #T VALUES
('Alex macwan', 'New york', '2015-12-10'),
('Jone Dinee', 'Denmark', '2016-05-01'),
('Jolly llb', 'USA', '2016-01-02'),
('Amin Mark', 'India', '2015-01-08'),
('Ben Denis', 'Brazil', '2015-10-02')
The query:
SELECT Name, Location, CreatedOn
FROM #T
UNION
SELECT LEFT(Name, 1), NULL, NULL
FROM #T
ORDER BY Name
Results:
Name Location CreatedOn
-------------------- -------------------- ----------
A NULL NULL
Alex macwan New york 2015-12-10
Amin Mark India 2015-01-08
B NULL NULL
Ben Denis Brazil 2015-10-02
J NULL NULL
Jolly llb USA 2016-01-02
Jone Dinee Denmark 2016-05-01

SQL Server - Update Column with Handing Duplicate and Unique Rows Based Upon Timestamp

I'm working with SQL Server 2005 and looking to export some data off of a table I have. However, prior to do that I need to update a status column based upon a field called "VisitNumber", which can contain multiple entries same value entries. I have a table set up in the following manner. There are more columns to it, but I am just putting in what's relevant to my issue
ID Name MyReport VisitNumber DateTimeStamp Status
-- --------- -------- ----------- ----------------------- ------
1 Test John Test123 123 2014-01-01 05.00.00.000
2 Test John Test456 123 2014-01-01 07.00.00.000
3 Test Sue Test123 555 2014-01-02 08.00.00.000
4 Test Ann Test123 888 2014-01-02 09.00.00.000
5 Test Ann Test456 888 2014-01-02 10.00.00.000
6 Test Ann Test789 888 2014-01-02 11.00.00.000
Field Notes
ID column is a unique ID in incremental numbers
MyReport is a text value and can actually be thousands of characters. Shortened for simplicity. In my scenario the text would be completely different
Rest of fields are varchar
My Goal
I need to address putting in a status of "F" for two conditions:
* If there is only one VisitNumber, update the status column of "F"
* If there is more than one visit number, only put "F" for the one based upon the earliest timestamp. For the other ones, put in a status of "A"
So going back to my table, here is the expectation
ID Name MyReport VisitNumber DateTimeStamp Status
-- --------- -------- ----------- ----------------------- ------
1 Test John Test123 123 2014-01-01 05.00.00.000 F
2 Test John Test456 123 2014-01-01 07.00.00.000 A
3 Test Sue Test123 555 2014-01-02 08.00.00.000 F
4 Test Ann Test123 888 2014-01-02 09.00.00.000 F
5 Test Ann Test456 888 2014-01-02 10.00.00.000 A
6 Test Ann Test789 888 2014-01-02 11.00.00.000 A
I was thinking I could handle this by splitting each types of duplicates/triplicates+ (2,3,4,5). Then updating every other (or every 3,4,5 rows). Then delete those from the original table and combine them together to export the data in SSIS. But I am thinking there is a much more efficient way of handling it.
Any thoughts? I can accomplish this by updating the table directly in SQL for this status column and then export normally through SSIS. Or if there is some way I can manipulate the column for the exact conditions I need, I can do it all in SSIS. I am just not sure how to proceed with this.
WITH cte AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY VisitNumber ORDER BY DateTimeStamp) rn from MyTable
)
UPDATE cte
SET [status] = (CASE WHEN rn = 1 THEN 'F' ELSE 'A' END)
I put together a test script to check the results. For your purposes, use the update statements and replace the temp table with your table name.
create table #temp1 (id int, [name] varchar(50), myreport varchar(50), visitnumber varchar(50), dts datetime, [status] varchar(1))
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (1,'Test John','Test123','123','2014-01-01 05:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (2,'Test John','Test456','123','2014-01-01 07:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (3,'Test Sue','Test123','555','2014-01-01 08:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (4,'Test Ann','Test123','888','2014-01-01 09:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (5,'Test Ann','Test456','888','2014-01-01 10:00')
insert into #temp1 (id,[name],myreport,visitnumber, dts) values (6,'Test Ann','Test789','888','2014-01-01 11:00')
select * from #temp1;
update #temp1 set status = 'F'
where id in (
select id from #temp1 t1
join (select min(dts) as mindts, visitnumber
from #temp1
group by visitNumber) t2
on t1.visitnumber = t2.visitnumber
and t1.dts = t2.mindts)
update #temp1 set status = 'A'
where id not in (
select id from #temp1 t1
join (select min(dts) as mindts, visitnumber
from #temp1
group by visitNumber) t2
on t1.visitnumber = t2.visitnumber
and t1.dts = t2.mindts)
select * from #temp1;
drop table #temp1
Hope this helps

Insert into Table B from Table A using Distinct

I have 2 tables
Table A
NameID FirstName MiddleName LastName Addr1 Addr2 Phn1 Phn2 City State
NULL Micheal Calvin Dodson 12 23 1234 123 XYZ ABC
NULL John NULL Keith NULL NULL 2344 NULL SQE FDG
NULL John NULL Keith NULL NULL 2344 NULL SQE FDG
NULL William Stephen NULL 45 NULL NULL NULL HJD ABC
NULL Victor NULL Anthony NULL NULL NULL NULL NULL NULL
Table B
NameID FirstName MiddleName LastName Addr1 Addr2 Phn1 Phn2 City State Zip Email Gender...
I need to get the distinct records of (FirstName,MiddleName,LastName) of Table A and insert the same details along with the other fields matching with Table A into Table B.
My Table B has NameID as an identity coloum. So after inserting a unique record into Table B, I need to get that NameID and insert it back into Table A shown below :
TABLE A
Table A
NameID FirstName MiddleName LastName Addr1 Addr2 Phn1 Phn2 City State
1 Micheal Calvin Dodson 12 23 1234 123 XYZ ABC
2 John NULL Keith NULL NULL 2344 NULL SQE FDG
2 John NULL Keith NULL NULL 2344 NULL SQE FDG
3 William Stephen NULL 45 NULL NULL NULL HJD ABC
4 Victor NULL Anthony NULL NULL NULL NULL NULL NULL
TABLE B
NameID FirstName MiddleName LastName Addr1 Addr2 Phn1 Phn2 City State Zip Email Gender...
1 Micheal Calvin Dodson 12 23 1234 123 XYZ ABC NULL NULL NULL
2 John NULL Keith NULL NULL 2344 NULL SQE FDG NULL NULL NULL
3 William Stephen NULL 45 NULL NULL NULL HJD ABC NULL NULL NULL
4 Victor NULL Anthony NULL NULL NULL NULL NULL NULL NULL NULL NULL
Can you please help me with this. Im not able to get this query right. Code in SQL Server 2008
Thanks in advance,
Sunitha
I think the easiest way to do this is with two queries. The first problem is handling duplicates in TableA. The following query selects an arbitrary row for each name combination:
insert into TableB()
select ()
from (select a.*,
row_number() over (partition by FirstName, MiddleName, LastName order by FirstName) as seqnum
from TableA a
) a
where seqnum = 1
Then, update the original table:
update TableA
set NameId = (select max(NameId) from TableB
where TableB.FirstName = TableA.FirstName and
TableB.MiddleName = TableA.MiddleName and
TableB.LastName = TableA.LastName
)
where NameId is null
If your fields contain NULL values (rather than blanks), you can use coalesce() for the join conditions:
update TableA
set NameId = (select max(NameId) from TableB
where coalesce(TableB.FirstName, '<null>') = coalesce(TableA.FirstName, '<null>') and
coalesce(TableB.MiddleName, '<null>') = coalesce(TableA.MiddleName, '<null>') and
coalesce(TableB.LastName , '<null>')= coalesce(TableA.LastName, '<null>')
)
where NameId is null
DECLARE #results TABLE
(
NameID INT,
FirstName VARCHAR(32), -- guessing on data types for these columns
MiddleName VARCHAR(32),
LastName VARCHAR(32)
);
;WITH x AS
(
SELECT FirstName, MiddleName, LastName,
rn = ROW_NUMBER() OVER (PARTITION BY FirstName, MiddleName, LastName
ORDER BY (SELECT NULL)
) --, ... other columns ...
FROM dbo.TableA
)
INSERT dbo.TableB
(
FirstName, MiddleName, LastName --, ... other columns ...
)
OUTPUT
inserted.NameID, inserted.FirstName,
inserted.MiddleName, inserted.LastName
INTO #results
SELECT FirstName, MiddleName, LastName --, ... other columns ...
FROM x WHERE rn = 1;
UPDATE a SET NameID = r.NameID
FROM dbo.TableA AS a
INNER JOIN #results AS r
ON COALESCE(a.FirstName,'') = COALESCE(r.FirstName,'')
AND COALESCE(a.MiddleName,'') = COALESCE(r.MiddleName,'')
AND COALESCE(a.LastName,'') = COALESCE(r.LastName,'');

Resources