filmov
tv
How to Implement Slowly Changing Dimension(SCD) Type 2 Using Insert and Update Commands in Snowflake

Показать описание
In this video , I am going to show you how to implement Slowly Changing Dimension(SCD) Type 2 Using Insert and Update Commands in Snowflake.
⌚Timestamps:
00:00 Intro
00:16 SCD Type2
06:16 Demo on SCD Type2 - Day 1 file processing
17:13 Demo on SCD Type2 - Day 2 file processing
20:38 Demo on SCD Type2 - Day 3 file processing
23:05 Outro
use role accountadmin;
use database demo_db;
use schema public;
create or replace TRANSIENT TABLE STG_EMP (
HK_EMPID VARCHAR(16777216),
CHANGE_HK VARCHAR(16777216),
EMPID NUMBER(38,0),
NAME VARCHAR(30),
SALARY NUMBER(20,2),
DESIGNATION VARCHAR(16777216),
OFFICE VARCHAR(30),
INSERT_TS TIMESTAMP_NTZ(9),
FILE_NAME VARCHAR(200),
FILE_ROW_NUMBER NUMBER(38,0)
);
create or replace TABLE EMP (
hk_empid varchar,
Change_HK varchar,
empID NUMBER(38,0),
NAME VARCHAR(30),
SALARY NUMBER(20,2),
DESIGNATION VARCHAR(16777216),
OFFICE VARCHAR(30),
active_indicator varchar(1),
effective_start_date date,
effective_end_date date,
INSERT_TS TIMESTAMP_NTZ(9),
UPDATE_TS TIMESTAMP_NTZ(9),
FILE_NAME VARCHAR(200),
FILE_ROW_NUMBER NUMBER(38,0),
dss_version number
);
use role accountadmin;
use database demo_db;
use schema public;
remove @mycsvstage;
truncate table STG_EMP;
COPY INTO STG_EMP(
empid
,name
,salary
,Designation
,office
,file_name
,file_row_number
,insert_ts
,hk_empid
,change_hk
)
FROM (
SELECT t.$1,t.$2,t.$3,t.$4,t.$5
,metadata$filename
,metadata$file_row_number
,CAST(current_timestamp as timestamp)
,CAST(MD5(
NVL(CAST($1 as varchar),'null') ) as varchar
) as hk_empid
,CAST(MD5(
NVL(CAST($2 as varchar),'null') || '||'||
NVL(CAST($3 as varchar),'null') || '||'||
NVL(CAST($4 as varchar),'null') || '||'||
NVL(CAST($5 as varchar),'null')
) as varchar
) as change_hk
from @mycsvstage/ t )
on_error = 'skip_file'
;
insert into EMP (
hk_empid
,change_hk
,empid
,name
,salary
,Designation
,office
,active_indicator
,effective_start_date
,effective_end_date
,file_name
,file_row_number
,insert_ts
,update_ts
,dss_version
)
SELECT
,s.Designation
,'Y'
,current_date()-1
,to_date('9999-12-31','YYYY-MM-DD')
,update_ts
from (
SELECT
,s.Designation
,CAST(current_timestamp as timestamp) update_ts
FROM stg_emp s qualify rn=1
) s
LEFT OUTER JOIN (
,max(dss_version) dss_version
from EMP a group by hk_empid
) as current_rows
where not exists(
select 1
from EMP e
);
update emp e
set
from (
SELECT
,s.Designation
,dss_version
FROM emp s where active_indicator ='Y' qualify rn !=1
) S
;
#snowflake#datacloud#database#datawarehouse#vcklytech
⌚Timestamps:
00:00 Intro
00:16 SCD Type2
06:16 Demo on SCD Type2 - Day 1 file processing
17:13 Demo on SCD Type2 - Day 2 file processing
20:38 Demo on SCD Type2 - Day 3 file processing
23:05 Outro
use role accountadmin;
use database demo_db;
use schema public;
create or replace TRANSIENT TABLE STG_EMP (
HK_EMPID VARCHAR(16777216),
CHANGE_HK VARCHAR(16777216),
EMPID NUMBER(38,0),
NAME VARCHAR(30),
SALARY NUMBER(20,2),
DESIGNATION VARCHAR(16777216),
OFFICE VARCHAR(30),
INSERT_TS TIMESTAMP_NTZ(9),
FILE_NAME VARCHAR(200),
FILE_ROW_NUMBER NUMBER(38,0)
);
create or replace TABLE EMP (
hk_empid varchar,
Change_HK varchar,
empID NUMBER(38,0),
NAME VARCHAR(30),
SALARY NUMBER(20,2),
DESIGNATION VARCHAR(16777216),
OFFICE VARCHAR(30),
active_indicator varchar(1),
effective_start_date date,
effective_end_date date,
INSERT_TS TIMESTAMP_NTZ(9),
UPDATE_TS TIMESTAMP_NTZ(9),
FILE_NAME VARCHAR(200),
FILE_ROW_NUMBER NUMBER(38,0),
dss_version number
);
use role accountadmin;
use database demo_db;
use schema public;
remove @mycsvstage;
truncate table STG_EMP;
COPY INTO STG_EMP(
empid
,name
,salary
,Designation
,office
,file_name
,file_row_number
,insert_ts
,hk_empid
,change_hk
)
FROM (
SELECT t.$1,t.$2,t.$3,t.$4,t.$5
,metadata$filename
,metadata$file_row_number
,CAST(current_timestamp as timestamp)
,CAST(MD5(
NVL(CAST($1 as varchar),'null') ) as varchar
) as hk_empid
,CAST(MD5(
NVL(CAST($2 as varchar),'null') || '||'||
NVL(CAST($3 as varchar),'null') || '||'||
NVL(CAST($4 as varchar),'null') || '||'||
NVL(CAST($5 as varchar),'null')
) as varchar
) as change_hk
from @mycsvstage/ t )
on_error = 'skip_file'
;
insert into EMP (
hk_empid
,change_hk
,empid
,name
,salary
,Designation
,office
,active_indicator
,effective_start_date
,effective_end_date
,file_name
,file_row_number
,insert_ts
,update_ts
,dss_version
)
SELECT
,s.Designation
,'Y'
,current_date()-1
,to_date('9999-12-31','YYYY-MM-DD')
,update_ts
from (
SELECT
,s.Designation
,CAST(current_timestamp as timestamp) update_ts
FROM stg_emp s qualify rn=1
) s
LEFT OUTER JOIN (
,max(dss_version) dss_version
from EMP a group by hk_empid
) as current_rows
where not exists(
select 1
from EMP e
);
update emp e
set
from (
SELECT
,s.Designation
,dss_version
FROM emp s where active_indicator ='Y' qualify rn !=1
) S
;
#snowflake#datacloud#database#datawarehouse#vcklytech
Комментарии