filmov
tv
Effortless Data Loading: Mastering Snowflake's COPY INTO Command for Seamless Table Population!
Показать описание
In this comprehensive video tutorial, unlock the power of Snowflake's COPY INTO command as we guide you through the seamless process of loading data files into your tables. Whether you're a beginner or an experienced user, this step-by-step guide will equip you with the skills to effortlessly populate your tables with data from various sources. Explore best practices, tips, and tricks for optimizing performance and ensuring data integrity. Join us as we demystify the COPY INTO command and empower you to streamline your data loading workflows in Snowflake like never before.
------------------------------------------------------------------------
SQL Scripts
------
-- copy command
list @MY_DB.MY_STAGES_SCHEMA.MY_STAGE;
CREATE OR REPLACE TABLE MY_DB.PUBLIC.LOAN_PAYMENT (
Loan_ID STRING,
loan_status STRING,
Principal STRING,
terms STRING,
effective_date STRING,
due_date STRING,
paid_off_time STRING,
past_due_days STRING,
age STRING,
education STRING,
Gender STRING);
//Loading the data from internal stage
select * from MY_DB.PUBLIC.LOAN_PAYMENT;
COPY INTO MY_DB.PUBLIC.LOAN_PAYMENT
FROM @MY_DB.MY_STAGES_SCHEMA.MY_STAGE
file_format = (type = csv field_delimiter = ',' skip_header=1)
SELECT * FROM MY_DB.PUBLIC.LOAN_PAYMENT;
TRUNCATE TABLE MY_DB.PUBLIC.LOAN_PAYMENT;
COPY INTO MY_DB.PUBLIC.LOAN_PAYMENT
FROM @MY_DB.MY_STAGES_SCHEMA.MY_STAGE
file_format = (type = csv field_delimiter = ',' skip_header=1)
SELECT * FROM MY_DB.PUBLIC.LOAN_PAYMENT;
list @MY_DB.MY_STAGES_SCHEMA.MY_STAGE;
COPY INTO MY_DB.PUBLIC.LOAN_PAYMENT
FILE_FORMAT = (TYPE = CSV FIELD_DELIMITER = ',' SKIP_HEADER = 1);
//Validate
SELECT * FROM MY_DB.PUBLIC.LOAN_PAYMENT;
-----------------------------------------------------------------------------------------
---Python Scripts --------------------------------------
# conda install Faker
import csv
from faker import Faker
import random
from datetime import datetime, timedelta
fake = Faker()
# Define loan statuses
loan_statuses = ['PAIDOFF', 'COLLECTION', 'COLLECTION_PAIDOFF']
# Define education levels
education_levels = ['High School or Below', 'College', 'Bechalor', 'Master or Above']
# Define genders
genders = ['male', 'female']
# Generate 50000 loan payment records
num_records = 4000
"due_date", "paid_off_time", "past_due_days", "age", "education", "Gender"])
for i in range(num_records):
paid_off_time = ''
past_due_days = ''
if loan_status == 'PAIDOFF':
elif loan_status == 'COLLECTION_PAIDOFF':
past_due_days = str((paid_off_time - due_date).days)
elif loan_status == 'COLLECTION':
paid_off_time, past_due_days, age, education, gender])
print("Dataset generation completed.")
------------------------------------------------------------------------------------
#Snowflake #DataLoading #TablePopulation #COPYINTO #DataIntegration #DataManagement #DataWarehousing #CloudComputing #DataAnalytics #ETL #DataEngineering #TechTutorial #DataPipeline #DataIngestion #DataProcessing #DataWorkflow
------------------------------------------------------------------------
SQL Scripts
------
-- copy command
list @MY_DB.MY_STAGES_SCHEMA.MY_STAGE;
CREATE OR REPLACE TABLE MY_DB.PUBLIC.LOAN_PAYMENT (
Loan_ID STRING,
loan_status STRING,
Principal STRING,
terms STRING,
effective_date STRING,
due_date STRING,
paid_off_time STRING,
past_due_days STRING,
age STRING,
education STRING,
Gender STRING);
//Loading the data from internal stage
select * from MY_DB.PUBLIC.LOAN_PAYMENT;
COPY INTO MY_DB.PUBLIC.LOAN_PAYMENT
FROM @MY_DB.MY_STAGES_SCHEMA.MY_STAGE
file_format = (type = csv field_delimiter = ',' skip_header=1)
SELECT * FROM MY_DB.PUBLIC.LOAN_PAYMENT;
TRUNCATE TABLE MY_DB.PUBLIC.LOAN_PAYMENT;
COPY INTO MY_DB.PUBLIC.LOAN_PAYMENT
FROM @MY_DB.MY_STAGES_SCHEMA.MY_STAGE
file_format = (type = csv field_delimiter = ',' skip_header=1)
SELECT * FROM MY_DB.PUBLIC.LOAN_PAYMENT;
list @MY_DB.MY_STAGES_SCHEMA.MY_STAGE;
COPY INTO MY_DB.PUBLIC.LOAN_PAYMENT
FILE_FORMAT = (TYPE = CSV FIELD_DELIMITER = ',' SKIP_HEADER = 1);
//Validate
SELECT * FROM MY_DB.PUBLIC.LOAN_PAYMENT;
-----------------------------------------------------------------------------------------
---Python Scripts --------------------------------------
# conda install Faker
import csv
from faker import Faker
import random
from datetime import datetime, timedelta
fake = Faker()
# Define loan statuses
loan_statuses = ['PAIDOFF', 'COLLECTION', 'COLLECTION_PAIDOFF']
# Define education levels
education_levels = ['High School or Below', 'College', 'Bechalor', 'Master or Above']
# Define genders
genders = ['male', 'female']
# Generate 50000 loan payment records
num_records = 4000
"due_date", "paid_off_time", "past_due_days", "age", "education", "Gender"])
for i in range(num_records):
paid_off_time = ''
past_due_days = ''
if loan_status == 'PAIDOFF':
elif loan_status == 'COLLECTION_PAIDOFF':
past_due_days = str((paid_off_time - due_date).days)
elif loan_status == 'COLLECTION':
paid_off_time, past_due_days, age, education, gender])
print("Dataset generation completed.")
------------------------------------------------------------------------------------
#Snowflake #DataLoading #TablePopulation #COPYINTO #DataIntegration #DataManagement #DataWarehousing #CloudComputing #DataAnalytics #ETL #DataEngineering #TechTutorial #DataPipeline #DataIngestion #DataProcessing #DataWorkflow
Комментарии