DuckDb ROCKS!! Dbeaver,Data Cleaning,and Cyclistic (vid 02)

preview_player
Показать описание
#cyclisticbikeshare , #duckdb, #DBeaver, #datacleaning
Importing CSV files into Duckdb:
Рекомендации по теме
Комментарии
Автор

-- Begin Data Cleaning

DROP TABLE IF EXISTS analytics;
CREATE TABLE analytics AS
-- (1) Extract start_date from started_at, and end_date from ended_at
SELECT CAST(started_at as date) as start_date,
CAST(ended_at as date) as end_date,
-- (2)Extract Start Hour from started_at
EXTRACT(hour from started_at) as start_hour,
-- (3)Extract Number for day of week. (0=Sunday)
EXTRACT(dow from started_at) as day_of_week,
-- (4) Round Starting Longitude (start_lng), and Starting Latitude (start_lat)
round(start_lat, 2) as start_latitude,
round(start_lng, 2) as start_longitude, *
FROM rides
-- Extract same day rides
WHERE start_date = end_date AND
-- Only keep rides that place between 6A - 6P (18:00)
start_hour >=6 and start_hour <=18;

--- Update start_station_name with Bike Type if start_station_name

SELECT count(*) FROM analytics WHERE start_station_name IS NULL

-- Check which rideable_type has the most NA(a)
SELECT rideable_type, start_station_name, count(*)
FROM analytics
WHERE start_station_name IS NULL
group by rideable_type, start_station_name
ORDER BY rideable_type, start_station_name ;

UPDATE analytics
SET start_station_name = rideable_type
WHERE start_station_name IS NULL;

CyclisticBikeShareProject-vb