Update code and data
This commit is contained in:
@@ -1,12 +0,0 @@
|
||||
\a
|
||||
\t
|
||||
\cd /temp
|
||||
\g create_script.sql
|
||||
CREATE SCHEMA IF NOT EXISTS staging;
|
||||
SELECT 'CREATE TABLE staging.factfinder_import(geo_id varchar(255)
|
||||
, geo_id2 varchar(255), geo_display varchar(255)
|
||||
, '|| array_to_string(array_agg('s' || lpad(i::text,2, '0')
|
||||
|| ' varchar(255), s' || lpad(i::text,2, '0') || '_perc varchar(255) ' ), ',') || ');' As create_sql
|
||||
FROM generate_series(1,51) As i
|
||||
\g create_script.sql
|
||||
\i create_script.sql
|
||||
19
ch04/build_stats.psql
Normal file
19
ch04/build_stats.psql
Normal file
@@ -0,0 +1,19 @@
|
||||
\a
|
||||
\t
|
||||
\cd /tmp
|
||||
\g create_script.sql
|
||||
DROP TABLE IF EXISTS table_stats;
|
||||
SELECT
|
||||
'CREATE TABLE table_stats (
|
||||
table_name varchar(255), count bigint);';
|
||||
SELECT format(''INSERT INTO TABLE staging.table_stats(
|
||||
table_name, count
|
||||
)
|
||||
VALUES (%1$I.%2$I,
|
||||
(SELECT COUNT(1)
|
||||
FROM %1$I.%2$I
|
||||
) )', table_schema, table_name)
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'pg_catalog';
|
||||
\o
|
||||
\i create_script.sql
|
||||
@@ -1,29 +1,64 @@
|
||||
\connect postgresql_book
|
||||
\cd /postgresql_up_and_running_4e_code_data/raw/ACSST1Y2024.S2502
|
||||
|
||||
\cd /postgresql_up_and_running_4e_code_data/raw/ACS.S2502
|
||||
CREATE SCHEMA IF NOT EXISTS staging;
|
||||
DROP TABLE IF EXISTS staging.lu_acs_columns;
|
||||
CREATE TABLE staging.lu_acs_columns(column_name text, label text);
|
||||
\copy staging.lu_acs_columns FROM ACSST1Y2024.S2502-Column-Metadata.csv CSV HEADER
|
||||
\copy staging.lu_acs_columns FROM ACSST5Y2023.S2502-Column-Metadata.csv CSV HEADER
|
||||
|
||||
INSERT INTO census.lu_acs_types(id, label)
|
||||
SELECT column_name, label
|
||||
FROM staging.lu_acs_columns
|
||||
WHERE column_name NOT IN(SELECT a.id FROM census.lu_acs_types AS a);
|
||||
|
||||
DROP TABLE IF EXISTS staging.acs_data_raw;
|
||||
-- <4>
|
||||
SELECT 'CREATE TABLE staging.acs_data_raw('
|
||||
|| string_agg( quote_ident( lower(column_name) )
|
||||
|| string_agg( quote_ident(column_name )
|
||||
|| ' text', ',') || ');'
|
||||
FROM staging.lu_acs_columns
|
||||
\gexec
|
||||
ALTER TABLE staging.acs_data_raw ADD COLUMN unknown text;
|
||||
\gexec <5>
|
||||
|
||||
\copy staging.acs_data_raw FROM ACSST1Y2024.S2502-Data.csv WITH (format 'csv', HEADER)
|
||||
ALTER TABLE staging.acs_data_raw ADD COLUMN unknown text; -- <6>
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS census;
|
||||
DROP TABLE IF EXISTS census.acs_data;
|
||||
SELECT 'CREATE TABLE census.acs_data AS
|
||||
SELECT '
|
||||
|| string_agg( 'NULLIF(' || quote_ident( lower(column_name) ) || ', ''N'') '
|
||||
|| CASE WHEN column_name IN('GEO_ID', 'NAME') THEN '::text' ELSE '::numeric' END
|
||||
|| ' AS '
|
||||
|| quote_ident( lower(column_name) ) , ',' ) || '
|
||||
FROM staging.acs_data_raw
|
||||
WHERE geo_id <> ''Geography'';'
|
||||
-- 8 to 9 secs, 85396 rows
|
||||
\copy staging.acs_data_raw FROM ACSST5Y2023.S2502-Data.csv WITH (format 'csv', HEADER)
|
||||
|
||||
-- <7> takes 14 - 24 minutes
|
||||
INSERT INTO census.acs_facts(acs_type_id, tract_id, yr, val)
|
||||
SELECT kv.key AS acs_type_id, r."GEO_ID" AS tract_id, 2023 AS yr,
|
||||
kv.value::numeric AS val
|
||||
FROM staging.acs_data_raw AS r,
|
||||
jsonb_each_text(to_jsonb(r)) AS kv
|
||||
WHERE kv.value ~ '^[0-9\.\-]+$';
|
||||
|
||||
DROP TABLE IF EXISTS staging.lu_acs_columns;
|
||||
CREATE TABLE staging.lu_acs_columns(column_name text, label text);
|
||||
\copy staging.lu_acs_columns FROM ACSST5Y2020.S2502-Column-Metadata.csv CSV HEADER
|
||||
|
||||
INSERT INTO census.lu_acs_types(id, label)
|
||||
SELECT column_name, label
|
||||
FROM staging.lu_acs_columns
|
||||
WHERE column_name NOT IN(SELECT a.id FROM census.lu_acs_types AS a);
|
||||
|
||||
DROP TABLE IF EXISTS staging.acs_data_raw;
|
||||
-- <4>
|
||||
SELECT 'CREATE TABLE staging.acs_data_raw('
|
||||
|| string_agg( quote_ident(column_name )
|
||||
|| ' text', ',') || ');'
|
||||
FROM staging.lu_acs_columns
|
||||
\gexec <5>
|
||||
|
||||
ALTER TABLE staging.acs_data_raw ADD COLUMN unknown text; -- <6>
|
||||
|
||||
\copy staging.acs_data_raw FROM ACSST5Y2020.S2502-Data.csv WITH (format 'csv', HEADER)
|
||||
|
||||
-- <7> takes 14 - 24 minutes
|
||||
SELECT 'INSERT INTO census.acs_facts(acs_type_id, tract_id, yr, val)
|
||||
SELECT ' || quote_literal(column_name )
|
||||
|| ', r."GEO_ID" AS tract_id, 2020 AS yr, '
|
||||
|| ' r.' || quote_ident(column_name ) || '::numeric AS val
|
||||
FROM staging.acs_data_raw AS r
|
||||
WHERE r.' || quote_ident(column_name ) || ' ~ ''^[0-9\.\-]+'';'
|
||||
FROM staging.lu_acs_columns
|
||||
WHERE column_name NOT IN('GEO_ID', 'NAME')
|
||||
\gexec
|
||||
Reference in New Issue
Block a user