-- Sample SQL DDL with complex lineage relationships -- E-commerce Data Warehouse Schema -- ============================================ -- RAW LAYER - Source tables -- ============================================ CREATE TABLE raw.customers ( customer_id INTEGER PRIMARY KEY, email VARCHAR(255) NOT NULL, first_name VARCHAR(100), last_name VARCHAR(100), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, country VARCHAR(50), segment VARCHAR(50) ); CREATE TABLE raw.orders ( order_id INTEGER PRIMARY KEY, customer_id INTEGER REFERENCES raw.customers(customer_id), order_date DATE NOT NULL, total_amount DECIMAL(10,2), currency VARCHAR(3) DEFAULT 'USD', status VARCHAR(20), shipping_address_id INTEGER ); CREATE TABLE raw.products ( product_id INTEGER PRIMARY KEY, product_name VARCHAR(255) NOT NULL, category VARCHAR(100), subcategory VARCHAR(100), brand VARCHAR(100), price DECIMAL(10,2), cost DECIMAL(10,2) ); CREATE TABLE raw.order_items ( order_item_id INTEGER PRIMARY KEY, order_id INTEGER REFERENCES raw.orders(order_id), product_id INTEGER REFERENCES raw.products(product_id), quantity INTEGER NOT NULL, unit_price DECIMAL(10,2), discount_percent DECIMAL(5,2) DEFAULT 0 ); -- ============================================ -- STAGING LAYER - Cleaned data -- ============================================ CREATE VIEW staging.stg_customers AS SELECT customer_id, LOWER(TRIM(email)) as email, INITCAP(first_name) as first_name, INITCAP(last_name) as last_name, DATE(created_at) as signup_date, UPPER(country) as country, COALESCE(segment, 'Unknown') as segment FROM raw.customers WHERE email IS NOT NULL; -- LINEAGE: raw.customers -> staging.stg_customers CREATE VIEW staging.stg_orders AS SELECT order_id, customer_id, order_date, total_amount, currency, CASE WHEN status IN ('completed', 'shipped', 'delivered') THEN 'Fulfilled' WHEN status IN ('pending', 'processing') THEN 'In Progress' ELSE 'Other' END as order_status FROM raw.orders WHERE order_date >= '2024-01-01'; -- LINEAGE: raw.orders -> staging.stg_orders CREATE VIEW staging.stg_products AS SELECT product_id, product_name, category, subcategory, brand, price, cost, (price - cost) / NULLIF(price, 0) * 100 as margin_percent FROM raw.products WHERE price > 0; -- LINEAGE: raw.products -> staging.stg_products CREATE VIEW staging.stg_order_items AS SELECT order_item_id, order_id, product_id, quantity, unit_price, discount_percent, quantity * unit_price * (1 - discount_percent/100) as line_total FROM raw.order_items; -- LINEAGE: raw.order_items -> staging.stg_order_items -- ============================================ -- INTERMEDIATE LAYER - Business logic -- ============================================ CREATE TABLE intermediate.int_customer_orders AS SELECT c.customer_id, c.email, c.first_name, c.last_name, c.signup_date, c.country, c.segment, COUNT(DISTINCT o.order_id) as total_orders, SUM(o.total_amount) as total_spent, MIN(o.order_date) as first_order_date, MAX(o.order_date) as last_order_date, AVG(o.total_amount) as avg_order_value FROM staging.stg_customers c LEFT JOIN staging.stg_orders o ON c.customer_id = o.customer_id GROUP BY c.customer_id, c.email, c.first_name, c.last_name, c.signup_date, c.country, c.segment; -- LINEAGE: staging.stg_customers, staging.stg_orders -> intermediate.int_customer_orders CREATE TABLE intermediate.int_order_details AS SELECT o.order_id, o.customer_id, o.order_date, o.order_status, oi.product_id, p.product_name, p.category, p.brand, oi.quantity, oi.unit_price, oi.line_total, p.margin_percent FROM staging.stg_orders o JOIN staging.stg_order_items oi ON o.order_id = oi.order_id JOIN staging.stg_products p ON oi.product_id = p.product_id; -- LINEAGE: staging.stg_orders, staging.stg_order_items, staging.stg_products -> intermediate.int_order_details -- ============================================ -- MARTS LAYER - Dimensional model -- ============================================ CREATE TABLE marts.dim_customers AS SELECT customer_id, email, first_name || ' ' || last_name as full_name, signup_date, country, segment, total_orders, total_spent, first_order_date, last_order_date, avg_order_value, CASE WHEN total_spent > 10000 THEN 'Platinum' WHEN total_spent > 5000 THEN 'Gold' WHEN total_spent > 1000 THEN 'Silver' ELSE 'Bronze' END as customer_tier, DATEDIFF(day, signup_date, first_order_date) as days_to_first_order FROM intermediate.int_customer_orders; -- LINEAGE: intermediate.int_customer_orders -> marts.dim_customers CREATE TABLE marts.dim_products AS SELECT product_id, product_name, category, subcategory, brand, price, cost, margin_percent, CASE WHEN margin_percent > 50 THEN 'High Margin' WHEN margin_percent > 25 THEN 'Medium Margin' ELSE 'Low Margin' END as margin_tier FROM staging.stg_products; -- LINEAGE: staging.stg_products -> marts.dim_products CREATE TABLE marts.fct_orders AS SELECT od.order_id, od.customer_id, od.product_id, od.order_date, od.order_status, od.quantity, od.unit_price, od.line_total, od.margin_percent, dc.customer_tier, dp.margin_tier, dp.category as product_category FROM intermediate.int_order_details od JOIN marts.dim_customers dc ON od.customer_id = dc.customer_id JOIN marts.dim_products dp ON od.product_id = dp.product_id; -- LINEAGE: intermediate.int_order_details, marts.dim_customers, marts.dim_products -> marts.fct_orders -- ============================================ -- REPORTING LAYER - Analytics views -- ============================================ CREATE VIEW reporting.rpt_daily_sales AS SELECT order_date, product_category, COUNT(DISTINCT order_id) as num_orders, SUM(quantity) as units_sold, SUM(line_total) as gross_revenue, AVG(line_total) as avg_order_value FROM marts.fct_orders GROUP BY order_date, product_category; -- LINEAGE: marts.fct_orders -> reporting.rpt_daily_sales CREATE VIEW reporting.rpt_customer_ltv AS SELECT customer_id, full_name, customer_tier, country, total_orders, total_spent as lifetime_value, avg_order_value, days_to_first_order, DATEDIFF(day, first_order_date, last_order_date) as customer_lifespan_days, total_spent / NULLIF(DATEDIFF(month, first_order_date, last_order_date), 0) as monthly_value FROM marts.dim_customers WHERE total_orders > 0; -- LINEAGE: marts.dim_customers -> reporting.rpt_customer_ltv CREATE VIEW reporting.rpt_product_performance AS SELECT dp.product_id, dp.product_name, dp.category, dp.brand, dp.margin_tier, COUNT(DISTINCT fo.order_id) as times_ordered, SUM(fo.quantity) as total_units_sold, SUM(fo.line_total) as total_revenue, AVG(fo.margin_percent) as avg_margin FROM marts.dim_products dp LEFT JOIN marts.fct_orders fo ON dp.product_id = fo.product_id GROUP BY dp.product_id, dp.product_name, dp.category, dp.brand, dp.margin_tier; -- LINEAGE: marts.dim_products, marts.fct_orders -> reporting.rpt_product_performance -- ============================================ -- SUMMARY: Lineage Flow -- ============================================ -- raw.customers -> staging.stg_customers -> intermediate.int_customer_orders -> marts.dim_customers -> reporting.rpt_customer_ltv -- raw.orders -> staging.stg_orders -> intermediate.int_customer_orders -- raw.orders -> staging.stg_orders -> intermediate.int_order_details -> marts.fct_orders -> reporting.rpt_daily_sales -- raw.products -> staging.stg_products -> intermediate.int_order_details -- raw.products -> staging.stg_products -> marts.dim_products -> marts.fct_orders -- raw.order_items -> staging.stg_order_items -> intermediate.int_order_details