Lineage-graph-accelerator / samples /warehouse_lineage_sample.json
aamanlamba's picture
Phase 2: Enhanced lineage extraction with export to data catalogs
0510038
{
"warehouse": {
"platform": "Snowflake",
"account": "xy12345.us-east-1",
"database": "ANALYTICS_DW"
},
"lineage": {
"datasets": [
{
"id": "raw.customers",
"type": "table",
"database": "ANALYTICS_DW",
"schema": "RAW",
"name": "CUSTOMERS",
"description": "Raw customer data from CRM",
"columns": [
{"name": "CUSTOMER_ID", "type": "NUMBER", "isPrimaryKey": true},
{"name": "EMAIL", "type": "VARCHAR", "pii": true},
{"name": "NAME", "type": "VARCHAR"},
{"name": "CREATED_AT", "type": "TIMESTAMP_NTZ"},
{"name": "SOURCE_SYSTEM", "type": "VARCHAR"}
],
"tags": ["pii", "raw"],
"owner": "data-platform-team"
},
{
"id": "raw.transactions",
"type": "table",
"database": "ANALYTICS_DW",
"schema": "RAW",
"name": "TRANSACTIONS",
"description": "Raw transaction events from payment gateway",
"columns": [
{"name": "TRANSACTION_ID", "type": "VARCHAR", "isPrimaryKey": true},
{"name": "CUSTOMER_ID", "type": "NUMBER", "isForeignKey": true, "references": "raw.customers.CUSTOMER_ID"},
{"name": "AMOUNT", "type": "NUMBER"},
{"name": "CURRENCY", "type": "VARCHAR"},
{"name": "TRANSACTION_DATE", "type": "DATE"},
{"name": "STATUS", "type": "VARCHAR"}
],
"tags": ["financial", "raw"],
"owner": "data-platform-team"
},
{
"id": "raw.products",
"type": "table",
"database": "ANALYTICS_DW",
"schema": "RAW",
"name": "PRODUCTS",
"description": "Product catalog from inventory system"
},
{
"id": "staging.customers_cleaned",
"type": "view",
"database": "ANALYTICS_DW",
"schema": "STAGING",
"name": "CUSTOMERS_CLEANED",
"description": "Deduplicated and cleaned customer records",
"transformation": "DEDUP + CLEAN + VALIDATE",
"owner": "analytics-engineering"
},
{
"id": "staging.transactions_enriched",
"type": "view",
"database": "ANALYTICS_DW",
"schema": "STAGING",
"name": "TRANSACTIONS_ENRICHED",
"description": "Transactions with currency conversion and status mapping",
"transformation": "ENRICH + CONVERT + MAP"
},
{
"id": "marts.dim_customer",
"type": "table",
"database": "ANALYTICS_DW",
"schema": "MARTS",
"name": "DIM_CUSTOMER",
"description": "Customer dimension with SCD Type 2",
"transformation": "SCD_TYPE_2 + AGGREGATE"
},
{
"id": "marts.fct_transaction",
"type": "table",
"database": "ANALYTICS_DW",
"schema": "MARTS",
"name": "FCT_TRANSACTION",
"description": "Transaction fact table with dimensions"
},
{
"id": "reporting.customer_360",
"type": "view",
"database": "ANALYTICS_DW",
"schema": "REPORTING",
"name": "CUSTOMER_360",
"description": "Complete customer view for BI tools"
},
{
"id": "reporting.revenue_dashboard",
"type": "materialized_view",
"database": "ANALYTICS_DW",
"schema": "REPORTING",
"name": "REVENUE_DASHBOARD",
"description": "Aggregated revenue metrics for executive dashboard",
"refresh_schedule": "DAILY at 06:00 UTC"
},
{
"id": "external.crm_export",
"type": "external_table",
"location": "s3://company-exports/crm/",
"description": "CRM data export to S3"
},
{
"id": "external.bi_semantic_layer",
"type": "semantic_model",
"platform": "Looker",
"description": "Looker semantic model for self-service analytics"
}
],
"relationships": [
{
"source": "raw.customers",
"target": "staging.customers_cleaned",
"type": "transform",
"job": "dbt_staging_customers",
"schedule": "hourly"
},
{
"source": "raw.transactions",
"target": "staging.transactions_enriched",
"type": "transform",
"job": "dbt_staging_transactions"
},
{
"source": "staging.customers_cleaned",
"target": "marts.dim_customer",
"type": "transform",
"job": "dbt_marts_dim_customer"
},
{
"source": "staging.transactions_enriched",
"target": "marts.fct_transaction",
"type": "transform"
},
{
"source": "raw.products",
"target": "marts.fct_transaction",
"type": "reference"
},
{
"source": "marts.dim_customer",
"target": "marts.fct_transaction",
"type": "reference"
},
{
"source": "marts.dim_customer",
"target": "reporting.customer_360",
"type": "transform"
},
{
"source": "marts.fct_transaction",
"target": "reporting.customer_360",
"type": "transform"
},
{
"source": "marts.fct_transaction",
"target": "reporting.revenue_dashboard",
"type": "aggregate"
},
{
"source": "marts.dim_customer",
"target": "reporting.revenue_dashboard",
"type": "reference"
},
{
"source": "reporting.customer_360",
"target": "external.crm_export",
"type": "export",
"job": "airflow_crm_sync"
},
{
"source": "reporting.revenue_dashboard",
"target": "external.bi_semantic_layer",
"type": "publish",
"job": "looker_sync"
}
],
"jobs": [
{
"id": "dbt_staging_customers",
"type": "dbt",
"schedule": "0 * * * *",
"description": "Hourly customer staging refresh"
},
{
"id": "dbt_staging_transactions",
"type": "dbt",
"schedule": "0 * * * *"
},
{
"id": "dbt_marts_dim_customer",
"type": "dbt",
"schedule": "0 2 * * *"
},
{
"id": "airflow_crm_sync",
"type": "airflow",
"schedule": "0 6 * * *"
},
{
"id": "looker_sync",
"type": "api",
"schedule": "0 7 * * *"
}
]
},
"notes": "Sample Snowflake data warehouse lineage with multi-layer architecture (raw, staging, marts, reporting) and external system integrations."
}