Commit
·
322f5cd
1
Parent(s):
60ac2eb
added tests, removed mermaid dependency - lineage extractor
Browse files- app.py +42 -12
- tests/test_app.py +38 -0
app.py
CHANGED
|
@@ -8,6 +8,27 @@ import json
|
|
| 8 |
import os
|
| 9 |
from typing import Optional, Tuple
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Note: This is a template. You'll need to integrate with your actual agent backend.
|
| 12 |
# This could be through an API, Claude SDK, or other agent framework.
|
| 13 |
|
|
@@ -30,8 +51,9 @@ def extract_lineage_from_text(
|
|
| 30 |
# TODO: Integrate with your agent backend
|
| 31 |
# This is where you'd call your agent with the metadata_parser and graph_visualizer workers
|
| 32 |
|
|
|
|
| 33 |
return (
|
| 34 |
-
|
| 35 |
f"Processed {source_type} metadata. Found X nodes and Y relationships."
|
| 36 |
)
|
| 37 |
|
|
@@ -55,8 +77,9 @@ def extract_lineage_from_bigquery(
|
|
| 55 |
"""
|
| 56 |
# TODO: Integrate with BigQuery and your agent backend
|
| 57 |
|
|
|
|
| 58 |
return (
|
| 59 |
-
|
| 60 |
f"Extracted lineage from BigQuery project: {project_id}"
|
| 61 |
)
|
| 62 |
|
|
@@ -76,8 +99,9 @@ def extract_lineage_from_url(
|
|
| 76 |
"""
|
| 77 |
# TODO: Integrate with URL fetching and your agent backend
|
| 78 |
|
|
|
|
| 79 |
return (
|
| 80 |
-
|
| 81 |
f"Extracted lineage from URL: {url}"
|
| 82 |
)
|
| 83 |
|
|
@@ -95,6 +119,12 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
|
|
| 95 |
- **URLs/APIs**: Fetch metadata from web endpoints
|
| 96 |
- **dbt, Airflow, Snowflake**: Through MCP integration (when configured)
|
| 97 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
with gr.Tabs():
|
| 100 |
# Tab 1: Text/File Input
|
|
@@ -119,9 +149,9 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
|
|
| 119 |
extract_btn_text = gr.Button("Extract Lineage", variant="primary")
|
| 120 |
|
| 121 |
with gr.Column():
|
| 122 |
-
output_viz_text = gr.
|
| 123 |
-
|
| 124 |
-
|
| 125 |
)
|
| 126 |
output_summary_text = gr.Textbox(
|
| 127 |
label="Summary",
|
|
@@ -160,9 +190,9 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
|
|
| 160 |
extract_btn_bq = gr.Button("Extract Lineage", variant="primary")
|
| 161 |
|
| 162 |
with gr.Column():
|
| 163 |
-
output_viz_bq = gr.
|
| 164 |
-
|
| 165 |
-
|
| 166 |
)
|
| 167 |
output_summary_bq = gr.Textbox(
|
| 168 |
label="Summary",
|
|
@@ -191,9 +221,9 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
|
|
| 191 |
extract_btn_url = gr.Button("Extract Lineage", variant="primary")
|
| 192 |
|
| 193 |
with gr.Column():
|
| 194 |
-
output_viz_url = gr.
|
| 195 |
-
|
| 196 |
-
|
| 197 |
)
|
| 198 |
output_summary_url = gr.Textbox(
|
| 199 |
label="Summary",
|
|
|
|
| 8 |
import os
|
| 9 |
from typing import Optional, Tuple
|
| 10 |
|
| 11 |
+
|
| 12 |
+
def render_mermaid(viz_code: str) -> str:
|
| 13 |
+
"""Wrap mermaid source in HTML and initialize mermaid when the HTML is inserted.
|
| 14 |
+
|
| 15 |
+
This function also escapes angle brackets in the source to avoid HTML injection
|
| 16 |
+
while keeping the mermaid syntax intact.
|
| 17 |
+
"""
|
| 18 |
+
# Escape HTML-sensitive characters
|
| 19 |
+
safe_viz = viz_code.replace("<", "<").replace(">", ">")
|
| 20 |
+
# Script will wait for mermaid to be available then initialize diagrams.
|
| 21 |
+
init_script = (
|
| 22 |
+
"<script>"
|
| 23 |
+
"(function(){"
|
| 24 |
+
"function run(){"
|
| 25 |
+
" if(window.mermaid){ mermaid.init(undefined, document.querySelectorAll('.mermaid')); }"
|
| 26 |
+
" else { setTimeout(run,50); }"
|
| 27 |
+
" } run();})();"
|
| 28 |
+
"</script>"
|
| 29 |
+
)
|
| 30 |
+
return f"<div class=\"mermaid\">{safe_viz}</div>{init_script}"
|
| 31 |
+
|
| 32 |
# Note: This is a template. You'll need to integrate with your actual agent backend.
|
| 33 |
# This could be through an API, Claude SDK, or other agent framework.
|
| 34 |
|
|
|
|
| 51 |
# TODO: Integrate with your agent backend
|
| 52 |
# This is where you'd call your agent with the metadata_parser and graph_visualizer workers
|
| 53 |
|
| 54 |
+
viz = "graph TD\n A[Sample Node] --> B[Output Node]"
|
| 55 |
return (
|
| 56 |
+
render_mermaid(viz),
|
| 57 |
f"Processed {source_type} metadata. Found X nodes and Y relationships."
|
| 58 |
)
|
| 59 |
|
|
|
|
| 77 |
"""
|
| 78 |
# TODO: Integrate with BigQuery and your agent backend
|
| 79 |
|
| 80 |
+
viz = "graph TD\n A[BigQuery Table] --> B[Destination Table]"
|
| 81 |
return (
|
| 82 |
+
render_mermaid(viz),
|
| 83 |
f"Extracted lineage from BigQuery project: {project_id}"
|
| 84 |
)
|
| 85 |
|
|
|
|
| 99 |
"""
|
| 100 |
# TODO: Integrate with URL fetching and your agent backend
|
| 101 |
|
| 102 |
+
viz = "graph TD\n A[API Source] --> B[Data Pipeline]"
|
| 103 |
return (
|
| 104 |
+
render_mermaid(viz),
|
| 105 |
f"Extracted lineage from URL: {url}"
|
| 106 |
)
|
| 107 |
|
|
|
|
| 119 |
- **URLs/APIs**: Fetch metadata from web endpoints
|
| 120 |
- **dbt, Airflow, Snowflake**: Through MCP integration (when configured)
|
| 121 |
""")
|
| 122 |
+
# Load Mermaid.js once (hidden). We set startOnLoad=false and will initialize
|
| 123 |
+
# individual diagrams after inserting them into the DOM.
|
| 124 |
+
mermaid_loader = gr.HTML(
|
| 125 |
+
value='''<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>\n<script>mermaid.initialize({startOnLoad:false});</script>''',
|
| 126 |
+
visible=False
|
| 127 |
+
)
|
| 128 |
|
| 129 |
with gr.Tabs():
|
| 130 |
# Tab 1: Text/File Input
|
|
|
|
| 149 |
extract_btn_text = gr.Button("Extract Lineage", variant="primary")
|
| 150 |
|
| 151 |
with gr.Column():
|
| 152 |
+
output_viz_text = gr.HTML(
|
| 153 |
+
value="",
|
| 154 |
+
label="Lineage Visualization"
|
| 155 |
)
|
| 156 |
output_summary_text = gr.Textbox(
|
| 157 |
label="Summary",
|
|
|
|
| 190 |
extract_btn_bq = gr.Button("Extract Lineage", variant="primary")
|
| 191 |
|
| 192 |
with gr.Column():
|
| 193 |
+
output_viz_bq = gr.HTML(
|
| 194 |
+
value="",
|
| 195 |
+
label="Lineage Visualization"
|
| 196 |
)
|
| 197 |
output_summary_bq = gr.Textbox(
|
| 198 |
label="Summary",
|
|
|
|
| 221 |
extract_btn_url = gr.Button("Extract Lineage", variant="primary")
|
| 222 |
|
| 223 |
with gr.Column():
|
| 224 |
+
output_viz_url = gr.HTML(
|
| 225 |
+
value="",
|
| 226 |
+
label="Lineage Visualization"
|
| 227 |
)
|
| 228 |
output_summary_url = gr.Textbox(
|
| 229 |
label="Summary",
|
tests/test_app.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
|
| 3 |
+
from app import (
|
| 4 |
+
render_mermaid,
|
| 5 |
+
extract_lineage_from_text,
|
| 6 |
+
extract_lineage_from_bigquery,
|
| 7 |
+
extract_lineage_from_url,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TestLineageExtractors(unittest.TestCase):
|
| 12 |
+
def test_render_mermaid_wraps_and_inits(self):
|
| 13 |
+
viz = "graph TD\n A --> B"
|
| 14 |
+
html = render_mermaid(viz)
|
| 15 |
+
self.assertIn('<div class="mermaid">', html)
|
| 16 |
+
self.assertIn('graph TD', html)
|
| 17 |
+
self.assertIn('mermaid.init', html)
|
| 18 |
+
|
| 19 |
+
def test_extract_lineage_from_text_returns_html_and_summary(self):
|
| 20 |
+
html, summary = extract_lineage_from_text("", "dbt Manifest", "Mermaid")
|
| 21 |
+
self.assertIsInstance(html, str)
|
| 22 |
+
self.assertIsInstance(summary, str)
|
| 23 |
+
self.assertIn('<div class="mermaid">', html)
|
| 24 |
+
self.assertIn('Processed', summary)
|
| 25 |
+
|
| 26 |
+
def test_extract_lineage_from_bigquery_returns_html_and_summary(self):
|
| 27 |
+
html, summary = extract_lineage_from_bigquery("proj", "SELECT 1", "key", "Mermaid")
|
| 28 |
+
self.assertIn('<div class="mermaid">', html)
|
| 29 |
+
self.assertIn('Extracted lineage', summary)
|
| 30 |
+
|
| 31 |
+
def test_extract_lineage_from_url_returns_html_and_summary(self):
|
| 32 |
+
html, summary = extract_lineage_from_url("https://example.com", "Mermaid")
|
| 33 |
+
self.assertIn('<div class="mermaid">', html)
|
| 34 |
+
self.assertIn('Extracted lineage from URL', summary)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
if __name__ == '__main__':
|
| 38 |
+
unittest.main()
|