aamanlamba commited on
Commit
322f5cd
·
1 Parent(s): 60ac2eb

added tests, removed mermaid dependency - lineage extractor

Browse files
Files changed (2) hide show
  1. app.py +42 -12
  2. tests/test_app.py +38 -0
app.py CHANGED
@@ -8,6 +8,27 @@ import json
8
  import os
9
  from typing import Optional, Tuple
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Note: This is a template. You'll need to integrate with your actual agent backend.
12
  # This could be through an API, Claude SDK, or other agent framework.
13
 
@@ -30,8 +51,9 @@ def extract_lineage_from_text(
30
  # TODO: Integrate with your agent backend
31
  # This is where you'd call your agent with the metadata_parser and graph_visualizer workers
32
 
 
33
  return (
34
- "graph TD\n A[Sample Node] --> B[Output Node]",
35
  f"Processed {source_type} metadata. Found X nodes and Y relationships."
36
  )
37
 
@@ -55,8 +77,9 @@ def extract_lineage_from_bigquery(
55
  """
56
  # TODO: Integrate with BigQuery and your agent backend
57
 
 
58
  return (
59
- "graph TD\n A[BigQuery Table] --> B[Destination Table]",
60
  f"Extracted lineage from BigQuery project: {project_id}"
61
  )
62
 
@@ -76,8 +99,9 @@ def extract_lineage_from_url(
76
  """
77
  # TODO: Integrate with URL fetching and your agent backend
78
 
 
79
  return (
80
- "graph TD\n A[API Source] --> B[Data Pipeline]",
81
  f"Extracted lineage from URL: {url}"
82
  )
83
 
@@ -95,6 +119,12 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
95
  - **URLs/APIs**: Fetch metadata from web endpoints
96
  - **dbt, Airflow, Snowflake**: Through MCP integration (when configured)
97
  """)
 
 
 
 
 
 
98
 
99
  with gr.Tabs():
100
  # Tab 1: Text/File Input
@@ -119,9 +149,9 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
119
  extract_btn_text = gr.Button("Extract Lineage", variant="primary")
120
 
121
  with gr.Column():
122
- output_viz_text = gr.Code(
123
- label="Lineage Visualization",
124
- language="mermaid"
125
  )
126
  output_summary_text = gr.Textbox(
127
  label="Summary",
@@ -160,9 +190,9 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
160
  extract_btn_bq = gr.Button("Extract Lineage", variant="primary")
161
 
162
  with gr.Column():
163
- output_viz_bq = gr.Code(
164
- label="Lineage Visualization",
165
- language="mermaid"
166
  )
167
  output_summary_bq = gr.Textbox(
168
  label="Summary",
@@ -191,9 +221,9 @@ with gr.Blocks(title="Lineage Graph Extractor", theme=gr.themes.Soft()) as demo:
191
  extract_btn_url = gr.Button("Extract Lineage", variant="primary")
192
 
193
  with gr.Column():
194
- output_viz_url = gr.Code(
195
- label="Lineage Visualization",
196
- language="mermaid"
197
  )
198
  output_summary_url = gr.Textbox(
199
  label="Summary",
 
8
  import os
9
  from typing import Optional, Tuple
10
 
11
+
12
+ def render_mermaid(viz_code: str) -> str:
13
+ """Wrap mermaid source in HTML and initialize mermaid when the HTML is inserted.
14
+
15
+ This function also escapes angle brackets in the source to avoid HTML injection
16
+ while keeping the mermaid syntax intact.
17
+ """
18
+ # Escape HTML-sensitive characters
19
+ safe_viz = viz_code.replace("<", "&lt;").replace(">", "&gt;")
20
+ # Script will wait for mermaid to be available then initialize diagrams.
21
+ init_script = (
22
+ "<script>"
23
+ "(function(){"
24
+ "function run(){"
25
+ " if(window.mermaid){ mermaid.init(undefined, document.querySelectorAll('.mermaid')); }"
26
+ " else { setTimeout(run,50); }"
27
+ " } run();})();"
28
+ "</script>"
29
+ )
30
+ return f"<div class=\"mermaid\">{safe_viz}</div>{init_script}"
31
+
32
  # Note: This is a template. You'll need to integrate with your actual agent backend.
33
  # This could be through an API, Claude SDK, or other agent framework.
34
 
 
51
  # TODO: Integrate with your agent backend
52
  # This is where you'd call your agent with the metadata_parser and graph_visualizer workers
53
 
54
+ viz = "graph TD\n A[Sample Node] --> B[Output Node]"
55
  return (
56
+ render_mermaid(viz),
57
  f"Processed {source_type} metadata. Found X nodes and Y relationships."
58
  )
59
 
 
77
  """
78
  # TODO: Integrate with BigQuery and your agent backend
79
 
80
+ viz = "graph TD\n A[BigQuery Table] --> B[Destination Table]"
81
  return (
82
+ render_mermaid(viz),
83
  f"Extracted lineage from BigQuery project: {project_id}"
84
  )
85
 
 
99
  """
100
  # TODO: Integrate with URL fetching and your agent backend
101
 
102
+ viz = "graph TD\n A[API Source] --> B[Data Pipeline]"
103
  return (
104
+ render_mermaid(viz),
105
  f"Extracted lineage from URL: {url}"
106
  )
107
 
 
119
  - **URLs/APIs**: Fetch metadata from web endpoints
120
  - **dbt, Airflow, Snowflake**: Through MCP integration (when configured)
121
  """)
122
+ # Load Mermaid.js once (hidden). We set startOnLoad=false and will initialize
123
+ # individual diagrams after inserting them into the DOM.
124
+ mermaid_loader = gr.HTML(
125
+ value='''<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>\n<script>mermaid.initialize({startOnLoad:false});</script>''',
126
+ visible=False
127
+ )
128
 
129
  with gr.Tabs():
130
  # Tab 1: Text/File Input
 
149
  extract_btn_text = gr.Button("Extract Lineage", variant="primary")
150
 
151
  with gr.Column():
152
+ output_viz_text = gr.HTML(
153
+ value="",
154
+ label="Lineage Visualization"
155
  )
156
  output_summary_text = gr.Textbox(
157
  label="Summary",
 
190
  extract_btn_bq = gr.Button("Extract Lineage", variant="primary")
191
 
192
  with gr.Column():
193
+ output_viz_bq = gr.HTML(
194
+ value="",
195
+ label="Lineage Visualization"
196
  )
197
  output_summary_bq = gr.Textbox(
198
  label="Summary",
 
221
  extract_btn_url = gr.Button("Extract Lineage", variant="primary")
222
 
223
  with gr.Column():
224
+ output_viz_url = gr.HTML(
225
+ value="",
226
+ label="Lineage Visualization"
227
  )
228
  output_summary_url = gr.Textbox(
229
  label="Summary",
tests/test_app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from app import (
4
+ render_mermaid,
5
+ extract_lineage_from_text,
6
+ extract_lineage_from_bigquery,
7
+ extract_lineage_from_url,
8
+ )
9
+
10
+
11
+ class TestLineageExtractors(unittest.TestCase):
12
+ def test_render_mermaid_wraps_and_inits(self):
13
+ viz = "graph TD\n A --> B"
14
+ html = render_mermaid(viz)
15
+ self.assertIn('<div class="mermaid">', html)
16
+ self.assertIn('graph TD', html)
17
+ self.assertIn('mermaid.init', html)
18
+
19
+ def test_extract_lineage_from_text_returns_html_and_summary(self):
20
+ html, summary = extract_lineage_from_text("", "dbt Manifest", "Mermaid")
21
+ self.assertIsInstance(html, str)
22
+ self.assertIsInstance(summary, str)
23
+ self.assertIn('<div class="mermaid">', html)
24
+ self.assertIn('Processed', summary)
25
+
26
+ def test_extract_lineage_from_bigquery_returns_html_and_summary(self):
27
+ html, summary = extract_lineage_from_bigquery("proj", "SELECT 1", "key", "Mermaid")
28
+ self.assertIn('<div class="mermaid">', html)
29
+ self.assertIn('Extracted lineage', summary)
30
+
31
+ def test_extract_lineage_from_url_returns_html_and_summary(self):
32
+ html, summary = extract_lineage_from_url("https://example.com", "Mermaid")
33
+ self.assertIn('<div class="mermaid">', html)
34
+ self.assertIn('Extracted lineage from URL', summary)
35
+
36
+
37
+ if __name__ == '__main__':
38
+ unittest.main()