Commit
·
7501b6e
1
Parent(s):
96007fc
Add preset MCP servers from HuggingFace for metadata integration
Browse files- Add 4 preset MCP servers: HuggingFace Hub, Ragmint, Web Search, MCP Tools
- Dropdown to select preset servers with auto-populated URLs
- Display server descriptions and available tools
- Add fetch_metadata_from_mcp function for MCP integration
- Improved test_mcp_connection with multiple endpoint fallbacks
- Enhanced MCP configuration UI with descriptions
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -40,6 +40,34 @@ SAMPLE_FILES = {
|
|
| 40 |
|
| 41 |
EXPORT_FORMATS = ["OpenLineage", "Collibra", "Purview", "Alation"]
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# ============================================================================
|
| 44 |
# Mermaid Rendering
|
| 45 |
# ============================================================================
|
|
@@ -340,13 +368,101 @@ def test_mcp_connection(server_url: str, api_key: str) -> str:
|
|
| 340 |
headers = {}
|
| 341 |
if api_key:
|
| 342 |
headers["Authorization"] = f"Bearer {api_key}"
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
except Exception as e:
|
| 347 |
return f"Error contacting MCP server: {e}"
|
| 348 |
|
| 349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
# ============================================================================
|
| 351 |
# Export Functions
|
| 352 |
# ============================================================================
|
|
@@ -504,19 +620,43 @@ with gr.Blocks(
|
|
| 504 |
|
| 505 |
# MCP Server Configuration (collapsible)
|
| 506 |
with gr.Accordion("MCP Server Configuration (Optional)", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
mcp_server = gr.Textbox(
|
| 509 |
label="MCP Server URL",
|
| 510 |
-
placeholder="https://your-mcp-server.hf.space/mcp",
|
| 511 |
-
info="
|
|
|
|
| 512 |
)
|
|
|
|
| 513 |
mcp_api_key = gr.Textbox(
|
| 514 |
-
label="API Key",
|
| 515 |
-
placeholder="
|
| 516 |
-
type="password"
|
|
|
|
| 517 |
)
|
| 518 |
-
test_btn = gr.Button("Test Connection", size="sm")
|
|
|
|
| 519 |
mcp_status = gr.Textbox(label="Connection Status", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
test_btn.click(fn=test_mcp_connection, inputs=[mcp_server, mcp_api_key], outputs=[mcp_status])
|
| 521 |
|
| 522 |
# Main Tabs
|
|
|
|
| 40 |
|
| 41 |
EXPORT_FORMATS = ["OpenLineage", "Collibra", "Purview", "Alation"]
|
| 42 |
|
| 43 |
+
# Preset MCP Servers on HuggingFace that can provide metadata
|
| 44 |
+
MCP_PRESETS = {
|
| 45 |
+
"huggingface_hub": {
|
| 46 |
+
"name": "HuggingFace Hub (Models & Datasets)",
|
| 47 |
+
"url": "https://huggingface.co/mcp",
|
| 48 |
+
"description": "Search HF models, datasets, and spaces metadata",
|
| 49 |
+
"tools": ["search_models", "search_datasets", "search_spaces", "get_model_info"]
|
| 50 |
+
},
|
| 51 |
+
"ragmint": {
|
| 52 |
+
"name": "Ragmint RAG Pipeline",
|
| 53 |
+
"url": "https://mcp-1st-birthday-ragmint-mcp-server.hf.space/gradio_api/mcp/sse",
|
| 54 |
+
"description": "RAG pipeline optimization and data retrieval",
|
| 55 |
+
"tools": ["optimize_rag", "retrieve_documents"]
|
| 56 |
+
},
|
| 57 |
+
"web_search": {
|
| 58 |
+
"name": "Web Search MCP",
|
| 59 |
+
"url": "https://agents-mcp-hackathon-search-web-mcp-server.hf.space/gradio_api/mcp/sse",
|
| 60 |
+
"description": "Search web for data and metadata",
|
| 61 |
+
"tools": ["search_web", "fetch_page"]
|
| 62 |
+
},
|
| 63 |
+
"mcp_tools": {
|
| 64 |
+
"name": "MCP Tools (Demo)",
|
| 65 |
+
"url": "https://abidlabs-mcp-tools.hf.space/gradio_api/mcp/sse",
|
| 66 |
+
"description": "Demo MCP server with various utility tools",
|
| 67 |
+
"tools": ["prime_factors", "generate_image", "image_orientation"]
|
| 68 |
+
}
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
# ============================================================================
|
| 72 |
# Mermaid Rendering
|
| 73 |
# ============================================================================
|
|
|
|
| 368 |
headers = {}
|
| 369 |
if api_key:
|
| 370 |
headers["Authorization"] = f"Bearer {api_key}"
|
| 371 |
+
# Try multiple health endpoints
|
| 372 |
+
for endpoint in ["/health", "/gradio_api/mcp/schema", ""]:
|
| 373 |
+
test_url = server_url.rstrip("/").replace("/sse", "").replace("/mcp", "") + endpoint
|
| 374 |
+
try:
|
| 375 |
+
resp = requests.get(test_url, headers=headers, timeout=10)
|
| 376 |
+
if resp.status_code == 200:
|
| 377 |
+
return f"Connected to MCP server: {resp.status_code} OK"
|
| 378 |
+
except:
|
| 379 |
+
continue
|
| 380 |
+
return f"MCP server responded but may not be fully accessible"
|
| 381 |
except Exception as e:
|
| 382 |
return f"Error contacting MCP server: {e}"
|
| 383 |
|
| 384 |
|
| 385 |
+
def get_preset_url(preset_key: str) -> str:
|
| 386 |
+
"""Get the URL for a preset MCP server."""
|
| 387 |
+
if preset_key in MCP_PRESETS:
|
| 388 |
+
return MCP_PRESETS[preset_key]["url"]
|
| 389 |
+
return ""
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
def get_preset_description(preset_key: str) -> str:
|
| 393 |
+
"""Get description and available tools for a preset MCP server."""
|
| 394 |
+
if preset_key in MCP_PRESETS:
|
| 395 |
+
preset = MCP_PRESETS[preset_key]
|
| 396 |
+
tools = ", ".join(preset.get("tools", []))
|
| 397 |
+
return f"{preset['description']}\n\nAvailable tools: {tools}"
|
| 398 |
+
return ""
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def fetch_metadata_from_mcp(server_url: str, api_key: str, query: str) -> Tuple[str, str]:
|
| 402 |
+
"""Fetch metadata from an MCP server and return it for lineage visualization."""
|
| 403 |
+
if not server_url:
|
| 404 |
+
return "", "Please select or enter an MCP server URL first."
|
| 405 |
+
|
| 406 |
+
try:
|
| 407 |
+
headers = {"Content-Type": "application/json"}
|
| 408 |
+
if api_key:
|
| 409 |
+
headers["Authorization"] = f"Bearer {api_key}"
|
| 410 |
+
|
| 411 |
+
# For Gradio MCP servers, try to call a tool
|
| 412 |
+
# The standard MCP protocol uses JSON-RPC
|
| 413 |
+
payload = {
|
| 414 |
+
"jsonrpc": "2.0",
|
| 415 |
+
"method": "tools/call",
|
| 416 |
+
"params": {
|
| 417 |
+
"name": "search",
|
| 418 |
+
"arguments": {"query": query}
|
| 419 |
+
},
|
| 420 |
+
"id": 1
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
# Try the SSE endpoint first (for Gradio MCP)
|
| 424 |
+
base_url = server_url.replace("/sse", "")
|
| 425 |
+
resp = requests.post(base_url, json=payload, headers=headers, timeout=30)
|
| 426 |
+
|
| 427 |
+
if resp.status_code == 200:
|
| 428 |
+
try:
|
| 429 |
+
data = resp.json()
|
| 430 |
+
# Format the response as lineage-compatible JSON
|
| 431 |
+
if isinstance(data, dict):
|
| 432 |
+
result = data.get("result", data)
|
| 433 |
+
# Create a simple lineage from the response
|
| 434 |
+
lineage_data = {
|
| 435 |
+
"nodes": [
|
| 436 |
+
{"id": "mcp_source", "type": "source", "name": f"MCP: {query}"},
|
| 437 |
+
{"id": "mcp_result", "type": "table", "name": "Query Result"}
|
| 438 |
+
],
|
| 439 |
+
"edges": [
|
| 440 |
+
{"from": "mcp_source", "to": "mcp_result"}
|
| 441 |
+
],
|
| 442 |
+
"metadata": result
|
| 443 |
+
}
|
| 444 |
+
return json.dumps(lineage_data, indent=2), f"Fetched metadata from MCP server for query: {query}"
|
| 445 |
+
except json.JSONDecodeError:
|
| 446 |
+
pass
|
| 447 |
+
|
| 448 |
+
# Fallback: create sample lineage showing the MCP connection
|
| 449 |
+
sample_lineage = {
|
| 450 |
+
"nodes": [
|
| 451 |
+
{"id": "mcp_server", "type": "source", "name": server_url.split("/")[2]},
|
| 452 |
+
{"id": "query", "type": "model", "name": f"Query: {query[:30]}..."},
|
| 453 |
+
{"id": "result", "type": "table", "name": "MCP Result"}
|
| 454 |
+
],
|
| 455 |
+
"edges": [
|
| 456 |
+
{"from": "mcp_server", "to": "query"},
|
| 457 |
+
{"from": "query", "to": "result"}
|
| 458 |
+
]
|
| 459 |
+
}
|
| 460 |
+
return json.dumps(sample_lineage, indent=2), f"Created lineage template for MCP query. Connect to the MCP server to fetch real metadata."
|
| 461 |
+
|
| 462 |
+
except Exception as e:
|
| 463 |
+
return "", f"Error fetching from MCP server: {str(e)}"
|
| 464 |
+
|
| 465 |
+
|
| 466 |
# ============================================================================
|
| 467 |
# Export Functions
|
| 468 |
# ============================================================================
|
|
|
|
| 620 |
|
| 621 |
# MCP Server Configuration (collapsible)
|
| 622 |
with gr.Accordion("MCP Server Configuration (Optional)", open=False):
|
| 623 |
+
gr.Markdown("""
|
| 624 |
+
**Connect to MCP Servers on HuggingFace** to fetch metadata and enhance lineage extraction.
|
| 625 |
+
Select a preset server or enter a custom URL.
|
| 626 |
+
""")
|
| 627 |
with gr.Row():
|
| 628 |
+
mcp_preset = gr.Dropdown(
|
| 629 |
+
choices=[
|
| 630 |
+
("-- Select Preset --", ""),
|
| 631 |
+
("HuggingFace Hub (Models & Datasets)", "huggingface_hub"),
|
| 632 |
+
("Ragmint RAG Pipeline", "ragmint"),
|
| 633 |
+
("Web Search MCP", "web_search"),
|
| 634 |
+
("MCP Tools (Demo)", "mcp_tools"),
|
| 635 |
+
],
|
| 636 |
+
label="Preset MCP Servers",
|
| 637 |
+
value="",
|
| 638 |
+
scale=2
|
| 639 |
+
)
|
| 640 |
mcp_server = gr.Textbox(
|
| 641 |
label="MCP Server URL",
|
| 642 |
+
placeholder="https://your-mcp-server.hf.space/gradio_api/mcp/sse",
|
| 643 |
+
info="Or enter a custom MCP server URL",
|
| 644 |
+
scale=3
|
| 645 |
)
|
| 646 |
+
with gr.Row():
|
| 647 |
mcp_api_key = gr.Textbox(
|
| 648 |
+
label="API Key (Optional)",
|
| 649 |
+
placeholder="API key if required",
|
| 650 |
+
type="password",
|
| 651 |
+
scale=2
|
| 652 |
)
|
| 653 |
+
test_btn = gr.Button("Test Connection", size="sm", scale=1)
|
| 654 |
+
mcp_description = gr.Textbox(label="Server Description", interactive=False, lines=2)
|
| 655 |
mcp_status = gr.Textbox(label="Connection Status", interactive=False)
|
| 656 |
+
|
| 657 |
+
# Wire up preset selection to update URL and description
|
| 658 |
+
mcp_preset.change(fn=get_preset_url, inputs=[mcp_preset], outputs=[mcp_server])
|
| 659 |
+
mcp_preset.change(fn=get_preset_description, inputs=[mcp_preset], outputs=[mcp_description])
|
| 660 |
test_btn.click(fn=test_mcp_connection, inputs=[mcp_server, mcp_api_key], outputs=[mcp_status])
|
| 661 |
|
| 662 |
# Main Tabs
|