aamanlamba Claude commited on
Commit
7501b6e
·
1 Parent(s): 96007fc

Add preset MCP servers from HuggingFace for metadata integration

Browse files

- Add 4 preset MCP servers: HuggingFace Hub, Ragmint, Web Search, MCP Tools
- Dropdown to select preset servers with auto-populated URLs
- Display server descriptions and available tools
- Add fetch_metadata_from_mcp function for MCP integration
- Improved test_mcp_connection with multiple endpoint fallbacks
- Enhanced MCP configuration UI with descriptions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +149 -9
app.py CHANGED
@@ -40,6 +40,34 @@ SAMPLE_FILES = {
40
 
41
  EXPORT_FORMATS = ["OpenLineage", "Collibra", "Purview", "Alation"]
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # ============================================================================
44
  # Mermaid Rendering
45
  # ============================================================================
@@ -340,13 +368,101 @@ def test_mcp_connection(server_url: str, api_key: str) -> str:
340
  headers = {}
341
  if api_key:
342
  headers["Authorization"] = f"Bearer {api_key}"
343
- resp = requests.get(server_url.replace("/mcp", "/health").replace("/api", "/health"),
344
- headers=headers, timeout=10)
345
- return f"MCP server responded: {resp.status_code} {resp.reason}"
 
 
 
 
 
 
 
346
  except Exception as e:
347
  return f"Error contacting MCP server: {e}"
348
 
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  # ============================================================================
351
  # Export Functions
352
  # ============================================================================
@@ -504,19 +620,43 @@ with gr.Blocks(
504
 
505
  # MCP Server Configuration (collapsible)
506
  with gr.Accordion("MCP Server Configuration (Optional)", open=False):
 
 
 
 
507
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
508
  mcp_server = gr.Textbox(
509
  label="MCP Server URL",
510
- placeholder="https://your-mcp-server.hf.space/mcp",
511
- info="Connect to a HuggingFace-hosted MCP server for enhanced processing"
 
512
  )
 
513
  mcp_api_key = gr.Textbox(
514
- label="API Key",
515
- placeholder="Optional API key",
516
- type="password"
 
517
  )
518
- test_btn = gr.Button("Test Connection", size="sm")
 
519
  mcp_status = gr.Textbox(label="Connection Status", interactive=False)
 
 
 
 
520
  test_btn.click(fn=test_mcp_connection, inputs=[mcp_server, mcp_api_key], outputs=[mcp_status])
521
 
522
  # Main Tabs
 
40
 
41
  EXPORT_FORMATS = ["OpenLineage", "Collibra", "Purview", "Alation"]
42
 
43
+ # Preset MCP Servers on HuggingFace that can provide metadata
44
+ MCP_PRESETS = {
45
+ "huggingface_hub": {
46
+ "name": "HuggingFace Hub (Models & Datasets)",
47
+ "url": "https://huggingface.co/mcp",
48
+ "description": "Search HF models, datasets, and spaces metadata",
49
+ "tools": ["search_models", "search_datasets", "search_spaces", "get_model_info"]
50
+ },
51
+ "ragmint": {
52
+ "name": "Ragmint RAG Pipeline",
53
+ "url": "https://mcp-1st-birthday-ragmint-mcp-server.hf.space/gradio_api/mcp/sse",
54
+ "description": "RAG pipeline optimization and data retrieval",
55
+ "tools": ["optimize_rag", "retrieve_documents"]
56
+ },
57
+ "web_search": {
58
+ "name": "Web Search MCP",
59
+ "url": "https://agents-mcp-hackathon-search-web-mcp-server.hf.space/gradio_api/mcp/sse",
60
+ "description": "Search web for data and metadata",
61
+ "tools": ["search_web", "fetch_page"]
62
+ },
63
+ "mcp_tools": {
64
+ "name": "MCP Tools (Demo)",
65
+ "url": "https://abidlabs-mcp-tools.hf.space/gradio_api/mcp/sse",
66
+ "description": "Demo MCP server with various utility tools",
67
+ "tools": ["prime_factors", "generate_image", "image_orientation"]
68
+ }
69
+ }
70
+
71
  # ============================================================================
72
  # Mermaid Rendering
73
  # ============================================================================
 
368
  headers = {}
369
  if api_key:
370
  headers["Authorization"] = f"Bearer {api_key}"
371
+ # Try multiple health endpoints
372
+ for endpoint in ["/health", "/gradio_api/mcp/schema", ""]:
373
+ test_url = server_url.rstrip("/").replace("/sse", "").replace("/mcp", "") + endpoint
374
+ try:
375
+ resp = requests.get(test_url, headers=headers, timeout=10)
376
+ if resp.status_code == 200:
377
+ return f"Connected to MCP server: {resp.status_code} OK"
378
+ except:
379
+ continue
380
+ return f"MCP server responded but may not be fully accessible"
381
  except Exception as e:
382
  return f"Error contacting MCP server: {e}"
383
 
384
 
385
+ def get_preset_url(preset_key: str) -> str:
386
+ """Get the URL for a preset MCP server."""
387
+ if preset_key in MCP_PRESETS:
388
+ return MCP_PRESETS[preset_key]["url"]
389
+ return ""
390
+
391
+
392
+ def get_preset_description(preset_key: str) -> str:
393
+ """Get description and available tools for a preset MCP server."""
394
+ if preset_key in MCP_PRESETS:
395
+ preset = MCP_PRESETS[preset_key]
396
+ tools = ", ".join(preset.get("tools", []))
397
+ return f"{preset['description']}\n\nAvailable tools: {tools}"
398
+ return ""
399
+
400
+
401
+ def fetch_metadata_from_mcp(server_url: str, api_key: str, query: str) -> Tuple[str, str]:
402
+ """Fetch metadata from an MCP server and return it for lineage visualization."""
403
+ if not server_url:
404
+ return "", "Please select or enter an MCP server URL first."
405
+
406
+ try:
407
+ headers = {"Content-Type": "application/json"}
408
+ if api_key:
409
+ headers["Authorization"] = f"Bearer {api_key}"
410
+
411
+ # For Gradio MCP servers, try to call a tool
412
+ # The standard MCP protocol uses JSON-RPC
413
+ payload = {
414
+ "jsonrpc": "2.0",
415
+ "method": "tools/call",
416
+ "params": {
417
+ "name": "search",
418
+ "arguments": {"query": query}
419
+ },
420
+ "id": 1
421
+ }
422
+
423
+ # Try the SSE endpoint first (for Gradio MCP)
424
+ base_url = server_url.replace("/sse", "")
425
+ resp = requests.post(base_url, json=payload, headers=headers, timeout=30)
426
+
427
+ if resp.status_code == 200:
428
+ try:
429
+ data = resp.json()
430
+ # Format the response as lineage-compatible JSON
431
+ if isinstance(data, dict):
432
+ result = data.get("result", data)
433
+ # Create a simple lineage from the response
434
+ lineage_data = {
435
+ "nodes": [
436
+ {"id": "mcp_source", "type": "source", "name": f"MCP: {query}"},
437
+ {"id": "mcp_result", "type": "table", "name": "Query Result"}
438
+ ],
439
+ "edges": [
440
+ {"from": "mcp_source", "to": "mcp_result"}
441
+ ],
442
+ "metadata": result
443
+ }
444
+ return json.dumps(lineage_data, indent=2), f"Fetched metadata from MCP server for query: {query}"
445
+ except json.JSONDecodeError:
446
+ pass
447
+
448
+ # Fallback: create sample lineage showing the MCP connection
449
+ sample_lineage = {
450
+ "nodes": [
451
+ {"id": "mcp_server", "type": "source", "name": server_url.split("/")[2]},
452
+ {"id": "query", "type": "model", "name": f"Query: {query[:30]}..."},
453
+ {"id": "result", "type": "table", "name": "MCP Result"}
454
+ ],
455
+ "edges": [
456
+ {"from": "mcp_server", "to": "query"},
457
+ {"from": "query", "to": "result"}
458
+ ]
459
+ }
460
+ return json.dumps(sample_lineage, indent=2), f"Created lineage template for MCP query. Connect to the MCP server to fetch real metadata."
461
+
462
+ except Exception as e:
463
+ return "", f"Error fetching from MCP server: {str(e)}"
464
+
465
+
466
  # ============================================================================
467
  # Export Functions
468
  # ============================================================================
 
620
 
621
  # MCP Server Configuration (collapsible)
622
  with gr.Accordion("MCP Server Configuration (Optional)", open=False):
623
+ gr.Markdown("""
624
+ **Connect to MCP Servers on HuggingFace** to fetch metadata and enhance lineage extraction.
625
+ Select a preset server or enter a custom URL.
626
+ """)
627
  with gr.Row():
628
+ mcp_preset = gr.Dropdown(
629
+ choices=[
630
+ ("-- Select Preset --", ""),
631
+ ("HuggingFace Hub (Models & Datasets)", "huggingface_hub"),
632
+ ("Ragmint RAG Pipeline", "ragmint"),
633
+ ("Web Search MCP", "web_search"),
634
+ ("MCP Tools (Demo)", "mcp_tools"),
635
+ ],
636
+ label="Preset MCP Servers",
637
+ value="",
638
+ scale=2
639
+ )
640
  mcp_server = gr.Textbox(
641
  label="MCP Server URL",
642
+ placeholder="https://your-mcp-server.hf.space/gradio_api/mcp/sse",
643
+ info="Or enter a custom MCP server URL",
644
+ scale=3
645
  )
646
+ with gr.Row():
647
  mcp_api_key = gr.Textbox(
648
+ label="API Key (Optional)",
649
+ placeholder="API key if required",
650
+ type="password",
651
+ scale=2
652
  )
653
+ test_btn = gr.Button("Test Connection", size="sm", scale=1)
654
+ mcp_description = gr.Textbox(label="Server Description", interactive=False, lines=2)
655
  mcp_status = gr.Textbox(label="Connection Status", interactive=False)
656
+
657
+ # Wire up preset selection to update URL and description
658
+ mcp_preset.change(fn=get_preset_url, inputs=[mcp_preset], outputs=[mcp_server])
659
+ mcp_preset.change(fn=get_preset_description, inputs=[mcp_preset], outputs=[mcp_description])
660
  test_btn.click(fn=test_mcp_connection, inputs=[mcp_server, mcp_api_key], outputs=[mcp_status])
661
 
662
  # Main Tabs