Spaces:

gmkdigitalmedia
/

CTapi-raw

Paused

CTapi-raw / demo_option_b_flow.py

Your Name

Deploy Option B: Query Parser + RAG + 355M Ranking

45cf63e about 1 month ago

10.8 kB

	"""
	Demo: Option B Pipeline Flow (Without Real Data)

	Shows exactly how Option B processes your physician query
	"""

	import json
	from datetime import datetime

	print("=" * 80)
	print("OPTION B PIPELINE DEMO")
	print("=" * 80)
	print()

	# Your test query
	query = "what should a physician considering prescribing ianalumab for sjogren's disease know"

	print(f"📝 PHYSICIAN QUERY:")
	print(f" {query}")
	print()

	# ===========================================================================
	# STEP 1: QUERY PARSER LLM (Llama-70B)
	# ===========================================================================
	print("=" * 80)
	print("STEP 1: QUERY PARSER LLM (Llama-70B)")
	print("=" * 80)
	print("⏱️ Time: ~3 seconds")
	print("💰 Cost: $0.001")
	print()

	# Simulated LLM response
	parsed_entities = {
	"drugs": [
	"ianalumab",
	"VAY736", # Research code for ianalumab
	"anti-BAFF-R antibody"
	],
	"diseases": [
	"Sjögren's syndrome",
	"Sjögren syndrome",
	"Sjogren's disease",
	"Sjogren disease",
	"primary Sjögren's syndrome",
	"sicca syndrome"
	],
	"companies": [
	"Novartis", # Ianalumab manufacturer
	"Novartis Pharmaceuticals"
	],
	"endpoints": [
	"safety",
	"efficacy",
	"dosing",
	"contraindications",
	"clinical outcomes"
	],
	"search_terms": "ianalumab VAY736 Sjögren syndrome Sjogren disease efficacy safety prescribing"
	}

	print("🔍 EXTRACTED ENTITIES:")
	print(f" Drugs: {parsed_entities['drugs']}")
	print(f" Diseases: {parsed_entities['diseases'][:3]}...") # Show first 3
	print(f" Companies: {parsed_entities['companies']}")
	print(f" Endpoints: {parsed_entities['endpoints']}")
	print()
	print(f"🎯 OPTIMIZED SEARCH QUERY:")
	print(f" {parsed_entities['search_terms']}")
	print()

	# ===========================================================================
	# STEP 2: RAG SEARCH (BM25 + Semantic + Inverted Index)
	# ===========================================================================
	print("=" * 80)
	print("STEP 2: RAG SEARCH")
	print("=" * 80)
	print("⏱️ Time: ~2 seconds")
	print("💰 Cost: $0 (local)")
	print()

	# Simulated search results
	print("🔎 SEARCH PROCESS:")
	print(" 1. Inverted Index: Found 'ianalumab' in 8 trials (O(1) lookup)")
	print(" 2. Semantic Search: Computed similarity for 500,000+ trials")
	print(" 3. Hybrid Scoring: Combined keyword + semantic scores")
	print()

	candidate_trials = [
	{
	"nct_id": "NCT02962895",
	"title": "A Randomized, Double-blind, Placebo-controlled Study of Ianalumab in Patients With Sjögren's Syndrome",
	"hybrid_score": 0.856,
	"snippet": "Phase 2 study evaluating efficacy and safety of ianalumab (VAY736) in primary Sjögren's syndrome..."
	},
	{
	"nct_id": "NCT03334851",
	"title": "Extension Study of Ianalumab in Sjögren's Syndrome",
	"hybrid_score": 0.823,
	"snippet": "Open-label extension to evaluate long-term safety and efficacy of ianalumab in Sjögren's syndrome..."
	},
	{
	"nct_id": "NCT02808364",
	"title": "Safety and Tolerability Study of Ianalumab in Sjögren's Syndrome",
	"hybrid_score": 0.791,
	"snippet": "Phase 2a study assessing safety, tolerability, and pharmacokinetics of ianalumab..."
	}
	]

	print(f"✅ FOUND: {len(candidate_trials)} highly relevant trials")
	print()
	for i, trial in enumerate(candidate_trials, 1):
	print(f" {i}. {trial['nct_id']}")
	print(f" Hybrid Score: {trial['hybrid_score']:.3f}")
	print(f" {trial['title'][:80]}...")
	print()

	# ===========================================================================
	# STEP 3: 355M PERPLEXITY RANKING
	# ===========================================================================
	print("=" * 80)
	print("STEP 3: 355M PERPLEXITY RANKING")
	print("=" * 80)
	print("⏱️ Time: ~2-5 seconds (GPU) or ~15-30 seconds (CPU)")
	print("💰 Cost: $0 (local model)")
	print()

	print("🧠 355M CLINICAL TRIAL GPT ANALYSIS:")
	print(" For each trial, calculates: 'How natural is this query-trial pairing?'")
	print()

	# Simulated perplexity scores
	ranked_trials = [
	{
	**candidate_trials[0],
	"perplexity": 12.4, # Lower = more relevant
	"perplexity_score": 0.890,
	"combined_score": 0.923, # 70% hybrid + 30% perplexity
	"rank_before": 1,
	"rank_after": 1
	},
	{
	**candidate_trials[1],
	"perplexity": 15.8,
	"perplexity_score": 0.863,
	"combined_score": 0.893,
	"rank_before": 2,
	"rank_after": 2
	},
	{
	**candidate_trials[2],
	"perplexity": 18.2,
	"perplexity_score": 0.846,
	"combined_score": 0.871,
	"rank_before": 3,
	"rank_after": 3
	}
	]

	for i, trial in enumerate(ranked_trials, 1):
	print(f" {i}. {trial['nct_id']}")
	print(f" Perplexity: {trial['perplexity']:.1f} (lower = better)")
	print(f" Hybrid Score: {trial['hybrid_score']:.3f}")
	print(f" Combined Score: {trial['combined_score']:.3f}")
	print(f" Rank: {trial['rank_before']} → {trial['rank_after']}")
	print()

	# ===========================================================================
	# STEP 4: STRUCTURED JSON OUTPUT
	# ===========================================================================
	print("=" * 80)
	print("STEP 4: STRUCTURED JSON OUTPUT")
	print("=" * 80)
	print("⏱️ Time: instant")
	print("💰 Cost: $0")
	print()

	# Final structured response
	final_response = {
	"query": query,
	"processing_time": 8.2,
	"query_analysis": {
	"extracted_entities": parsed_entities,
	"optimized_search": parsed_entities['search_terms'],
	"parsing_time": 3.1
	},
	"results": {
	"total_found": len(candidate_trials),
	"returned": len(ranked_trials),
	"top_relevance_score": ranked_trials[0]['combined_score']
	},
	"trials": [
	{
	"nct_id": trial['nct_id'],
	"title": trial['title'],
	"status": "Completed",
	"phase": "Phase 2",
	"conditions": "Primary Sjögren's Syndrome",
	"interventions": "Ianalumab (VAY736)",
	"sponsor": "Novartis Pharmaceuticals",
	"enrollment": "160 participants",
	"primary_outcome": "Change in ESSDAI score at Week 24",
	"description": trial['snippet'],
	"scoring": {
	"relevance_score": trial['combined_score'],
	"hybrid_score": trial['hybrid_score'],
	"perplexity": trial['perplexity'],
	"perplexity_score": trial['perplexity_score'],
	"rank_before_355m": trial['rank_before'],
	"rank_after_355m": trial['rank_after'],
	"ranking_method": "355m_perplexity"
	},
	"url": f"https://clinicaltrials.gov/study/{trial['nct_id']}"
	}
	for trial in ranked_trials
	],
	"benchmarking": {
	"query_parsing_time": 3.1,
	"rag_search_time": 2.3,
	"355m_ranking_time": 2.8,
	"total_processing_time": 8.2
	}
	}

	print("📦 STRUCTURED JSON RESPONSE:")
	print(json.dumps(final_response, indent=2)[:1000] + "...")
	print()

	# ===========================================================================
	# WHAT THE CLIENT DOES WITH THIS DATA
	# ===========================================================================
	print("=" * 80)
	print("WHAT CHATBOT COMPANIES DO WITH THIS JSON")
	print("=" * 80)
	print()

	print("🤖 CLIENT'S LLM (GPT-4, Claude, etc.) GENERATES:")
	print()
	print("─" * 80)
	print("PHYSICIAN RESPONSE (Generated by Client's LLM):")
	print("─" * 80)
	print()
	print("Based on current clinical trial data, physicians considering prescribing")
	print("ianalumab for Sjögren's disease should be aware of the following:")
	print()
	print("Clinical Evidence:")
	print(f"- {len(ranked_trials)} major clinical trials have evaluated ianalumab in Sjögren's syndrome")
	print()
	print("Primary Trial (NCT02962895):")
	print("- Phase 2, randomized, double-blind, placebo-controlled study")
	print("- 160 participants with primary Sjögren's syndrome")
	print("- Primary endpoint: Change in ESSDAI (disease activity) score at Week 24")
	print("- Status: Completed")
	print("- Sponsor: Novartis Pharmaceuticals")
	print()
	print("Drug Information:")
	print("- Generic name: Ianalumab")
	print("- Research code: VAY736")
	print("- Mechanism: Anti-BAFF-R (B-cell activating factor receptor) antibody")
	print()
	print("Key Considerations:")
	print("1. Safety profile from completed Phase 2 trials available")
	print("2. Long-term extension study (NCT03334851) provides extended safety data")
	print("3. Efficacy measured by ESSDAI score reduction")
	print("4. Appropriate for patients with primary Sjögren's syndrome")
	print()
	print("Additional Resources:")
	print(f"- NCT02962895: https://clinicaltrials.gov/study/NCT02962895")
	print(f"- NCT03334851: https://clinicaltrials.gov/study/NCT03334851")
	print(f"- NCT02808364: https://clinicaltrials.gov/study/NCT02808364")
	print()
	print("Note: This information is based on clinical trial data. Please refer")
	print("to the complete prescribing information and consult current clinical")
	print("guidelines before prescribing.")
	print("─" * 80)
	print()

	# ===========================================================================
	# SUMMARY
	# ===========================================================================
	print("=" * 80)
	print("OPTION B SUMMARY")
	print("=" * 80)
	print()
	print("✅ WHAT OPTION B PROVIDES:")
	print(" • Fast query parsing with entity extraction (Llama-70B)")
	print(" • Accurate trial retrieval (Hybrid RAG)")
	print(" • Clinical relevance ranking (355M perplexity)")
	print(" • Structured JSON output with all trial data")
	print()
	print("⏱️ TOTAL TIME: ~8 seconds (with GPU) or ~20-25 seconds (CPU)")
	print("💰 TOTAL COST: $0.001 per query")
	print()
	print("❌ WHAT OPTION B DOESN'T DO:")
	print(" • Does NOT generate text responses")
	print(" • Does NOT use 355M for text generation (prevents hallucinations)")
	print(" • Does NOT include 3-agent orchestration")
	print()
	print("🎯 WHY THIS IS PERFECT:")
	print(" • Chatbot companies control response generation")
	print(" • Your API focuses on accurate search & ranking")
	print(" • Fast, cheap, and reliable")
	print(" • No hallucinations (355M only scores, doesn't generate)")
	print()
	print("=" * 80)

	# Save to file
	with open("demo_option_b_output.json", "w") as f:
	json.dump(final_response, f, indent=2)

	print()
	print(f"💾 Full JSON response saved to: demo_option_b_output.json")
	print()