CTapi-raw / demo_option_b_flow.py
Your Name
Deploy Option B: Query Parser + RAG + 355M Ranking
45cf63e
"""
Demo: Option B Pipeline Flow (Without Real Data)
Shows exactly how Option B processes your physician query
"""
import json
from datetime import datetime
print("=" * 80)
print("OPTION B PIPELINE DEMO")
print("=" * 80)
print()
# Your test query
query = "what should a physician considering prescribing ianalumab for sjogren's disease know"
print(f"📝 PHYSICIAN QUERY:")
print(f" {query}")
print()
# ===========================================================================
# STEP 1: QUERY PARSER LLM (Llama-70B)
# ===========================================================================
print("=" * 80)
print("STEP 1: QUERY PARSER LLM (Llama-70B)")
print("=" * 80)
print("⏱️ Time: ~3 seconds")
print("💰 Cost: $0.001")
print()
# Simulated LLM response
parsed_entities = {
"drugs": [
"ianalumab",
"VAY736", # Research code for ianalumab
"anti-BAFF-R antibody"
],
"diseases": [
"Sjögren's syndrome",
"Sjögren syndrome",
"Sjogren's disease",
"Sjogren disease",
"primary Sjögren's syndrome",
"sicca syndrome"
],
"companies": [
"Novartis", # Ianalumab manufacturer
"Novartis Pharmaceuticals"
],
"endpoints": [
"safety",
"efficacy",
"dosing",
"contraindications",
"clinical outcomes"
],
"search_terms": "ianalumab VAY736 Sjögren syndrome Sjogren disease efficacy safety prescribing"
}
print("🔍 EXTRACTED ENTITIES:")
print(f" Drugs: {parsed_entities['drugs']}")
print(f" Diseases: {parsed_entities['diseases'][:3]}...") # Show first 3
print(f" Companies: {parsed_entities['companies']}")
print(f" Endpoints: {parsed_entities['endpoints']}")
print()
print(f"🎯 OPTIMIZED SEARCH QUERY:")
print(f" {parsed_entities['search_terms']}")
print()
# ===========================================================================
# STEP 2: RAG SEARCH (BM25 + Semantic + Inverted Index)
# ===========================================================================
print("=" * 80)
print("STEP 2: RAG SEARCH")
print("=" * 80)
print("⏱️ Time: ~2 seconds")
print("💰 Cost: $0 (local)")
print()
# Simulated search results
print("🔎 SEARCH PROCESS:")
print(" 1. Inverted Index: Found 'ianalumab' in 8 trials (O(1) lookup)")
print(" 2. Semantic Search: Computed similarity for 500,000+ trials")
print(" 3. Hybrid Scoring: Combined keyword + semantic scores")
print()
candidate_trials = [
{
"nct_id": "NCT02962895",
"title": "A Randomized, Double-blind, Placebo-controlled Study of Ianalumab in Patients With Sjögren's Syndrome",
"hybrid_score": 0.856,
"snippet": "Phase 2 study evaluating efficacy and safety of ianalumab (VAY736) in primary Sjögren's syndrome..."
},
{
"nct_id": "NCT03334851",
"title": "Extension Study of Ianalumab in Sjögren's Syndrome",
"hybrid_score": 0.823,
"snippet": "Open-label extension to evaluate long-term safety and efficacy of ianalumab in Sjögren's syndrome..."
},
{
"nct_id": "NCT02808364",
"title": "Safety and Tolerability Study of Ianalumab in Sjögren's Syndrome",
"hybrid_score": 0.791,
"snippet": "Phase 2a study assessing safety, tolerability, and pharmacokinetics of ianalumab..."
}
]
print(f"✅ FOUND: {len(candidate_trials)} highly relevant trials")
print()
for i, trial in enumerate(candidate_trials, 1):
print(f" {i}. {trial['nct_id']}")
print(f" Hybrid Score: {trial['hybrid_score']:.3f}")
print(f" {trial['title'][:80]}...")
print()
# ===========================================================================
# STEP 3: 355M PERPLEXITY RANKING
# ===========================================================================
print("=" * 80)
print("STEP 3: 355M PERPLEXITY RANKING")
print("=" * 80)
print("⏱️ Time: ~2-5 seconds (GPU) or ~15-30 seconds (CPU)")
print("💰 Cost: $0 (local model)")
print()
print("🧠 355M CLINICAL TRIAL GPT ANALYSIS:")
print(" For each trial, calculates: 'How natural is this query-trial pairing?'")
print()
# Simulated perplexity scores
ranked_trials = [
{
**candidate_trials[0],
"perplexity": 12.4, # Lower = more relevant
"perplexity_score": 0.890,
"combined_score": 0.923, # 70% hybrid + 30% perplexity
"rank_before": 1,
"rank_after": 1
},
{
**candidate_trials[1],
"perplexity": 15.8,
"perplexity_score": 0.863,
"combined_score": 0.893,
"rank_before": 2,
"rank_after": 2
},
{
**candidate_trials[2],
"perplexity": 18.2,
"perplexity_score": 0.846,
"combined_score": 0.871,
"rank_before": 3,
"rank_after": 3
}
]
for i, trial in enumerate(ranked_trials, 1):
print(f" {i}. {trial['nct_id']}")
print(f" Perplexity: {trial['perplexity']:.1f} (lower = better)")
print(f" Hybrid Score: {trial['hybrid_score']:.3f}")
print(f" Combined Score: {trial['combined_score']:.3f}")
print(f" Rank: {trial['rank_before']}{trial['rank_after']}")
print()
# ===========================================================================
# STEP 4: STRUCTURED JSON OUTPUT
# ===========================================================================
print("=" * 80)
print("STEP 4: STRUCTURED JSON OUTPUT")
print("=" * 80)
print("⏱️ Time: instant")
print("💰 Cost: $0")
print()
# Final structured response
final_response = {
"query": query,
"processing_time": 8.2,
"query_analysis": {
"extracted_entities": parsed_entities,
"optimized_search": parsed_entities['search_terms'],
"parsing_time": 3.1
},
"results": {
"total_found": len(candidate_trials),
"returned": len(ranked_trials),
"top_relevance_score": ranked_trials[0]['combined_score']
},
"trials": [
{
"nct_id": trial['nct_id'],
"title": trial['title'],
"status": "Completed",
"phase": "Phase 2",
"conditions": "Primary Sjögren's Syndrome",
"interventions": "Ianalumab (VAY736)",
"sponsor": "Novartis Pharmaceuticals",
"enrollment": "160 participants",
"primary_outcome": "Change in ESSDAI score at Week 24",
"description": trial['snippet'],
"scoring": {
"relevance_score": trial['combined_score'],
"hybrid_score": trial['hybrid_score'],
"perplexity": trial['perplexity'],
"perplexity_score": trial['perplexity_score'],
"rank_before_355m": trial['rank_before'],
"rank_after_355m": trial['rank_after'],
"ranking_method": "355m_perplexity"
},
"url": f"https://clinicaltrials.gov/study/{trial['nct_id']}"
}
for trial in ranked_trials
],
"benchmarking": {
"query_parsing_time": 3.1,
"rag_search_time": 2.3,
"355m_ranking_time": 2.8,
"total_processing_time": 8.2
}
}
print("📦 STRUCTURED JSON RESPONSE:")
print(json.dumps(final_response, indent=2)[:1000] + "...")
print()
# ===========================================================================
# WHAT THE CLIENT DOES WITH THIS DATA
# ===========================================================================
print("=" * 80)
print("WHAT CHATBOT COMPANIES DO WITH THIS JSON")
print("=" * 80)
print()
print("🤖 CLIENT'S LLM (GPT-4, Claude, etc.) GENERATES:")
print()
print("─" * 80)
print("PHYSICIAN RESPONSE (Generated by Client's LLM):")
print("─" * 80)
print()
print("Based on current clinical trial data, physicians considering prescribing")
print("ianalumab for Sjögren's disease should be aware of the following:")
print()
print("**Clinical Evidence:**")
print(f"- {len(ranked_trials)} major clinical trials have evaluated ianalumab in Sjögren's syndrome")
print()
print("**Primary Trial (NCT02962895):**")
print("- Phase 2, randomized, double-blind, placebo-controlled study")
print("- 160 participants with primary Sjögren's syndrome")
print("- Primary endpoint: Change in ESSDAI (disease activity) score at Week 24")
print("- Status: Completed")
print("- Sponsor: Novartis Pharmaceuticals")
print()
print("**Drug Information:**")
print("- Generic name: Ianalumab")
print("- Research code: VAY736")
print("- Mechanism: Anti-BAFF-R (B-cell activating factor receptor) antibody")
print()
print("**Key Considerations:**")
print("1. Safety profile from completed Phase 2 trials available")
print("2. Long-term extension study (NCT03334851) provides extended safety data")
print("3. Efficacy measured by ESSDAI score reduction")
print("4. Appropriate for patients with primary Sjögren's syndrome")
print()
print("**Additional Resources:**")
print(f"- NCT02962895: https://clinicaltrials.gov/study/NCT02962895")
print(f"- NCT03334851: https://clinicaltrials.gov/study/NCT03334851")
print(f"- NCT02808364: https://clinicaltrials.gov/study/NCT02808364")
print()
print("**Note:** This information is based on clinical trial data. Please refer")
print("to the complete prescribing information and consult current clinical")
print("guidelines before prescribing.")
print("─" * 80)
print()
# ===========================================================================
# SUMMARY
# ===========================================================================
print("=" * 80)
print("OPTION B SUMMARY")
print("=" * 80)
print()
print("✅ WHAT OPTION B PROVIDES:")
print(" • Fast query parsing with entity extraction (Llama-70B)")
print(" • Accurate trial retrieval (Hybrid RAG)")
print(" • Clinical relevance ranking (355M perplexity)")
print(" • Structured JSON output with all trial data")
print()
print("⏱️ TOTAL TIME: ~8 seconds (with GPU) or ~20-25 seconds (CPU)")
print("💰 TOTAL COST: $0.001 per query")
print()
print("❌ WHAT OPTION B DOESN'T DO:")
print(" • Does NOT generate text responses")
print(" • Does NOT use 355M for text generation (prevents hallucinations)")
print(" • Does NOT include 3-agent orchestration")
print()
print("🎯 WHY THIS IS PERFECT:")
print(" • Chatbot companies control response generation")
print(" • Your API focuses on accurate search & ranking")
print(" • Fast, cheap, and reliable")
print(" • No hallucinations (355M only scores, doesn't generate)")
print()
print("=" * 80)
# Save to file
with open("demo_option_b_output.json", "w") as f:
json.dump(final_response, f, indent=2)
print()
print(f"💾 Full JSON response saved to: demo_option_b_output.json")
print()