JatsTheAIGen commited on
Commit
fad63bf
Β·
1 Parent(s): f5d3311

feat: Add comprehensive step-by-step system readiness logging

Browse files

- Add detailed 7-step startup process in main.py with component verification
- Enhance orchestration initialization with 6-step process and health checks
- Add step-by-step logging for Flask API initialization
- Include component status summaries and service endpoint information
- Add final 'SYSTEM READY' confirmation after all components initialized
- Improve container log readability with clear step indicators and status icons
- Log configuration details (ZeroGPU, local models, database paths)
- Add component health verification before marking system as ready

Files changed (4) hide show
  1. app.py +89 -13
  2. flask_api.py +14 -3
  3. flask_api_standalone.py +29 -7
  4. main.py +103 -22
app.py CHANGED
@@ -2068,37 +2068,113 @@ def initialize_orchestrator():
2068
  logger.debug(f"Could not load ZeroGPU config: {e}")
2069
 
2070
  # Initialize LLM Router
2071
- logger.info("Step 1/6: Initializing LLM Router...")
 
 
 
 
 
 
 
 
 
 
2072
  llm_router = LLMRouter(hf_token, use_local_models=True, zero_gpu_config=zero_gpu_config)
2073
- logger.info("βœ“ LLM Router initialized")
 
2074
 
2075
  # Initialize Agents
2076
- logger.info("Step 2/6: Initializing Agents...")
 
2077
  agents = {
2078
  'intent_recognition': create_intent_agent(llm_router),
2079
- 'response_synthesis': create_synthesis_agent(llm_router),
2080
- 'safety_check': create_safety_agent(llm_router),
2081
  }
 
 
 
 
 
2082
 
2083
- # Add skills identification agent
 
 
 
 
2084
  skills_agent = create_skills_identification_agent(llm_router)
2085
  agents['skills_identification'] = skills_agent
2086
- logger.info("βœ“ Skills identification agent initialized")
2087
 
2088
- logger.info(f"βœ“ Initialized {len(agents)} agents")
2089
 
2090
  # Initialize Context Manager (with LLM router for context generation)
2091
- logger.info("Step 3/6: Initializing Context Manager...")
 
 
2092
  context_manager = EfficientContextManager(llm_router=llm_router)
2093
- logger.info("βœ“ Context Manager initialized")
 
2094
 
2095
  # Initialize Orchestrator
2096
- logger.info("Step 4/6: Initializing Orchestrator...")
 
2097
  orchestrator = MVPOrchestrator(llm_router, context_manager, agents)
2098
- logger.info("βœ“ Orchestrator initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2099
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2100
  logger.info("=" * 60)
2101
- logger.info("ORCHESTRATION SYSTEM READY")
 
 
 
2102
  logger.info("=" * 60)
2103
 
2104
  except Exception as e:
 
2068
  logger.debug(f"Could not load ZeroGPU config: {e}")
2069
 
2070
  # Initialize LLM Router
2071
+ logger.info("[ORCHESTRATION STEP 1/6] Initializing LLM Router...")
2072
+ logger.info(" β†’ Checking inference backend configuration...")
2073
+ if zero_gpu_config and zero_gpu_config.get("enabled"):
2074
+ logger.info(f" β†’ ZeroGPU API: {zero_gpu_config.get('base_url', 'N/A')}")
2075
+ if zero_gpu_config.get("per_user_mode"):
2076
+ logger.info(" β†’ Mode: Per-user (multi-tenant)")
2077
+ else:
2078
+ logger.info(" β†’ Mode: Service account (single-tenant)")
2079
+ else:
2080
+ logger.info(" β†’ ZeroGPU API: Disabled (using local/HF fallback)")
2081
+ logger.info(" β†’ Local models: Enabled (lazy loading)")
2082
  llm_router = LLMRouter(hf_token, use_local_models=True, zero_gpu_config=zero_gpu_config)
2083
+ logger.info(" βœ“ LLM Router initialized")
2084
+ logger.info(" βœ“ Inference routing configured")
2085
 
2086
  # Initialize Agents
2087
+ logger.info("[ORCHESTRATION STEP 2/6] Initializing AI Agents...")
2088
+ logger.info(" β†’ Creating intent recognition agent...")
2089
  agents = {
2090
  'intent_recognition': create_intent_agent(llm_router),
 
 
2091
  }
2092
+ logger.info(" βœ“ Intent recognition agent ready")
2093
+
2094
+ logger.info(" β†’ Creating response synthesis agent...")
2095
+ agents['response_synthesis'] = create_synthesis_agent(llm_router)
2096
+ logger.info(" βœ“ Response synthesis agent ready")
2097
 
2098
+ logger.info(" β†’ Creating safety check agent...")
2099
+ agents['safety_check'] = create_safety_agent(llm_router)
2100
+ logger.info(" βœ“ Safety check agent ready")
2101
+
2102
+ logger.info(" β†’ Creating skills identification agent...")
2103
  skills_agent = create_skills_identification_agent(llm_router)
2104
  agents['skills_identification'] = skills_agent
2105
+ logger.info(" βœ“ Skills identification agent ready")
2106
 
2107
+ logger.info(f" βœ“ All {len(agents)} agents initialized successfully")
2108
 
2109
  # Initialize Context Manager (with LLM router for context generation)
2110
+ logger.info("[ORCHESTRATION STEP 3/6] Initializing Context Manager...")
2111
+ logger.info(" β†’ Setting up session database...")
2112
+ logger.info(f" β†’ Database path: {settings.db_path if hasattr(settings, 'db_path') else '/tmp/sessions.db'}")
2113
  context_manager = EfficientContextManager(llm_router=llm_router)
2114
+ logger.info(" βœ“ Context Manager initialized")
2115
+ logger.info(" βœ“ Session storage ready")
2116
 
2117
  # Initialize Orchestrator
2118
+ logger.info("[ORCHESTRATION STEP 4/6] Initializing Orchestrator...")
2119
+ logger.info(" β†’ Connecting LLM Router, Context Manager, and Agents...")
2120
  orchestrator = MVPOrchestrator(llm_router, context_manager, agents)
2121
+ logger.info(" βœ“ Orchestrator initialized")
2122
+ logger.info(" βœ“ Request processing pipeline ready")
2123
+
2124
+ # Component Health Verification
2125
+ logger.info("[ORCHESTRATION STEP 5/6] Verifying component health...")
2126
+ health_status = {}
2127
+
2128
+ # Check LLM Router
2129
+ if llm_router:
2130
+ health_status['LLM Router'] = True
2131
+ logger.info(" βœ“ LLM Router: Healthy")
2132
+ else:
2133
+ health_status['LLM Router'] = False
2134
+ logger.error(" βœ— LLM Router: Not available")
2135
+
2136
+ # Check Agents
2137
+ if agents and len(agents) == 4:
2138
+ health_status['Agents'] = True
2139
+ logger.info(f" βœ“ Agents: All {len(agents)} agents healthy")
2140
+ else:
2141
+ health_status['Agents'] = False
2142
+ logger.error(f" βœ— Agents: Only {len(agents) if agents else 0} agents available")
2143
 
2144
+ # Check Context Manager
2145
+ if context_manager:
2146
+ health_status['Context Manager'] = True
2147
+ logger.info(" βœ“ Context Manager: Healthy")
2148
+ else:
2149
+ health_status['Context Manager'] = False
2150
+ logger.error(" βœ— Context Manager: Not available")
2151
+
2152
+ # Check Orchestrator
2153
+ if orchestrator:
2154
+ health_status['Orchestrator'] = True
2155
+ logger.info(" βœ“ Orchestrator: Healthy")
2156
+ else:
2157
+ health_status['Orchestrator'] = False
2158
+ logger.error(" βœ— Orchestrator: Not available")
2159
+
2160
+ logger.info(" βœ“ Component health verification complete")
2161
+
2162
+ # Final Readiness Confirmation
2163
+ logger.info("[ORCHESTRATION STEP 6/6] Final readiness confirmation...")
2164
+ all_healthy = all(health_status.values())
2165
+ if all_healthy:
2166
+ logger.info(" βœ“ All components healthy")
2167
+ else:
2168
+ failed_components = [comp for comp, status in health_status.items() if not status]
2169
+ logger.warning(f" ⚠ Some components not healthy: {', '.join(failed_components)}")
2170
+
2171
+ logger.info("=" * 60)
2172
+ logger.info("βœ… ORCHESTRATION SYSTEM READY")
2173
  logger.info("=" * 60)
2174
+ logger.info("Component Status Summary:")
2175
+ for component, status in health_status.items():
2176
+ status_icon = "βœ“" if status else "βœ—"
2177
+ logger.info(f" {status_icon} {component}: {'Ready' if status else 'Not Available'}")
2178
  logger.info("=" * 60)
2179
 
2180
  except Exception as e:
flask_api.py CHANGED
@@ -43,6 +43,9 @@ def initialize_orchestrator():
43
  logger.info("=" * 60)
44
  logger.info("INITIALIZING FLASK API ORCHESTRATOR")
45
  logger.info("=" * 60)
 
 
 
46
 
47
  # Import from app.py (this won't trigger app.py's launch code)
48
  # The orchestrator is already initialized in app.py's module-level code
@@ -51,13 +54,21 @@ def initialize_orchestrator():
51
  _orchestrator = orchestrator
52
  _process_message_async = process_message_async
53
 
 
54
  if _orchestrator is not None:
55
- logger.info("βœ“ AI ORCHESTRATOR READY (shared with Gradio)")
 
 
 
 
56
  else:
57
- logger.warning("⚠ Orchestrator not available - some features may be limited")
 
 
 
 
58
 
59
  _initialized = True
60
- logger.info("=" * 60)
61
 
62
  except Exception as e:
63
  logger.error(f"Failed to initialize orchestrator: {e}", exc_info=True)
 
43
  logger.info("=" * 60)
44
  logger.info("INITIALIZING FLASK API ORCHESTRATOR")
45
  logger.info("=" * 60)
46
+ logger.info("[FLASK API] Loading orchestrator from app.py...")
47
+ logger.info(" β†’ Orchestrator is shared with Gradio interface")
48
+ logger.info(" β†’ No duplicate initialization needed")
49
 
50
  # Import from app.py (this won't trigger app.py's launch code)
51
  # The orchestrator is already initialized in app.py's module-level code
 
54
  _orchestrator = orchestrator
55
  _process_message_async = process_message_async
56
 
57
+ logger.info("[FLASK API] Verifying orchestrator status...")
58
  if _orchestrator is not None:
59
+ logger.info(" βœ“ Orchestrator loaded successfully")
60
+ logger.info(" βœ“ Request processing available")
61
+ logger.info("=" * 60)
62
+ logger.info("βœ… AI ORCHESTRATOR READY (shared with Gradio)")
63
+ logger.info("=" * 60)
64
  else:
65
+ logger.warning(" ⚠ Orchestrator not available")
66
+ logger.warning(" ⚠ Some features may be limited")
67
+ logger.info("=" * 60)
68
+ logger.warning("⚠ FLASK API RUNNING IN DEGRADED MODE")
69
+ logger.info("=" * 60)
70
 
71
  _initialized = True
 
72
 
73
  except Exception as e:
74
  logger.error(f"Failed to initialize orchestrator: {e}", exc_info=True)
flask_api_standalone.py CHANGED
@@ -93,28 +93,50 @@ def initialize_orchestrator():
93
  logger.debug(f"Could not load ZeroGPU config: {e}")
94
 
95
  # Initialize LLM Router with local model loading enabled
96
- logger.info("Initializing LLM Router with local GPU model loading...")
 
 
 
 
 
 
97
  llm_router = LLMRouter(hf_token, use_local_models=True, zero_gpu_config=zero_gpu_config)
 
98
 
99
- logger.info("Initializing Agents...")
 
100
  agents = {
101
  'intent_recognition': create_intent_agent(llm_router),
102
  'response_synthesis': create_synthesis_agent(llm_router),
103
  'safety_check': create_safety_agent(llm_router),
104
  'skills_identification': create_skills_identification_agent(llm_router)
105
  }
 
106
 
107
- logger.info("Initializing Context Manager...")
 
108
  context_manager = EfficientContextManager(llm_router=llm_router)
 
109
 
110
- logger.info("Initializing Orchestrator...")
 
111
  orchestrator = MVPOrchestrator(llm_router, context_manager, agents)
 
112
 
 
113
  orchestrator_available = True
 
 
 
 
 
 
 
114
  logger.info("=" * 60)
115
- logger.info("βœ“ AI ORCHESTRATOR READY")
116
- logger.info(" - Local GPU models enabled")
117
- logger.info(" - MAX_WORKERS: 4")
 
118
  logger.info("=" * 60)
119
 
120
  return True
 
93
  logger.debug(f"Could not load ZeroGPU config: {e}")
94
 
95
  # Initialize LLM Router with local model loading enabled
96
+ logger.info("[FLASK API STEP 1/5] Initializing LLM Router...")
97
+ logger.info(" β†’ Configuring inference backend...")
98
+ if zero_gpu_config and zero_gpu_config.get("enabled"):
99
+ logger.info(f" β†’ ZeroGPU API: {zero_gpu_config.get('base_url', 'N/A')}")
100
+ else:
101
+ logger.info(" β†’ ZeroGPU API: Disabled (using local/HF fallback)")
102
+ logger.info(" β†’ Local models: Enabled (lazy loading)")
103
  llm_router = LLMRouter(hf_token, use_local_models=True, zero_gpu_config=zero_gpu_config)
104
+ logger.info(" βœ“ LLM Router initialized")
105
 
106
+ logger.info("[FLASK API STEP 2/5] Initializing AI Agents...")
107
+ logger.info(" β†’ Creating 4 specialized agents...")
108
  agents = {
109
  'intent_recognition': create_intent_agent(llm_router),
110
  'response_synthesis': create_synthesis_agent(llm_router),
111
  'safety_check': create_safety_agent(llm_router),
112
  'skills_identification': create_skills_identification_agent(llm_router)
113
  }
114
+ logger.info(f" βœ“ All {len(agents)} agents initialized")
115
 
116
+ logger.info("[FLASK API STEP 3/5] Initializing Context Manager...")
117
+ logger.info(" β†’ Setting up session storage...")
118
  context_manager = EfficientContextManager(llm_router=llm_router)
119
+ logger.info(" βœ“ Context Manager initialized")
120
 
121
+ logger.info("[FLASK API STEP 4/5] Initializing Orchestrator...")
122
+ logger.info(" β†’ Connecting all components...")
123
  orchestrator = MVPOrchestrator(llm_router, context_manager, agents)
124
+ logger.info(" βœ“ Orchestrator initialized")
125
 
126
+ logger.info("[FLASK API STEP 5/5] Verifying system readiness...")
127
  orchestrator_available = True
128
+ if llm_router and context_manager and orchestrator and agents:
129
+ logger.info(" βœ“ All components verified")
130
+ else:
131
+ logger.warning(" ⚠ Some components may not be fully initialized")
132
+
133
+ logger.info("=" * 60)
134
+ logger.info("βœ… AI ORCHESTRATOR READY (Flask API)")
135
  logger.info("=" * 60)
136
+ logger.info("Configuration:")
137
+ logger.info(" β€’ Local GPU models: Enabled (lazy loading)")
138
+ logger.info(" β€’ MAX_WORKERS: 4")
139
+ logger.info(" β€’ ZeroGPU API: " + ("Enabled" if (zero_gpu_config and zero_gpu_config.get("enabled")) else "Disabled"))
140
  logger.info("=" * 60)
141
 
142
  return True
main.py CHANGED
@@ -169,51 +169,132 @@ def start_flask_background():
169
 
170
  def main():
171
  """Main entry point for HF Spaces"""
172
- logger.info("πŸš€ Starting AI Research Assistant MVP")
173
- logger.info("=" * 60)
 
174
 
175
- # Check for HF Token
 
176
  hf_token = os.getenv('HF_TOKEN')
177
- if not hf_token:
178
- logger.warning("HF_TOKEN not found in environment. Some features may be limited.")
 
 
179
 
180
- # Start Flask API in background (for external integrations)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  flask_thread = start_flask_background()
 
 
 
 
182
 
183
- # Import the already-configured interface from app.py
184
- # This imports the interface without triggering app.py's launch code
185
- # (app.py's launch only runs when app.py is executed directly, not when imported)
186
- logger.info("Creating interface from app.py (all handlers already initialized)...")
187
  demo, components = create_mobile_optimized_interface()
 
 
188
 
189
- logger.info("βœ“ Interface created with all API endpoints and handlers")
 
 
 
 
 
 
 
 
 
 
190
 
191
- # Detect if running on HF Spaces
192
- # HF Spaces provides its own public URL, so share is not needed
193
  try:
194
  from spaces import GPU
195
  is_hf_spaces = True
196
- logger.info("βœ“ Detected Hugging Face Spaces - using built-in public URL")
 
197
  except ImportError:
198
  is_hf_spaces = False
199
- logger.info("βœ“ Local deployment - will create Gradio share link if needed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  # Launch configuration
202
- # Note: show_api=True is already set in app.py's interface creation,
203
- # but we ensure it here as well for clarity
204
  launch_config = {
205
  'server_name': '0.0.0.0',
206
  'server_port': 7860,
207
  'share': not is_hf_spaces, # Only create share link when not on HF Spaces
208
- 'show_api': True, # Enable API endpoints (also set in app.py, but explicit here)
209
  'debug': False
210
  }
211
 
212
- logger.info("=" * 60)
213
- logger.info("βœ… Application ready for launch")
 
 
 
 
 
 
 
 
 
 
214
  if flask_thread:
215
- logger.info("βœ“ Flask API running in background")
216
- logger.info("=" * 60)
 
 
 
 
217
  return demo.launch(**launch_config)
218
 
219
  if __name__ == "__main__":
 
169
 
170
  def main():
171
  """Main entry point for HF Spaces"""
172
+ logger.info("=" * 70)
173
+ logger.info("πŸš€ SYSTEM STARTUP - AI Research Assistant MVP")
174
+ logger.info("=" * 70)
175
 
176
+ # Step 1: Environment Configuration Check
177
+ logger.info("[STEP 1/7] Checking environment configuration...")
178
  hf_token = os.getenv('HF_TOKEN')
179
+ if hf_token:
180
+ logger.info(" βœ“ HF_TOKEN configured")
181
+ else:
182
+ logger.warning(" ⚠ HF_TOKEN not found - some features may be limited")
183
 
184
+ # Check ZeroGPU configuration
185
+ use_zero_gpu = os.getenv('USE_ZERO_GPU', 'false').lower() == 'true'
186
+ if use_zero_gpu:
187
+ zero_gpu_url = os.getenv('ZERO_GPU_API_URL', '')
188
+ if zero_gpu_url:
189
+ logger.info(f" βœ“ ZeroGPU API enabled: {zero_gpu_url}")
190
+ else:
191
+ logger.warning(" ⚠ ZeroGPU enabled but URL not configured")
192
+ else:
193
+ logger.info(" βœ“ ZeroGPU API disabled (using local/HF fallback)")
194
+
195
+ logger.info(" βœ“ Environment check complete")
196
+
197
+ # Step 2: Initialize Orchestration System (via app.py import)
198
+ logger.info("[STEP 2/7] Initializing orchestration system...")
199
+ logger.info(" β†’ This step initializes: LLM Router, Agents, Context Manager, Orchestrator")
200
+ logger.info(" β†’ Detailed logs available in orchestration initialization section")
201
+
202
+ # Step 3: Start Flask API in background
203
+ logger.info("[STEP 3/7] Starting Flask API server...")
204
  flask_thread = start_flask_background()
205
+ if flask_thread:
206
+ logger.info(" βœ“ Flask API started successfully")
207
+ else:
208
+ logger.warning(" ⚠ Flask API not started (continuing with Gradio only)")
209
 
210
+ # Step 4: Create Gradio Interface
211
+ logger.info("[STEP 4/7] Creating Gradio interface...")
212
+ logger.info(" β†’ Loading UI components and API endpoints...")
 
213
  demo, components = create_mobile_optimized_interface()
214
+ logger.info(" βœ“ Gradio interface created")
215
+ logger.info(" βœ“ API endpoints registered")
216
 
217
+ # Step 5: Verify Component Status
218
+ logger.info("[STEP 5/7] Verifying component status...")
219
+ if components and not components.get('mock_mode'):
220
+ logger.info(" βœ“ All components initialized successfully")
221
+ if 'orchestrator' in components and components['orchestrator']:
222
+ logger.info(" βœ“ Orchestrator available")
223
+ else:
224
+ logger.warning(" ⚠ Orchestrator not available (degraded mode)")
225
+ else:
226
+ logger.warning(" ⚠ Running in mock/fallback mode")
227
+ logger.info(" βœ“ Component verification complete")
228
 
229
+ # Step 6: Detect Deployment Environment
230
+ logger.info("[STEP 6/7] Detecting deployment environment...")
231
  try:
232
  from spaces import GPU
233
  is_hf_spaces = True
234
+ logger.info(" βœ“ Detected Hugging Face Spaces")
235
+ logger.info(" βœ“ Using built-in public URL (no share link needed)")
236
  except ImportError:
237
  is_hf_spaces = False
238
+ logger.info(" βœ“ Detected local deployment")
239
+ logger.info(" β†’ Will create Gradio share link if needed")
240
+ logger.info(" βœ“ Environment detection complete")
241
+
242
+ # Step 7: Final System Readiness Check
243
+ logger.info("[STEP 7/7] Performing final system readiness check...")
244
+ readiness_checks = []
245
+
246
+ # Check Gradio interface
247
+ if demo:
248
+ readiness_checks.append(("Gradio Interface", True))
249
+ logger.info(" βœ“ Gradio interface ready")
250
+ else:
251
+ readiness_checks.append(("Gradio Interface", False))
252
+ logger.error(" βœ— Gradio interface not ready")
253
+
254
+ # Check Flask API
255
+ if flask_thread and flask_thread.is_alive():
256
+ readiness_checks.append(("Flask API", True))
257
+ logger.info(" βœ“ Flask API running")
258
+ else:
259
+ readiness_checks.append(("Flask API", False))
260
+ logger.warning(" ⚠ Flask API not running (optional)")
261
+
262
+ # Check Orchestrator
263
+ if components and components.get('orchestrator'):
264
+ readiness_checks.append(("Orchestrator", True))
265
+ logger.info(" βœ“ Orchestrator available")
266
+ else:
267
+ readiness_checks.append(("Orchestrator", False))
268
+ logger.warning(" ⚠ Orchestrator not available (degraded mode)")
269
 
270
  # Launch configuration
 
 
271
  launch_config = {
272
  'server_name': '0.0.0.0',
273
  'server_port': 7860,
274
  'share': not is_hf_spaces, # Only create share link when not on HF Spaces
275
+ 'show_api': True, # Enable API endpoints
276
  'debug': False
277
  }
278
 
279
+ # Final System Ready Message
280
+ logger.info("=" * 70)
281
+ logger.info("βœ… SYSTEM READY - All components initialized")
282
+ logger.info("=" * 70)
283
+ logger.info("Component Status:")
284
+ for component, status in readiness_checks:
285
+ status_icon = "βœ“" if status else "⚠"
286
+ logger.info(f" {status_icon} {component}: {'Ready' if status else 'Not Available'}")
287
+ logger.info("")
288
+ logger.info("Service Endpoints:")
289
+ logger.info(" β€’ Gradio UI: http://0.0.0.0:7860")
290
+ logger.info(" β€’ Gradio API: http://0.0.0.0:7860/api/")
291
  if flask_thread:
292
+ logger.info(" β€’ Flask API: http://0.0.0.0:5001/api/chat")
293
+ logger.info(" β€’ Flask Health: http://0.0.0.0:5001/health")
294
+ logger.info("")
295
+ logger.info("πŸš€ Launching application...")
296
+ logger.info("=" * 70)
297
+
298
  return demo.launch(**launch_config)
299
 
300
  if __name__ == "__main__":