|
|
|
|
|
""" |
|
|
Pure Flask API for Hugging Face Spaces |
|
|
No Gradio - Just Flask REST API |
|
|
Uses local GPU models for inference |
|
|
""" |
|
|
|
|
|
from flask import Flask, request, jsonify |
|
|
from flask_cors import CORS |
|
|
from flask_limiter import Limiter |
|
|
from flask_limiter.util import get_remote_address |
|
|
import logging |
|
|
import sys |
|
|
import os |
|
|
import asyncio |
|
|
from pathlib import Path |
|
|
from logging.handlers import RotatingFileHandler |
|
|
|
|
|
|
|
|
omp_threads = os.getenv('OMP_NUM_THREADS', '4') |
|
|
try: |
|
|
omp_int = int(omp_threads) |
|
|
if omp_int <= 0: |
|
|
omp_int = 4 |
|
|
logger_basic = logging.getLogger(__name__) |
|
|
logger_basic.warning("OMP_NUM_THREADS must be positive, defaulting to 4") |
|
|
os.environ['OMP_NUM_THREADS'] = str(omp_int) |
|
|
os.environ['MKL_NUM_THREADS'] = str(omp_int) |
|
|
except (ValueError, TypeError): |
|
|
os.environ['OMP_NUM_THREADS'] = '4' |
|
|
os.environ['MKL_NUM_THREADS'] = '4' |
|
|
logger_basic = logging.getLogger(__name__) |
|
|
logger_basic.warning("Invalid OMP_NUM_THREADS, defaulting to 4") |
|
|
|
|
|
|
|
|
log_dir = os.getenv('LOG_DIR', '/tmp/logs') |
|
|
try: |
|
|
os.makedirs(log_dir, exist_ok=True, mode=0o700) |
|
|
except OSError: |
|
|
|
|
|
log_dir = os.path.expanduser('~/.logs') if os.path.expanduser('~') else '/tmp' |
|
|
os.makedirs(log_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.INFO, |
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
|
handlers=[ |
|
|
logging.StreamHandler(sys.stdout) |
|
|
] |
|
|
) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
try: |
|
|
log_file = os.path.join(log_dir, 'app.log') |
|
|
file_handler = RotatingFileHandler( |
|
|
log_file, |
|
|
maxBytes=10*1024*1024, |
|
|
backupCount=5 |
|
|
) |
|
|
file_handler.setFormatter(logging.Formatter( |
|
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
|
datefmt='%Y-%m-%d %H:%M:%S' |
|
|
)) |
|
|
file_handler.setLevel(logging.INFO) |
|
|
logger.addHandler(file_handler) |
|
|
|
|
|
if os.name != 'nt': |
|
|
try: |
|
|
os.chmod(log_file, 0o600) |
|
|
except OSError: |
|
|
pass |
|
|
logger.info(f"Logging to file: {log_file}") |
|
|
except (OSError, PermissionError) as e: |
|
|
logger.warning(f"Could not create log file: {e}. Using console logging only.") |
|
|
|
|
|
|
|
|
def sanitize_log_data(data): |
|
|
"""Remove sensitive information from log data""" |
|
|
if isinstance(data, dict): |
|
|
sanitized = {} |
|
|
for key, value in data.items(): |
|
|
if any(sensitive in key.lower() for sensitive in ['token', 'password', 'secret', 'key', 'auth', 'api_key']): |
|
|
sanitized[key] = '***REDACTED***' |
|
|
else: |
|
|
sanitized[key] = sanitize_log_data(value) if isinstance(value, (dict, list)) else value |
|
|
return sanitized |
|
|
elif isinstance(data, list): |
|
|
return [sanitize_log_data(item) for item in data] |
|
|
return data |
|
|
|
|
|
|
|
|
project_root = Path(__file__).parent |
|
|
sys.path.insert(0, str(project_root)) |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
CORS(app) |
|
|
|
|
|
|
|
|
rate_limit_enabled = os.getenv('RATE_LIMIT_ENABLED', 'true').lower() == 'true' |
|
|
if rate_limit_enabled: |
|
|
limiter = Limiter( |
|
|
app=app, |
|
|
key_func=get_remote_address, |
|
|
default_limits=["200 per day", "50 per hour", "10 per minute"], |
|
|
storage_uri="memory://", |
|
|
headers_enabled=True |
|
|
) |
|
|
logger.info("Rate limiting enabled") |
|
|
else: |
|
|
limiter = None |
|
|
logger.warning("Rate limiting disabled - NOT recommended for production") |
|
|
|
|
|
|
|
|
@app.after_request |
|
|
def set_security_headers(response): |
|
|
""" |
|
|
Add comprehensive security headers to all responses. |
|
|
|
|
|
Implements OWASP-recommended security headers for enhanced protection |
|
|
against common web vulnerabilities. |
|
|
""" |
|
|
|
|
|
response.headers['X-Content-Type-Options'] = 'nosniff' |
|
|
response.headers['X-Frame-Options'] = 'DENY' |
|
|
response.headers['X-XSS-Protection'] = '1; mode=block' |
|
|
response.headers['Strict-Transport-Security'] = 'max-age=31536000; includeSubDomains' |
|
|
response.headers['Content-Security-Policy'] = "default-src 'self'" |
|
|
response.headers['Referrer-Policy'] = 'strict-origin-when-cross-origin' |
|
|
|
|
|
|
|
|
response.headers['Permissions-Policy'] = 'geolocation=(), microphone=(), camera=()' |
|
|
response.headers['Cross-Origin-Resource-Policy'] = 'same-origin' |
|
|
response.headers['Cross-Origin-Opener-Policy'] = 'same-origin' |
|
|
response.headers['X-Permitted-Cross-Domain-Policies'] = 'none' |
|
|
|
|
|
return response |
|
|
|
|
|
|
|
|
orchestrator = None |
|
|
orchestrator_available = False |
|
|
initialization_attempted = False |
|
|
initialization_error = None |
|
|
|
|
|
def initialize_orchestrator(): |
|
|
"""Initialize the AI orchestrator with local GPU models""" |
|
|
global orchestrator, orchestrator_available, initialization_attempted, initialization_error |
|
|
|
|
|
initialization_attempted = True |
|
|
initialization_error = None |
|
|
|
|
|
try: |
|
|
logger.info("=" * 60) |
|
|
logger.info("INITIALIZING AI ORCHESTRATOR (Local GPU Models)") |
|
|
logger.info("=" * 60) |
|
|
|
|
|
from src.agents.intent_agent import create_intent_agent |
|
|
from src.agents.synthesis_agent import create_synthesis_agent |
|
|
from src.agents.safety_agent import create_safety_agent |
|
|
from src.agents.skills_identification_agent import create_skills_identification_agent |
|
|
from src.llm_router import LLMRouter |
|
|
from src.orchestrator_engine import MVPOrchestrator |
|
|
from src.context_manager import EfficientContextManager |
|
|
|
|
|
logger.info("✓ Imports successful") |
|
|
|
|
|
|
|
|
hf_token = os.getenv('HF_TOKEN', '') |
|
|
if not hf_token: |
|
|
logger.warning("HF_TOKEN not set - may be needed for gated model access") |
|
|
else: |
|
|
logger.info(f"HF_TOKEN available (for model download only)") |
|
|
|
|
|
|
|
|
try: |
|
|
from huggingface_hub.exceptions import GatedRepoError |
|
|
except ImportError: |
|
|
GatedRepoError = Exception |
|
|
|
|
|
logger.info("Initializing LLM Router (local models only, no API fallback)...") |
|
|
try: |
|
|
|
|
|
llm_router = LLMRouter(hf_token=hf_token, use_local_models=True) |
|
|
logger.info("✓ LLM Router initialized (local models only)") |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Failed to initialize LLM Router: {e}", exc_info=True) |
|
|
logger.error("This is a critical error - local models are required") |
|
|
raise |
|
|
|
|
|
logger.info("Initializing Agents...") |
|
|
try: |
|
|
agents = { |
|
|
'intent_recognition': create_intent_agent(llm_router), |
|
|
'response_synthesis': create_synthesis_agent(llm_router), |
|
|
'safety_check': create_safety_agent(llm_router), |
|
|
'skills_identification': create_skills_identification_agent(llm_router) |
|
|
} |
|
|
logger.info("✓ All agents initialized") |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Failed to initialize agents: {e}", exc_info=True) |
|
|
raise |
|
|
|
|
|
logger.info("Initializing Context Manager...") |
|
|
try: |
|
|
context_manager = EfficientContextManager(llm_router=llm_router) |
|
|
logger.info("✓ Context Manager initialized") |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Failed to initialize Context Manager: {e}", exc_info=True) |
|
|
raise |
|
|
|
|
|
logger.info("Initializing Orchestrator...") |
|
|
try: |
|
|
orchestrator = MVPOrchestrator(llm_router, context_manager, agents) |
|
|
logger.info("✓ Orchestrator initialized") |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Failed to initialize Orchestrator: {e}", exc_info=True) |
|
|
raise |
|
|
|
|
|
orchestrator_available = True |
|
|
logger.info("=" * 60) |
|
|
logger.info("✓ AI ORCHESTRATOR READY") |
|
|
logger.info(" - Local GPU models enabled" if llm_router.use_local_models else " - API-only mode (local models disabled)") |
|
|
logger.info(" - MAX_WORKERS: 4") |
|
|
logger.info("=" * 60) |
|
|
|
|
|
return True |
|
|
|
|
|
except GatedRepoError as e: |
|
|
logger.error("=" * 60) |
|
|
logger.error("❌ GATED REPOSITORY ERROR DURING INITIALIZATION") |
|
|
logger.error("=" * 60) |
|
|
logger.error(f"Error: {e}") |
|
|
logger.error("") |
|
|
logger.error("SOLUTION:") |
|
|
logger.error("1. Visit the model repository on Hugging Face") |
|
|
logger.error("2. Click 'Agree and access repository'") |
|
|
logger.error("3. Wait for approval (usually instant)") |
|
|
logger.error("4. Ensure HF_TOKEN is set with your access token") |
|
|
logger.error("") |
|
|
logger.error("NOTE: API fallback has been removed. Local models are required.") |
|
|
logger.error("=" * 60) |
|
|
orchestrator_available = False |
|
|
initialization_error = f"GatedRepoError: {str(e)}" |
|
|
return False |
|
|
except Exception as e: |
|
|
logger.error("=" * 60) |
|
|
logger.error("❌ FAILED TO INITIALIZE ORCHESTRATOR") |
|
|
logger.error("=" * 60) |
|
|
logger.error(f"Error type: {type(e).__name__}") |
|
|
logger.error(f"Error message: {str(e)}") |
|
|
logger.error("=" * 60) |
|
|
logger.error("Full traceback:", exc_info=True) |
|
|
orchestrator_available = False |
|
|
initialization_error = f"{type(e).__name__}: {str(e)}" |
|
|
return False |
|
|
|
|
|
|
|
|
@app.route('/', methods=['GET']) |
|
|
def root(): |
|
|
"""API information""" |
|
|
return jsonify({ |
|
|
'name': 'AI Assistant Flask API', |
|
|
'version': '1.0', |
|
|
'status': 'running', |
|
|
'orchestrator_ready': orchestrator_available, |
|
|
'features': { |
|
|
'local_gpu_models': True, |
|
|
'max_workers': 4, |
|
|
'hardware': 'NVIDIA T4 Medium' |
|
|
}, |
|
|
'endpoints': { |
|
|
'health': 'GET /api/health', |
|
|
'chat': 'POST /api/chat', |
|
|
'initialize': 'POST /api/initialize', |
|
|
'context_mode_get': 'GET /api/context/mode', |
|
|
'context_mode_set': 'POST /api/context/mode' |
|
|
} |
|
|
}) |
|
|
|
|
|
|
|
|
@app.route('/api/health', methods=['GET']) |
|
|
def health_check(): |
|
|
"""Health check endpoint with detailed diagnostics""" |
|
|
status = { |
|
|
'status': 'healthy' if orchestrator_available else 'unhealthy', |
|
|
'orchestrator_ready': orchestrator_available, |
|
|
'initialization_attempted': initialization_attempted, |
|
|
} |
|
|
|
|
|
if not orchestrator_available: |
|
|
if initialization_error: |
|
|
status['error'] = initialization_error |
|
|
status['message'] = 'Initialization failed. Check logs for details.' |
|
|
elif initialization_attempted: |
|
|
status['message'] = 'Initialization completed but orchestrator not available' |
|
|
else: |
|
|
status['message'] = 'Initialization not yet attempted' |
|
|
status['help'] = 'Try POST /api/initialize to trigger initialization' |
|
|
|
|
|
return jsonify(status) |
|
|
|
|
|
|
|
|
@app.route('/api/chat', methods=['POST']) |
|
|
@limiter.limit("10 per minute") if limiter else lambda f: f |
|
|
def chat(): |
|
|
""" |
|
|
Process chat message |
|
|
|
|
|
POST /api/chat |
|
|
{ |
|
|
"message": "user message", |
|
|
"history": [[user, assistant], ...], |
|
|
"session_id": "session-123", |
|
|
"user_id": "user-456" |
|
|
} |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"success": true, |
|
|
"message": "AI response", |
|
|
"history": [...], |
|
|
"reasoning": {...}, |
|
|
"performance": {...} |
|
|
} |
|
|
""" |
|
|
try: |
|
|
data = request.get_json() |
|
|
|
|
|
if not data or 'message' not in data: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Message is required' |
|
|
}), 400 |
|
|
|
|
|
message = data['message'] |
|
|
|
|
|
|
|
|
if not isinstance(message, str): |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Message must be a string' |
|
|
}), 400 |
|
|
|
|
|
|
|
|
message = message.strip() |
|
|
if not message: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Message cannot be empty' |
|
|
}), 400 |
|
|
|
|
|
|
|
|
MAX_MESSAGE_LENGTH = 10000 |
|
|
if len(message) > MAX_MESSAGE_LENGTH: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': f'Message too long. Maximum length is {MAX_MESSAGE_LENGTH} characters' |
|
|
}), 400 |
|
|
|
|
|
history = data.get('history', []) |
|
|
session_id = data.get('session_id') |
|
|
user_id = data.get('user_id', 'anonymous') |
|
|
context_mode = data.get('context_mode') |
|
|
|
|
|
logger.info(f"Chat request - User: {user_id}, Session: {session_id}") |
|
|
logger.info(f"Message length: {len(message)} chars, preview: {message[:100]}...") |
|
|
|
|
|
if not orchestrator_available or orchestrator is None: |
|
|
logger.warning("Chat request received but orchestrator not ready") |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Orchestrator not ready', |
|
|
'message': 'AI system is initializing. Please try again in a moment.', |
|
|
'help': 'If this persists, check logs for initialization errors or try POST /api/initialize' |
|
|
}), 503 |
|
|
|
|
|
|
|
|
|
|
|
if session_id: |
|
|
orchestrator.set_user_id(session_id, user_id) |
|
|
|
|
|
|
|
|
if context_mode and hasattr(orchestrator.context_manager, 'set_context_mode'): |
|
|
if context_mode in ['fresh', 'relevant']: |
|
|
orchestrator.context_manager.set_context_mode(session_id, context_mode, user_id) |
|
|
logger.info(f"Context mode set to '{context_mode}' for session {session_id}") |
|
|
else: |
|
|
logger.warning(f"Invalid context_mode '{context_mode}', ignoring. Use 'fresh' or 'relevant'") |
|
|
|
|
|
|
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
try: |
|
|
result = loop.run_until_complete( |
|
|
orchestrator.process_request( |
|
|
session_id=session_id or f"session-{user_id}", |
|
|
user_input=message |
|
|
) |
|
|
) |
|
|
finally: |
|
|
loop.close() |
|
|
|
|
|
|
|
|
if isinstance(result, dict): |
|
|
response_text = result.get('response', '') or result.get('final_response', '') |
|
|
reasoning = result.get('reasoning', {}) |
|
|
performance = result.get('performance', {}) |
|
|
|
|
|
|
|
|
if performance: |
|
|
logger.info("=" * 60) |
|
|
logger.info("PERFORMANCE METRICS") |
|
|
logger.info("=" * 60) |
|
|
logger.info(f"Processing Time: {performance.get('processing_time', 0)}ms") |
|
|
logger.info(f"Tokens Used: {performance.get('tokens_used', 0)}") |
|
|
logger.info(f"Agents Used: {performance.get('agents_used', 0)}") |
|
|
logger.info(f"Confidence Score: {performance.get('confidence_score', 0)}%") |
|
|
agent_contribs = performance.get('agent_contributions', []) |
|
|
if agent_contribs: |
|
|
logger.info("Agent Contributions:") |
|
|
for contrib in agent_contribs: |
|
|
logger.info(f" - {contrib.get('agent', 'Unknown')}: {contrib.get('percentage', 0)}%") |
|
|
logger.info(f"Safety Score: {performance.get('safety_score', 0)}%") |
|
|
logger.info("=" * 60) |
|
|
else: |
|
|
logger.warning("⚠️ No performance metrics in response!") |
|
|
logger.debug(f"Result keys: {list(result.keys())}") |
|
|
logger.debug(f"Result metadata keys: {list(result.get('metadata', {}).keys())}") |
|
|
|
|
|
metadata = result.get('metadata', {}) |
|
|
if 'performance_metrics' in metadata: |
|
|
performance = metadata['performance_metrics'] |
|
|
logger.info("✓ Found performance metrics in metadata") |
|
|
else: |
|
|
response_text = str(result) |
|
|
reasoning = {} |
|
|
performance = { |
|
|
"processing_time": 0, |
|
|
"tokens_used": 0, |
|
|
"agents_used": 0, |
|
|
"confidence_score": 0, |
|
|
"agent_contributions": [], |
|
|
"safety_score": 80, |
|
|
"error": "Response format error" |
|
|
} |
|
|
|
|
|
updated_history = history + [[message, response_text]] |
|
|
|
|
|
logger.info(f"✓ Response generated (length: {len(response_text)})") |
|
|
|
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'message': response_text, |
|
|
'history': updated_history, |
|
|
'reasoning': reasoning, |
|
|
'performance': performance |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Chat error: {e}", exc_info=True) |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': str(e), |
|
|
'message': 'Error processing your request. Please try again.' |
|
|
}), 500 |
|
|
|
|
|
|
|
|
@app.route('/api/initialize', methods=['POST']) |
|
|
@limiter.limit("5 per minute") if limiter else lambda f: f |
|
|
def initialize(): |
|
|
"""Manually trigger initialization""" |
|
|
success = initialize_orchestrator() |
|
|
|
|
|
if success: |
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'message': 'Orchestrator initialized successfully' |
|
|
}) |
|
|
else: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'message': 'Initialization failed. Check logs for details.' |
|
|
}), 500 |
|
|
|
|
|
|
|
|
@app.route('/api/context/mode', methods=['GET']) |
|
|
def get_context_mode(): |
|
|
""" |
|
|
Get current context mode for a session |
|
|
|
|
|
GET /api/context/mode?session_id=session-123 |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"success": true, |
|
|
"session_id": "session-123", |
|
|
"context_mode": "fresh" | "relevant", |
|
|
"description": { |
|
|
"fresh": "No user context included - starts fresh each time", |
|
|
"relevant": "Only relevant user context included based on relevance classification" |
|
|
} |
|
|
} |
|
|
""" |
|
|
try: |
|
|
session_id = request.args.get('session_id') |
|
|
|
|
|
if not session_id: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'session_id query parameter is required' |
|
|
}), 400 |
|
|
|
|
|
if not orchestrator_available or orchestrator is None: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Orchestrator not ready' |
|
|
}), 503 |
|
|
|
|
|
if not hasattr(orchestrator.context_manager, 'get_context_mode'): |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Context mode not available' |
|
|
}), 503 |
|
|
|
|
|
context_mode = orchestrator.context_manager.get_context_mode(session_id) |
|
|
|
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'session_id': session_id, |
|
|
'context_mode': context_mode, |
|
|
'description': { |
|
|
'fresh': 'No user context included - starts fresh each time', |
|
|
'relevant': 'Only relevant user context included based on relevance classification' |
|
|
} |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Get context mode error: {e}", exc_info=True) |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': str(e) |
|
|
}), 500 |
|
|
|
|
|
@app.route('/api/context/mode', methods=['POST']) |
|
|
def set_context_mode(): |
|
|
""" |
|
|
Set context mode for a session |
|
|
|
|
|
POST /api/context/mode |
|
|
{ |
|
|
"session_id": "session-123", |
|
|
"mode": "fresh" | "relevant", |
|
|
"user_id": "user-456" (optional) |
|
|
} |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"success": true, |
|
|
"session_id": "session-123", |
|
|
"context_mode": "fresh" | "relevant", |
|
|
"message": "Context mode set successfully" |
|
|
} |
|
|
""" |
|
|
try: |
|
|
data = request.get_json() |
|
|
|
|
|
if not data: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Request body is required' |
|
|
}), 400 |
|
|
|
|
|
session_id = data.get('session_id') |
|
|
mode = data.get('mode') |
|
|
user_id = data.get('user_id', 'anonymous') |
|
|
|
|
|
if not session_id: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'session_id is required' |
|
|
}), 400 |
|
|
|
|
|
if not mode: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'mode is required' |
|
|
}), 400 |
|
|
|
|
|
if mode not in ['fresh', 'relevant']: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': "mode must be 'fresh' or 'relevant'" |
|
|
}), 400 |
|
|
|
|
|
if not orchestrator_available or orchestrator is None: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Orchestrator not ready' |
|
|
}), 503 |
|
|
|
|
|
if not hasattr(orchestrator.context_manager, 'set_context_mode'): |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Context mode not available' |
|
|
}), 503 |
|
|
|
|
|
success = orchestrator.context_manager.set_context_mode(session_id, mode, user_id) |
|
|
|
|
|
if success: |
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'session_id': session_id, |
|
|
'context_mode': mode, |
|
|
'message': 'Context mode set successfully' |
|
|
}) |
|
|
else: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': 'Failed to set context mode' |
|
|
}), 500 |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Set context mode error: {e}", exc_info=True) |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': str(e) |
|
|
}), 500 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("=" * 60) |
|
|
logger.info("FLASK API MODULE LOADED") |
|
|
logger.info("=" * 60) |
|
|
logger.info("Initializing orchestrator on module import...") |
|
|
|
|
|
|
|
|
import threading |
|
|
_init_lock = threading.Lock() |
|
|
_init_started = False |
|
|
|
|
|
def _start_initialization(): |
|
|
"""Start initialization in background thread to avoid blocking""" |
|
|
global _init_started |
|
|
with _init_lock: |
|
|
if _init_started: |
|
|
return |
|
|
_init_started = True |
|
|
|
|
|
def init_worker(): |
|
|
try: |
|
|
logger.info("Background initialization thread started") |
|
|
initialize_orchestrator() |
|
|
except Exception as e: |
|
|
logger.error(f"Background initialization failed: {e}", exc_info=True) |
|
|
|
|
|
|
|
|
init_thread = threading.Thread(target=init_worker, daemon=True, name="OrchInit") |
|
|
init_thread.start() |
|
|
logger.info("Initialization thread started (non-blocking)") |
|
|
|
|
|
|
|
|
_start_initialization() |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
logger.info("=" * 60) |
|
|
logger.info("STARTING PURE FLASK API (Direct Execution)") |
|
|
logger.info("=" * 60) |
|
|
|
|
|
|
|
|
import time |
|
|
if not orchestrator_available: |
|
|
logger.info("Waiting for background initialization to complete...") |
|
|
for i in range(30): |
|
|
if orchestrator_available: |
|
|
break |
|
|
time.sleep(1) |
|
|
if i % 5 == 0: |
|
|
logger.info(f"Still waiting... ({i}s)") |
|
|
|
|
|
if not orchestrator_available: |
|
|
logger.warning("Orchestrator not ready after wait, attempting direct initialization...") |
|
|
initialize_orchestrator() |
|
|
|
|
|
port = int(os.getenv('PORT', 7860)) |
|
|
|
|
|
logger.info(f"Starting Flask on port {port}") |
|
|
logger.info("Endpoints available:") |
|
|
logger.info(" GET /") |
|
|
logger.info(" GET /api/health") |
|
|
logger.info(" POST /api/chat") |
|
|
logger.info(" POST /api/initialize") |
|
|
logger.info(" GET /api/context/mode") |
|
|
logger.info(" POST /api/context/mode") |
|
|
logger.info("=" * 60) |
|
|
|
|
|
|
|
|
logger.warning("⚠️ Using Flask development server - NOT for production!") |
|
|
logger.warning("⚠️ Use Gunicorn for production: gunicorn flask_api_standalone:app") |
|
|
logger.info("=" * 60) |
|
|
|
|
|
app.run( |
|
|
host='0.0.0.0', |
|
|
port=port, |
|
|
debug=False, |
|
|
threaded=True |
|
|
) |
|
|
|
|
|
|