Spaces:

JatinAutonomousLabs
/

Research_AI_Assistant

Sleeping

App Files Files Community

JatsTheAIGen commited on Oct 29

Commit

6d41cb5

1 Parent(s): 8f308fb

safety module enhanced to improve response UX v1

Browse files

Files changed (2) hide show

app.py +112 -26
src/orchestrator_engine.py +381 -1

app.py CHANGED Viewed

@@ -529,13 +529,42 @@ async def process_message_async(message: str, history: Optional[List], session_i
         # Try to use orchestrator if available
         if orchestrator is not None:
             try:
-                logger.info("Attempting full orchestration...")
-                # Use orchestrator to process
-                result = await orchestrator.process_request(
                     session_id=session_id,
                     user_input=message.strip()
                 )
                 # Extract response from result with multiple fallback checks
                 if isinstance(result, dict):
                     # Extract the text response (not the dict)
@@ -606,29 +635,84 @@ async def process_message_async(message: str, history: Optional[List], session_i
                 logger.info(f"Orchestrator returned response (length: {len(response)})")
             except Exception as orch_error:
-                logger.error(f"Orchestrator error: {orch_error}", exc_info=True)
-                # Fallback response with error info and enhanced reasoning
-                response = f"I'm experiencing some technical difficulties. Your message was: '{message[:100]}...' Please try again or rephrase your question."
-                reasoning_data = {
-                    "chain_of_thought": {
-                        "step_1": {
-                            "hypothesis": "System encountered an error during processing",
-                            "evidence": [f"Error: {str(orch_error)[:100]}..."],
-                            "confidence": 0.3,
-                            "reasoning": "Orchestrator failure - fallback mode activated"
-                        }
-                    },
-                    "alternative_paths": [],
-                    "uncertainty_areas": [
-                        {
-                            "aspect": "System reliability",
-                            "confidence": 0.3,
-                            "mitigation": "Error handling and graceful degradation"
-                        }
-                    ],
-                    "evidence_sources": [],
-                    "confidence_calibration": {"overall_confidence": 0.3, "error_mode": True}
-                }
         else:
             # System initialization message with enhanced reasoning
             logger.info("Orchestrator not yet available")
@@ -653,6 +737,8 @@ async def process_message_async(message: str, history: Optional[List], session_i
                 "evidence_sources": [],
                 "confidence_calibration": {"overall_confidence": 0.5, "initialization_mode": True}
             }
             skills_html = ""  # Initialize skills_html for orchestrator not available case
         # Add assistant response

         # Try to use orchestrator if available
         if orchestrator is not None:
             try:
+                logger.info("Attempting full orchestration with safety revision...")
+                # Use enhanced orchestrator with safety revision
+                result = await orchestrator.process_request_with_revision(
                     session_id=session_id,
                     user_input=message.strip()
                 )
+                # Log revision information
+                revision_attempts = result.get('revision_attempts', 0)
+                safety_revision_applied = result.get('safety_revision_applied', False)
+                safety_exceeded = result.get('safety_exceeded', [])
+                intelligent_reprompt_success = result.get('intelligent_reprompt_success', False)
+                input_complexity = result.get('input_complexity', {})
+                if revision_attempts > 0:
+                    logger.info(f"✅ Safety revision applied: {revision_attempts} attempts")
+                    if safety_revision_applied:
+                        if safety_exceeded:
+                            logger.warning(f"⚠️ Safety categories still exceeded: {safety_exceeded}")
+                        else:
+                            logger.info(f"✅ Response improved for safety categories")
+                    # Log intelligent re-prompt success
+                    if intelligent_reprompt_success:
+                        logger.info(f"✅ Intelligent re-prompt successfully resolved safety concerns for complex input")
+                    # Log complexity handling
+                    if input_complexity.get('is_complex'):
+                        logger.info(f"📊 Complex input handled (score: {input_complexity.get('complexity_score', 0):.1f})")
+                if result.get('revision_error'):
+                    logger.error(f"⚠️ Revision error occurred: {result.get('revision_error')}")
+                if result.get('timeout_error'):
+                    logger.warning(f"⏱️ Safety revision timed out after 30 seconds")
                 # Extract response from result with multiple fallback checks
                 if isinstance(result, dict):
                     # Extract the text response (not the dict)
                 logger.info(f"Orchestrator returned response (length: {len(response)})")
             except Exception as orch_error:
+                logger.error(f"Orchestrator error with safety revision: {orch_error}", exc_info=True)
+                try:
+                    # Graceful degradation to original orchestrator method
+                    logger.info("Falling back to original orchestrator method...")
+                    result = await orchestrator.process_request(
+                        session_id=session_id,
+                        user_input=message.strip()
+                    )
+                    result['fallback_used'] = True
+                    result['revision_attempts'] = 0
+                    logger.info("✓ Fallback to original orchestrator successful")
+                    # Extract response from fallback result
+                    response = (
+                        result.get('response') or
+                        result.get('final_response') or
+                        result.get('safety_checked_response') or
+                        result.get('original_response') or
+                        str(result.get('result', ''))
+                    )
+                    # Extract metadata from fallback result
+                    reasoning_data = result.get('metadata', {}).get('reasoning_chain', {
+                        "chain_of_thought": {},
+                        "alternative_paths": [],
+                        "uncertainty_areas": [],
+                        "evidence_sources": [],
+                        "confidence_calibration": {}
+                    })
+                    performance_data = {
+                        "agent_trace": result.get('agent_trace', []),
+                        "processing_time": result.get('metadata', {}).get('processing_time', 0),
+                        "token_count": result.get('metadata', {}).get('token_count', 0),
+                        "confidence_score": result.get('confidence_score', 0.7),
+                        "agents_used": result.get('metadata', {}).get('agents_used', [])
+                    }
+                    context_data = {
+                        "interaction_id": result.get('interaction_id', 'unknown'),
+                        "session_id": session_id,
+                        "timestamp": result.get('timestamp', ''),
+                        "warnings": result.get('metadata', {}).get('warnings', [])
+                    }
+                    # Extract skills data from fallback
+                    skills_html = ""
+                    skills_result = result.get('metadata', {}).get('skills_result', {})
+                    if skills_result and skills_result.get('identified_skills'):
+                        skills_html = _generate_skills_html(skills_result['identified_skills'])
+                except Exception as fallback_error:
+                    logger.error(f"Fallback orchestrator also failed: {fallback_error}", exc_info=True)
+                    # Fallback response with error info and enhanced reasoning
+                    response = f"I'm experiencing some technical difficulties. Your message was: '{message[:100]}...' Please try again or rephrase your question."
+                    reasoning_data = {
+                        "chain_of_thought": {
+                            "step_1": {
+                                "hypothesis": "System encountered an error during processing",
+                                "evidence": [f"Error: {str(orch_error)[:100]}..."],
+                                "confidence": 0.3,
+                                "reasoning": "Orchestrator failure - fallback mode activated"
+                            }
+                        },
+                        "alternative_paths": [],
+                        "uncertainty_areas": [
+                            {
+                                "aspect": "System reliability",
+                                "confidence": 0.3,
+                                "mitigation": "Error handling and graceful degradation"
+                            }
+                        ],
+                        "evidence_sources": [],
+                        "confidence_calibration": {"overall_confidence": 0.3, "error_mode": True}
+                    }
+                    performance_data = {}
+                    context_data = {}
+                    skills_html = ""
         else:
             # System initialization message with enhanced reasoning
             logger.info("Orchestrator not yet available")
                 "evidence_sources": [],
                 "confidence_calibration": {"overall_confidence": 0.5, "initialization_mode": True}
             }
+            performance_data = {}
+            context_data = {}
             skills_html = ""  # Initialize skills_html for orchestrator not available case
         # Add assistant response

src/orchestrator_engine.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import uuid
 import logging
 import time
 from datetime import datetime
 logger = logging.getLogger(__name__)
@@ -12,7 +13,17 @@ class MVPOrchestrator:
         self.context_manager = context_manager
         self.agents = agents
         self.execution_trace = []
-        logger.info("MVPOrchestrator initialized")
     async def process_request(self, session_id: str, user_input: str) -> dict:
         """
@@ -671,3 +682,372 @@ class MVPOrchestrator:
             return "Appropriate content with minor notes"
         else:
             return "Content requires review"

 import uuid
 import logging
 import time
+import asyncio
 from datetime import datetime
 logger = logging.getLogger(__name__)
         self.context_manager = context_manager
         self.agents = agents
         self.execution_trace = []
+        # Safety revision thresholds
+        self.safety_thresholds = {
+            "toxicity_or_harmful_language": 0.3,
+            "potential_biases_or_stereotypes": 0.05,  # Low threshold for bias
+            "privacy_or_security_concerns": 0.2,
+            "controversial_or_sensitive_topics": 0.3
+        }
+        self.max_revision_attempts = 2
+        self.revision_timeout = 30  # seconds
+        logger.info("MVPOrchestrator initialized with safety revision thresholds")
     async def process_request(self, session_id: str, user_input: str) -> dict:
         """
             return "Appropriate content with minor notes"
         else:
             return "Content requires review"
+    def check_safety_thresholds(self, safety_result: dict) -> tuple:
+        """Check if safety scores exceed thresholds"""
+        safety_analysis = safety_result.get('safety_analysis', {})
+        # Try to get safety_scores first (LLM format), fallback to confidence_scores (pattern format)
+        safety_scores = safety_analysis.get('safety_scores', {})
+        if not safety_scores:
+            # Map confidence_scores to safety_scores format
+            confidence_scores = safety_analysis.get('confidence_scores', {})
+            safety_scores = {
+                "toxicity_or_harmful_language": confidence_scores.get('toxicity', 0.0),
+                "potential_biases_or_stereotypes": confidence_scores.get('bias', 0.0),
+                "privacy_or_security_concerns": confidence_scores.get('privacy', 0.0),
+                "controversial_or_sensitive_topics": 1.0 - confidence_scores.get('safety', 1.0)
+            }
+        exceeded_categories = []
+        for category, score in safety_scores.items():
+            threshold = self.safety_thresholds.get(category, 1.0)
+            if isinstance(score, (int, float)) and score > threshold:
+                exceeded_categories.append(category)
+                logger.info(f"Safety threshold exceeded: {category} = {score} > {threshold}")
+        return len(exceeded_categories) > 0, exceeded_categories
+    def generate_revision_prompt(self, original_response: str, exceeded_categories: list,
+                               safety_warnings: list) -> str:
+        """Generate revision prompt for synthesis agent"""
+        category_instructions = {
+            "toxicity_or_harmful_language": "Remove any language that could be harmful or offensive",
+            "potential_biases_or_stereotypes": "Ensure response provides balanced alternatives and avoids stereotypes. Include diverse perspectives and specific qualifications needed for each career path",
+            "privacy_or_security_concerns": "Remove any content that could compromise privacy or security",
+            "controversial_or_sensitive_topics": "Present balanced viewpoints and acknowledge different perspectives"
+        }
+        revision_instructions = []
+        for category in exceeded_categories:
+            if category in category_instructions:
+                revision_instructions.append(category_instructions[category])
+        prompt = f"""REVISION REQUIRED: The following response has safety concerns that need addressing.
+Original Response:
+{original_response}
+Safety Issues Detected:
+{', '.join(exceeded_categories)}
+Specific Warnings:
+{'; '.join(safety_warnings) if safety_warnings else 'General safety concerns detected'}
+REVISION INSTRUCTIONS:
+{' | '.join(revision_instructions)}
+Please revise the response to address these concerns while maintaining helpfulness and accuracy. Ensure the response:
+1. Addresses the user's original question completely
+2. Provides specific, actionable alternatives with clear qualifications needed
+3. Avoids generalizations and stereotypes about career transitions
+4. Includes necessary skills, education, and experience requirements
+5. Maintains a balanced, inclusive perspective that acknowledges different paths
+Revised Response:"""
+        return prompt
+    async def process_request_with_revision(self, session_id: str, user_input: str) -> dict:
+        """Enhanced process_request with safety revision loop and timeout protection"""
+        try:
+            return await asyncio.wait_for(
+                self._process_request_with_revision_internal(session_id, user_input),
+                timeout=self.revision_timeout
+            )
+        except asyncio.TimeoutError:
+            logger.error(f"Safety revision timed out after {self.revision_timeout}s")
+            # Fallback to basic response
+            return {
+                'final_response': 'Request processing took longer than expected. Please try again.',
+                'response': 'Request processing took longer than expected. Please try again.',
+                'revision_attempts': 0,
+                'timeout_error': True,
+                'safety_revision_applied': False
+            }
+    async def _process_request_with_revision_internal(self, session_id: str, user_input: str) -> dict:
+        """Internal revision loop with comprehensive error handling"""
+        revision_attempt = 0
+        current_response = None
+        final_result = None
+        exceeded_categories = []  # ✅ Fix: Initialize variables
+        safety_warnings = []     # ✅ Fix: Initialize variables
+        while revision_attempt <= self.max_revision_attempts:
+            try:
+                # For revision attempts, modify the input to include revision instructions
+                processing_input = user_input
+                if revision_attempt > 0:
+                    processing_input = self.generate_revision_prompt(
+                        current_response,
+                        exceeded_categories,
+                        safety_warnings
+                    )
+                    logger.info(f"Revision attempt {revision_attempt}: regenerating response with safety improvements")
+                # Execute normal processing flow
+                result = await self.process_request(session_id, processing_input)
+                # Extract the response text
+                current_response = result.get('final_response') or result.get('response', '')
+                if not current_response:
+                    # Fallback: try to extract from metadata
+                    metadata = result.get('metadata', {})
+                    current_response = metadata.get('synthesis_result', {}).get('final_response', '')
+                if not current_response:
+                    logger.warning("Could not extract response text for safety check")
+                    return result
+                # Execute safety check on the response
+                safety_checked = await self.agents['safety_check'].execute(
+                    response=current_response,
+                    context=result.get('context', {})
+                )
+                # Check if revision is needed
+                needs_revision, exceeded_categories = self.check_safety_thresholds(safety_checked)
+                safety_warnings = safety_checked.get('warnings', [])
+                if not needs_revision:
+                    # Safety thresholds met
+                    logger.info(f"Safety check passed on attempt {revision_attempt + 1}")
+                    result['safety_result'] = safety_checked
+                    result['revision_attempts'] = revision_attempt
+                    result['safety_revision_applied'] = revision_attempt > 0
+                    # Update metadata with safety info
+                    if 'metadata' not in result:
+                        result['metadata'] = {}
+                    result['metadata']['safety_result'] = safety_checked
+                    result['metadata']['revision_attempts'] = revision_attempt
+                    return result
+                if revision_attempt >= self.max_revision_attempts:
+                    # Max attempts reached - handle gracefully based on input complexity
+                    logger.warning(f"Max revision attempts reached. Categories still exceeded: {exceeded_categories}")
+                    input_complexity = self._assess_input_complexity(user_input)
+                    # For complex inputs, offer intelligent re-attempt instead of asking user to rephrase
+                    if input_complexity["is_complex"] and input_complexity["complexity_score"] > 25:
+                        logger.info("Complex input detected - attempting intelligent re-prompt")
+                        try:
+                            # Generate improved prompt automatically
+                            improved_prompt = self._generate_improved_prompt(user_input, exceeded_categories)
+                            # One final attempt with improved prompting
+                            improved_result = await self.process_request(session_id, improved_prompt)
+                            improved_response = improved_result.get('final_response', '')
+                            # Quick safety check on improved response
+                            final_safety_check = await self.agents['safety_check'].execute(
+                                response=improved_response,
+                                context=improved_result.get('context', {})
+                            )
+                            improved_needs_revision, improved_exceeded = self.check_safety_thresholds(final_safety_check)
+                            if not improved_needs_revision:
+                                # Success with intelligent re-prompting
+                                logger.info("Intelligent re-prompt resolved safety concerns")
+                                improved_result['safety_result'] = final_safety_check
+                                improved_result['revision_attempts'] = revision_attempt + 1
+                                improved_result['intelligent_reprompt_success'] = True
+                                if 'metadata' not in improved_result:
+                                    improved_result['metadata'] = {}
+                                improved_result['metadata']['safety_result'] = final_safety_check
+                                improved_result['metadata']['revision_attempts'] = revision_attempt + 1
+                                improved_result['metadata']['intelligent_reprompt_success'] = True
+                                return improved_result
+                            else:
+                                # Still has issues - proceed with guidance
+                                logger.info("Intelligent re-prompt did not fully resolve concerns")
+                                current_response = improved_response
+                                safety_checked = final_safety_check
+                                exceeded_categories = improved_exceeded
+                        except Exception as e:
+                            logger.warning(f"Intelligent re-prompt failed: {e}", exc_info=True)
+                            # Continue with original response and guidance
+                    # Add user-friendly warning summary with appropriate guidance
+                    warning_summary = self._generate_warning_summary(exceeded_categories, safety_checked.get('warnings', []))
+                    user_guidance = self._generate_user_guidance(exceeded_categories, user_input)
+                    # Append guidance to response
+                    original_response = result.get('final_response', '')
+                    enhanced_response = f"{original_response}\n\n{warning_summary}\n\n{user_guidance}"
+                    result['final_response'] = enhanced_response
+                    result['response'] = enhanced_response  # Also update response for compatibility
+                    result['safety_result'] = safety_checked
+                    result['revision_attempts'] = revision_attempt
+                    result['safety_exceeded'] = exceeded_categories
+                    result['safety_revision_applied'] = revision_attempt > 0
+                    result['warning_summary_added'] = True
+                    result['input_complexity'] = input_complexity
+                    # Update metadata
+                    if 'metadata' not in result:
+                        result['metadata'] = {}
+                    result['metadata']['safety_result'] = safety_checked
+                    result['metadata']['revision_attempts'] = revision_attempt
+                    result['metadata']['safety_exceeded'] = exceeded_categories
+                    result['metadata']['input_complexity'] = input_complexity
+                    return result
+                # Store for next revision
+                final_result = result
+                revision_attempt += 1
+                logger.info(f"Generating revision attempt {revision_attempt} for: {exceeded_categories}")
+            except Exception as e:
+                logger.error(f"Error in safety revision attempt {revision_attempt}: {e}", exc_info=True)
+                if final_result:
+                    final_result['revision_error'] = str(e)
+                    if 'metadata' not in final_result:
+                        final_result['metadata'] = {}
+                    final_result['metadata']['revision_error'] = str(e)
+                    return final_result
+                # If we don't have a result yet, return the error result
+                return {
+                    'response': 'Error in processing with safety revision',
+                    'final_response': 'Error in processing with safety revision',
+                    'revision_attempts': revision_attempt,
+                    'revision_error': str(e),
+                    'error': str(e)
+                }
+        # Fallback - should not reach here
+        return final_result or {
+            'response': 'Error in safety revision processing',
+            'final_response': 'Error in safety revision processing',
+            'revision_attempts': revision_attempt,
+            'safety_revision_applied': False
+        }
+    def _generate_warning_summary(self, exceeded_categories: list, safety_warnings: list) -> str:
+        """Generate user-friendly warning summary"""
+        category_explanations = {
+            "potential_biases_or_stereotypes": "may contain assumptions about career transitions that don't account for individual circumstances",
+            "toxicity_or_harmful_language": "contains language that could be harmful or inappropriate",
+            "privacy_or_security_concerns": "includes content that could raise privacy or security considerations",
+            "controversial_or_sensitive_topics": "touches on topics that may benefit from additional perspective"
+        }
+        if not exceeded_categories:
+            return ""
+        warning_text = "**Note**: This response " + ", ".join([
+            category_explanations.get(cat, f"has concerns related to {cat}")
+            for cat in exceeded_categories
+        ]) + "."
+        return warning_text
+    def _generate_user_guidance(self, exceeded_categories: list, original_user_input: str) -> str:
+        """Generate proactive user guidance with UX-friendly options for complex prompts"""
+        if not exceeded_categories:
+            return ""
+        input_complexity = self._assess_input_complexity(original_user_input)
+        guidance_templates = {
+            "potential_biases_or_stereotypes": {
+                "issue": "avoid assumptions about career paths",
+                "simple_suggestion": "ask for advice tailored to specific qualifications or industry interests",
+                "complex_refinement": "add details like your specific skills, target industry, or education level"
+            },
+            "toxicity_or_harmful_language": {
+                "issue": "ensure respectful communication",
+                "simple_suggestion": "rephrase using more neutral language",
+                "complex_refinement": "adjust the tone while keeping your detailed context"
+            },
+            "privacy_or_security_concerns": {
+                "issue": "protect sensitive information",
+                "simple_suggestion": "ask for general guidance instead",
+                "complex_refinement": "remove specific personal details while keeping the scenario structure"
+            },
+            "controversial_or_sensitive_topics": {
+                "issue": "get balanced perspectives",
+                "simple_suggestion": "ask for multiple viewpoints or balanced analysis",
+                "complex_refinement": "specify you'd like pros/cons or different perspectives included"
+            }
+        }
+        primary_category = exceeded_categories[0]
+        guidance = guidance_templates.get(primary_category, {
+            "issue": "improve response quality",
+            "simple_suggestion": "try rephrasing with more specific details",
+            "complex_refinement": "add clarifying details to your existing question"
+        })
+        topic = self._extract_main_topic(original_user_input)
+        # Adaptive guidance based on input complexity
+        if input_complexity["is_complex"]:
+            return f"""**Want a better response?** To {guidance['issue']} in responses about {topic}, you could {guidance['complex_refinement']} rather than rewriting your detailed question. Or simply ask again as-is and I'll focus on providing more balanced information."""
+        else:
+            return f"""**Want a better response?** To {guidance['issue']} in future responses about {topic}, you could {guidance['simple_suggestion']}. Feel free to ask again with any adjustments!"""
+    def _assess_input_complexity(self, user_input: str) -> dict:
+        """Assess input complexity to determine appropriate UX guidance"""
+        word_count = len(user_input.split())
+        sentence_count = user_input.count('.') + user_input.count('!') + user_input.count('?')
+        has_context = any(phrase in user_input.lower() for phrase in [
+            'i am currently', 'my situation', 'my background', 'i have been',
+            'my experience', 'i work', 'my company', 'specific to my'
+        ])
+        has_constraints = any(phrase in user_input.lower() for phrase in [
+            'must', 'need to', 'required', 'limited by', 'constraint', 'budget',
+            'timeline', 'deadline', 'specific requirements'
+        ])
+        is_complex = (
+            word_count > 30 or
+            sentence_count > 2 or
+            has_context or
+            has_constraints
+        )
+        return {
+            "is_complex": is_complex,
+            "word_count": word_count,
+            "has_personal_context": has_context,
+            "has_constraints": has_constraints,
+            "complexity_score": word_count * 0.1 + sentence_count * 5 + (has_context * 10) + (has_constraints * 10)
+        }
+    def _generate_improved_prompt(self, original_input: str, exceeded_categories: list) -> str:
+        """Generate improved prompt for complex inputs to resolve safety concerns automatically"""
+        improvements = []
+        if "potential_biases_or_stereotypes" in exceeded_categories:
+            improvements.append("Please provide specific qualifications, skills, and requirements for each option")
+            improvements.append("Include diverse pathways and acknowledge individual circumstances vary")
+        if "toxicity_or_harmful_language" in exceeded_categories:
+            improvements.append("Use respectful, professional language throughout")
+        if "privacy_or_security_concerns" in exceeded_categories:
+            improvements.append("Focus on general guidance without personal specifics")
+        if "controversial_or_sensitive_topics" in exceeded_categories:
+            improvements.append("Present balanced perspectives and multiple viewpoints")
+        improvement_instructions = ". ".join(improvements)
+        improved_prompt = f"""{original_input}
+Additional guidance for response: {improvement_instructions}. Ensure all advice is specific, actionable, and acknowledges different backgrounds and circumstances."""
+        return improved_prompt