JatsTheAIGen commited on
Commit
6d41cb5
·
1 Parent(s): 8f308fb

safety module enhanced to improve response UX v1

Browse files
Files changed (2) hide show
  1. app.py +112 -26
  2. src/orchestrator_engine.py +381 -1
app.py CHANGED
@@ -529,13 +529,42 @@ async def process_message_async(message: str, history: Optional[List], session_i
529
  # Try to use orchestrator if available
530
  if orchestrator is not None:
531
  try:
532
- logger.info("Attempting full orchestration...")
533
- # Use orchestrator to process
534
- result = await orchestrator.process_request(
535
  session_id=session_id,
536
  user_input=message.strip()
537
  )
538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  # Extract response from result with multiple fallback checks
540
  if isinstance(result, dict):
541
  # Extract the text response (not the dict)
@@ -606,29 +635,84 @@ async def process_message_async(message: str, history: Optional[List], session_i
606
  logger.info(f"Orchestrator returned response (length: {len(response)})")
607
 
608
  except Exception as orch_error:
609
- logger.error(f"Orchestrator error: {orch_error}", exc_info=True)
610
- # Fallback response with error info and enhanced reasoning
611
- response = f"I'm experiencing some technical difficulties. Your message was: '{message[:100]}...' Please try again or rephrase your question."
612
- reasoning_data = {
613
- "chain_of_thought": {
614
- "step_1": {
615
- "hypothesis": "System encountered an error during processing",
616
- "evidence": [f"Error: {str(orch_error)[:100]}..."],
617
- "confidence": 0.3,
618
- "reasoning": "Orchestrator failure - fallback mode activated"
619
- }
620
- },
621
- "alternative_paths": [],
622
- "uncertainty_areas": [
623
- {
624
- "aspect": "System reliability",
625
- "confidence": 0.3,
626
- "mitigation": "Error handling and graceful degradation"
627
- }
628
- ],
629
- "evidence_sources": [],
630
- "confidence_calibration": {"overall_confidence": 0.3, "error_mode": True}
631
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
  else:
633
  # System initialization message with enhanced reasoning
634
  logger.info("Orchestrator not yet available")
@@ -653,6 +737,8 @@ async def process_message_async(message: str, history: Optional[List], session_i
653
  "evidence_sources": [],
654
  "confidence_calibration": {"overall_confidence": 0.5, "initialization_mode": True}
655
  }
 
 
656
  skills_html = "" # Initialize skills_html for orchestrator not available case
657
 
658
  # Add assistant response
 
529
  # Try to use orchestrator if available
530
  if orchestrator is not None:
531
  try:
532
+ logger.info("Attempting full orchestration with safety revision...")
533
+ # Use enhanced orchestrator with safety revision
534
+ result = await orchestrator.process_request_with_revision(
535
  session_id=session_id,
536
  user_input=message.strip()
537
  )
538
 
539
+ # Log revision information
540
+ revision_attempts = result.get('revision_attempts', 0)
541
+ safety_revision_applied = result.get('safety_revision_applied', False)
542
+ safety_exceeded = result.get('safety_exceeded', [])
543
+ intelligent_reprompt_success = result.get('intelligent_reprompt_success', False)
544
+ input_complexity = result.get('input_complexity', {})
545
+
546
+ if revision_attempts > 0:
547
+ logger.info(f"✅ Safety revision applied: {revision_attempts} attempts")
548
+ if safety_revision_applied:
549
+ if safety_exceeded:
550
+ logger.warning(f"⚠️ Safety categories still exceeded: {safety_exceeded}")
551
+ else:
552
+ logger.info(f"✅ Response improved for safety categories")
553
+
554
+ # Log intelligent re-prompt success
555
+ if intelligent_reprompt_success:
556
+ logger.info(f"✅ Intelligent re-prompt successfully resolved safety concerns for complex input")
557
+
558
+ # Log complexity handling
559
+ if input_complexity.get('is_complex'):
560
+ logger.info(f"📊 Complex input handled (score: {input_complexity.get('complexity_score', 0):.1f})")
561
+
562
+ if result.get('revision_error'):
563
+ logger.error(f"⚠️ Revision error occurred: {result.get('revision_error')}")
564
+
565
+ if result.get('timeout_error'):
566
+ logger.warning(f"⏱️ Safety revision timed out after 30 seconds")
567
+
568
  # Extract response from result with multiple fallback checks
569
  if isinstance(result, dict):
570
  # Extract the text response (not the dict)
 
635
  logger.info(f"Orchestrator returned response (length: {len(response)})")
636
 
637
  except Exception as orch_error:
638
+ logger.error(f"Orchestrator error with safety revision: {orch_error}", exc_info=True)
639
+ try:
640
+ # Graceful degradation to original orchestrator method
641
+ logger.info("Falling back to original orchestrator method...")
642
+ result = await orchestrator.process_request(
643
+ session_id=session_id,
644
+ user_input=message.strip()
645
+ )
646
+ result['fallback_used'] = True
647
+ result['revision_attempts'] = 0
648
+ logger.info("✓ Fallback to original orchestrator successful")
649
+
650
+ # Extract response from fallback result
651
+ response = (
652
+ result.get('response') or
653
+ result.get('final_response') or
654
+ result.get('safety_checked_response') or
655
+ result.get('original_response') or
656
+ str(result.get('result', ''))
657
+ )
658
+
659
+ # Extract metadata from fallback result
660
+ reasoning_data = result.get('metadata', {}).get('reasoning_chain', {
661
+ "chain_of_thought": {},
662
+ "alternative_paths": [],
663
+ "uncertainty_areas": [],
664
+ "evidence_sources": [],
665
+ "confidence_calibration": {}
666
+ })
667
+
668
+ performance_data = {
669
+ "agent_trace": result.get('agent_trace', []),
670
+ "processing_time": result.get('metadata', {}).get('processing_time', 0),
671
+ "token_count": result.get('metadata', {}).get('token_count', 0),
672
+ "confidence_score": result.get('confidence_score', 0.7),
673
+ "agents_used": result.get('metadata', {}).get('agents_used', [])
674
+ }
675
+
676
+ context_data = {
677
+ "interaction_id": result.get('interaction_id', 'unknown'),
678
+ "session_id": session_id,
679
+ "timestamp": result.get('timestamp', ''),
680
+ "warnings": result.get('metadata', {}).get('warnings', [])
681
+ }
682
+
683
+ # Extract skills data from fallback
684
+ skills_html = ""
685
+ skills_result = result.get('metadata', {}).get('skills_result', {})
686
+ if skills_result and skills_result.get('identified_skills'):
687
+ skills_html = _generate_skills_html(skills_result['identified_skills'])
688
+
689
+ except Exception as fallback_error:
690
+ logger.error(f"Fallback orchestrator also failed: {fallback_error}", exc_info=True)
691
+ # Fallback response with error info and enhanced reasoning
692
+ response = f"I'm experiencing some technical difficulties. Your message was: '{message[:100]}...' Please try again or rephrase your question."
693
+ reasoning_data = {
694
+ "chain_of_thought": {
695
+ "step_1": {
696
+ "hypothesis": "System encountered an error during processing",
697
+ "evidence": [f"Error: {str(orch_error)[:100]}..."],
698
+ "confidence": 0.3,
699
+ "reasoning": "Orchestrator failure - fallback mode activated"
700
+ }
701
+ },
702
+ "alternative_paths": [],
703
+ "uncertainty_areas": [
704
+ {
705
+ "aspect": "System reliability",
706
+ "confidence": 0.3,
707
+ "mitigation": "Error handling and graceful degradation"
708
+ }
709
+ ],
710
+ "evidence_sources": [],
711
+ "confidence_calibration": {"overall_confidence": 0.3, "error_mode": True}
712
+ }
713
+ performance_data = {}
714
+ context_data = {}
715
+ skills_html = ""
716
  else:
717
  # System initialization message with enhanced reasoning
718
  logger.info("Orchestrator not yet available")
 
737
  "evidence_sources": [],
738
  "confidence_calibration": {"overall_confidence": 0.5, "initialization_mode": True}
739
  }
740
+ performance_data = {}
741
+ context_data = {}
742
  skills_html = "" # Initialize skills_html for orchestrator not available case
743
 
744
  # Add assistant response
src/orchestrator_engine.py CHANGED
@@ -2,6 +2,7 @@
2
  import uuid
3
  import logging
4
  import time
 
5
  from datetime import datetime
6
 
7
  logger = logging.getLogger(__name__)
@@ -12,7 +13,17 @@ class MVPOrchestrator:
12
  self.context_manager = context_manager
13
  self.agents = agents
14
  self.execution_trace = []
15
- logger.info("MVPOrchestrator initialized")
 
 
 
 
 
 
 
 
 
 
16
 
17
  async def process_request(self, session_id: str, user_input: str) -> dict:
18
  """
@@ -671,3 +682,372 @@ class MVPOrchestrator:
671
  return "Appropriate content with minor notes"
672
  else:
673
  return "Content requires review"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import uuid
3
  import logging
4
  import time
5
+ import asyncio
6
  from datetime import datetime
7
 
8
  logger = logging.getLogger(__name__)
 
13
  self.context_manager = context_manager
14
  self.agents = agents
15
  self.execution_trace = []
16
+
17
+ # Safety revision thresholds
18
+ self.safety_thresholds = {
19
+ "toxicity_or_harmful_language": 0.3,
20
+ "potential_biases_or_stereotypes": 0.05, # Low threshold for bias
21
+ "privacy_or_security_concerns": 0.2,
22
+ "controversial_or_sensitive_topics": 0.3
23
+ }
24
+ self.max_revision_attempts = 2
25
+ self.revision_timeout = 30 # seconds
26
+ logger.info("MVPOrchestrator initialized with safety revision thresholds")
27
 
28
  async def process_request(self, session_id: str, user_input: str) -> dict:
29
  """
 
682
  return "Appropriate content with minor notes"
683
  else:
684
  return "Content requires review"
685
+
686
+ def check_safety_thresholds(self, safety_result: dict) -> tuple:
687
+ """Check if safety scores exceed thresholds"""
688
+ safety_analysis = safety_result.get('safety_analysis', {})
689
+
690
+ # Try to get safety_scores first (LLM format), fallback to confidence_scores (pattern format)
691
+ safety_scores = safety_analysis.get('safety_scores', {})
692
+ if not safety_scores:
693
+ # Map confidence_scores to safety_scores format
694
+ confidence_scores = safety_analysis.get('confidence_scores', {})
695
+ safety_scores = {
696
+ "toxicity_or_harmful_language": confidence_scores.get('toxicity', 0.0),
697
+ "potential_biases_or_stereotypes": confidence_scores.get('bias', 0.0),
698
+ "privacy_or_security_concerns": confidence_scores.get('privacy', 0.0),
699
+ "controversial_or_sensitive_topics": 1.0 - confidence_scores.get('safety', 1.0)
700
+ }
701
+
702
+ exceeded_categories = []
703
+
704
+ for category, score in safety_scores.items():
705
+ threshold = self.safety_thresholds.get(category, 1.0)
706
+ if isinstance(score, (int, float)) and score > threshold:
707
+ exceeded_categories.append(category)
708
+ logger.info(f"Safety threshold exceeded: {category} = {score} > {threshold}")
709
+
710
+ return len(exceeded_categories) > 0, exceeded_categories
711
+
712
+ def generate_revision_prompt(self, original_response: str, exceeded_categories: list,
713
+ safety_warnings: list) -> str:
714
+ """Generate revision prompt for synthesis agent"""
715
+
716
+ category_instructions = {
717
+ "toxicity_or_harmful_language": "Remove any language that could be harmful or offensive",
718
+ "potential_biases_or_stereotypes": "Ensure response provides balanced alternatives and avoids stereotypes. Include diverse perspectives and specific qualifications needed for each career path",
719
+ "privacy_or_security_concerns": "Remove any content that could compromise privacy or security",
720
+ "controversial_or_sensitive_topics": "Present balanced viewpoints and acknowledge different perspectives"
721
+ }
722
+
723
+ revision_instructions = []
724
+ for category in exceeded_categories:
725
+ if category in category_instructions:
726
+ revision_instructions.append(category_instructions[category])
727
+
728
+ prompt = f"""REVISION REQUIRED: The following response has safety concerns that need addressing.
729
+
730
+ Original Response:
731
+ {original_response}
732
+
733
+ Safety Issues Detected:
734
+ {', '.join(exceeded_categories)}
735
+
736
+ Specific Warnings:
737
+ {'; '.join(safety_warnings) if safety_warnings else 'General safety concerns detected'}
738
+
739
+ REVISION INSTRUCTIONS:
740
+ {' | '.join(revision_instructions)}
741
+
742
+ Please revise the response to address these concerns while maintaining helpfulness and accuracy. Ensure the response:
743
+ 1. Addresses the user's original question completely
744
+ 2. Provides specific, actionable alternatives with clear qualifications needed
745
+ 3. Avoids generalizations and stereotypes about career transitions
746
+ 4. Includes necessary skills, education, and experience requirements
747
+ 5. Maintains a balanced, inclusive perspective that acknowledges different paths
748
+
749
+ Revised Response:"""
750
+
751
+ return prompt
752
+
753
+ async def process_request_with_revision(self, session_id: str, user_input: str) -> dict:
754
+ """Enhanced process_request with safety revision loop and timeout protection"""
755
+ try:
756
+ return await asyncio.wait_for(
757
+ self._process_request_with_revision_internal(session_id, user_input),
758
+ timeout=self.revision_timeout
759
+ )
760
+ except asyncio.TimeoutError:
761
+ logger.error(f"Safety revision timed out after {self.revision_timeout}s")
762
+ # Fallback to basic response
763
+ return {
764
+ 'final_response': 'Request processing took longer than expected. Please try again.',
765
+ 'response': 'Request processing took longer than expected. Please try again.',
766
+ 'revision_attempts': 0,
767
+ 'timeout_error': True,
768
+ 'safety_revision_applied': False
769
+ }
770
+
771
+ async def _process_request_with_revision_internal(self, session_id: str, user_input: str) -> dict:
772
+ """Internal revision loop with comprehensive error handling"""
773
+
774
+ revision_attempt = 0
775
+ current_response = None
776
+ final_result = None
777
+ exceeded_categories = [] # ✅ Fix: Initialize variables
778
+ safety_warnings = [] # ✅ Fix: Initialize variables
779
+
780
+ while revision_attempt <= self.max_revision_attempts:
781
+ try:
782
+ # For revision attempts, modify the input to include revision instructions
783
+ processing_input = user_input
784
+ if revision_attempt > 0:
785
+ processing_input = self.generate_revision_prompt(
786
+ current_response,
787
+ exceeded_categories,
788
+ safety_warnings
789
+ )
790
+ logger.info(f"Revision attempt {revision_attempt}: regenerating response with safety improvements")
791
+
792
+ # Execute normal processing flow
793
+ result = await self.process_request(session_id, processing_input)
794
+
795
+ # Extract the response text
796
+ current_response = result.get('final_response') or result.get('response', '')
797
+
798
+ if not current_response:
799
+ # Fallback: try to extract from metadata
800
+ metadata = result.get('metadata', {})
801
+ current_response = metadata.get('synthesis_result', {}).get('final_response', '')
802
+
803
+ if not current_response:
804
+ logger.warning("Could not extract response text for safety check")
805
+ return result
806
+
807
+ # Execute safety check on the response
808
+ safety_checked = await self.agents['safety_check'].execute(
809
+ response=current_response,
810
+ context=result.get('context', {})
811
+ )
812
+
813
+ # Check if revision is needed
814
+ needs_revision, exceeded_categories = self.check_safety_thresholds(safety_checked)
815
+ safety_warnings = safety_checked.get('warnings', [])
816
+
817
+ if not needs_revision:
818
+ # Safety thresholds met
819
+ logger.info(f"Safety check passed on attempt {revision_attempt + 1}")
820
+ result['safety_result'] = safety_checked
821
+ result['revision_attempts'] = revision_attempt
822
+ result['safety_revision_applied'] = revision_attempt > 0
823
+
824
+ # Update metadata with safety info
825
+ if 'metadata' not in result:
826
+ result['metadata'] = {}
827
+ result['metadata']['safety_result'] = safety_checked
828
+ result['metadata']['revision_attempts'] = revision_attempt
829
+
830
+ return result
831
+
832
+ if revision_attempt >= self.max_revision_attempts:
833
+ # Max attempts reached - handle gracefully based on input complexity
834
+ logger.warning(f"Max revision attempts reached. Categories still exceeded: {exceeded_categories}")
835
+
836
+ input_complexity = self._assess_input_complexity(user_input)
837
+
838
+ # For complex inputs, offer intelligent re-attempt instead of asking user to rephrase
839
+ if input_complexity["is_complex"] and input_complexity["complexity_score"] > 25:
840
+ logger.info("Complex input detected - attempting intelligent re-prompt")
841
+ try:
842
+ # Generate improved prompt automatically
843
+ improved_prompt = self._generate_improved_prompt(user_input, exceeded_categories)
844
+
845
+ # One final attempt with improved prompting
846
+ improved_result = await self.process_request(session_id, improved_prompt)
847
+ improved_response = improved_result.get('final_response', '')
848
+
849
+ # Quick safety check on improved response
850
+ final_safety_check = await self.agents['safety_check'].execute(
851
+ response=improved_response,
852
+ context=improved_result.get('context', {})
853
+ )
854
+
855
+ improved_needs_revision, improved_exceeded = self.check_safety_thresholds(final_safety_check)
856
+
857
+ if not improved_needs_revision:
858
+ # Success with intelligent re-prompting
859
+ logger.info("Intelligent re-prompt resolved safety concerns")
860
+ improved_result['safety_result'] = final_safety_check
861
+ improved_result['revision_attempts'] = revision_attempt + 1
862
+ improved_result['intelligent_reprompt_success'] = True
863
+ if 'metadata' not in improved_result:
864
+ improved_result['metadata'] = {}
865
+ improved_result['metadata']['safety_result'] = final_safety_check
866
+ improved_result['metadata']['revision_attempts'] = revision_attempt + 1
867
+ improved_result['metadata']['intelligent_reprompt_success'] = True
868
+ return improved_result
869
+ else:
870
+ # Still has issues - proceed with guidance
871
+ logger.info("Intelligent re-prompt did not fully resolve concerns")
872
+ current_response = improved_response
873
+ safety_checked = final_safety_check
874
+ exceeded_categories = improved_exceeded
875
+
876
+ except Exception as e:
877
+ logger.warning(f"Intelligent re-prompt failed: {e}", exc_info=True)
878
+ # Continue with original response and guidance
879
+
880
+ # Add user-friendly warning summary with appropriate guidance
881
+ warning_summary = self._generate_warning_summary(exceeded_categories, safety_checked.get('warnings', []))
882
+ user_guidance = self._generate_user_guidance(exceeded_categories, user_input)
883
+
884
+ # Append guidance to response
885
+ original_response = result.get('final_response', '')
886
+ enhanced_response = f"{original_response}\n\n{warning_summary}\n\n{user_guidance}"
887
+
888
+ result['final_response'] = enhanced_response
889
+ result['response'] = enhanced_response # Also update response for compatibility
890
+ result['safety_result'] = safety_checked
891
+ result['revision_attempts'] = revision_attempt
892
+ result['safety_exceeded'] = exceeded_categories
893
+ result['safety_revision_applied'] = revision_attempt > 0
894
+ result['warning_summary_added'] = True
895
+ result['input_complexity'] = input_complexity
896
+
897
+ # Update metadata
898
+ if 'metadata' not in result:
899
+ result['metadata'] = {}
900
+ result['metadata']['safety_result'] = safety_checked
901
+ result['metadata']['revision_attempts'] = revision_attempt
902
+ result['metadata']['safety_exceeded'] = exceeded_categories
903
+ result['metadata']['input_complexity'] = input_complexity
904
+
905
+ return result
906
+
907
+ # Store for next revision
908
+ final_result = result
909
+ revision_attempt += 1
910
+ logger.info(f"Generating revision attempt {revision_attempt} for: {exceeded_categories}")
911
+
912
+ except Exception as e:
913
+ logger.error(f"Error in safety revision attempt {revision_attempt}: {e}", exc_info=True)
914
+ if final_result:
915
+ final_result['revision_error'] = str(e)
916
+ if 'metadata' not in final_result:
917
+ final_result['metadata'] = {}
918
+ final_result['metadata']['revision_error'] = str(e)
919
+ return final_result
920
+ # If we don't have a result yet, return the error result
921
+ return {
922
+ 'response': 'Error in processing with safety revision',
923
+ 'final_response': 'Error in processing with safety revision',
924
+ 'revision_attempts': revision_attempt,
925
+ 'revision_error': str(e),
926
+ 'error': str(e)
927
+ }
928
+
929
+ # Fallback - should not reach here
930
+ return final_result or {
931
+ 'response': 'Error in safety revision processing',
932
+ 'final_response': 'Error in safety revision processing',
933
+ 'revision_attempts': revision_attempt,
934
+ 'safety_revision_applied': False
935
+ }
936
+
937
+ def _generate_warning_summary(self, exceeded_categories: list, safety_warnings: list) -> str:
938
+ """Generate user-friendly warning summary"""
939
+ category_explanations = {
940
+ "potential_biases_or_stereotypes": "may contain assumptions about career transitions that don't account for individual circumstances",
941
+ "toxicity_or_harmful_language": "contains language that could be harmful or inappropriate",
942
+ "privacy_or_security_concerns": "includes content that could raise privacy or security considerations",
943
+ "controversial_or_sensitive_topics": "touches on topics that may benefit from additional perspective"
944
+ }
945
+
946
+ if not exceeded_categories:
947
+ return ""
948
+
949
+ warning_text = "**Note**: This response " + ", ".join([
950
+ category_explanations.get(cat, f"has concerns related to {cat}")
951
+ for cat in exceeded_categories
952
+ ]) + "."
953
+
954
+ return warning_text
955
+
956
+ def _generate_user_guidance(self, exceeded_categories: list, original_user_input: str) -> str:
957
+ """Generate proactive user guidance with UX-friendly options for complex prompts"""
958
+ if not exceeded_categories:
959
+ return ""
960
+
961
+ input_complexity = self._assess_input_complexity(original_user_input)
962
+
963
+ guidance_templates = {
964
+ "potential_biases_or_stereotypes": {
965
+ "issue": "avoid assumptions about career paths",
966
+ "simple_suggestion": "ask for advice tailored to specific qualifications or industry interests",
967
+ "complex_refinement": "add details like your specific skills, target industry, or education level"
968
+ },
969
+ "toxicity_or_harmful_language": {
970
+ "issue": "ensure respectful communication",
971
+ "simple_suggestion": "rephrase using more neutral language",
972
+ "complex_refinement": "adjust the tone while keeping your detailed context"
973
+ },
974
+ "privacy_or_security_concerns": {
975
+ "issue": "protect sensitive information",
976
+ "simple_suggestion": "ask for general guidance instead",
977
+ "complex_refinement": "remove specific personal details while keeping the scenario structure"
978
+ },
979
+ "controversial_or_sensitive_topics": {
980
+ "issue": "get balanced perspectives",
981
+ "simple_suggestion": "ask for multiple viewpoints or balanced analysis",
982
+ "complex_refinement": "specify you'd like pros/cons or different perspectives included"
983
+ }
984
+ }
985
+
986
+ primary_category = exceeded_categories[0]
987
+ guidance = guidance_templates.get(primary_category, {
988
+ "issue": "improve response quality",
989
+ "simple_suggestion": "try rephrasing with more specific details",
990
+ "complex_refinement": "add clarifying details to your existing question"
991
+ })
992
+
993
+ topic = self._extract_main_topic(original_user_input)
994
+
995
+ # Adaptive guidance based on input complexity
996
+ if input_complexity["is_complex"]:
997
+ return f"""**Want a better response?** To {guidance['issue']} in responses about {topic}, you could {guidance['complex_refinement']} rather than rewriting your detailed question. Or simply ask again as-is and I'll focus on providing more balanced information."""
998
+ else:
999
+ return f"""**Want a better response?** To {guidance['issue']} in future responses about {topic}, you could {guidance['simple_suggestion']}. Feel free to ask again with any adjustments!"""
1000
+
1001
+ def _assess_input_complexity(self, user_input: str) -> dict:
1002
+ """Assess input complexity to determine appropriate UX guidance"""
1003
+ word_count = len(user_input.split())
1004
+ sentence_count = user_input.count('.') + user_input.count('!') + user_input.count('?')
1005
+ has_context = any(phrase in user_input.lower() for phrase in [
1006
+ 'i am currently', 'my situation', 'my background', 'i have been',
1007
+ 'my experience', 'i work', 'my company', 'specific to my'
1008
+ ])
1009
+ has_constraints = any(phrase in user_input.lower() for phrase in [
1010
+ 'must', 'need to', 'required', 'limited by', 'constraint', 'budget',
1011
+ 'timeline', 'deadline', 'specific requirements'
1012
+ ])
1013
+
1014
+ is_complex = (
1015
+ word_count > 30 or
1016
+ sentence_count > 2 or
1017
+ has_context or
1018
+ has_constraints
1019
+ )
1020
+
1021
+ return {
1022
+ "is_complex": is_complex,
1023
+ "word_count": word_count,
1024
+ "has_personal_context": has_context,
1025
+ "has_constraints": has_constraints,
1026
+ "complexity_score": word_count * 0.1 + sentence_count * 5 + (has_context * 10) + (has_constraints * 10)
1027
+ }
1028
+
1029
+ def _generate_improved_prompt(self, original_input: str, exceeded_categories: list) -> str:
1030
+ """Generate improved prompt for complex inputs to resolve safety concerns automatically"""
1031
+
1032
+ improvements = []
1033
+
1034
+ if "potential_biases_or_stereotypes" in exceeded_categories:
1035
+ improvements.append("Please provide specific qualifications, skills, and requirements for each option")
1036
+ improvements.append("Include diverse pathways and acknowledge individual circumstances vary")
1037
+
1038
+ if "toxicity_or_harmful_language" in exceeded_categories:
1039
+ improvements.append("Use respectful, professional language throughout")
1040
+
1041
+ if "privacy_or_security_concerns" in exceeded_categories:
1042
+ improvements.append("Focus on general guidance without personal specifics")
1043
+
1044
+ if "controversial_or_sensitive_topics" in exceeded_categories:
1045
+ improvements.append("Present balanced perspectives and multiple viewpoints")
1046
+
1047
+ improvement_instructions = ". ".join(improvements)
1048
+
1049
+ improved_prompt = f"""{original_input}
1050
+
1051
+ Additional guidance for response: {improvement_instructions}. Ensure all advice is specific, actionable, and acknowledges different backgrounds and circumstances."""
1052
+
1053
+ return improved_prompt