Spaces:

aiqtech
/

rag

Sleeping

App Files Files Community

aiqtech commited on Aug 20

Commit

1090499

verified ·

1 Parent(s): efa312a

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -173

app.py CHANGED Viewed

@@ -1,15 +1,18 @@
 """
-Multi-Agent RAG-Enhanced LLM System for Hugging Face Spaces
 감독자(Supervisor) -> 창의성 생성자(Creative) -> 비평자(Critic) -> 감독자(Final)
-4단계 파이프라인을 통한 고품질 답변 생성 시스템
 """
 import os
 import json
 import time
-from typing import Optional, List, Dict, Any, Tuple
 from datetime import datetime
 from enum import Enum
 import requests
 import gradio as gr
@@ -43,11 +46,10 @@ class AgentResponse(BaseModel):
     metadata: Optional[Dict] = None
-class FinalResponse(BaseModel):
-    final_answer: str
-    agent_responses: List[AgentResponse]
-    search_results: Optional[List[Dict]] = None
-    processing_time: float
 # ============================================================================
@@ -107,7 +109,7 @@ class BraveSearchClient:
 # ============================================================================
-# Fireworks LLM 클라이언트
 # ============================================================================
 class FireworksClient:
@@ -124,14 +126,15 @@ class FireworksClient:
         }
     def chat(self, messages: List[Dict], **kwargs) -> str:
-        """LLM과 대화"""
         payload = {
             "model": kwargs.get("model", "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"),
             "messages": messages,
             "max_tokens": kwargs.get("max_tokens", 4096),
             "temperature": kwargs.get("temperature", 0.7),
             "top_p": kwargs.get("top_p", 1.0),
-            "top_k": kwargs.get("top_k", 40)
         }
         try:
@@ -150,14 +153,55 @@ class FireworksClient:
         except Exception as e:
             return f"오류 발생: {str(e)}"
 # ============================================================================
-# 멀티 에이전트 시스템
 # ============================================================================
-class MultiAgentSystem:
-    """4단계 멀티 에이전트 처리 시스템"""
     def __init__(self, llm_client: FireworksClient, search_client: BraveSearchClient):
         self.llm = llm_client
@@ -176,13 +220,7 @@ class MultiAgentSystem:
 1. 질문의 핵심 의도 파악
 2. 검색 결과에서 핵심 정보 추출
 3. 답변이 포함해야 할 주요 요소들 정의
-4. 논리적 흐름과 구조 제시
-출력 형식:
-- 질문 분석: [핵심 의도]
-- 주요 포함 사항: [항목들]
-- 답변 구조: [논리적 흐름]
-- 검색 결과 활용 방안: [어떤 정보를 어떻게 활용할지]"""
             },
             AgentRole.CREATIVE: {
@@ -194,13 +232,7 @@ class MultiAgentSystem:
 1. 감독자의 구조를 따르되 창의적으로 확장
 2. 예시, 비유, 스토리텔링 활용
 3. 사용자 관점에서 이해하기 쉬운 설명 추가
-4. 실용적이고 구체적인 조언 포함
-5. 독창적인 관점과 통찰 제공
-주의사항:
-- 정확성을 해치지 않는 선에서 창의성 발휘
-- 검색 결과를 창의적으로 재구성
-- 사용자 참여를 유도하는 내용 포함"""
             },
             AgentRole.CRITIC: {
@@ -208,24 +240,11 @@ class MultiAgentSystem:
                 "system_prompt": """당신은 비평자 에이전트입니다.
 창의성 생성자의 답변을 검토하고 개선점을 제시해야 합니다.
-역할:
-1. 사실 관계 검증
-2. 논리적 일관성 확인
-3. 오해의 소지가 있는 표현 지적
-4. 누락된 중요 정보 확인
-5. 개선 방향 구체적 제시
 평가 기준:
 - 정확성: 사실과 데이터의 정확성
 - 완전성: 질문에 대한 충분한 답변 여부
 - 명확성: 이해하기 쉬운 설명인지
-- 유용성: 실제로 도움이 되는 정보인지
-- 신뢰성: 검증 가능한 출처 포함 여부
-출력 형식:
-✅ 긍정적 측면: [잘된 점들]
-⚠️ 개선 필요: [문제점과 개선 방안]
-💡 추가 제안: [보완할 내용]"""
             },
             AgentRole.FINALIZER: {
@@ -233,24 +252,11 @@ class MultiAgentSystem:
                 "system_prompt": """당신은 최종 감독자입니다.
 모든 에이전트의 의견을 종합하여 최종 답변을 생성해야 합니다.
-역할:
-1. 창의성 생성자의 답변을 기반으로
-2. 비평자의 피드백을 반영하여
-3. 감독자의 초기 구조를 유지하며
-4. 논리적이고 이해하기 쉬운 최종 답변 생성
 최종 답변 기준:
 - 정확성과 창의성의 균형
 - 명확한 구조와 논리적 흐름
 - 실용적이고 유용한 정보
-- 사용자 친화적인 톤
-- 검색 결과 출처 명시
-반드시 포함할 요소:
-1. 핵심 답변 (직접적인 응답)
-2. 상세 설명 (배경과 맥락)
-3. 실용적 조언 (해당 시)
-4. 참고 자료 (검색 결과 기반)"""
             }
         }
@@ -265,24 +271,46 @@ class MultiAgentSystem:
 [검색결과 {i}]
 제목: {result.get('title', 'N/A')}
 URL: {result.get('url', 'N/A')}
-내용: {result.get('description', 'N/A')}
-게시: {result.get('age', 'N/A')}""")
         return "\n".join(formatted)
-    def process_with_agents(
         self,
         query: str,
         search_results: List[Dict],
-        config: Dict
-    ) -> FinalResponse:
-        """멀티 에이전트 파이프라인 실행"""
-        start_time = time.time()
-        agent_responses = []
         search_context = self._format_search_results(search_results)
-        # 1단계: 감독자 - 방향성 제시
         supervisor_prompt = f"""
 사용자 질문: {query}
@@ -291,21 +319,29 @@ URL: {result.get('url', 'N/A')}
 위 정보를 바탕으로 답변의 방향성과 구조를 제시하세요."""
-        supervisor_response = self.llm.chat(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.SUPERVISOR]["system_prompt"]},
                 {"role": "user", "content": supervisor_prompt}
             ],
             temperature=self.agent_configs[AgentRole.SUPERVISOR]["temperature"],
             max_tokens=config.get("max_tokens", 1000)
-        )
-        agent_responses.append(AgentResponse(
-            role=AgentRole.SUPERVISOR,
-            content=supervisor_response
-        ))
-        # 2단계: 창의성 생성자 - 창의적 답변 생성
         creative_prompt = f"""
 사용자 질문: {query}
@@ -317,21 +353,29 @@ URL: {result.get('url', 'N/A')}
 위 지침과 정보를 바탕으로 창의적이고 유용한 답변을 생성하세요."""
-        creative_response = self.llm.chat(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.CREATIVE]["system_prompt"]},
                 {"role": "user", "content": creative_prompt}
             ],
             temperature=self.agent_configs[AgentRole.CREATIVE]["temperature"],
             max_tokens=config.get("max_tokens", 2000)
-        )
-        agent_responses.append(AgentResponse(
-            role=AgentRole.CREATIVE,
-            content=creative_response
-        ))
-        # 3단계: 비평자 - 검토 및 개선점 제시
         critic_prompt = f"""
 원본 질문: {query}
@@ -343,21 +387,39 @@ URL: {result.get('url', 'N/A')}
 위 답변을 검토하고 개선점을 제시하세요."""
-        critic_response = self.llm.chat(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.CRITIC]["system_prompt"]},
                 {"role": "user", "content": critic_prompt}
             ],
             temperature=self.agent_configs[AgentRole.CRITIC]["temperature"],
             max_tokens=config.get("max_tokens", 1000)
-        )
-        agent_responses.append(AgentResponse(
-            role=AgentRole.CRITIC,
-            content=critic_response
-        ))
-        # 4단계: 최종 감독자 - 종합 및 최종 답변
         final_prompt = f"""
 사용자 질문: {query}
@@ -373,45 +435,34 @@ URL: {result.get('url', 'N/A')}
 검색 결과:
 {search_context}
-모든 의견을 종합하여 최종 답변을 생성하세요.
-비평자의 피드백을 반영하여 개선된 버전을 만들어주세요."""
-        final_response = self.llm.chat(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.FINALIZER]["system_prompt"]},
                 {"role": "user", "content": final_prompt}
             ],
             temperature=self.agent_configs[AgentRole.FINALIZER]["temperature"],
             max_tokens=config.get("max_tokens", 3000)
-        )
-        agent_responses.append(AgentResponse(
-            role=AgentRole.FINALIZER,
-            content=final_response
-        ))
-        processing_time = time.time() - start_time
-        return FinalResponse(
-            final_answer=final_response,
-            agent_responses=agent_responses,
-            search_results=search_results,
-            processing_time=processing_time
-        )
 # ============================================================================
-# Gradio UI
 # ============================================================================
 def create_gradio_interface():
-    """Gradio 인터페이스 생성"""
     # 클라이언트 초기화
     try:
         llm_client = FireworksClient()
         search_client = BraveSearchClient()
-        multi_agent_system = MultiAgentSystem(llm_client, search_client)
         system_ready = True
     except Exception as e:
         print(f"⚠️ System initialization error: {e}")
@@ -419,7 +470,7 @@ def create_gradio_interface():
         search_client = None
         system_ready = False
-    def process_query(
         message: str,
         history: List[Dict],
         use_search: bool,
@@ -427,17 +478,34 @@ def create_gradio_interface():
         search_count: int,
         temperature: float,
         max_tokens: int
-    ) -> Tuple[List[Dict], str, str]:
-        """쿼리 처리 함수"""
         if not message or not system_ready:
-            return history, "", ""
         try:
             # 검색 수행
             search_results = []
             if use_search and search_client and search_client.api_key:
                 search_results = search_client.search(message, count=search_count)
             # 설정
             config = {
@@ -445,74 +513,58 @@ def create_gradio_interface():
                 "max_tokens": max_tokens
             }
-            # 멀티 에이전트 처리
-            response = multi_agent_system.process_with_agents(
-                query=message,
-                search_results=search_results,
-                config=config
-            )
-            # 에이전트 사고 과정 포맷팅
             agent_thoughts = ""
-            if show_agent_thoughts:
-                agent_thoughts = "## 🤖 에이전트 사고 과정\n\n"
-                for agent_resp in response.agent_responses:
-                    role_emoji = {
-                        AgentRole.SUPERVISOR: "👔",
-                        AgentRole.CREATIVE: "🎨",
-                        AgentRole.CRITIC: "🔍",
-                        AgentRole.FINALIZER: "✅"
-                    }
-                    role_name = {
-                        AgentRole.SUPERVISOR: "감독자 (초기 구조화)",
-                        AgentRole.CREATIVE: "창의성 생성자",
-                        AgentRole.CRITIC: "비평자",
-                        AgentRole.FINALIZER: "최종 감독자"
-                    }
-                    agent_thoughts += f"### {role_emoji[agent_resp.role]} {role_name[agent_resp.role]}\n"
-                    agent_thoughts += f"{agent_resp.content[:500]}...\n\n"
-            # 검색 결과 포맷팅
-            search_display = ""
-            if search_results:
-                search_display = "## 📚 참고 자료\n\n"
-                for i, result in enumerate(search_results, 1):
-                    search_display += f"**{i}. [{result['title']}]({result['url']})**\n"
-                    search_display += f"   {result['description'][:100]}...\n\n"
-            # 처리 시간 추가
-            final_answer = response.final_answer
-            final_answer += f"\n\n---\n⏱️ *처리 시간: {response.processing_time:.2f}초*"
-            # 히스토리 업데이트 (OpenAI 형식)
-            history.append({"role": "user", "content": message})
-            history.append({"role": "assistant", "content": final_answer})
-            return history, agent_thoughts, search_display
         except Exception as e:
             error_msg = f"❌ 오류 발생: {str(e)}"
-            history.append({"role": "user", "content": message})
-            history.append({"role": "assistant", "content": error_msg})
-            return history, "", ""
     # Gradio 인터페이스
     with gr.Blocks(
-        title="Multi-Agent RAG System",
         theme=gr.themes.Soft(),
         css="""
         .gradio-container {
             max-width: 1400px !important;
             margin: auto !important;
         }
         """
     ) as demo:
         gr.Markdown("""
-        # 🧠 Multi-Agent RAG System
-        ### 4단계 에이전트 협업을 통한 고품질 답변 생성
         **처리 과정:** 감독자(구조화) → 창의성 생성자(창의적 답변) → 비평자(검증) → 최종 감독자(종합)
         """)
@@ -530,18 +582,20 @@ def create_gradio_interface():
                 chatbot = gr.Chatbot(
                     height=500,
                     label="💬 대화",
-                    type="messages"  # OpenAI 스타일 메시지 형식
                 )
                 msg = gr.Textbox(
                     label="질문 입력",
-                    placeholder="질문을 입력하세요... (멀티 에이전트가 협업하여 답변합니다)",
                     lines=3
                 )
                 with gr.Row():
                     submit = gr.Button("🚀 전송", variant="primary")
                     clear = gr.Button("🔄 초기화")
                 # 에이전트 사고 과정
                 with gr.Accordion("🤖 에이전트 사고 과정", open=False):
@@ -569,7 +623,7 @@ def create_gradio_interface():
                     search_count = gr.Slider(
                         minimum=1,
                         maximum=10,
-                        value=10,
                         step=1,
                         label="검색 결과 수"
                     )
@@ -579,13 +633,14 @@ def create_gradio_interface():
                         maximum=1,
                         value=0.6,
                         step=0.1,
-                        label="Temperature"
                     )
                     max_tokens = gr.Slider(
                         minimum=500,
                         maximum=4000,
-                        value=4000,
                         step=100,
                         label="Max Tokens"
                     )
@@ -593,11 +648,16 @@ def create_gradio_interface():
                 gr.Markdown("""
                 ### 📊 시스템 정보
-                **에이전트 역할:**
                 - 👔 **감독자**: 구조 설계
                 - 🎨 **창의성**: 창의적 생성
                 - 🔍 **비평자**: 검증/개선
                 - ✅ **최종**: 종합/완성
                 """)
         # 예제
@@ -612,9 +672,9 @@ def create_gradio_interface():
             inputs=msg
         )
-        # 이벤트 바인딩
-        submit.click(
-            process_query,
             inputs=[msg, chatbot, use_search, show_agent_thoughts,
                    search_count, temperature, max_tokens],
             outputs=[chatbot, agent_thoughts, search_sources]
@@ -624,8 +684,8 @@ def create_gradio_interface():
             msg
         )
-        msg.submit(
-            process_query,
             inputs=[msg, chatbot, use_search, show_agent_thoughts,
                    search_count, temperature, max_tokens],
             outputs=[chatbot, agent_thoughts, search_sources]
@@ -635,8 +695,16 @@ def create_gradio_interface():
             msg
         )
         clear.click(
-            lambda: ([], None, None),
             None,
             [chatbot, agent_thoughts, search_sources]
         )
@@ -651,10 +719,10 @@ def create_gradio_interface():
 if __name__ == "__main__":
     print("""
 ╔══════════════════════════════════════════════════════════════╗
-║         🧠 Multi-Agent RAG-Enhanced LLM System 🧠           ║
 ║                                                              ║
 ║  감독자 → 창의성 생성자 → 비평자 → 최종 감독자             ║
-║  4단계 협업을 통한 고품질 답변 생성                         ║
 ╚══════════════════════════════════════════════════════════════╝
     """)

 """
+Multi-Agent RAG-Enhanced LLM System for Hugging Face Spaces with Streaming
 감독자(Supervisor) -> 창의성 생성자(Creative) -> 비평자(Critic) -> 감독자(Final)
+4단계 파이프라인을 통한 고품질 답변 생성 시스템 - 스트리밍 출력 지원
 """
 import os
 import json
 import time
+import asyncio
+from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator
 from datetime import datetime
 from enum import Enum
+import threading
+import queue
 import requests
 import gradio as gr
     metadata: Optional[Dict] = None
+class StreamingResponse(BaseModel):
+    chunk: str
+    agent_role: Optional[AgentRole] = None
+    is_complete: bool = False
 # ============================================================================
 # ============================================================================
+# Fireworks LLM 클라이언트 (스트리밍 지원)
 # ============================================================================
 class FireworksClient:
         }
     def chat(self, messages: List[Dict], **kwargs) -> str:
+        """LLM과 대화 (일반)"""
         payload = {
             "model": kwargs.get("model", "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"),
             "messages": messages,
             "max_tokens": kwargs.get("max_tokens", 4096),
             "temperature": kwargs.get("temperature", 0.7),
             "top_p": kwargs.get("top_p", 1.0),
+            "top_k": kwargs.get("top_k", 40),
+            "stream": False
         }
         try:
         except Exception as e:
             return f"오류 발생: {str(e)}"
+    def chat_stream(self, messages: List[Dict], **kwargs) -> Generator[str, None, None]:
+        """LLM과 대화 (스트리밍)"""
+        payload = {
+            "model": kwargs.get("model", "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"),
+            "messages": messages,
+            "max_tokens": kwargs.get("max_tokens", 4096),
+            "temperature": kwargs.get("temperature", 0.7),
+            "top_p": kwargs.get("top_p", 1.0),
+            "top_k": kwargs.get("top_k", 40),
+            "stream": True
+        }
+        try:
+            response = requests.post(
+                self.base_url,
+                headers={**self.headers, "Accept": "text/event-stream"},
+                data=json.dumps(payload),
+                stream=True,
+                timeout=60
+            )
+            response.raise_for_status()
+            for line in response.iter_lines():
+                if line:
+                    line_str = line.decode('utf-8')
+                    if line_str.startswith("data: "):
+                        data_str = line_str[6:]
+                        if data_str == "[DONE]":
+                            break
+                        try:
+                            data = json.loads(data_str)
+                            if "choices" in data and len(data["choices"]) > 0:
+                                delta = data["choices"][0].get("delta", {})
+                                if "content" in delta:
+                                    yield delta["content"]
+                        except json.JSONDecodeError:
+                            continue
+        except Exception as e:
+            yield f"오류 발생: {str(e)}"
 # ============================================================================
+# 멀티 에이전트 시스템 (스트리밍 지원)
 # ============================================================================
+class MultiAgentSystemStreaming:
+    """스트리밍을 지원하는 4단계 멀티 에이전트 처리 시스템"""
     def __init__(self, llm_client: FireworksClient, search_client: BraveSearchClient):
         self.llm = llm_client
 1. 질문의 핵심 의도 파악
 2. 검색 결과에서 핵심 정보 추출
 3. 답변이 포함해야 할 주요 요소들 정의
+4. 논리적 흐름과 구조 제시"""
             },
             AgentRole.CREATIVE: {
 1. 감독자의 구조를 따르되 창의적으로 확장
 2. 예시, 비유, 스토리텔링 활용
 3. 사용자 관점에서 이해하기 쉬운 설명 추가
+4. 실용적이고 구체적인 조언 포함"""
             },
             AgentRole.CRITIC: {
                 "system_prompt": """당신은 비평자 에이전트입니다.
 창의성 생성자의 답변을 검토하고 개선점을 제시해야 합니다.
 평가 기준:
 - 정확성: 사실과 데이터의 정확성
 - 완전성: 질문에 대한 충분한 답변 여부
 - 명확성: 이해하기 쉬운 설명인지
+- 유용성: 실제로 도움이 되는 정보인지"""
             },
             AgentRole.FINALIZER: {
                 "system_prompt": """당신은 최종 감독자입니다.
 모든 에이전트의 의견을 종합하여 최종 답변을 생성해야 합니다.
 최종 답변 기준:
 - 정확성과 창의성의 균형
 - 명확한 구조와 논리적 흐름
 - 실용적이고 유용한 정보
+- 사용자 친화적인 톤"""
             }
         }
 [검색결과 {i}]
 제목: {result.get('title', 'N/A')}
 URL: {result.get('url', 'N/A')}
+내용: {result.get('description', 'N/A')}""")
         return "\n".join(formatted)
+    def process_with_streaming(
         self,
         query: str,
         search_results: List[Dict],
+        config: Dict,
+        show_agent_thoughts: bool = False
+    ) -> Generator[Tuple[str, str], None, None]:
+        """스트리밍으로 멀티 에이전트 파이프라인 실행"""
         search_context = self._format_search_results(search_results)
+        accumulated_response = ""
+        agent_thoughts_display = ""
+        # 에이전트 역할 이모지
+        role_emoji = {
+            AgentRole.SUPERVISOR: "👔",
+            AgentRole.CREATIVE: "🎨",
+            AgentRole.CRITIC: "🔍",
+            AgentRole.FINALIZER: "✅"
+        }
+        role_name = {
+            AgentRole.SUPERVISOR: "감독자",
+            AgentRole.CREATIVE: "창의성 생성자",
+            AgentRole.CRITIC: "비평자",
+            AgentRole.FINALIZER: "최종 감독자"
+        }
+        # 저장할 에이전트 응답들
+        agent_responses = {}
+        # 1단계: 감독자
+        if show_agent_thoughts:
+            agent_thoughts_display += f"### {role_emoji[AgentRole.SUPERVISOR]} {role_name[AgentRole.SUPERVISOR]} 분석 중...\n\n"
+            yield accumulated_response, agent_thoughts_display
         supervisor_prompt = f"""
 사용자 질문: {query}
 위 정보를 바탕으로 답변의 방향성과 구조를 제시하세요."""
+        supervisor_response = ""
+        for chunk in self.llm.chat_stream(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.SUPERVISOR]["system_prompt"]},
                 {"role": "user", "content": supervisor_prompt}
             ],
             temperature=self.agent_configs[AgentRole.SUPERVISOR]["temperature"],
             max_tokens=config.get("max_tokens", 1000)
+        ):
+            supervisor_response += chunk
+            if show_agent_thoughts:
+                # 감독자 응답을 실시간으로 표시 (처음 300자만)
+                display_text = supervisor_response[:300] + ("..." if len(supervisor_response) > 300 else "")
+                agent_thoughts_display = f"### {role_emoji[AgentRole.SUPERVISOR]} {role_name[AgentRole.SUPERVISOR]}\n\n{display_text}\n\n"
+                yield accumulated_response, agent_thoughts_display
+        agent_responses[AgentRole.SUPERVISOR] = supervisor_response
+        # 2단계: 창의성 생성자
+        if show_agent_thoughts:
+            agent_thoughts_display += f"### {role_emoji[AgentRole.CREATIVE]} {role_name[AgentRole.CREATIVE]} 생성 중...\n\n"
+            yield accumulated_response, agent_thoughts_display
         creative_prompt = f"""
 사용자 질문: {query}
 위 지침과 정보를 바탕으로 창의적이고 유용한 답변을 생성하세요."""
+        creative_response = ""
+        for chunk in self.llm.chat_stream(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.CREATIVE]["system_prompt"]},
                 {"role": "user", "content": creative_prompt}
             ],
             temperature=self.agent_configs[AgentRole.CREATIVE]["temperature"],
             max_tokens=config.get("max_tokens", 2000)
+        ):
+            creative_response += chunk
+            if show_agent_thoughts:
+                display_text = creative_response[:400] + ("..." if len(creative_response) > 400 else "")
+                prev_supervisor = f"### {role_emoji[AgentRole.SUPERVISOR]} {role_name[AgentRole.SUPERVISOR]}\n\n{supervisor_response[:200]}...\n\n"
+                agent_thoughts_display = prev_supervisor + f"### {role_emoji[AgentRole.CREATIVE]} {role_name[AgentRole.CREATIVE]}\n\n{display_text}\n\n"
+                yield accumulated_response, agent_thoughts_display
+        agent_responses[AgentRole.CREATIVE] = creative_response
+        # 3단계: 비평자
+        if show_agent_thoughts:
+            agent_thoughts_display += f"### {role_emoji[AgentRole.CRITIC]} {role_name[AgentRole.CRITIC]} 검토 중...\n\n"
+            yield accumulated_response, agent_thoughts_display
         critic_prompt = f"""
 원본 질문: {query}
 위 답변을 검토하고 개선점을 제시하세요."""
+        critic_response = ""
+        for chunk in self.llm.chat_stream(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.CRITIC]["system_prompt"]},
                 {"role": "user", "content": critic_prompt}
             ],
             temperature=self.agent_configs[AgentRole.CRITIC]["temperature"],
             max_tokens=config.get("max_tokens", 1000)
+        ):
+            critic_response += chunk
+            if show_agent_thoughts:
+                display_text = critic_response[:300] + ("..." if len(critic_response) > 300 else "")
+                # 이전 에이전트들 요약
+                prev_content = f"### {role_emoji[AgentRole.SUPERVISOR]} {role_name[AgentRole.SUPERVISOR]}\n{supervisor_response[:150]}...\n\n"
+                prev_content += f"### {role_emoji[AgentRole.CREATIVE]} {role_name[AgentRole.CREATIVE]}\n{creative_response[:200]}...\n\n"
+                agent_thoughts_display = prev_content + f"### {role_emoji[AgentRole.CRITIC]} {role_name[AgentRole.CRITIC]}\n\n{display_text}\n\n"
+                yield accumulated_response, agent_thoughts_display
+        agent_responses[AgentRole.CRITIC] = critic_response
+        # 4단계: 최종 감독자 - 이제 최종 답변을 스트리밍으로 출력
+        if show_agent_thoughts:
+            # 모든 에이전트 사고 과정 최종 정리
+            final_thoughts = "## 🤖 에이전트 협업 완료\n\n"
+            for role in [AgentRole.SUPERVISOR, AgentRole.CREATIVE, AgentRole.CRITIC]:
+                final_thoughts += f"### {role_emoji[role]} {role_name[role]}\n"
+                final_thoughts += f"{agent_responses[role][:250]}...\n\n"
+            final_thoughts += f"### {role_emoji[AgentRole.FINALIZER]} {role_name[AgentRole.FINALIZER]} 최종 답변 생성 ���...\n\n"
+            agent_thoughts_display = final_thoughts
+            yield accumulated_response, agent_thoughts_display
+        # 최종 답변 프롬프트
         final_prompt = f"""
 사용자 질문: {query}
 검색 결과:
 {search_context}
+모든 의견을 종합하여 최종 답변을 생성하세요."""
+        # 최종 답변 스트리밍
+        accumulated_response = ""
+        for chunk in self.llm.chat_stream(
             messages=[
                 {"role": "system", "content": self.agent_configs[AgentRole.FINALIZER]["system_prompt"]},
                 {"role": "user", "content": final_prompt}
             ],
             temperature=self.agent_configs[AgentRole.FINALIZER]["temperature"],
             max_tokens=config.get("max_tokens", 3000)
+        ):
+            accumulated_response += chunk
+            yield accumulated_response, agent_thoughts_display
 # ============================================================================
+# Gradio UI (스트리밍 지원)
 # ============================================================================
 def create_gradio_interface():
+    """Gradio 인터페이스 생성 (스트리밍 지원)"""
     # 클라이언트 초기화
     try:
         llm_client = FireworksClient()
         search_client = BraveSearchClient()
+        multi_agent_system = MultiAgentSystemStreaming(llm_client, search_client)
         system_ready = True
     except Exception as e:
         print(f"⚠️ System initialization error: {e}")
         search_client = None
         system_ready = False
+    def process_query_streaming(
         message: str,
         history: List[Dict],
         use_search: bool,
         search_count: int,
         temperature: float,
         max_tokens: int
+    ):
+        """스트리밍 쿼리 처리 함수"""
         if not message or not system_ready:
+            yield history, "", ""
+            return
         try:
             # 검색 수행
             search_results = []
+            search_display = ""
             if use_search and search_client and search_client.api_key:
+                # 검색 중 표시
+                history_with_message = history + [
+                    {"role": "user", "content": message},
+                    {"role": "assistant", "content": "🔍 검색 중..."}
+                ]
+                yield history_with_message, "", ""
                 search_results = search_client.search(message, count=search_count)
+                # 검색 결과 포맷팅
+                if search_results:
+                    search_display = "## 📚 참고 자료\n\n"
+                    for i, result in enumerate(search_results, 1):
+                        search_display += f"**{i}. [{result['title']}]({result['url']})**\n"
+                        search_display += f"   {result['description'][:100]}...\n\n"
             # 설정
             config = {
                 "max_tokens": max_tokens
             }
+            # 사용자 메시지 추가
+            current_history = history + [{"role": "user", "content": message}]
+            # 멀티 에이전트 스트리밍 처리
+            assistant_message = ""
             agent_thoughts = ""
+            for response_chunk, thoughts_chunk in multi_agent_system.process_with_streaming(
+                query=message,
+                search_results=search_results,
+                config=config,
+                show_agent_thoughts=show_agent_thoughts
+            ):
+                assistant_message = response_chunk
+                agent_thoughts = thoughts_chunk
+                # 히스토리 업데이트
+                updated_history = current_history + [{"role": "assistant", "content": assistant_message}]
+                yield updated_history, agent_thoughts, search_display
+            # 최종 처리 시간 추가
+            final_message = assistant_message + "\n\n---\n✨ *답변 생성 완료*"
+            final_history = current_history + [{"role": "assistant", "content": final_message}]
+            yield final_history, agent_thoughts, search_display
         except Exception as e:
             error_msg = f"❌ 오류 발생: {str(e)}"
+            error_history = history + [
+                {"role": "user", "content": message},
+                {"role": "assistant", "content": error_msg}
+            ]
+            yield error_history, "", ""
     # Gradio 인터페이스
     with gr.Blocks(
+        title="Multi-Agent RAG System with Streaming",
         theme=gr.themes.Soft(),
         css="""
         .gradio-container {
             max-width: 1400px !important;
             margin: auto !important;
         }
+        .message {
+            font-size: 1.1em !important;
+        }
         """
     ) as demo:
         gr.Markdown("""
+        # 🧠 Multi-Agent RAG System (Streaming)
+        ### 실시간 스트리밍으로 4단계 에이전트 협업 답변 생성
         **처리 과정:** 감독자(구조화) → 창의성 생성자(창의적 답변) → 비평자(검증) → 최종 감독자(종합)
         """)
                 chatbot = gr.Chatbot(
                     height=500,
                     label="💬 대화",
+                    type="messages",
+                    show_copy_button=True
                 )
                 msg = gr.Textbox(
                     label="질문 입력",
+                    placeholder="질문을 입력하세요... (실시간으로 답변이 생성됩니다)",
                     lines=3
                 )
                 with gr.Row():
                     submit = gr.Button("🚀 전송", variant="primary")
                     clear = gr.Button("🔄 초기화")
+                    stop = gr.Button("⏹️ 중지", variant="stop")
                 # 에이전트 사고 과정
                 with gr.Accordion("🤖 에이전트 사고 과정", open=False):
                     search_count = gr.Slider(
                         minimum=1,
                         maximum=10,
+                        value=5,
                         step=1,
                         label="검색 결과 수"
                     )
                         maximum=1,
                         value=0.6,
                         step=0.1,
+                        label="Temperature",
+                        info="낮을수록 일관성, 높을수록 창의성"
                     )
                     max_tokens = gr.Slider(
                         minimum=500,
                         maximum=4000,
+                        value=2000,
                         step=100,
                         label="Max Tokens"
                     )
                 gr.Markdown("""
                 ### 📊 시스템 정보
+                **🎭 에이전트 역할:**
                 - 👔 **감독자**: 구조 설계
                 - 🎨 **창의성**: 창의적 생성
                 - 🔍 **비평자**: 검증/개선
                 - ✅ **최종**: 종합/완성
+                **✨ 특징:**
+                - 실시간 스트리밍 출력
+                - 다단계 검증 시스템
+                - RAG 기반 정확성
                 """)
         # 예제
             inputs=msg
         )
+        # 이벤트 바인딩 (스트리밍)
+        submit_event = submit.click(
+            process_query_streaming,
             inputs=[msg, chatbot, use_search, show_agent_thoughts,
                    search_count, temperature, max_tokens],
             outputs=[chatbot, agent_thoughts, search_sources]
             msg
         )
+        msg_event = msg.submit(
+            process_query_streaming,
             inputs=[msg, chatbot, use_search, show_agent_thoughts,
                    search_count, temperature, max_tokens],
             outputs=[chatbot, agent_thoughts, search_sources]
             msg
         )
+        # 중지 버튼
+        stop.click(
+            None,
+            None,
+            None,
+            cancels=[submit_event, msg_event]
+        )
         clear.click(
+            lambda: ([], "", ""),
             None,
             [chatbot, agent_thoughts, search_sources]
         )
 if __name__ == "__main__":
     print("""
 ╔══════════════════════════════════════════════════════════════╗
+║     🧠 Multi-Agent RAG System with Streaming Output 🧠      ║
 ║                                                              ║
 ║  감독자 → 창의성 생성자 → 비평자 → 최종 감독자             ║
+║  실시간 스트리밍으로 고품질 답변 생성                       ║
 ╚══════════════════════════════════════════════════════════════╝
     """)