Spaces:

aiqtech
/

rag

Running

App Files Files Community

aiqtech commited on Aug 21

Commit

1b3ced8

verified ·

1 Parent(s): 371c534

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -73

app.py CHANGED Viewed

@@ -351,7 +351,7 @@ class QualityChecker:
 class OptimizedStreaming:
     """스트리밍 버퍼 최적화"""
-    def __init__(self, chunk_size: int = 100, flush_interval: float = 0.1):
         self.chunk_size = chunk_size
         self.flush_interval = flush_interval
         self.buffer = ""
@@ -421,7 +421,7 @@ class SpeedOptimizedMultiAgentSystem:
             AgentRole.FINALIZER: """[최종통합]
 모든의견 종합→최적답변
 명확구조+실용정보+창의균형
-바로 핵심 내용부터 시작. 불필요한 헤더나 마크업 없이."""
         }
     async def parallel_process_agents(
@@ -449,7 +449,7 @@ class SpeedOptimizedMultiAgentSystem:
         try:
             # === 1단계: 감독자 + 검색 병렬 실행 ===
             if show_progress:
-                agent_thoughts = "### 🚀 병렬 처리 시작\n"
                 agent_thoughts += "👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n"
                 yield accumulated_response, agent_thoughts
@@ -474,12 +474,12 @@ class SpeedOptimizedMultiAgentSystem:
             async for chunk in self.streaming.buffer_and_yield(supervisor_task):
                 supervisor_response += chunk
                 if show_progress and len(supervisor_response) < 300:
-                    agent_thoughts = f"### 👔 감독자 분석\n{supervisor_response[:300]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 2단계: 창의성 + 비평 준비 병렬 ===
             if show_progress:
-                agent_thoughts += "### 🎨 창의성 생성자 + 🔍 비평자 준비...\n\n"
                 yield accumulated_response, agent_thoughts
             # 창의성 생성 시작
@@ -524,7 +524,7 @@ class SpeedOptimizedMultiAgentSystem:
                 if show_progress:
                     display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
-                    agent_thoughts = f"### 🎨 창의성 생성자\n{display_creative}\n\n"
                     yield accumulated_response, agent_thoughts
             # 비평자 결과 대기
@@ -532,7 +532,7 @@ class SpeedOptimizedMultiAgentSystem:
                 critic_response = await critic_task
                 if show_progress:
-                    agent_thoughts += f"### 🔍 비평자 검토\n{critic_response[:200]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 3단계: 품질 체크 및 조기 종료 ===
@@ -545,7 +545,7 @@ class SpeedOptimizedMultiAgentSystem:
                 accumulated_response = creative_response
                 if show_progress:
-                    agent_thoughts += f"### ✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n"
                 # 캐시 저장
                 self.cache.set(query, {
@@ -558,7 +558,7 @@ class SpeedOptimizedMultiAgentSystem:
             # === 4단계: 최종 통합 (스트리밍) ===
             if show_progress:
-                agent_thoughts += "### ✅ 최종 통합 중...\n\n"
                 yield accumulated_response, agent_thoughts
             final_prompt = f"""
@@ -566,7 +566,7 @@ class SpeedOptimizedMultiAgentSystem:
 창의성답변: {creative_response}
 비평피드백: {critic_response}
 감독자구조: {supervisor_response}
-최종통합→완벽답변"""
             final_task = self.llm.chat_stream_async(
                 messages=[
@@ -577,36 +577,38 @@ class SpeedOptimizedMultiAgentSystem:
                 max_tokens=2500
             )
-            # 최종 답변 스트리밍
             accumulated_response = ""
-            temp_buffer = ""  # 임시 버퍼 추가
-            async for chunk in self.streaming.buffer_and_yield(final_task):
-                temp_buffer += chunk
-                # "| --- # 🌱 **최종통합 답변:" ���분 제거
-                if "| --- # 🌱 **최종통합 답변:" in temp_buffer:
-                    # 해당 텍스트 이후의 내용만 추출
-                    parts = temp_buffer.split("| --- # 🌱 **최종통합 답변:")
-                    if len(parts) > 1:
-                        temp_buffer = parts[1]
-                # "**–오류: ---" 부분이 나타나면 그 전까지만 사용
-                if "**–오류: ---" in temp_buffer:
-                    temp_buffer = temp_buffer.split("**–오류: ---")[0]
-                # 정리된 내용을 accumulated_response에 추가
-                accumulated_response = temp_buffer
                 yield accumulated_response, agent_thoughts
-            # 최종 정리 - 혹시 남아있을 수 있는 불필요한 부분 제거
-            if "| --- # 🌱 **최종통합 답변:" in accumulated_response:
-                accumulated_response = accumulated_response.split("| --- # 🌱 **최종통합 답변:")[1]
-            if "**–오류: ---" in accumulated_response:
-                accumulated_response = accumulated_response.split("**–오류: ---")[0]
-            # 앞뒤 공백 제거
             accumulated_response = accumulated_response.strip()
             # 처리 시간 추가
@@ -671,7 +673,7 @@ def create_optimized_gradio_interface():
         show_agent_thoughts: bool,
         search_count: int
     ):
-        """최적화된 쿼리 처리 - 동기 버전"""
         if not message:
             yield history, "", ""
@@ -682,23 +684,7 @@ def create_optimized_gradio_interface():
             import nest_asyncio
             nest_asyncio.apply()
         except ImportError:
-            pass  # nest_asyncio가 없어도 진행
-        def run_async_function(coro):
-            """비동기 함수를 동기적으로 실행하는 헬퍼"""
-            try:
-                loop = asyncio.get_event_loop()
-                if loop.is_running():
-                    # 이미 실행 중인 루프가 있으면 새 스레드에서 실행
-                    import concurrent.futures
-                    with concurrent.futures.ThreadPoolExecutor() as executor:
-                        future = executor.submit(asyncio.run, coro)
-                        return future.result()
-                else:
-                    return loop.run_until_complete(coro)
-            except RuntimeError:
-                # 루프가 없으면 새로 생성
-                return asyncio.run(coro)
         try:
             # 검색 수행 (동기화)
@@ -714,12 +700,15 @@ def create_optimized_gradio_interface():
                 yield history_with_message, "", ""
                 # 비동기 검색을 동기적으로 실행
-                search_results = run_async_function(
-                    system.search.search_async(message, count=search_count)
-                )
                 if search_results:
-                    search_display = "## 📚 참고 자료\n\n"
                     for i, result in enumerate(search_results[:3], 1):
                         search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
                         search_display += f"   {result['description'][:100]}...\n\n"
@@ -727,26 +716,37 @@ def create_optimized_gradio_interface():
             # 사용자 메시지 추가
             current_history = history + [{"role": "user", "content": message}]
-            # 병렬 처리 실행을 동기적으로 수집
-            async def collect_responses():
-                responses = []
                 async for response, thoughts in system.parallel_process_agents(
                     query=message,
                     search_results=search_results,
                     show_progress=show_agent_thoughts
                 ):
-                    responses.append((response, thoughts))
-                return responses
-            # 모든 응답 수집
-            all_responses = run_async_function(collect_responses())
-            # 수집된 응답을 yield
-            for response, thoughts in all_responses:
-                updated_history = current_history + [
-                    {"role": "assistant", "content": response}
-                ]
-                yield updated_history, thoughts, search_display
         except Exception as e:
             error_history = history + [
@@ -754,6 +754,12 @@ def create_optimized_gradio_interface():
                 {"role": "assistant", "content": f"❌ 오류: {str(e)}"}
             ]
             yield error_history, "", ""
     # Gradio 인터페이스
     with gr.Blocks(
@@ -768,7 +774,7 @@ def create_optimized_gradio_interface():
     ) as demo:
         gr.Markdown("""
         # ⚡ 고속 Multi-Agent RAG System
-        ### 복잡한 질문도 5초 이내 처리 목표
         **최적화 기술:**
         - 🚀 병렬 처리: 에이전트 동시 실행
@@ -802,7 +808,7 @@ def create_optimized_gradio_interface():
                     search_sources = gr.Markdown()
             with gr.Column(scale=1):
-                gr.Markdown("### ⚙️ 설정")
                 use_search = gr.Checkbox(
                     label="🔍 웹 검색 사용",
@@ -823,7 +829,7 @@ def create_optimized_gradio_interface():
                 )
                 gr.Markdown("""
-                ### ⚡ 최적화 상태
                 **활성화된 최적화:**
                 - ✅ 병렬 처리

 class OptimizedStreaming:
     """스트리밍 버퍼 최적화"""
+    def __init__(self, chunk_size: int = 20, flush_interval: float = 0.05):
         self.chunk_size = chunk_size
         self.flush_interval = flush_interval
         self.buffer = ""
             AgentRole.FINALIZER: """[최종통합]
 모든의견 종합→최적답변
 명확구조+실용정보+창의균형
+바로 핵심 내용부터 시작. 불필요한 헤더나 마크업 없이. 마크다운 헤더(#, ##, ###) 사용 금지."""
         }
     async def parallel_process_agents(
         try:
             # === 1단계: 감독자 + 검색 병렬 실행 ===
             if show_progress:
+                agent_thoughts = "🚀 병렬 처리 시작\n"
                 agent_thoughts += "👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n"
                 yield accumulated_response, agent_thoughts
             async for chunk in self.streaming.buffer_and_yield(supervisor_task):
                 supervisor_response += chunk
                 if show_progress and len(supervisor_response) < 300:
+                    agent_thoughts = f"👔 감독자 분석\n{supervisor_response[:300]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 2단계: 창의성 + 비평 준비 병렬 ===
             if show_progress:
+                agent_thoughts += "🎨 창의성 생성자 + 🔍 비평자 준비...\n\n"
                 yield accumulated_response, agent_thoughts
             # 창의성 생성 시작
                 if show_progress:
                     display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
+                    agent_thoughts = f"🎨 창의성 생성자\n{display_creative}\n\n"
                     yield accumulated_response, agent_thoughts
             # 비평자 결과 대기
                 critic_response = await critic_task
                 if show_progress:
+                    agent_thoughts += f"🔍 비평자 검토\n{critic_response[:200]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 3단계: 품질 체크 및 조기 종료 ===
                 accumulated_response = creative_response
                 if show_progress:
+                    agent_thoughts += f"✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n"
                 # 캐시 저장
                 self.cache.set(query, {
             # === 4단계: 최종 통합 (스트리밍) ===
             if show_progress:
+                agent_thoughts += "✅ 최종 통합 중...\n\n"
                 yield accumulated_response, agent_thoughts
             final_prompt = f"""
 창의성답변: {creative_response}
 비평피드백: {critic_response}
 감독자구조: {supervisor_response}
+최종통합→완벽답변. 마크다운 헤더(#, ##, ###) 사용 금지."""
             final_task = self.llm.chat_stream_async(
                 messages=[
                 max_tokens=2500
             )
+            # 최종 답변 스트리밍 - 개선된 필터링
             accumulated_response = ""
+            unwanted_header_found = False
+            async for chunk in final_task:  # buffer_and_yield 제거하여 즉시 스트리밍
+                # 불필요한 헤더 체크
+                if not unwanted_header_found:
+                    accumulated_response += chunk
+                    # 헤더 패턴 감지
+                    if "| --- # 🌱 **최종통합 답변:" in accumulated_response:
+                        unwanted_header_found = True
+                        # 헤더 이후 내용만 추출
+                        parts = accumulated_response.split("| --- # 🌱 **최종통합 답변:")
+                        if len(parts) > 1:
+                            accumulated_response = parts[1].lstrip()
+                    # 오류 푸터 감지 및 제거
+                    if "**–오류: ---" in accumulated_response:
+                        accumulated_response = accumulated_response.split("**–오류: ---")[0]
+                else:
+                    # 헤더를 찾은 후에는 바로 추가
+                    accumulated_response += chunk
+                    # 오류 푸터 실시간 체크
+                    if "**–오류: ---" in accumulated_response:
+                        accumulated_response = accumulated_response.split("**–오류: ---")[0]
+                # 실시간 스트리밍 yield
                 yield accumulated_response, agent_thoughts
+            # 최종 정리
             accumulated_response = accumulated_response.strip()
             # 처리 시간 추가
         show_agent_thoughts: bool,
         search_count: int
     ):
+        """최적화된 쿼리 처리 - 실시간 스트리밍 버전"""
         if not message:
             yield history, "", ""
             import nest_asyncio
             nest_asyncio.apply()
         except ImportError:
+            pass
         try:
             # 검색 수행 (동기화)
                 yield history_with_message, "", ""
                 # 비동기 검색을 동기적으로 실행
+                async def search_wrapper():
+                    return await system.search.search_async(message, count=search_count)
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                search_results = loop.run_until_complete(search_wrapper())
                 if search_results:
+                    search_display = "📚 참고 자료\n\n"
                     for i, result in enumerate(search_results[:3], 1):
                         search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
                         search_display += f"   {result['description'][:100]}...\n\n"
             # 사용자 메시지 추가
             current_history = history + [{"role": "user", "content": message}]
+            # 실시간 스트리밍을 위한 비동기 처리
+            async def stream_responses():
+                """실시간 스트리밍 제너레이터"""
                 async for response, thoughts in system.parallel_process_agents(
                     query=message,
                     search_results=search_results,
                     show_progress=show_agent_thoughts
                 ):
+                    yield response, thoughts
+            # 새 이벤트 루프에서 실시간 스트리밍
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            # 비동기 제너레이터를 동기적으로 순회
+            gen = stream_responses()
+            while True:
+                try:
+                    # 다음 항목 가져오기
+                    task = asyncio.ensure_future(gen.__anext__(), loop=loop)
+                    response, thoughts = loop.run_until_complete(task)
+                    # 실시간 업데이트
+                    updated_history = current_history + [
+                        {"role": "assistant", "content": response}
+                    ]
+                    yield updated_history, thoughts, search_display
+                except StopAsyncIteration:
+                    break
         except Exception as e:
             error_history = history + [
                 {"role": "assistant", "content": f"❌ 오류: {str(e)}"}
             ]
             yield error_history, "", ""
+        finally:
+            # 루프 정리
+            try:
+                loop.close()
+            except:
+                pass
     # Gradio 인터페이스
     with gr.Blocks(
     ) as demo:
         gr.Markdown("""
         # ⚡ 고속 Multi-Agent RAG System
+        **복잡한 질문도 5초 이내 처리 목표**
         **최적화 기술:**
         - 🚀 병렬 처리: 에이전트 동시 실행
                     search_sources = gr.Markdown()
             with gr.Column(scale=1):
+                gr.Markdown("**⚙️ 설정**")
                 use_search = gr.Checkbox(
                     label="🔍 웹 검색 사용",
                 )
                 gr.Markdown("""
+                **⚡ 최적화 상태**
                 **활성화된 최적화:**
                 - ✅ 병렬 처리