Spaces:

tecuts
/

chat

Running

App Files Files Community

tecuts commited on Jul 2

Commit

db547a3

verified ·

1 Parent(s): 90e4830

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -69

app.py CHANGED Viewed

@@ -177,15 +177,14 @@ available_tools = [
     }
 ]
 # --- Streaming Response Generator ---
 async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
     """Generate streaming response with optional search"""
     try:
-        # --- Stage 1: Initial call to see if the model wants to use a tool ---
         llm_kwargs = {
-            "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "temperature": temperature,
             "messages": messages,
             "max_tokens": 2000,
@@ -196,105 +195,100 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
             llm_kwargs["tools"] = available_tools
             llm_kwargs["tool_choice"] = "auto"
-        stream = client.chat.completions.create(**llm_kwargs)
         response_content = ""
         tool_calls_data = []
-        # Accumulate the response from the first stream
         for chunk in stream:
             delta = chunk.choices[0].delta
             if delta.content:
                 content_chunk = delta.content
                 response_content += content_chunk
-                # Don't yield content yet, wait to see if a tool is called
-            # This logic for accumulating tool calls is complex but correct
             if delta.tool_calls:
                 for tool_call in delta.tool_calls:
                     if len(tool_calls_data) <= tool_call.index:
-                        tool_calls_data.extend([{"id": "", "function": {"name": "", "arguments": ""}} for _ in range(tool_call.index + 1 - len(tool_calls_data))])
                     if tool_call.id:
                         tool_calls_data[tool_call.index]["id"] = tool_call.id
                     if tool_call.function.name:
                         tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
                     if tool_call.function.arguments:
                         tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
-        # --- Stage 2: Decide what to do based on the model's response ---
-        # If the model returned tool calls, execute them
-        if tool_calls_data:
             yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
-            # 1. Append the assistant's request to use a tool to the message history
-            messages.append({
-                "role": "assistant",
-                "content": response_content or None, # Can be empty
-                "tool_calls": tool_calls_data
-            })
-            # Execute all tool calls concurrently
-            search_tasks = {}
             for tool_call in tool_calls_data:
-                if tool_call["function"]["name"] == "Google Search":
                     try:
                         args = json.loads(tool_call["function"]["arguments"])
                         query = args.get("query", "").strip()
                         if query:
-                            # Map tool_call_id to the task
-                            search_tasks[tool_call["id"]] = google_search_tool_async(query)
                     except json.JSONDecodeError:
                         continue
-            search_results_by_id = await asyncio.gather(*search_tasks.values(), return_exceptions=True)
-            tool_ids = list(search_tasks.keys())
-            source_links = []
-            # 2. Append the results of EACH tool call to the message history
-            for i, results in enumerate(search_results_by_id):
-                tool_call_id = tool_ids[i]
-                if isinstance(results, list):
-                    search_context = format_search_results_compact(results)
-                    # Gather source links to send to the client
-                    for result in results:
-                        source_links.append({"title": result["source_title"], "url": result["url"], "domain": result["domain"]})
-                else: # Handle search error
-                    search_context = "Error performing search."
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": tool_call_id,
-                    "content": search_context
-                })
-            # 3. Make the SECOND call to the LLM with the complete context
-            yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
-            final_stream = client.chat.completions.create(
-                model="unsloth/Qwen3-30B-A3B-GGUF",
-                temperature=temperature,
-                messages=messages, # Send the fully updated message history
-                max_tokens=2000,
-                stream=True
-            )
-            for chunk in final_stream:
-                if chunk.choices[0].delta.content:
-                    content = chunk.choices[0].delta.content
-                    yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
-        # If no tool calls were made, just stream the initial response
-        else:
-            yield f"data: {json.dumps({'type': 'content', 'data': response_content})}\n\n"
-        # --- Stage 3: Finalize the stream ---
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
         yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': bool(source_links)}})}\n\n"
     except Exception as e:
         logger.error(f"Streaming error: {e}")
         yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"

     }
 ]
 # --- Streaming Response Generator ---
 async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
     """Generate streaming response with optional search"""
     try:
+        # Initial LLM call with streaming
         llm_kwargs = {
+            "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "temperature": temperature,
             "messages": messages,
             "max_tokens": 2000,
             llm_kwargs["tools"] = available_tools
             llm_kwargs["tool_choice"] = "auto"
+        source_links = []
         response_content = ""
         tool_calls_data = []
+        # First streaming call
+        stream = client.chat.completions.create(**llm_kwargs)
         for chunk in stream:
             delta = chunk.choices[0].delta
+            # Handle content streaming
             if delta.content:
                 content_chunk = delta.content
                 response_content += content_chunk
+                yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
+            # Handle tool calls
             if delta.tool_calls:
                 for tool_call in delta.tool_calls:
                     if len(tool_calls_data) <= tool_call.index:
+                        tool_calls_data.extend([{"id": "", "function": {"name": "", "arguments": ""}}
+                                              for _ in range(tool_call.index + 1 - len(tool_calls_data))])
                     if tool_call.id:
                         tool_calls_data[tool_call.index]["id"] = tool_call.id
                     if tool_call.function.name:
                         tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
                     if tool_call.function.arguments:
                         tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
+        # Process tool calls if any
+        if tool_calls_data and any(tc["function"]["name"] for tc in tool_calls_data):
             yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
+            # Execute searches concurrently for speed
+            search_tasks = []
             for tool_call in tool_calls_data:
+                if tool_call["function"]["name"] == "google_search":
                     try:
                         args = json.loads(tool_call["function"]["arguments"])
                         query = args.get("query", "").strip()
                         if query:
+                            search_tasks.append(google_search_tool_async(query))
                     except json.JSONDecodeError:
                         continue
+            # Run searches concurrently
+            if search_tasks:
+                search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
+                # Combine all search results
+                all_results = []
+                for results in search_results_list:
+                    if isinstance(results, list):
+                        all_results.extend(results)
+                        for result in results:
+                            source_links.append({
+                                "title": result["source_title"],
+                                "url": result["url"],
+                                "domain": result["domain"]
+                            })
+                # Format search results
+                if all_results:
+                    search_context = format_search_results_compact(all_results)
+                    # Create new message with search context
+                    search_messages = messages + [{
+                        "role": "system",
+                        "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
+                    }]
+                    yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
+                    # Generate final response with search context
+                    final_stream = client.chat.completions.create(
+                        model="unsloth/Qwen3-30B-A3B-GGUF",
+                        temperature=temperature,
+                        messages=search_messages,
+                        max_tokens=2000,
+                        stream=True
+                    )
+                    for chunk in final_stream:
+                        if chunk.choices[0].delta.content:
+                            content = chunk.choices[0].delta.content
+                            yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
+        # Send sources and completion
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
         yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': bool(source_links)}})}\n\n"
     except Exception as e:
         logger.error(f"Streaming error: {e}")
         yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"