Spaces:

tecuts
/

chat

Running

App Files Files Community

tecuts commited on Jul 2

Commit

b136fd6

verified ·

1 Parent(s): f2561e7

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -63

app.py CHANGED Viewed

@@ -177,14 +177,15 @@ available_tools = [
     }
 ]
 # --- Streaming Response Generator ---
 async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
     """Generate streaming response with optional search"""
     try:
-        # Initial LLM call with streaming
         llm_kwargs = {
-            "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "temperature": temperature,
             "messages": messages,
             "max_tokens": 2000,
@@ -195,100 +196,105 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
             llm_kwargs["tools"] = available_tools
             llm_kwargs["tool_choice"] = "auto"
-        source_links = []
-        response_content = ""
-        tool_calls_data = []
-        # First streaming call
         stream = client.chat.completions.create(**llm_kwargs)
         for chunk in stream:
             delta = chunk.choices[0].delta
-            # Handle content streaming
             if delta.content:
                 content_chunk = delta.content
                 response_content += content_chunk
-                yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
-            # Handle tool calls
             if delta.tool_calls:
                 for tool_call in delta.tool_calls:
                     if len(tool_calls_data) <= tool_call.index:
-                        tool_calls_data.extend([{"id": "", "function": {"name": "", "arguments": ""}}
-                                              for _ in range(tool_call.index + 1 - len(tool_calls_data))])
                     if tool_call.id:
                         tool_calls_data[tool_call.index]["id"] = tool_call.id
                     if tool_call.function.name:
                         tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
                     if tool_call.function.arguments:
                         tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
-        # Process tool calls if any
-        if tool_calls_data and any(tc["function"]["name"] for tc in tool_calls_data):
             yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
-            # Execute searches concurrently for speed
-            search_tasks = []
             for tool_call in tool_calls_data:
-                if tool_call["function"]["name"] == "google_search":
                     try:
                         args = json.loads(tool_call["function"]["arguments"])
                         query = args.get("query", "").strip()
                         if query:
-                            search_tasks.append(google_search_tool_async(query))
                     except json.JSONDecodeError:
                         continue
-            # Run searches concurrently
-            if search_tasks:
-                search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
-                # Combine all search results
-                all_results = []
-                for results in search_results_list:
-                    if isinstance(results, list):
-                        all_results.extend(results)
-                        for result in results:
-                            source_links.append({
-                                "title": result["source_title"],
-                                "url": result["url"],
-                                "domain": result["domain"]
-                            })
-                # Format search results
-                if all_results:
-                    search_context = format_search_results_compact(all_results)
-                    # Create new message with search context
-                    search_messages = messages + [{
-                        "role": "system",
-                        "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
-                    }]
-                    yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
-                    # Generate final response with search context
-                    final_stream = client.chat.completions.create(
-                        model="unsloth/Qwen3-30B-A3B-GGUF",
-                        temperature=temperature,
-                        messages=search_messages,
-                        max_tokens=2000,
-                        stream=True
-                    )
-                    for chunk in final_stream:
-                        if chunk.choices[0].delta.content:
-                            content = chunk.choices[0].delta.content
-                            yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
-        # Send sources and completion
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
         yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': bool(source_links)}})}\n\n"
     except Exception as e:
         logger.error(f"Streaming error: {e}")
         yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"

     }
 ]
 # --- Streaming Response Generator ---
 async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
     """Generate streaming response with optional search"""
     try:
+        # --- Stage 1: Initial call to see if the model wants to use a tool ---
         llm_kwargs = {
+            "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "temperature": temperature,
             "messages": messages,
             "max_tokens": 2000,
             llm_kwargs["tools"] = available_tools
             llm_kwargs["tool_choice"] = "auto"
         stream = client.chat.completions.create(**llm_kwargs)
+        response_content = ""
+        tool_calls_data = []
+        # Accumulate the response from the first stream
         for chunk in stream:
             delta = chunk.choices[0].delta
             if delta.content:
                 content_chunk = delta.content
                 response_content += content_chunk
+                # Don't yield content yet, wait to see if a tool is called
+            # This logic for accumulating tool calls is complex but correct
             if delta.tool_calls:
                 for tool_call in delta.tool_calls:
                     if len(tool_calls_data) <= tool_call.index:
+                        tool_calls_data.extend([{"id": "", "function": {"name": "", "arguments": ""}} for _ in range(tool_call.index + 1 - len(tool_calls_data))])
                     if tool_call.id:
                         tool_calls_data[tool_call.index]["id"] = tool_call.id
                     if tool_call.function.name:
                         tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
                     if tool_call.function.arguments:
                         tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
+        # --- Stage 2: Decide what to do based on the model's response ---
+        # If the model returned tool calls, execute them
+        if tool_calls_data:
             yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
+            # 1. Append the assistant's request to use a tool to the message history
+            messages.append({
+                "role": "assistant",
+                "content": response_content or None, # Can be empty
+                "tool_calls": tool_calls_data
+            })
+            # Execute all tool calls concurrently
+            search_tasks = {}
             for tool_call in tool_calls_data:
+                if tool_call["function"]["name"] == "Google Search":
                     try:
                         args = json.loads(tool_call["function"]["arguments"])
                         query = args.get("query", "").strip()
                         if query:
+                            # Map tool_call_id to the task
+                            search_tasks[tool_call["id"]] = Google Search_tool_async(query)
                     except json.JSONDecodeError:
                         continue
+            search_results_by_id = await asyncio.gather(*search_tasks.values(), return_exceptions=True)
+            tool_ids = list(search_tasks.keys())
+            source_links = []
+            # 2. Append the results of EACH tool call to the message history
+            for i, results in enumerate(search_results_by_id):
+                tool_call_id = tool_ids[i]
+                if isinstance(results, list):
+                    search_context = format_search_results_compact(results)
+                    # Gather source links to send to the client
+                    for result in results:
+                        source_links.append({"title": result["source_title"], "url": result["url"], "domain": result["domain"]})
+                else: # Handle search error
+                    search_context = "Error performing search."
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": tool_call_id,
+                    "content": search_context
+                })
+            # 3. Make the SECOND call to the LLM with the complete context
+            yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
+            final_stream = client.chat.completions.create(
+                model="unsloth/Qwen3-30B-A3B-GGUF",
+                temperature=temperature,
+                messages=messages, # Send the fully updated message history
+                max_tokens=2000,
+                stream=True
+            )
+            for chunk in final_stream:
+                if chunk.choices[0].delta.content:
+                    content = chunk.choices[0].delta.content
+                    yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
+        # If no tool calls were made, just stream the initial response
+        else:
+            yield f"data: {json.dumps({'type': 'content', 'data': response_content})}\n\n"
+        # --- Stage 3: Finalize the stream ---
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
         yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': bool(source_links)}})}\n\n"
     except Exception as e:
         logger.error(f"Streaming error: {e}")
         yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"