import os import re import whisper import io import pandas as pd from langchain_community.tools import WikipediaQueryRun from langchain_community.utilities import WikipediaAPIWrapper from langchain_community.tools import DuckDuckGoSearchRun from langchain_community.tools import ArxivQueryRun from langchain_core.messages import HumanMessage from dotenv import load_dotenv from langchain_openai import AzureChatOpenAI from langchain_perplexity import ChatPerplexity from langchain_core.tools import tool from langchain_tavily import TavilySearch from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace from youtube_transcript_api import YouTubeTranscriptApi from pandasai.llm.openai import OpenAI from llama_index.readers.pandas_ai import PandasAIReader from contextlib import redirect_stdout, redirect_stderr from .state import State # wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) load_dotenv() # Initialize our LLM llm = AzureChatOpenAI( azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), # Corrected variable name openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"), deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"), # Corrected variable name openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"), temperature=0.0, ) hf_endpoint = HuggingFaceEndpoint( # repo_id="microsoft/Phi-3-mini-4k-instruct", repo_id="Qwen/Qwen3-4B", task="text-generation", max_new_tokens=1000, do_sample=False, ) llm_hf = ChatHuggingFace(llm=hf_endpoint, verbose=True) @tool def get_youtube_transcript(video_id: str) -> list: """Fetches the transcript of a YouTube video.""" ytt_api = YouTubeTranscriptApi() fetched_transcript = ytt_api.fetch(video_id) return fetched_transcript.to_raw_data() @tool def transcript_mp3_audio(task_id: str) -> str: """Transcribes an MP3 audio file using a speech-to-text model.""" file_path = f"{task_id}.mp3" # Assuming the file is named with the task_id file_path = os.path.join('results', file_path) model = whisper.load_model("base") # puoi usare tiny, base, small, medium, large result = model.transcribe(file_path) return result["text"] @tool def load_and_analyze_excel_file(question: str, task_id: str) -> str: """Loads an Excel file and analyzes it using PandasAI.""" file_path = f"{task_id}.xlsx" # Assuming the file is named with the task_id file_path = os.path.join('results', file_path) df = pd.read_excel(file_path) reader = PandasAIReader(pandas_llm=llm) response = reader.run_pandas_ai( df, question, is_conversational_answer=True ) return response def reverse_text(state: State) -> State: """Reverses the input text.""" reversed_text = state["question"] print(f"Reversing text: {reversed_text}") text = reversed_text[::-1] message = [ {"role": "user", "content": text}, ] return { "is_question_reversed": False, "messages": message, "question": text, } def is_question_reversed(state: State) -> State: """Checks if the question is reversed though an LLM.""" question = state["messages"][-1].content # Get the last user message # Prepare our prompt for the LLM prompt = f""" You are given a question. Determine whether it is written normally or in reverse (backwards). Only answer with 'normal' or 'reversed'. Question: {question} Answer: """ # Call the LLM messages = [HumanMessage(content=prompt)] response = llm_hf.invoke(messages) # Simple logic to parse the response (in a real app, you'd want more robust parsing) response_text = response.content.lower() response_text = re.sub(r".*?", "", response_text, flags=re.DOTALL).strip() is_reversed = "reversed" in response_text and "normal" not in response_text # Update messages for tracking if is_reversed: new_messages = [ {"role": "user", "content": prompt}, {"role": "assistant", "content": response_text} ] else: new_messages = state.get("messages", []) + [ {"role": "user", "content": prompt}, {"role": "assistant", "content": response_text} ] # Return state updates return { "is_question_reversed": is_reversed, "messages": new_messages, "question": question, } def route_question(state: State) -> str: """Determine the next step based on whether the question is reversed or not.""" if state["is_question_reversed"]: return "question_reversed" else: return "question_not_reversed" @tool def load_and_execute_python_file(task_id: str) -> str: """ Reads a Python file, executes it, and prints the result. """ file_path = f"{task_id}.py" # Assuming the file is named with the task_id file_path = os.path.join('results', file_path) try: with open(file_path, 'r', encoding='utf-8') as f: python_code = f.read() except FileNotFoundError: return f"Errore: file '{file_path}' non trovato." stdout_buffer = io.StringIO() stderr_buffer = io.StringIO() # Redirige stdout e stderr per catturare tutto l'output with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): try: exec(python_code, {"__name__": "__main__"}) except Exception as e: # Qualsiasi eccezione viene catturata e mostrata nel buffer stderr print(f"Errore durante l'esecuzione: {e}", file=stderr_buffer) # Combina stdout e stderr output = stdout_buffer.getvalue() errors = stderr_buffer.getvalue() return output + errors def get_avaiable_tools(): """Returns a list of available tools.""" web_search_tool = TavilySearch(max_results=5) arxiv_search_tool = ArxivQueryRun() wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) return [ wikipedia_tool, arxiv_search_tool, web_search_tool, get_youtube_transcript, transcript_mp3_audio, load_and_analyze_excel_file, load_and_execute_python_file ]