porla
Add functionality to load and execute Python files; refactor test script for output capture
fde6ed4
import os
import re
import whisper
import io
import pandas as pd
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools import ArxivQueryRun
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_perplexity import ChatPerplexity
from langchain_core.tools import tool
from langchain_tavily import TavilySearch
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from youtube_transcript_api import YouTubeTranscriptApi
from pandasai.llm.openai import OpenAI
from llama_index.readers.pandas_ai import PandasAIReader
from contextlib import redirect_stdout, redirect_stderr
from .state import State
# wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
load_dotenv()
# Initialize our LLM
llm = AzureChatOpenAI(
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), # Corrected variable name
openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"), # Corrected variable name
openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
temperature=0.0,
)
hf_endpoint = HuggingFaceEndpoint(
# repo_id="microsoft/Phi-3-mini-4k-instruct",
repo_id="Qwen/Qwen3-4B",
task="text-generation",
max_new_tokens=1000,
do_sample=False,
)
llm_hf = ChatHuggingFace(llm=hf_endpoint, verbose=True)
@tool
def get_youtube_transcript(video_id: str) -> list:
"""Fetches the transcript of a YouTube video."""
ytt_api = YouTubeTranscriptApi()
fetched_transcript = ytt_api.fetch(video_id)
return fetched_transcript.to_raw_data()
@tool
def transcript_mp3_audio(task_id: str) -> str:
"""Transcribes an MP3 audio file using a speech-to-text model."""
file_path = f"{task_id}.mp3" # Assuming the file is named with the task_id
file_path = os.path.join('results', file_path)
model = whisper.load_model("base") # puoi usare tiny, base, small, medium, large
result = model.transcribe(file_path)
return result["text"]
@tool
def load_and_analyze_excel_file(question: str, task_id: str) -> str:
"""Loads an Excel file and analyzes it using PandasAI."""
file_path = f"{task_id}.xlsx" # Assuming the file is named with the task_id
file_path = os.path.join('results', file_path)
df = pd.read_excel(file_path)
reader = PandasAIReader(pandas_llm=llm)
response = reader.run_pandas_ai(
df, question, is_conversational_answer=True
)
return response
def reverse_text(state: State) -> State:
"""Reverses the input text."""
reversed_text = state["question"]
print(f"Reversing text: {reversed_text}")
text = reversed_text[::-1]
message = [
{"role": "user", "content": text},
]
return {
"is_question_reversed": False,
"messages": message,
"question": text,
}
def is_question_reversed(state: State) -> State:
"""Checks if the question is reversed though an LLM."""
question = state["messages"][-1].content # Get the last user message
# Prepare our prompt for the LLM
prompt = f"""
You are given a question. Determine whether it is written normally or in reverse (backwards).
Only answer with 'normal' or 'reversed'.
Question: {question}
Answer:
"""
# Call the LLM
messages = [HumanMessage(content=prompt)]
response = llm_hf.invoke(messages)
# Simple logic to parse the response (in a real app, you'd want more robust parsing)
response_text = response.content.lower()
response_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL).strip()
is_reversed = "reversed" in response_text and "normal" not in response_text
# Update messages for tracking
if is_reversed:
new_messages = [
{"role": "user", "content": prompt},
{"role": "assistant", "content": response_text}
]
else:
new_messages = state.get("messages", []) + [
{"role": "user", "content": prompt},
{"role": "assistant", "content": response_text}
]
# Return state updates
return {
"is_question_reversed": is_reversed,
"messages": new_messages,
"question": question,
}
def route_question(state: State) -> str:
"""Determine the next step based on whether the question is reversed or not."""
if state["is_question_reversed"]:
return "question_reversed"
else:
return "question_not_reversed"
@tool
def load_and_execute_python_file(task_id: str) -> str:
"""
Reads a Python file, executes it, and prints the result.
"""
file_path = f"{task_id}.py" # Assuming the file is named with the task_id
file_path = os.path.join('results', file_path)
try:
with open(file_path, 'r', encoding='utf-8') as f:
python_code = f.read()
except FileNotFoundError:
return f"Errore: file '{file_path}' non trovato."
stdout_buffer = io.StringIO()
stderr_buffer = io.StringIO()
# Redirige stdout e stderr per catturare tutto l'output
with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
try:
exec(python_code, {"__name__": "__main__"})
except Exception as e:
# Qualsiasi eccezione viene catturata e mostrata nel buffer stderr
print(f"Errore durante l'esecuzione: {e}", file=stderr_buffer)
# Combina stdout e stderr
output = stdout_buffer.getvalue()
errors = stderr_buffer.getvalue()
return output + errors
def get_avaiable_tools():
"""Returns a list of available tools."""
web_search_tool = TavilySearch(max_results=5)
arxiv_search_tool = ArxivQueryRun()
wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
return [
wikipedia_tool,
arxiv_search_tool,
web_search_tool,
get_youtube_transcript,
transcript_mp3_audio,
load_and_analyze_excel_file,
load_and_execute_python_file
]