Final_Assignment_Template

Build error

Final_Assignment_Template / src /tools.py

porla

Add functionality to load and execute Python files; refactor test script for output capture

fde6ed4 6 months ago

6.21 kB

	import os
	import re
	import whisper
	import io

	import pandas as pd
	from langchain_community.tools import WikipediaQueryRun
	from langchain_community.utilities import WikipediaAPIWrapper
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain_community.tools import ArxivQueryRun

	from langchain_core.messages import HumanMessage
	from dotenv import load_dotenv
	from langchain_openai import AzureChatOpenAI
	from langchain_perplexity import ChatPerplexity
	from langchain_core.tools import tool
	from langchain_tavily import TavilySearch

	from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
	from youtube_transcript_api import YouTubeTranscriptApi

	from pandasai.llm.openai import OpenAI
	from llama_index.readers.pandas_ai import PandasAIReader
	from contextlib import redirect_stdout, redirect_stderr


	from .state import State

	# wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

	load_dotenv()


	# Initialize our LLM
	llm = AzureChatOpenAI(
	azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), # Corrected variable name
	openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
	deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"), # Corrected variable name
	openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
	temperature=0.0,
	)

	hf_endpoint = HuggingFaceEndpoint(
	# repo_id="microsoft/Phi-3-mini-4k-instruct",
	repo_id="Qwen/Qwen3-4B",
	task="text-generation",
	max_new_tokens=1000,
	do_sample=False,
	)
	llm_hf = ChatHuggingFace(llm=hf_endpoint, verbose=True)

	@tool
	def get_youtube_transcript(video_id: str) -> list:
	"""Fetches the transcript of a YouTube video."""
	ytt_api = YouTubeTranscriptApi()
	fetched_transcript = ytt_api.fetch(video_id)
	return fetched_transcript.to_raw_data()

	@tool
	def transcript_mp3_audio(task_id: str) -> str:
	"""Transcribes an MP3 audio file using a speech-to-text model."""
	file_path = f"{task_id}.mp3" # Assuming the file is named with the task_id
	file_path = os.path.join('results', file_path)
	model = whisper.load_model("base") # puoi usare tiny, base, small, medium, large
	result = model.transcribe(file_path)
	return result["text"]

	@tool
	def load_and_analyze_excel_file(question: str, task_id: str) -> str:
	"""Loads an Excel file and analyzes it using PandasAI."""
	file_path = f"{task_id}.xlsx" # Assuming the file is named with the task_id
	file_path = os.path.join('results', file_path)
	df = pd.read_excel(file_path)
	reader = PandasAIReader(pandas_llm=llm)
	response = reader.run_pandas_ai(
	df, question, is_conversational_answer=True
	)
	return response



	def reverse_text(state: State) -> State:
	"""Reverses the input text."""
	reversed_text = state["question"]
	print(f"Reversing text: {reversed_text}")
	text = reversed_text[::-1]
	message = [
	{"role": "user", "content": text},
	]
	return {
	"is_question_reversed": False,
	"messages": message,
	"question": text,
	}

	def is_question_reversed(state: State) -> State:

	"""Checks if the question is reversed though an LLM."""
	question = state["messages"][-1].content # Get the last user message

	# Prepare our prompt for the LLM
	prompt = f"""
	You are given a question. Determine whether it is written normally or in reverse (backwards).
	Only answer with 'normal' or 'reversed'.

	Question: {question}
	Answer:
	"""

	# Call the LLM
	messages = [HumanMessage(content=prompt)]
	response = llm_hf.invoke(messages)

	# Simple logic to parse the response (in a real app, you'd want more robust parsing)
	response_text = response.content.lower()
	response_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL).strip()

	is_reversed = "reversed" in response_text and "normal" not in response_text

	# Update messages for tracking
	if is_reversed:
	new_messages = [
	{"role": "user", "content": prompt},
	{"role": "assistant", "content": response_text}
	]
	else:
	new_messages = state.get("messages", []) + [
	{"role": "user", "content": prompt},
	{"role": "assistant", "content": response_text}
	]

	# Return state updates
	return {
	"is_question_reversed": is_reversed,
	"messages": new_messages,
	"question": question,
	}

	def route_question(state: State) -> str:
	"""Determine the next step based on whether the question is reversed or not."""
	if state["is_question_reversed"]:
	return "question_reversed"
	else:
	return "question_not_reversed"

	@tool
	def load_and_execute_python_file(task_id: str) -> str:
	"""
	Reads a Python file, executes it, and prints the result.
	"""
	file_path = f"{task_id}.py" # Assuming the file is named with the task_id
	file_path = os.path.join('results', file_path)
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	python_code = f.read()
	except FileNotFoundError:
	return f"Errore: file '{file_path}' non trovato."

	stdout_buffer = io.StringIO()
	stderr_buffer = io.StringIO()

	# Redirige stdout e stderr per catturare tutto l'output
	with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
	try:
	exec(python_code, {"__name__": "__main__"})
	except Exception as e:
	# Qualsiasi eccezione viene catturata e mostrata nel buffer stderr
	print(f"Errore durante l'esecuzione: {e}", file=stderr_buffer)

	# Combina stdout e stderr
	output = stdout_buffer.getvalue()
	errors = stderr_buffer.getvalue()
	return output + errors


	def get_avaiable_tools():
	"""Returns a list of available tools."""
	web_search_tool = TavilySearch(max_results=5)
	arxiv_search_tool = ArxivQueryRun()
	wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
	return [
	wikipedia_tool,
	arxiv_search_tool,
	web_search_tool,
	get_youtube_transcript,
	transcript_mp3_audio,
	load_and_analyze_excel_file,
	load_and_execute_python_file
	]