Final_Assignment_Template

Build error

File size: 6,212 Bytes

import os
import re
import whisper
import io

import pandas as pd
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools import ArxivQueryRun

from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_perplexity import ChatPerplexity
from langchain_core.tools import tool
from langchain_tavily import TavilySearch

from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from youtube_transcript_api import YouTubeTranscriptApi

from pandasai.llm.openai import OpenAI
from llama_index.readers.pandas_ai import PandasAIReader
from contextlib import redirect_stdout, redirect_stderr


from .state import State

# wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

load_dotenv()


# Initialize our LLM
llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),  # Corrected variable name
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"),  # Corrected variable name
    openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    temperature=0.0,
)

hf_endpoint = HuggingFaceEndpoint(
    # repo_id="microsoft/Phi-3-mini-4k-instruct",
    repo_id="Qwen/Qwen3-4B",
    task="text-generation",
    max_new_tokens=1000,
    do_sample=False,
)
llm_hf = ChatHuggingFace(llm=hf_endpoint, verbose=True)

@tool
def get_youtube_transcript(video_id: str) -> list:
    """Fetches the transcript of a YouTube video."""
    ytt_api = YouTubeTranscriptApi()
    fetched_transcript = ytt_api.fetch(video_id)
    return fetched_transcript.to_raw_data()

@tool
def transcript_mp3_audio(task_id: str) -> str:
    """Transcribes an MP3 audio file using a speech-to-text model."""
    file_path = f"{task_id}.mp3"  # Assuming the file is named with the task_id
    file_path = os.path.join('results', file_path)
    model = whisper.load_model("base")  # puoi usare tiny, base, small, medium, large
    result = model.transcribe(file_path)
    return result["text"]

@tool
def load_and_analyze_excel_file(question: str, task_id: str) -> str:
    """Loads an Excel file and analyzes it using PandasAI."""
    file_path = f"{task_id}.xlsx"  # Assuming the file is named with the task_id
    file_path = os.path.join('results', file_path)
    df = pd.read_excel(file_path)
    reader = PandasAIReader(pandas_llm=llm)
    response = reader.run_pandas_ai(
        df, question, is_conversational_answer=True
    )
    return response



def reverse_text(state: State) -> State:
    """Reverses the input text."""
    reversed_text = state["question"]
    print(f"Reversing text: {reversed_text}")
    text = reversed_text[::-1]
    message = [
            {"role": "user", "content": text},
        ]
    return {
        "is_question_reversed": False,
        "messages": message,
        "question": text,
    }

def is_question_reversed(state: State) -> State:
    
    """Checks if the question is reversed though an LLM."""
    question = state["messages"][-1].content # Get the last user message
    
    # Prepare our prompt for the LLM
    prompt = f"""
    You are given a question. Determine whether it is written normally or in reverse (backwards).
    Only answer with 'normal' or 'reversed'.

    Question: {question}
    Answer: 
    """
    
    # Call the LLM
    messages = [HumanMessage(content=prompt)]
    response = llm_hf.invoke(messages)
    
    # Simple logic to parse the response (in a real app, you'd want more robust parsing)
    response_text = response.content.lower()
    response_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL).strip()

    is_reversed = "reversed" in response_text and "normal" not in response_text
    
    # Update messages for tracking
    if is_reversed:
        new_messages = [
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": response_text}
        ]
    else:
        new_messages = state.get("messages", []) + [
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": response_text}
        ]
    
    # Return state updates
    return {
        "is_question_reversed": is_reversed,
        "messages": new_messages,
        "question": question,
    }

def route_question(state: State) -> str:
    """Determine the next step based on whether the question is reversed or not."""
    if state["is_question_reversed"]:
        return "question_reversed"
    else:
        return "question_not_reversed"

@tool
def load_and_execute_python_file(task_id: str) -> str:
    """
    Reads a Python file, executes it, and prints the result.
    """
    file_path = f"{task_id}.py"  # Assuming the file is named with the task_id
    file_path = os.path.join('results', file_path)
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            python_code = f.read()
    except FileNotFoundError:
        return f"Errore: file '{file_path}' non trovato."
    
    stdout_buffer = io.StringIO()
    stderr_buffer = io.StringIO()

    # Redirige stdout e stderr per catturare tutto l'output
    with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
        try:
            exec(python_code, {"__name__": "__main__"})
        except Exception as e:
            # Qualsiasi eccezione viene catturata e mostrata nel buffer stderr
            print(f"Errore durante l'esecuzione: {e}", file=stderr_buffer)

    # Combina stdout e stderr
    output = stdout_buffer.getvalue()
    errors = stderr_buffer.getvalue()
    return output + errors


def get_avaiable_tools():
    """Returns a list of available tools."""
    web_search_tool = TavilySearch(max_results=5)
    arxiv_search_tool = ArxivQueryRun()
    wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
    return [
        wikipedia_tool,
        arxiv_search_tool,
        web_search_tool,
        get_youtube_transcript,
        transcript_mp3_audio,
        load_and_analyze_excel_file,
        load_and_execute_python_file
    ]