import json
import numpy as np
import pandas as pd

from transformers import pipeline
from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
from huggingface_hub import login
import os    

# Sanity Check
hf_token = os.getenv("V2_TOKEN")
if hf_token is None:
    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")

# Explicit login
login(token=hf_token)

# --- Configuration ---
print("Loading RAG system on your device...")

# Load Knowledge base
FILE_PATH = "data.jsonl"
PRELOAD_FILE_PATH = "preload-data.json"

# Load data
print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

# Set data
documents = data

# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents, convert_to_numpy=True)

# Use pandas dataframe
df = pd.DataFrame(
    {
        "Document": documents,
        "Embedding": list(embeddings),  # store as list
    }
)

# Load LLM Pipeline
llm = pipeline(
    "text-generation",
    model="google/gemma-3-4b-it", # Might not have enough storage ngl
    token=hf_token
)

def clean_query_with_llm(query): 
    prompt_content = f"""    
    Below is a new question asked by the user that needs to be answered by searching in a knowledge base.
    You have access to SFU IT Knowledge Base index with 100's of chunked documents.
    Generate a search question based the user's question.
    If you cannot generate a search query, return just the number 0.
    User's Question:
    {query}
    Search Query:
    """

    response = llm(
        prompt_content,
        max_new_tokens=100,
        do_sample=False,
        return_full_text=False
    )
    return response[0]["generated_text"].strip()
    

# Retrieve w Pandas
def retrieve_with_pandas(query: str, top_k: int = 5):
    """
    Embed the query, compute cosine similarity to each document,
    and return the top_k most similar documents (as a DataFrame).
    """
    query_embedding = embedding_model.encode([query])[0]

    def cosine_sim(x):
        x = np.array(x)
        return float(
            np.dot(query_embedding, x)
            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
        )

    df["Similarity"] = df["Embedding"].apply(cosine_sim)
    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
    return results[["Document", "Similarity"]]


def generate_with_rag(query, top_k=5):
        # goSFU specific cleaning
    if "gosfu" in query.lower():
        query = query.replace("gosfu", "goSFU") 

    # Retrieve
    search_query = clean_query_with_llm(query)
    results = retrieve_with_pandas(search_query)

    # Turn the Series into a single string of text
    # (each doc separated by a divider)
    context_str = "\n\n---\n\n".join(results["Document"].tolist())

    # Build a clean prompt
    prompt_content = f"""
    You are a SFU IT helpdesk chatbot.
    Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting. 
    Below is new question asked by the user, and related article chunks to the user question.
    If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below.
    If there are links in the articles, provide those links in your answer.
    If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html
    If the user DID NOT ask a question, be friendly and ask how you can help them.
    Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT.
    If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html
    Do not ask the user any follow-up questions after answering them.
   
Question:
{query}
-- Start of Articles --
{context_str}
-- End of Articles --
Answer:"""

    # Call the LLM
    response = llm(
        prompt_content,
        max_new_tokens=500,
        do_sample=False,
        return_full_text=False
    )
    return response[0]["generated_text"].strip()
    

def chat_fn(message, history):
    """
    Chat Interface callback
    """
    answer = generate_with_rag(message, top_k=5)
    return answer


demo = gr.ChatInterface(
    fn=chat_fn,
    title="SFU IT Chatbot",
    description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
)

# share=True
if __name__ == "__main__":
    demo.launch()