import json
import numpy as np
import pandas as pd

from transformers import pipeline
from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
from huggingface_hub import login
import os    

# Sanity Check
hf_token = os.getenv("V2_TOKEN")
if hf_token is None:
    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")

# Explicit login
login(token=hf_token)

# --- Configuration ---
print("Loading RAG system on your device...")

# Load Knowledge base
FILE_PATH = "data.jsonl"
PRELOAD_FILE_PATH = "preload-data"

# File path readings
if not os.path.exists(FILE_PATH):
    # Dummy data for testing if you don't have the file yet
    print(f"Warning: {FILE_PATH} not found. Creating dummy data.")
    data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}]
elif os.path.exists(PRELOAD_FILE_PATH):
    print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
    with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)
else:
    with open(FILE_PATH, "r", encoding="utf-8") as f:
        print(f"No Preloaded Data Found. Using {FILE_PATH}...")
        data = pd.read_json(path_or_buf=f, lines=True)

# Writes in data embedding
if not os.path.exists(PRELOAD_FILE_PATH):
    documents = list(data["text"])
    print(f"Creating {PRELOAD_FILE_PATH}...")
    with open("preload-data", "w") as fp:
        json.dump(documents, fp)
else:
    documents = data

# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents, convert_to_numpy=True)

# Use pandas dataframe
df = pd.DataFrame(
    {
        "Document": documents,
        "Embedding": list(embeddings),  # store as list
    }
)

# Load LLM Pipeline
llm = pipeline(
    "text-generation",
    model="google/flan-t5-xl", # Might not have enough storage ngl
    token=hf_token
)

# Retrieve w Pandas
def retrieve_with_pandas(query: str, top_k: int = 10):
    """
    Embed the query, compute cosine similarity to each document,
    and return the top_k most similar documents (as a DataFrame).
    """
    query_embedding = embedding_model.encode([query])[0]

    def cosine_sim(x):
        x = np.array(x)
        return float(
            np.dot(query_embedding, x)
            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
        )

    df["Similarity"] = df["Embedding"].apply(cosine_sim)
    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
    return results[["Document", "Similarity"]]


def generate_with_rag(query, top_k=5):
    # Retrieve context as a pandas Series of document texts
    docs = retrieve_with_pandas(query)  # whatever you currently return
    context_series = docs["Document"] if "Document" in docs else docs

    # Turn the Series into a single string of text
    # (each doc separated by a divider)
    context_str = "\n\n---\n\n".join(context_series.tolist())

    # Build a clean prompt
    input_text = f"""You are an IT helpdesk assistant.
If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below.
If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT.
If the user DID NOT ask a question, be friendly and ask how you can help them.

Question:
{query}

-- Start of Articles --
{context_str}
-- End of Articles --

Answer:"""

    # Call the LLM
    response = llm(
        input_text,
        max_new_tokens=1024,
        do_sample=False,
        return_full_text=False
    )
    return response[0]["generated_text"].strip()

def chat_fn(message, history):
    """
    Chat Interface callback
    """
    answer = generate_with_rag(message, top_k=2)
    return answer


demo = gr.ChatInterface(
    fn=chat_fn,
    title="SFU IT Chatbot",
    description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
)

# share=True
if __name__ == "__main__":
    demo.launch()