import json import numpy as np import pandas as pd from transformers import pipeline from sentence_transformers import SentenceTransformer import gradio as gr import torch from huggingface_hub import login import os # Sanity Check hf_token = os.getenv("V2_TOKEN") if hf_token is None: raise RuntimeError("V2_TOKEN environment variable is not set in this Space.") # Explicit login login(token=hf_token) # --- Configuration --- print("Loading RAG system on your device...") # Load Knowledge base FILE_PATH = "data.jsonl" PRELOAD_FILE_PATH = "preload-data" # File path readings if not os.path.exists(FILE_PATH): # Dummy data for testing if you don't have the file yet print(f"Warning: {FILE_PATH} not found. Creating dummy data.") data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}] elif os.path.exists(PRELOAD_FILE_PATH): print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...") with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f: data = json.load(f) else: with open(FILE_PATH, "r", encoding="utf-8") as f: print(f"No Preloaded Data Found. Using {FILE_PATH}...") data = pd.read_json(path_or_buf=f, lines=True) # Writes in data embedding if not os.path.exists(PRELOAD_FILE_PATH): documents = list(data["text"]) print(f"Creating {PRELOAD_FILE_PATH}...") with open("preload-data", "w") as fp: json.dump(documents, fp) else: documents = data # Embeddings embedding_model = SentenceTransformer("all-MiniLM-L6-v2") embeddings = embedding_model.encode(documents, convert_to_numpy=True) # Use pandas dataframe df = pd.DataFrame( { "Document": documents, "Embedding": list(embeddings), # store as list } ) # Load LLM Pipeline llm = pipeline( "text-generation", model="google/flan-t5-xl", # Might not have enough storage ngl token=hf_token ) # Retrieve w Pandas def retrieve_with_pandas(query: str, top_k: int = 10): """ Embed the query, compute cosine similarity to each document, and return the top_k most similar documents (as a DataFrame). """ query_embedding = embedding_model.encode([query])[0] def cosine_sim(x): x = np.array(x) return float( np.dot(query_embedding, x) / (np.linalg.norm(query_embedding) * np.linalg.norm(x)) ) df["Similarity"] = df["Embedding"].apply(cosine_sim) results = df.sort_values(by="Similarity", ascending=False).head(top_k) return results[["Document", "Similarity"]] def generate_with_rag(query, top_k=5): # Retrieve context as a pandas Series of document texts docs = retrieve_with_pandas(query) # whatever you currently return context_series = docs["Document"] if "Document" in docs else docs # Turn the Series into a single string of text # (each doc separated by a divider) context_str = "\n\n---\n\n".join(context_series.tolist()) # Build a clean prompt input_text = f"""You are an IT helpdesk assistant. If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below. If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT. If the user DID NOT ask a question, be friendly and ask how you can help them. Question: {query} -- Start of Articles -- {context_str} -- End of Articles -- Answer:""" # Call the LLM response = llm( input_text, max_new_tokens=1024, do_sample=False, return_full_text=False ) return response[0]["generated_text"].strip() def chat_fn(message, history): """ Chat Interface callback """ answer = generate_with_rag(message, top_k=2) return answer demo = gr.ChatInterface( fn=chat_fn, title="SFU IT Chatbot", description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.", ) # share=True if __name__ == "__main__": demo.launch()