import json import numpy as np import pandas as pd from transformers import pipeline from sentence_transformers import SentenceTransformer import gradio as gr import torch from huggingface_hub import login import os # Sanity Check hf_token = os.getenv("V2_TOKEN") if hf_token is None: raise RuntimeError("V2_TOKEN environment variable is not set in this Space.") # Explicit login login(token=hf_token) # --- Configuration --- print("Loading RAG system on your device...") # Load Knowledge base FILE_PATH = "data.jsonl" PRELOAD_FILE_PATH = "preload-data.json" # Load data print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...") with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f: data = json.load(f) # Set data documents = data # Embeddings embedding_model = SentenceTransformer("all-MiniLM-L6-v2") embeddings = embedding_model.encode(documents, convert_to_numpy=True) # Use pandas dataframe df = pd.DataFrame( { "Document": documents, "Embedding": list(embeddings), # store as list } ) # Load LLM Pipeline llm = pipeline( "text-generation", model="google/gemma-3-4b-it", # Might not have enough storage ngl token=hf_token ) def clean_query_with_llm(query): prompt_content = f""" Below is a new question asked by the user that needs to be answered by searching in a knowledge base. You have access to SFU IT Knowledge Base index with 100's of chunked documents. Generate a search question based the user's question. If you cannot generate a search query, return just the number 0. User's Question: {query} Search Query: """ response = llm( prompt_content, max_new_tokens=100, do_sample=False, return_full_text=False ) return response[0]["generated_text"].strip() # Retrieve w Pandas def retrieve_with_pandas(query: str, top_k: int = 5): """ Embed the query, compute cosine similarity to each document, and return the top_k most similar documents (as a DataFrame). """ query_embedding = embedding_model.encode([query])[0] def cosine_sim(x): x = np.array(x) return float( np.dot(query_embedding, x) / (np.linalg.norm(query_embedding) * np.linalg.norm(x)) ) df["Similarity"] = df["Embedding"].apply(cosine_sim) results = df.sort_values(by="Similarity", ascending=False).head(top_k) return results[["Document", "Similarity"]] def generate_with_rag(query, top_k=5): # goSFU specific cleaning if "gosfu" in query.lower(): query = query.replace("gosfu", "goSFU") # Retrieve search_query = clean_query_with_llm(query) results = retrieve_with_pandas(search_query) # Turn the Series into a single string of text # (each doc separated by a divider) context_str = "\n\n---\n\n".join(results["Document"].tolist()) # Build a clean prompt prompt_content = f""" You are a SFU IT helpdesk chatbot. Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting. Below is new question asked by the user, and related article chunks to the user question. If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below. If there are links in the articles, provide those links in your answer. If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html If the user DID NOT ask a question, be friendly and ask how you can help them. Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT. If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html Do not ask the user any follow-up questions after answering them. Question: {query} -- Start of Articles -- {context_str} -- End of Articles -- Answer:""" # Call the LLM response = llm( prompt_content, max_new_tokens=500, do_sample=False, return_full_text=False ) return response[0]["generated_text"].strip() def chat_fn(message, history): """ Chat Interface callback """ answer = generate_with_rag(message, top_k=5) return answer demo = gr.ChatInterface( fn=chat_fn, title="SFU IT Chatbot", description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.", ) # share=True if __name__ == "__main__": demo.launch()