Spaces:
Running
Running
| import json | |
| import numpy as np | |
| import pandas as pd | |
| from transformers import pipeline | |
| from sentence_transformers import SentenceTransformer | |
| import gradio as gr | |
| import torch | |
| from huggingface_hub import login | |
| import os | |
| # Sanity Check | |
| hf_token = os.getenv("V2_TOKEN") | |
| if hf_token is None: | |
| raise RuntimeError("V2_TOKEN environment variable is not set in this Space.") | |
| # Explicit login | |
| login(token=hf_token) | |
| # --- Configuration --- | |
| print("Loading RAG system on your device...") | |
| # Load Knowledge base | |
| FILE_PATH = "data.jsonl" | |
| PRELOAD_FILE_PATH = "preload-data.json" | |
| # Load data | |
| print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...") | |
| with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| # Set data | |
| documents = data | |
| # Embeddings | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| embeddings = embedding_model.encode(documents, convert_to_numpy=True) | |
| # Use pandas dataframe | |
| df = pd.DataFrame( | |
| { | |
| "Document": documents, | |
| "Embedding": list(embeddings), # store as list | |
| } | |
| ) | |
| # Load LLM Pipeline | |
| llm = pipeline( | |
| "text-generation", | |
| model="google/gemma-3-4b-it", # Might not have enough storage ngl | |
| token=hf_token | |
| ) | |
| def clean_query_with_llm(query): | |
| prompt_content = f""" | |
| Below is a new question asked by the user that needs to be answered by searching in a knowledge base. | |
| You have access to SFU IT Knowledge Base index with 100's of chunked documents. | |
| Generate a search question based the user's question. | |
| If you cannot generate a search query, return just the number 0. | |
| User's Question: | |
| {query} | |
| Search Query: | |
| """ | |
| response = llm( | |
| prompt_content, | |
| max_new_tokens=100, | |
| do_sample=False, | |
| return_full_text=False | |
| ) | |
| return response[0]["generated_text"].strip() | |
| # Retrieve w Pandas | |
| def retrieve_with_pandas(query: str, top_k: int = 5): | |
| """ | |
| Embed the query, compute cosine similarity to each document, | |
| and return the top_k most similar documents (as a DataFrame). | |
| """ | |
| query_embedding = embedding_model.encode([query])[0] | |
| def cosine_sim(x): | |
| x = np.array(x) | |
| return float( | |
| np.dot(query_embedding, x) | |
| / (np.linalg.norm(query_embedding) * np.linalg.norm(x)) | |
| ) | |
| df["Similarity"] = df["Embedding"].apply(cosine_sim) | |
| results = df.sort_values(by="Similarity", ascending=False).head(top_k) | |
| return results[["Document", "Similarity"]] | |
| def generate_with_rag(query, top_k=5): | |
| # goSFU specific cleaning | |
| if "gosfu" in query.lower(): | |
| query = query.replace("gosfu", "goSFU") | |
| # Retrieve | |
| search_query = clean_query_with_llm(query) | |
| results = retrieve_with_pandas(search_query) | |
| # Turn the Series into a single string of text | |
| # (each doc separated by a divider) | |
| context_str = "\n\n---\n\n".join(results["Document"].tolist()) | |
| # Build a clean prompt | |
| prompt_content = f""" | |
| You are a SFU IT helpdesk chatbot. | |
| Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting. | |
| Below is new question asked by the user, and related article chunks to the user question. | |
| If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below. | |
| If there are links in the articles, provide those links in your answer. | |
| If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html | |
| If the user DID NOT ask a question, be friendly and ask how you can help them. | |
| Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT. | |
| If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html | |
| Do not ask the user any follow-up questions after answering them. | |
| Question: | |
| {query} | |
| -- Start of Articles -- | |
| {context_str} | |
| -- End of Articles -- | |
| Answer:""" | |
| # Call the LLM | |
| response = llm( | |
| prompt_content, | |
| max_new_tokens=500, | |
| do_sample=False, | |
| return_full_text=False | |
| ) | |
| return response[0]["generated_text"].strip() | |
| def chat_fn(message, history): | |
| """ | |
| Chat Interface callback | |
| """ | |
| answer = generate_with_rag(message, top_k=5) | |
| return answer | |
| demo = gr.ChatInterface( | |
| fn=chat_fn, | |
| title="SFU IT Chatbot", | |
| description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.", | |
| ) | |
| # share=True | |
| if __name__ == "__main__": | |
| demo.launch() |