Spaces:
Running
Running
| import json | |
| import numpy as np | |
| import pandas as pd | |
| from transformers import pipeline | |
| from sentence_transformers import SentenceTransformer | |
| import gradio as gr | |
| import torch | |
| from huggingface_hub import login | |
| import os | |
| # Sanity Check | |
| hf_token = os.getenv("V2_TOKEN") | |
| if hf_token is None: | |
| raise RuntimeError("V2_TOKEN environment variable is not set in this Space.") | |
| # Explicit login | |
| login(token=hf_token) | |
| # --- Configuration --- | |
| print("Loading RAG system on your device...") | |
| # Load Knowledge base | |
| FILE_PATH = "data.jsonl" | |
| PRELOAD_FILE_PATH = "preload-data" | |
| # File path readings | |
| if not os.path.exists(FILE_PATH): | |
| # Dummy data for testing if you don't have the file yet | |
| print(f"Warning: {FILE_PATH} not found. Creating dummy data.") | |
| data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}] | |
| elif os.path.exists(PRELOAD_FILE_PATH): | |
| print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...") | |
| with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| else: | |
| with open(FILE_PATH, "r", encoding="utf-8") as f: | |
| print(f"No Preloaded Data Found. Using {FILE_PATH}...") | |
| data = pd.read_json(path_or_buf=f, lines=True) | |
| # Writes in data embedding | |
| if not os.path.exists(PRELOAD_FILE_PATH): | |
| documents = list(data["text"]) | |
| print(f"Creating {PRELOAD_FILE_PATH}...") | |
| with open("preload-data", "w") as fp: | |
| json.dump(documents, fp) | |
| else: | |
| documents = data | |
| # Embeddings | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| embeddings = embedding_model.encode(documents, convert_to_numpy=True) | |
| # Use pandas dataframe | |
| df = pd.DataFrame( | |
| { | |
| "Document": documents, | |
| "Embedding": list(embeddings), # store as list | |
| } | |
| ) | |
| # Load LLM Pipeline | |
| llm = pipeline( | |
| "text-generation", | |
| model="google/flan-t5-xl", # Might not have enough storage ngl | |
| token=hf_token | |
| ) | |
| # Retrieve w Pandas | |
| def retrieve_with_pandas(query: str, top_k: int = 10): | |
| """ | |
| Embed the query, compute cosine similarity to each document, | |
| and return the top_k most similar documents (as a DataFrame). | |
| """ | |
| query_embedding = embedding_model.encode([query])[0] | |
| def cosine_sim(x): | |
| x = np.array(x) | |
| return float( | |
| np.dot(query_embedding, x) | |
| / (np.linalg.norm(query_embedding) * np.linalg.norm(x)) | |
| ) | |
| df["Similarity"] = df["Embedding"].apply(cosine_sim) | |
| results = df.sort_values(by="Similarity", ascending=False).head(top_k) | |
| return results[["Document", "Similarity"]] | |
| def generate_with_rag(query, top_k=5): | |
| # Retrieve context as a pandas Series of document texts | |
| docs = retrieve_with_pandas(query) # whatever you currently return | |
| context_series = docs["Document"] if "Document" in docs else docs | |
| # Turn the Series into a single string of text | |
| # (each doc separated by a divider) | |
| context_str = "\n\n---\n\n".join(context_series.tolist()) | |
| # Build a clean prompt | |
| input_text = f"""You are an IT helpdesk assistant. | |
| If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below. | |
| If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT. | |
| If the user DID NOT ask a question, be friendly and ask how you can help them. | |
| Question: | |
| {query} | |
| -- Start of Articles -- | |
| {context_str} | |
| -- End of Articles -- | |
| Answer:""" | |
| # Call the LLM | |
| response = llm( | |
| input_text, | |
| max_new_tokens=1024, | |
| do_sample=False, | |
| return_full_text=False | |
| ) | |
| return response[0]["generated_text"].strip() | |
| def chat_fn(message, history): | |
| """ | |
| Chat Interface callback | |
| """ | |
| answer = generate_with_rag(message, top_k=2) | |
| return answer | |
| demo = gr.ChatInterface( | |
| fn=chat_fn, | |
| title="SFU IT Chatbot", | |
| description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.", | |
| ) | |
| # share=True | |
| if __name__ == "__main__": | |
| demo.launch() |